< prev index next >

src/hotspot/cpu/sparc/stubGenerator_sparc.cpp

Print this page




1059 
1060       __ inccc(count, count_dec>>1 ); // + 8 bytes
1061       __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1062       __ delayed()->inc(count, count_dec>>1); // restore 'count'
1063 
1064       // copy 8 bytes, part of them already loaded in O3
1065       __ ldx(end_from, -8, O4);
1066       __ dec(end_to, 8);
1067       __ dec(end_from, 8);
1068       __ srlx(O3, right_shift, O3);
1069       __ sllx(O4, left_shift,  G3);
1070       __ bset(O3, G3);
1071       __ stx(G3, end_to, 0);
1072 
1073     __ BIND(L_copy_last_bytes);
1074       __ srl(left_shift, LogBitsPerByte, left_shift);    // misaligned bytes
1075       __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1076       __ delayed()->add(end_from, left_shift, end_from); // restore address
1077   }
1078 
1079   address generate_unsafecopy_common_error_exit() {
1080     address start_pc = __ pc();
1081     if (UseBlockCopy) {
1082       __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
1083       __ membar(Assembler::StoreLoad);
1084     }
1085     __ retl();
1086     __ delayed()->mov(G0, O0); // return 0
1087     return start_pc;
1088   }
1089 
1090   //
1091   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
1092   //  "from" and "to" addresses are assumed to be heapword aligned.
1093   //
1094   // Arguments for generated stub:
1095   //      from:  O0
1096   //      to:    O1
1097   //      count: O2 treated as signed
1098   //
1099   address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
1100     __ align(CodeEntryAlignment);
1101     StubCodeMark mark(this, "StubRoutines", name);
1102     address start = __ pc();
1103 
1104     Label L_skip_alignment, L_align;
1105     Label L_copy_byte, L_copy_byte_loop, L_exit;
1106 
1107     const Register from      = O0;   // source array address
1108     const Register to        = O1;   // destination array address
1109     const Register count     = O2;   // elements count
1110     const Register offset    = O5;   // offset from start of arrays
1111     // O3, O4, G3, G4 are used as temp registers
1112 
1113     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1114 
1115     if (entry != NULL) {
1116       *entry = __ pc();
1117       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1118       BLOCK_COMMENT("Entry:");
1119     }
1120 
1121     {
1122       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
1123       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
1124 
1125       // for short arrays, just do single element copy
1126       __ cmp(count, 23); // 16 + 7
1127       __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1128       __ delayed()->mov(G0, offset);
1129 
1130       if (aligned) {
1131         // 'aligned' == true when it is known statically during compilation
1132         // of this arraycopy call site that both 'from' and 'to' addresses
1133         // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1134         //
1135         // Aligned arrays have 4 bytes alignment in 32-bits VM
1136         // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1137         //
1138       } else {
1139         // copy bytes to align 'to' on 8 byte boundary
1140         __ andcc(to, 7, G1); // misaligned bytes
1141         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1142         __ delayed()->neg(G1);
1143         __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1144         __ sub(count, G1, count);
1145       __ BIND(L_align);
1146         __ ldub(from, 0, O3);
1147         __ deccc(G1);
1148         __ inc(from);
1149         __ stb(O3, to, 0);
1150         __ br(Assembler::notZero, false, Assembler::pt, L_align);
1151         __ delayed()->inc(to);
1152       __ BIND(L_skip_alignment);
1153       }
1154       if (!aligned) {
1155         // Copy with shift 16 bytes per iteration if arrays do not have
1156         // the same alignment mod 8, otherwise fall through to the next
1157         // code for aligned copy.
1158         // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1159         // Also jump over aligned copy after the copy with shift completed.
1160 
1161         copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1162       }
1163 
1164       // Both array are 8 bytes aligned, copy 16 bytes at a time
1165       __ and3(count, 7, G4); // Save count
1166       __ srl(count, 3, count);
1167       generate_disjoint_long_copy_core(aligned);
1168       __ mov(G4, count);     // Restore count
1169 
1170       // copy tailing bytes
1171       __ BIND(L_copy_byte);
1172         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1173         __ align(OptoLoopAlignment);
1174       __ BIND(L_copy_byte_loop);
1175         __ ldub(from, offset, O3);
1176         __ deccc(count);
1177         __ stb(O3, to, offset);
1178         __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1179         __ delayed()->inc(offset);
1180     }
1181 
1182     __ BIND(L_exit);
1183       // O3, O4 are used as temp registers
1184       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1185       __ retl();
1186       __ delayed()->mov(G0, O0); // return 0
1187     return start;
1188   }
1189 
1190   //
1191   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1192   //  "from" and "to" addresses are assumed to be heapword aligned.
1193   //
1194   // Arguments for generated stub:
1195   //      from:  O0
1196   //      to:    O1
1197   //      count: O2 treated as signed
1198   //
1199   address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
1200                                       address *entry, const char *name) {


1206 
1207     Label L_skip_alignment, L_align, L_aligned_copy;
1208     Label L_copy_byte, L_copy_byte_loop, L_exit;
1209 
1210     const Register from      = O0;   // source array address
1211     const Register to        = O1;   // destination array address
1212     const Register count     = O2;   // elements count
1213     const Register end_from  = from; // source array end address
1214     const Register end_to    = to;   // destination array end address
1215 
1216     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1217 
1218     if (entry != NULL) {
1219       *entry = __ pc();
1220       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1221       BLOCK_COMMENT("Entry:");
1222     }
1223 
1224     array_overlap_test(nooverlap_target, 0);
1225 
1226     {
1227       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
1228       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
1229 
1230       __ add(to, count, end_to);       // offset after last copied element



1231 
1232       // for short arrays, just do single element copy
1233       __ cmp(count, 23); // 16 + 7
1234       __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1235       __ delayed()->add(from, count, end_from);
1236 
1237       {
1238         // Align end of arrays since they could be not aligned even
1239         // when arrays itself are aligned.
1240 
1241         // copy bytes to align 'end_to' on 8 byte boundary
1242         __ andcc(end_to, 7, G1); // misaligned bytes
1243         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1244         __ delayed()->nop();
1245         __ sub(count, G1, count);
1246       __ BIND(L_align);
1247         __ dec(end_from);
1248         __ dec(end_to);
1249         __ ldub(end_from, 0, O3);
1250         __ deccc(G1);
1251         __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1252         __ delayed()->stb(O3, end_to, 0);
1253       __ BIND(L_skip_alignment);
1254       }
1255       if (aligned) {
1256         // Both arrays are aligned to 8-bytes in 64-bits VM.
1257         // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1258         // in unaligned case.
1259         __ dec(count, 16);
1260       } else {
1261         // Copy with shift 16 bytes per iteration if arrays do not have
1262         // the same alignment mod 8, otherwise jump to the next
1263         // code for aligned copy (and substracting 16 from 'count' before jump).
1264         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1265         // Also jump over aligned copy after the copy with shift completed.
1266 
1267        copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1268                                           L_aligned_copy, L_copy_byte);
1269       }
1270       // copy 4 elements (16 bytes) at a time
1271         __ align(OptoLoopAlignment);
1272       __ BIND(L_aligned_copy);
1273         __ dec(end_from, 16);
1274         __ ldx(end_from, 8, O3);
1275         __ ldx(end_from, 0, O4);
1276         __ dec(end_to, 16);
1277         __ deccc(count, 16);
1278         __ stx(O3, end_to, 8);
1279         __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1280         __ delayed()->stx(O4, end_to, 0);
1281         __ inc(count, 16);
1282 
1283       // copy 1 element (2 bytes) at a time
1284       __ BIND(L_copy_byte);
1285         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1286         __ align(OptoLoopAlignment);
1287       __ BIND(L_copy_byte_loop);
1288         __ dec(end_from);
1289         __ dec(end_to);
1290         __ ldub(end_from, 0, O4);
1291         __ deccc(count);
1292         __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1293         __ delayed()->stb(O4, end_to, 0);
1294     }







































1295 
1296     __ BIND(L_exit);
1297     // O3, O4 are used as temp registers
1298     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1299     __ retl();
1300     __ delayed()->mov(G0, O0); // return 0
1301     return start;
1302   }
1303 
1304   //
1305   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1306   //  "from" and "to" addresses are assumed to be heapword aligned.
1307   //
1308   // Arguments for generated stub:
1309   //      from:  O0
1310   //      to:    O1
1311   //      count: O2 treated as signed
1312   //
1313   address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
1314     __ align(CodeEntryAlignment);
1315     StubCodeMark mark(this, "StubRoutines", name);
1316     address start = __ pc();
1317 
1318     Label L_skip_alignment, L_skip_alignment2;
1319     Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1320 
1321     const Register from      = O0;   // source array address
1322     const Register to        = O1;   // destination array address
1323     const Register count     = O2;   // elements count
1324     const Register offset    = O5;   // offset from start of arrays
1325     // O3, O4, G3, G4 are used as temp registers
1326 
1327     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1328 
1329     if (entry != NULL) {
1330       *entry = __ pc();
1331       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1332       BLOCK_COMMENT("Entry:");
1333     }
1334 
1335     {
1336       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
1337       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
1338       // for short arrays, just do single element copy
1339       __ cmp(count, 11); // 8 + 3  (22 bytes)
1340       __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1341       __ delayed()->mov(G0, offset);
1342 
1343       if (aligned) {
1344         // 'aligned' == true when it is known statically during compilation
1345         // of this arraycopy call site that both 'from' and 'to' addresses
1346         // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1347         //
1348         // Aligned arrays have 4 bytes alignment in 32-bits VM
1349         // and 8 bytes - in 64-bits VM.
1350         //
1351       } else {
1352         // copy 1 element if necessary to align 'to' on an 4 bytes
1353         __ andcc(to, 3, G0);
1354         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1355         __ delayed()->lduh(from, 0, O3);
1356         __ inc(from, 2);
1357         __ inc(to, 2);
1358         __ dec(count);
1359         __ sth(O3, to, -2);
1360       __ BIND(L_skip_alignment);
1361 
1362         // copy 2 elements to align 'to' on an 8 byte boundary
1363         __ andcc(to, 7, G0);
1364         __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1365         __ delayed()->lduh(from, 0, O3);
1366         __ dec(count, 2);
1367         __ lduh(from, 2, O4);
1368         __ inc(from, 4);
1369         __ inc(to, 4);
1370         __ sth(O3, to, -4);
1371         __ sth(O4, to, -2);
1372       __ BIND(L_skip_alignment2);
1373       }
1374       if (!aligned) {
1375         // Copy with shift 16 bytes per iteration if arrays do not have
1376         // the same alignment mod 8, otherwise fall through to the next
1377         // code for aligned copy.
1378         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1379         // Also jump over aligned copy after the copy with shift completed.
1380 
1381         copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1382       }
















1383 
1384       // Both array are 8 bytes aligned, copy 16 bytes at a time
1385         __ and3(count, 3, G4); // Save
1386         __ srl(count, 2, count);
1387        generate_disjoint_long_copy_core(aligned);
1388         __ mov(G4, count); // restore
1389 
1390       // copy 1 element at a time
1391       __ BIND(L_copy_2_bytes);
1392         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1393         __ align(OptoLoopAlignment);
1394       __ BIND(L_copy_2_bytes_loop);
1395         __ lduh(from, offset, O3);
1396         __ deccc(count);
1397         __ sth(O3, to, offset);
1398         __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1399         __ delayed()->inc(offset, 2);
1400     }


























1401 
1402     __ BIND(L_exit);
1403       // O3, O4 are used as temp registers
1404       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1405       __ retl();
1406       __ delayed()->mov(G0, O0); // return 0
1407     return start;
1408   }
1409 
1410   //
1411   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1412   //  "to" address is assumed to be heapword aligned.
1413   //
1414   // Arguments for generated stub:
1415   //      to:    O0
1416   //      value: O1
1417   //      count: O2 treated as signed
1418   //
1419   address generate_fill(BasicType t, bool aligned, const char* name) {
1420     __ align(CodeEntryAlignment);


1647     Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1648 
1649     const Register from      = O0;   // source array address
1650     const Register to        = O1;   // destination array address
1651     const Register count     = O2;   // elements count
1652     const Register end_from  = from; // source array end address
1653     const Register end_to    = to;   // destination array end address
1654 
1655     const Register byte_count = O3;  // bytes count to copy
1656 
1657     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1658 
1659     if (entry != NULL) {
1660       *entry = __ pc();
1661       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1662       BLOCK_COMMENT("Entry:");
1663     }
1664 
1665     array_overlap_test(nooverlap_target, 1);
1666 
1667     {
1668       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
1669       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
1670 
1671       __ sllx(count, LogBytesPerShort, byte_count);
1672       __ add(to, byte_count, end_to);  // offset after last copied element


1673 
1674       // for short arrays, just do single element copy
1675       __ cmp(count, 11); // 8 + 3  (22 bytes)
1676       __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1677       __ delayed()->add(from, byte_count, end_from);
1678 
1679       {
1680         // Align end of arrays since they could be not aligned even
1681         // when arrays itself are aligned.
1682 
1683         // copy 1 element if necessary to align 'end_to' on an 4 bytes
1684         __ andcc(end_to, 3, G0);
1685         __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1686         __ delayed()->lduh(end_from, -2, O3);
1687         __ dec(end_from, 2);
1688         __ dec(end_to, 2);
1689         __ dec(count);
1690         __ sth(O3, end_to, 0);
1691       __ BIND(L_skip_alignment);
1692 
1693         // copy 2 elements to align 'end_to' on an 8 byte boundary
1694         __ andcc(end_to, 7, G0);
1695         __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1696         __ delayed()->lduh(end_from, -2, O3);
1697         __ dec(count, 2);
1698         __ lduh(end_from, -4, O4);
1699         __ dec(end_from, 4);
1700         __ dec(end_to, 4);
1701         __ sth(O3, end_to, 2);
1702         __ sth(O4, end_to, 0);
1703       __ BIND(L_skip_alignment2);
1704       }
1705       if (aligned) {
1706         // Both arrays are aligned to 8-bytes in 64-bits VM.
1707         // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1708         // in unaligned case.
1709         __ dec(count, 8);
1710       } else {
1711         // Copy with shift 16 bytes per iteration if arrays do not have
1712         // the same alignment mod 8, otherwise jump to the next
1713         // code for aligned copy (and substracting 8 from 'count' before jump).
1714         // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1715         // Also jump over aligned copy after the copy with shift completed.
1716 
1717         copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1718                                         L_aligned_copy, L_copy_2_bytes);
1719       }
1720       // copy 4 elements (16 bytes) at a time
1721         __ align(OptoLoopAlignment);
1722       __ BIND(L_aligned_copy);
1723         __ dec(end_from, 16);
1724         __ ldx(end_from, 8, O3);
1725         __ ldx(end_from, 0, O4);
1726         __ dec(end_to, 16);
1727         __ deccc(count, 8);
1728         __ stx(O3, end_to, 8);
1729         __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1730         __ delayed()->stx(O4, end_to, 0);
1731         __ inc(count, 8);
1732 
1733       // copy 1 element (2 bytes) at a time
1734       __ BIND(L_copy_2_bytes);
1735         __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1736       __ BIND(L_copy_2_bytes_loop);
1737         __ dec(end_from, 2);
1738         __ dec(end_to, 2);
1739         __ lduh(end_from, 0, O4);
1740         __ deccc(count);
1741         __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1742         __ delayed()->sth(O4, end_to, 0);
1743     }
























1744     __ BIND(L_exit);
1745     // O3, O4 are used as temp registers
1746     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1747     __ retl();
1748     __ delayed()->mov(G0, O0); // return 0
1749     return start;
1750   }
1751 
1752   //
1753   // Helper methods for generate_disjoint_int_copy_core()
1754   //
1755   void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec,
1756                           Label& L_loop, bool use_prefetch, bool use_bis) {
1757 
1758     __ align(OptoLoopAlignment);
1759     __ BIND(L_loop);
1760     if (use_prefetch) {
1761       if (ArraycopySrcPrefetchDistance > 0) {
1762         __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
1763       }


1882   //  "from" and "to" addresses are assumed to be heapword aligned.
1883   //
1884   // Arguments for generated stub:
1885   //      from:  O0
1886   //      to:    O1
1887   //      count: O2 treated as signed
1888   //
1889   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
1890     __ align(CodeEntryAlignment);
1891     StubCodeMark mark(this, "StubRoutines", name);
1892     address start = __ pc();
1893 
1894     const Register count = O2;
1895     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1896 
1897     if (entry != NULL) {
1898       *entry = __ pc();
1899       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1900       BLOCK_COMMENT("Entry:");
1901     }
1902     {
1903       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
1904       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
1905       generate_disjoint_int_copy_core(aligned);
1906     }
1907     // O3, O4 are used as temp registers
1908     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
1909     __ retl();
1910     __ delayed()->mov(G0, O0); // return 0
1911     return start;
1912   }
1913 
1914   //
1915   //  Generate core code for conjoint int copy (and oop copy on 32-bit).
1916   //  If "aligned" is true, the "from" and "to" addresses are assumed
1917   //  to be heapword aligned.
1918   //
1919   // Arguments:
1920   //      from:  O0
1921   //      to:    O1
1922   //      count: O2 treated as signed
1923   //
1924   void generate_conjoint_int_copy_core(bool aligned) {
1925     // Do reverse copy.
1926 


2019   // Arguments for generated stub:
2020   //      from:  O0
2021   //      to:    O1
2022   //      count: O2 treated as signed
2023   //
2024   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
2025                                      address *entry, const char *name) {
2026     __ align(CodeEntryAlignment);
2027     StubCodeMark mark(this, "StubRoutines", name);
2028     address start = __ pc();
2029 
2030     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2031 
2032     if (entry != NULL) {
2033       *entry = __ pc();
2034       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2035       BLOCK_COMMENT("Entry:");
2036     }
2037 
2038     array_overlap_test(nooverlap_target, 2);
2039     {
2040       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
2041       UnsafeCopyMemoryMark ucmm(this, !aligned, false);
2042       generate_conjoint_int_copy_core(aligned);
2043     }
2044     // O3, O4 are used as temp registers
2045     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2046     __ retl();
2047     __ delayed()->mov(G0, O0); // return 0
2048     return start;
2049   }
2050 
2051   //
2052   // Helper methods for generate_disjoint_long_copy_core()
2053   //
2054   void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec,
2055                           Label& L_loop, bool use_prefetch, bool use_bis) {
2056     __ align(OptoLoopAlignment);
2057     __ BIND(L_loop);
2058     for (int off = 0; off < 64; off += 16) {
2059       if (use_prefetch && (off & 31) == 0) {
2060         if (ArraycopySrcPrefetchDistance > 0) {
2061           __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
2062         }
2063         if (ArraycopyDstPrefetchDistance > 0) {


2172   //  assumption that both addresses are always 64-bit aligned.
2173   //
2174   // Arguments for generated stub:
2175   //      from:  O0
2176   //      to:    O1
2177   //      count: O2 treated as signed
2178   //
2179   address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
2180     __ align(CodeEntryAlignment);
2181     StubCodeMark mark(this, "StubRoutines", name);
2182     address start = __ pc();
2183 
2184     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2185 
2186     if (entry != NULL) {
2187       *entry = __ pc();
2188       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2189       BLOCK_COMMENT("Entry:");
2190     }
2191 
2192     {
2193       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
2194       UnsafeCopyMemoryMark ucmm(this, true, false);
2195       generate_disjoint_long_copy_core(aligned);
2196     }
2197     // O3, O4 are used as temp registers
2198     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2199     __ retl();
2200     __ delayed()->mov(G0, O0); // return 0
2201     return start;
2202   }
2203 
2204   //
2205   //  Generate core code for conjoint long copy (and oop copy on 64-bit).
2206   //  "aligned" is ignored, because we must make the stronger
2207   //  assumption that both addresses are always 64-bit aligned.
2208   //
2209   // Arguments:
2210   //      from:  O0
2211   //      to:    O1
2212   //      count: O2 treated as signed
2213   //
2214   void generate_conjoint_long_copy_core(bool aligned) {
2215     // Do reverse copy.
2216     Label L_copy_8_bytes, L_copy_16_bytes, L_exit;


2251   //      to:    O1
2252   //      count: O2 treated as signed
2253   //
2254   address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
2255                                       address *entry, const char *name) {
2256     __ align(CodeEntryAlignment);
2257     StubCodeMark mark(this, "StubRoutines", name);
2258     address start = __ pc();
2259 
2260     assert(aligned, "Should always be aligned");
2261 
2262     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2263 
2264     if (entry != NULL) {
2265       *entry = __ pc();
2266       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2267       BLOCK_COMMENT("Entry:");
2268     }
2269 
2270     array_overlap_test(nooverlap_target, 3);
2271     {
2272       // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
2273       UnsafeCopyMemoryMark ucmm(this, true, false);
2274       generate_conjoint_long_copy_core(aligned);
2275     }
2276     // O3, O4 are used as temp registers
2277     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2278     __ retl();
2279     __ delayed()->mov(G0, O0); // return 0
2280     return start;
2281   }
2282 
2283   //  Generate stub for disjoint oop copy.  If "aligned" is true, the
2284   //  "from" and "to" addresses are assumed to be heapword aligned.
2285   //
2286   // Arguments for generated stub:
2287   //      from:  O0
2288   //      to:    O1
2289   //      count: O2 treated as signed
2290   //
2291   address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
2292                                      bool dest_uninitialized = false) {
2293 
2294     const Register from  = O0;  // source array address
2295     const Register to    = O1;  // destination array address


2950 
2951     Label Ldone;
2952     __ sllx(count, LogHeapWordSize, count); // to bytes count
2953     // Use BIS for zeroing
2954     __ bis_zeroing(to, count, temp, Ldone);
2955     __ bind(Ldone);
2956     __ retl();
2957     __ delayed()->nop();
2958     return start;
2959 }
2960 
2961   void generate_arraycopy_stubs() {
2962     address entry;
2963     address entry_jbyte_arraycopy;
2964     address entry_jshort_arraycopy;
2965     address entry_jint_arraycopy;
2966     address entry_oop_arraycopy;
2967     address entry_jlong_arraycopy;
2968     address entry_checkcast_arraycopy;
2969 
2970     address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
2971     UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
2972 
2973     //*** jbyte
2974     // Always need aligned and unaligned versions
2975     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
2976                                                                                   "jbyte_disjoint_arraycopy");
2977     StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
2978                                                                                   &entry_jbyte_arraycopy,
2979                                                                                   "jbyte_arraycopy");
2980     StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
2981                                                                                   "arrayof_jbyte_disjoint_arraycopy");
2982     StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
2983                                                                                   "arrayof_jbyte_arraycopy");
2984 
2985     //*** jshort
2986     // Always need aligned and unaligned versions
2987     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
2988                                                                                     "jshort_disjoint_arraycopy");
2989     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
2990                                                                                     &entry_jshort_arraycopy,
2991                                                                                     "jshort_arraycopy");
2992     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,


5845 
5846   void align(bool at_header = false) {
5847     // %%%%% move this constant somewhere else
5848     // UltraSPARC cache line size is 8 instructions:
5849     const unsigned int icache_line_size = 32;
5850     const unsigned int icache_half_line_size = 16;
5851 
5852     if (at_header) {
5853       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5854         __ emit_data(0, relocInfo::none);
5855       }
5856     } else {
5857       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5858         __ nop();
5859       }
5860     }
5861   }
5862 
5863 }; // end class declaration
5864 
5865 #define UCM_TABLE_MAX_ENTRIES 8
5866 void StubGenerator_generate(CodeBuffer* code, bool all) {
5867   if (UnsafeCopyMemory::_table == NULL) {
5868     UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
5869   }
5870   StubGenerator g(code, all);
5871 }


1059 
1060       __ inccc(count, count_dec>>1 ); // + 8 bytes
1061       __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1062       __ delayed()->inc(count, count_dec>>1); // restore 'count'
1063 
1064       // copy 8 bytes, part of them already loaded in O3
1065       __ ldx(end_from, -8, O4);
1066       __ dec(end_to, 8);
1067       __ dec(end_from, 8);
1068       __ srlx(O3, right_shift, O3);
1069       __ sllx(O4, left_shift,  G3);
1070       __ bset(O3, G3);
1071       __ stx(G3, end_to, 0);
1072 
1073     __ BIND(L_copy_last_bytes);
1074       __ srl(left_shift, LogBitsPerByte, left_shift);    // misaligned bytes
1075       __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1076       __ delayed()->add(end_from, left_shift, end_from); // restore address
1077   }
1078 











1079   //
1080   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
1081   //  "from" and "to" addresses are assumed to be heapword aligned.
1082   //
1083   // Arguments for generated stub:
1084   //      from:  O0
1085   //      to:    O1
1086   //      count: O2 treated as signed
1087   //
1088   address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
1089     __ align(CodeEntryAlignment);
1090     StubCodeMark mark(this, "StubRoutines", name);
1091     address start = __ pc();
1092 
1093     Label L_skip_alignment, L_align;
1094     Label L_copy_byte, L_copy_byte_loop, L_exit;
1095 
1096     const Register from      = O0;   // source array address
1097     const Register to        = O1;   // destination array address
1098     const Register count     = O2;   // elements count
1099     const Register offset    = O5;   // offset from start of arrays
1100     // O3, O4, G3, G4 are used as temp registers
1101 
1102     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1103 
1104     if (entry != NULL) {
1105       *entry = __ pc();
1106       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1107       BLOCK_COMMENT("Entry:");
1108     }
1109 
1110     // for short arrays, just do single element copy
1111     __ cmp(count, 23); // 16 + 7
1112     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1113     __ delayed()->mov(G0, offset);




1114 
1115     if (aligned) {
1116       // 'aligned' == true when it is known statically during compilation
1117       // of this arraycopy call site that both 'from' and 'to' addresses
1118       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1119       //
1120       // Aligned arrays have 4 bytes alignment in 32-bits VM
1121       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1122       //
1123     } else {
1124       // copy bytes to align 'to' on 8 byte boundary
1125       __ andcc(to, 7, G1); // misaligned bytes
1126       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1127       __ delayed()->neg(G1);
1128       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1129       __ sub(count, G1, count);
1130     __ BIND(L_align);
1131       __ ldub(from, 0, O3);
1132       __ deccc(G1);
1133       __ inc(from);
1134       __ stb(O3, to, 0);
1135       __ br(Assembler::notZero, false, Assembler::pt, L_align);
1136       __ delayed()->inc(to);
1137     __ BIND(L_skip_alignment);
1138     }
1139     if (!aligned) {
1140       // Copy with shift 16 bytes per iteration if arrays do not have
1141       // the same alignment mod 8, otherwise fall through to the next
1142       // code for aligned copy.
1143       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1144       // Also jump over aligned copy after the copy with shift completed.
1145 
1146       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1147     }
1148 
1149     // Both array are 8 bytes aligned, copy 16 bytes at a time
1150       __ and3(count, 7, G4); // Save count
1151       __ srl(count, 3, count);
1152      generate_disjoint_long_copy_core(aligned);
1153       __ mov(G4, count);     // Restore count
1154 
1155     // copy tailing bytes
1156     __ BIND(L_copy_byte);
1157       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1158       __ align(OptoLoopAlignment);
1159     __ BIND(L_copy_byte_loop);
1160       __ ldub(from, offset, O3);
1161       __ deccc(count);
1162       __ stb(O3, to, offset);
1163       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1164       __ delayed()->inc(offset);

1165 
1166     __ BIND(L_exit);
1167       // O3, O4 are used as temp registers
1168       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1169       __ retl();
1170       __ delayed()->mov(G0, O0); // return 0
1171     return start;
1172   }
1173 
1174   //
1175   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1176   //  "from" and "to" addresses are assumed to be heapword aligned.
1177   //
1178   // Arguments for generated stub:
1179   //      from:  O0
1180   //      to:    O1
1181   //      count: O2 treated as signed
1182   //
1183   address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
1184                                       address *entry, const char *name) {


1190 
1191     Label L_skip_alignment, L_align, L_aligned_copy;
1192     Label L_copy_byte, L_copy_byte_loop, L_exit;
1193 
1194     const Register from      = O0;   // source array address
1195     const Register to        = O1;   // destination array address
1196     const Register count     = O2;   // elements count
1197     const Register end_from  = from; // source array end address
1198     const Register end_to    = to;   // destination array end address
1199 
1200     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1201 
1202     if (entry != NULL) {
1203       *entry = __ pc();
1204       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1205       BLOCK_COMMENT("Entry:");
1206     }
1207 
1208     array_overlap_test(nooverlap_target, 0);
1209 
1210     __ add(to, count, end_to);       // offset after last copied element


1211 
1212     // for short arrays, just do single element copy
1213     __ cmp(count, 23); // 16 + 7
1214     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1215     __ delayed()->add(from, count, end_from);
1216 
1217     {
1218       // Align end of arrays since they could be not aligned even
1219       // when arrays itself are aligned.































1220 
1221       // copy bytes to align 'end_to' on 8 byte boundary
1222       __ andcc(end_to, 7, G1); // misaligned bytes
1223       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1224       __ delayed()->nop();
1225       __ sub(count, G1, count);
1226     __ BIND(L_align);
1227       __ dec(end_from);
1228       __ dec(end_to);
1229       __ ldub(end_from, 0, O3);
1230       __ deccc(G1);
1231       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1232       __ delayed()->stb(O3, end_to, 0);
1233     __ BIND(L_skip_alignment);














1234     }
1235     if (aligned) {
1236       // Both arrays are aligned to 8-bytes in 64-bits VM.
1237       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1238       // in unaligned case.
1239       __ dec(count, 16);
1240     } else {
1241       // Copy with shift 16 bytes per iteration if arrays do not have
1242       // the same alignment mod 8, otherwise jump to the next
1243       // code for aligned copy (and substracting 16 from 'count' before jump).
1244       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1245       // Also jump over aligned copy after the copy with shift completed.
1246 
1247       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1248                                         L_aligned_copy, L_copy_byte);
1249     }
1250     // copy 4 elements (16 bytes) at a time
1251       __ align(OptoLoopAlignment);
1252     __ BIND(L_aligned_copy);
1253       __ dec(end_from, 16);
1254       __ ldx(end_from, 8, O3);
1255       __ ldx(end_from, 0, O4);
1256       __ dec(end_to, 16);
1257       __ deccc(count, 16);
1258       __ stx(O3, end_to, 8);
1259       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1260       __ delayed()->stx(O4, end_to, 0);
1261       __ inc(count, 16);
1262 
1263     // copy 1 element (2 bytes) at a time
1264     __ BIND(L_copy_byte);
1265       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1266       __ align(OptoLoopAlignment);
1267     __ BIND(L_copy_byte_loop);
1268       __ dec(end_from);
1269       __ dec(end_to);
1270       __ ldub(end_from, 0, O4);
1271       __ deccc(count);
1272       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1273       __ delayed()->stb(O4, end_to, 0);
1274 
1275     __ BIND(L_exit);
1276     // O3, O4 are used as temp registers
1277     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1278     __ retl();
1279     __ delayed()->mov(G0, O0); // return 0
1280     return start;
1281   }
1282 
1283   //
1284   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1285   //  "from" and "to" addresses are assumed to be heapword aligned.
1286   //
1287   // Arguments for generated stub:
1288   //      from:  O0
1289   //      to:    O1
1290   //      count: O2 treated as signed
1291   //
1292   address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
1293     __ align(CodeEntryAlignment);
1294     StubCodeMark mark(this, "StubRoutines", name);
1295     address start = __ pc();
1296 
1297     Label L_skip_alignment, L_skip_alignment2;
1298     Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1299 
1300     const Register from      = O0;   // source array address
1301     const Register to        = O1;   // destination array address
1302     const Register count     = O2;   // elements count
1303     const Register offset    = O5;   // offset from start of arrays
1304     // O3, O4, G3, G4 are used as temp registers
1305 
1306     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1307 
1308     if (entry != NULL) {
1309       *entry = __ pc();
1310       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1311       BLOCK_COMMENT("Entry:");
1312     }
1313 
1314     // for short arrays, just do single element copy
1315     __ cmp(count, 11); // 8 + 3  (22 bytes)
1316     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1317     __ delayed()->mov(G0, offset);









































1318 
1319     if (aligned) {
1320       // 'aligned' == true when it is known statically during compilation
1321       // of this arraycopy call site that both 'from' and 'to' addresses
1322       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1323       //
1324       // Aligned arrays have 4 bytes alignment in 32-bits VM
1325       // and 8 bytes - in 64-bits VM.
1326       //
1327     } else {
1328       // copy 1 element if necessary to align 'to' on an 4 bytes
1329       __ andcc(to, 3, G0);
1330       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1331       __ delayed()->lduh(from, 0, O3);
1332       __ inc(from, 2);
1333       __ inc(to, 2);
1334       __ dec(count);
1335       __ sth(O3, to, -2);
1336     __ BIND(L_skip_alignment);
1337 
1338       // copy 2 elements to align 'to' on an 8 byte boundary
1339       __ andcc(to, 7, G0);
1340       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1341       __ delayed()->lduh(from, 0, O3);
1342       __ dec(count, 2);
1343       __ lduh(from, 2, O4);
1344       __ inc(from, 4);
1345       __ inc(to, 4);
1346       __ sth(O3, to, -4);
1347       __ sth(O4, to, -2);
1348     __ BIND(L_skip_alignment2);





1349     }
1350     if (!aligned) {
1351       // Copy with shift 16 bytes per iteration if arrays do not have
1352       // the same alignment mod 8, otherwise fall through to the next
1353       // code for aligned copy.
1354       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1355       // Also jump over aligned copy after the copy with shift completed.
1356 
1357       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1358     }
1359 
1360     // Both array are 8 bytes aligned, copy 16 bytes at a time
1361       __ and3(count, 3, G4); // Save
1362       __ srl(count, 2, count);
1363      generate_disjoint_long_copy_core(aligned);
1364       __ mov(G4, count); // restore
1365 
1366     // copy 1 element at a time
1367     __ BIND(L_copy_2_bytes);
1368       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1369       __ align(OptoLoopAlignment);
1370     __ BIND(L_copy_2_bytes_loop);
1371       __ lduh(from, offset, O3);
1372       __ deccc(count);
1373       __ sth(O3, to, offset);
1374       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1375       __ delayed()->inc(offset, 2);
1376 
1377     __ BIND(L_exit);
1378       // O3, O4 are used as temp registers
1379       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1380       __ retl();
1381       __ delayed()->mov(G0, O0); // return 0
1382     return start;
1383   }
1384 
1385   //
1386   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1387   //  "to" address is assumed to be heapword aligned.
1388   //
1389   // Arguments for generated stub:
1390   //      to:    O0
1391   //      value: O1
1392   //      count: O2 treated as signed
1393   //
1394   address generate_fill(BasicType t, bool aligned, const char* name) {
1395     __ align(CodeEntryAlignment);


1622     Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1623 
1624     const Register from      = O0;   // source array address
1625     const Register to        = O1;   // destination array address
1626     const Register count     = O2;   // elements count
1627     const Register end_from  = from; // source array end address
1628     const Register end_to    = to;   // destination array end address
1629 
1630     const Register byte_count = O3;  // bytes count to copy
1631 
1632     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1633 
1634     if (entry != NULL) {
1635       *entry = __ pc();
1636       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1637       BLOCK_COMMENT("Entry:");
1638     }
1639 
1640     array_overlap_test(nooverlap_target, 1);
1641 
1642     __ sllx(count, LogBytesPerShort, byte_count);
1643     __ add(to, byte_count, end_to);  // offset after last copied element

1644 
1645     // for short arrays, just do single element copy
1646     __ cmp(count, 11); // 8 + 3  (22 bytes)
1647     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1648     __ delayed()->add(from, byte_count, end_from);
1649 
1650     {
1651       // Align end of arrays since they could be not aligned even
1652       // when arrays itself are aligned.

1653 
1654       // copy 1 element if necessary to align 'end_to' on an 4 bytes
1655       __ andcc(end_to, 3, G0);
1656       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1657       __ delayed()->lduh(end_from, -2, O3);
1658       __ dec(end_from, 2);
1659       __ dec(end_to, 2);
1660       __ dec(count);
1661       __ sth(O3, end_to, 0);
1662     __ BIND(L_skip_alignment);
1663 
1664       // copy 2 elements to align 'end_to' on an 8 byte boundary
1665       __ andcc(end_to, 7, G0);
1666       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1667       __ delayed()->lduh(end_from, -2, O3);
1668       __ dec(count, 2);
1669       __ lduh(end_from, -4, O4);
1670       __ dec(end_from, 4);
1671       __ dec(end_to, 4);
1672       __ sth(O3, end_to, 2);
1673       __ sth(O4, end_to, 0);
1674     __ BIND(L_skip_alignment2);
1675     }
1676     if (aligned) {
1677       // Both arrays are aligned to 8-bytes in 64-bits VM.
1678       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1679       // in unaligned case.
1680       __ dec(count, 8);
1681     } else {
1682       // Copy with shift 16 bytes per iteration if arrays do not have
1683       // the same alignment mod 8, otherwise jump to the next
1684       // code for aligned copy (and substracting 8 from 'count' before jump).
1685       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1686       // Also jump over aligned copy after the copy with shift completed.




1687 
1688       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1689                                         L_aligned_copy, L_copy_2_bytes);
























1690     }
1691     // copy 4 elements (16 bytes) at a time
1692       __ align(OptoLoopAlignment);
1693     __ BIND(L_aligned_copy);
1694       __ dec(end_from, 16);
1695       __ ldx(end_from, 8, O3);
1696       __ ldx(end_from, 0, O4);
1697       __ dec(end_to, 16);
1698       __ deccc(count, 8);
1699       __ stx(O3, end_to, 8);
1700       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1701       __ delayed()->stx(O4, end_to, 0);
1702       __ inc(count, 8);
1703 
1704     // copy 1 element (2 bytes) at a time
1705     __ BIND(L_copy_2_bytes);
1706       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1707     __ BIND(L_copy_2_bytes_loop);
1708       __ dec(end_from, 2);
1709       __ dec(end_to, 2);
1710       __ lduh(end_from, 0, O4);
1711       __ deccc(count);
1712       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1713       __ delayed()->sth(O4, end_to, 0);
1714 
1715     __ BIND(L_exit);
1716     // O3, O4 are used as temp registers
1717     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1718     __ retl();
1719     __ delayed()->mov(G0, O0); // return 0
1720     return start;
1721   }
1722 
1723   //
1724   // Helper methods for generate_disjoint_int_copy_core()
1725   //
1726   void copy_16_bytes_loop(Register from, Register to, Register count, int count_dec,
1727                           Label& L_loop, bool use_prefetch, bool use_bis) {
1728 
1729     __ align(OptoLoopAlignment);
1730     __ BIND(L_loop);
1731     if (use_prefetch) {
1732       if (ArraycopySrcPrefetchDistance > 0) {
1733         __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
1734       }


1853   //  "from" and "to" addresses are assumed to be heapword aligned.
1854   //
1855   // Arguments for generated stub:
1856   //      from:  O0
1857   //      to:    O1
1858   //      count: O2 treated as signed
1859   //
1860   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
1861     __ align(CodeEntryAlignment);
1862     StubCodeMark mark(this, "StubRoutines", name);
1863     address start = __ pc();
1864 
1865     const Register count = O2;
1866     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1867 
1868     if (entry != NULL) {
1869       *entry = __ pc();
1870       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1871       BLOCK_COMMENT("Entry:");
1872     }
1873 
1874     generate_disjoint_int_copy_core(aligned);
1875 


1876     // O3, O4 are used as temp registers
1877     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
1878     __ retl();
1879     __ delayed()->mov(G0, O0); // return 0
1880     return start;
1881   }
1882 
1883   //
1884   //  Generate core code for conjoint int copy (and oop copy on 32-bit).
1885   //  If "aligned" is true, the "from" and "to" addresses are assumed
1886   //  to be heapword aligned.
1887   //
1888   // Arguments:
1889   //      from:  O0
1890   //      to:    O1
1891   //      count: O2 treated as signed
1892   //
1893   void generate_conjoint_int_copy_core(bool aligned) {
1894     // Do reverse copy.
1895 


1988   // Arguments for generated stub:
1989   //      from:  O0
1990   //      to:    O1
1991   //      count: O2 treated as signed
1992   //
1993   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
1994                                      address *entry, const char *name) {
1995     __ align(CodeEntryAlignment);
1996     StubCodeMark mark(this, "StubRoutines", name);
1997     address start = __ pc();
1998 
1999     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2000 
2001     if (entry != NULL) {
2002       *entry = __ pc();
2003       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2004       BLOCK_COMMENT("Entry:");
2005     }
2006 
2007     array_overlap_test(nooverlap_target, 2);
2008 
2009     generate_conjoint_int_copy_core(aligned);
2010 


2011     // O3, O4 are used as temp registers
2012     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2013     __ retl();
2014     __ delayed()->mov(G0, O0); // return 0
2015     return start;
2016   }
2017 
2018   //
2019   // Helper methods for generate_disjoint_long_copy_core()
2020   //
2021   void copy_64_bytes_loop(Register from, Register to, Register count, int count_dec,
2022                           Label& L_loop, bool use_prefetch, bool use_bis) {
2023     __ align(OptoLoopAlignment);
2024     __ BIND(L_loop);
2025     for (int off = 0; off < 64; off += 16) {
2026       if (use_prefetch && (off & 31) == 0) {
2027         if (ArraycopySrcPrefetchDistance > 0) {
2028           __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
2029         }
2030         if (ArraycopyDstPrefetchDistance > 0) {


2139   //  assumption that both addresses are always 64-bit aligned.
2140   //
2141   // Arguments for generated stub:
2142   //      from:  O0
2143   //      to:    O1
2144   //      count: O2 treated as signed
2145   //
2146   address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
2147     __ align(CodeEntryAlignment);
2148     StubCodeMark mark(this, "StubRoutines", name);
2149     address start = __ pc();
2150 
2151     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2152 
2153     if (entry != NULL) {
2154       *entry = __ pc();
2155       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2156       BLOCK_COMMENT("Entry:");
2157     }
2158 
2159     generate_disjoint_long_copy_core(aligned);
2160 



2161     // O3, O4 are used as temp registers
2162     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2163     __ retl();
2164     __ delayed()->mov(G0, O0); // return 0
2165     return start;
2166   }
2167 
2168   //
2169   //  Generate core code for conjoint long copy (and oop copy on 64-bit).
2170   //  "aligned" is ignored, because we must make the stronger
2171   //  assumption that both addresses are always 64-bit aligned.
2172   //
2173   // Arguments:
2174   //      from:  O0
2175   //      to:    O1
2176   //      count: O2 treated as signed
2177   //
2178   void generate_conjoint_long_copy_core(bool aligned) {
2179     // Do reverse copy.
2180     Label L_copy_8_bytes, L_copy_16_bytes, L_exit;


2215   //      to:    O1
2216   //      count: O2 treated as signed
2217   //
2218   address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
2219                                       address *entry, const char *name) {
2220     __ align(CodeEntryAlignment);
2221     StubCodeMark mark(this, "StubRoutines", name);
2222     address start = __ pc();
2223 
2224     assert(aligned, "Should always be aligned");
2225 
2226     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2227 
2228     if (entry != NULL) {
2229       *entry = __ pc();
2230       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2231       BLOCK_COMMENT("Entry:");
2232     }
2233 
2234     array_overlap_test(nooverlap_target, 3);
2235 
2236     generate_conjoint_long_copy_core(aligned);
2237 


2238     // O3, O4 are used as temp registers
2239     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2240     __ retl();
2241     __ delayed()->mov(G0, O0); // return 0
2242     return start;
2243   }
2244 
2245   //  Generate stub for disjoint oop copy.  If "aligned" is true, the
2246   //  "from" and "to" addresses are assumed to be heapword aligned.
2247   //
2248   // Arguments for generated stub:
2249   //      from:  O0
2250   //      to:    O1
2251   //      count: O2 treated as signed
2252   //
2253   address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
2254                                      bool dest_uninitialized = false) {
2255 
2256     const Register from  = O0;  // source array address
2257     const Register to    = O1;  // destination array address


2912 
2913     Label Ldone;
2914     __ sllx(count, LogHeapWordSize, count); // to bytes count
2915     // Use BIS for zeroing
2916     __ bis_zeroing(to, count, temp, Ldone);
2917     __ bind(Ldone);
2918     __ retl();
2919     __ delayed()->nop();
2920     return start;
2921 }
2922 
2923   void generate_arraycopy_stubs() {
2924     address entry;
2925     address entry_jbyte_arraycopy;
2926     address entry_jshort_arraycopy;
2927     address entry_jint_arraycopy;
2928     address entry_oop_arraycopy;
2929     address entry_jlong_arraycopy;
2930     address entry_checkcast_arraycopy;
2931 



2932     //*** jbyte
2933     // Always need aligned and unaligned versions
2934     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
2935                                                                                   "jbyte_disjoint_arraycopy");
2936     StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
2937                                                                                   &entry_jbyte_arraycopy,
2938                                                                                   "jbyte_arraycopy");
2939     StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
2940                                                                                   "arrayof_jbyte_disjoint_arraycopy");
2941     StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
2942                                                                                   "arrayof_jbyte_arraycopy");
2943 
2944     //*** jshort
2945     // Always need aligned and unaligned versions
2946     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
2947                                                                                     "jshort_disjoint_arraycopy");
2948     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
2949                                                                                     &entry_jshort_arraycopy,
2950                                                                                     "jshort_arraycopy");
2951     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,


5804 
5805   void align(bool at_header = false) {
5806     // %%%%% move this constant somewhere else
5807     // UltraSPARC cache line size is 8 instructions:
5808     const unsigned int icache_line_size = 32;
5809     const unsigned int icache_half_line_size = 16;
5810 
5811     if (at_header) {
5812       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5813         __ emit_data(0, relocInfo::none);
5814       }
5815     } else {
5816       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5817         __ nop();
5818       }
5819     }
5820   }
5821 
5822 }; // end class declaration
5823 

5824 void StubGenerator_generate(CodeBuffer* code, bool all) {



5825   StubGenerator g(code, all);
5826 }
< prev index next >