< prev index next >

src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

Print this page

 262   __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
 263                    Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
 264 #else
 265 #if !INCLUDE_JVMCI
 266   assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
 267 #endif
 268   __ pop_CPU_state(_save_vectors);
 269 #endif
 270   __ leave();
 271 
 272 }
 273 
 274 // Is vector's size (in bytes) bigger than a size saved by default?
 275 // 8 bytes vector registers are saved by default on AArch64.
 276 // The SVE supported min vector size is 8 bytes and we need to save
 277 // predicate registers when the vector size is 8 bytes as well.
 278 bool SharedRuntime::is_wide_vector(int size) {
 279   return size > 8 || (UseSVE > 0 && size >= 8);
 280 }
 281 
 282 // The java_calling_convention describes stack locations as ideal slots on
 283 // a frame with no abi restrictions. Since we must observe abi restrictions
 284 // (like the placement of the register window) the slots must be biased by
 285 // the following value.
 286 static int reg2offset_in(VMReg r) {
 287   // Account for saved rfp and lr
 288   // This should really be in_preserve_stack_slots
 289   return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
 290 }
 291 
 292 static int reg2offset_out(VMReg r) {
 293   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 294 }
 295 
 296 // ---------------------------------------------------------------------------
 297 // Read the array of BasicTypes from a signature, and compute where the
 298 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
 299 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
 300 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
 301 // as framesizes are fixed.
 302 // VMRegImpl::stack0 refers to the first slot 0(sp).
 303 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
 304 // up to RegisterImpl::number_of_registers) are the 64-bit
 305 // integer registers.
 306 
 307 // Note: the INPUTS in sig_bt are in units of Java argument words,
 308 // which are 64-bit.  The OUTPUTS are in 32-bit units.
 309 
 310 // The Java calling convention is a "shifted" version of the C ABI.
 311 // By skipping the first C ABI register we can call non-static jni
 312 // methods with small numbers of arguments without having to shuffle
 313 // the arguments at all. Since we control the java ABI we ought to at
 314 // least get some advantage out of it.
 315 

 901   return stk_args;
 902 }
 903 
 904 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 905                                              uint num_bits,
 906                                              uint total_args_passed) {
 907   Unimplemented();
 908   return 0;
 909 }
 910 
 911 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 912                                          VMRegPair *regs,
 913                                          VMRegPair *regs2,
 914                                          int total_args_passed)
 915 {
 916   int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed);
 917   guarantee(result >= 0, "Unsupported arguments configuration");
 918   return result;
 919 }
 920 
 921 // On 64 bit we will store integer like items to the stack as
 922 // 64 bits items (Aarch64 abi) even though java would only store
 923 // 32bits for a parameter. On 32bit it will simply be 32 bits
 924 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
 925 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 926   if (src.first()->is_stack()) {
 927     if (dst.first()->is_stack()) {
 928       // stack to stack
 929       __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
 930       __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
 931     } else {
 932       // stack to reg
 933       __ ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
 934     }
 935   } else if (dst.first()->is_stack()) {
 936     // reg to stack
 937     // Do we really have to sign extend???
 938     // __ movslq(src.first()->as_Register(), src.first()->as_Register());
 939     __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
 940   } else {
 941     if (dst.first() != src.first()) {
 942       __ sxtw(dst.first()->as_Register(), src.first()->as_Register());
 943     }
 944   }
 945 }
 946 
 947 // An oop arg. Must pass a handle not the oop itself
 948 static void object_move(MacroAssembler* masm,
 949                         OopMap* map,
 950                         int oop_handle_offset,
 951                         int framesize_in_slots,
 952                         VMRegPair src,
 953                         VMRegPair dst,
 954                         bool is_receiver,
 955                         int* receiver_offset) {
 956 
 957   // must pass a handle. First figure out the location we use as a handle
 958 
 959   Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register();
 960 
 961   // See if oop is NULL if it is we need no handle
 962 
 963   if (src.first()->is_stack()) {
 964 
 965     // Oop is already on the stack as an argument
 966     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 967     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
 968     if (is_receiver) {
 969       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
 970     }
 971 
 972     __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
 973     __ lea(rHandle, Address(rfp, reg2offset_in(src.first())));
 974     // conditionally move a NULL
 975     __ cmp(rscratch1, zr);
 976     __ csel(rHandle, zr, rHandle, Assembler::EQ);
 977   } else {
 978 
 979     // Oop is in an a register we must store it to the space we reserve
 980     // on the stack for oop_handles and pass a handle if oop is non-NULL
 981 
 982     const Register rOop = src.first()->as_Register();
 983     int oop_slot;
 984     if (rOop == j_rarg0)
 985       oop_slot = 0;
 986     else if (rOop == j_rarg1)
 987       oop_slot = 1;
 988     else if (rOop == j_rarg2)
 989       oop_slot = 2;
 990     else if (rOop == j_rarg3)
 991       oop_slot = 3;
 992     else if (rOop == j_rarg4)
 993       oop_slot = 4;
 994     else if (rOop == j_rarg5)
 995       oop_slot = 5;
 996     else if (rOop == j_rarg6)
 997       oop_slot = 6;
 998     else {
 999       assert(rOop == j_rarg7, "wrong register");
1000       oop_slot = 7;
1001     }
1002 
1003     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
1004     int offset = oop_slot*VMRegImpl::stack_slot_size;
1005 
1006     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1007     // Store oop in handle area, may be NULL
1008     __ str(rOop, Address(sp, offset));
1009     if (is_receiver) {
1010       *receiver_offset = offset;
1011     }
1012 
1013     __ cmp(rOop, zr);
1014     __ lea(rHandle, Address(sp, offset));
1015     // conditionally move a NULL
1016     __ csel(rHandle, zr, rHandle, Assembler::EQ);
1017   }
1018 
1019   // If arg is on the stack then place it otherwise it is already in correct reg.
1020   if (dst.first()->is_stack()) {
1021     __ str(rHandle, Address(sp, reg2offset_out(dst.first())));
1022   }
1023 }
1024 
1025 // A float arg may have to do float reg int reg conversion
1026 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1027   assert(src.first()->is_stack() && dst.first()->is_stack() ||
1028          src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
1029   if (src.first()->is_stack()) {
1030     if (dst.first()->is_stack()) {
1031       __ ldrw(rscratch1, Address(rfp, reg2offset_in(src.first())));
1032       __ strw(rscratch1, Address(sp, reg2offset_out(dst.first())));
1033     } else {
1034       ShouldNotReachHere();
1035     }
1036   } else if (src.first() != dst.first()) {
1037     if (src.is_single_phys_reg() && dst.is_single_phys_reg())
1038       __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1039     else
1040       ShouldNotReachHere();
1041   }
1042 }
1043 
1044 // A long move
1045 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1046   if (src.first()->is_stack()) {
1047     if (dst.first()->is_stack()) {
1048       // stack to stack
1049       __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
1050       __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
1051     } else {
1052       // stack to reg
1053       __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
1054     }
1055   } else if (dst.first()->is_stack()) {
1056     // reg to stack
1057     // Do we really have to sign extend???
1058     // __ movslq(src.first()->as_Register(), src.first()->as_Register());
1059     __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
1060   } else {
1061     if (dst.first() != src.first()) {
1062       __ mov(dst.first()->as_Register(), src.first()->as_Register());
1063     }
1064   }
1065 }
1066 
1067 
1068 // A double move
1069 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1070   assert(src.first()->is_stack() && dst.first()->is_stack() ||
1071          src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
1072   if (src.first()->is_stack()) {
1073     if (dst.first()->is_stack()) {
1074       __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
1075       __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
1076     } else {
1077       ShouldNotReachHere();
1078     }
1079   } else if (src.first() != dst.first()) {
1080     if (src.is_single_phys_reg() && dst.is_single_phys_reg())
1081       __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1082     else
1083       ShouldNotReachHere();
1084   }
1085 }
1086 
1087 
1088 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1089   // We always ignore the frame_slots arg and just use the space just below frame pointer
1090   // which by this time is free to use
1091   switch (ret_type) {
1092   case T_FLOAT:
1093     __ strs(v0, Address(rfp, -wordSize));
1094     break;
1095   case T_DOUBLE:
1096     __ strd(v0, Address(rfp, -wordSize));
1097     break;
1098   case T_VOID:  break;
1099   default: {
1100     __ str(r0, Address(rfp, -wordSize));
1101     }
1102   }
1103 }
1104 
1105 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1106   // We always ignore the frame_slots arg and just use the space just below frame pointer

1132 
1133 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1134   RegSet x;
1135   for ( int i = first_arg ; i < arg_count ; i++ ) {
1136     if (args[i].first()->is_Register()) {
1137       x = x + args[i].first()->as_Register();
1138     } else {
1139       ;
1140     }
1141   }
1142   __ pop(x, sp);
1143   for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1144     if (args[i].first()->is_Register()) {
1145       ;
1146     } else if (args[i].first()->is_FloatRegister()) {
1147       __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
1148     }
1149   }
1150 }
1151 
1152 static void rt_call(MacroAssembler* masm, address dest) {
1153   CodeBlob *cb = CodeCache::find_blob(dest);
1154   if (cb) {
1155     __ far_call(RuntimeAddress(dest));
1156   } else {
1157     __ lea(rscratch1, RuntimeAddress(dest));
1158     __ blr(rscratch1);
1159   }
1160 }
1161 
1162 static void verify_oop_args(MacroAssembler* masm,
1163                             const methodHandle& method,
1164                             const BasicType* sig_bt,
1165                             const VMRegPair* regs) {
1166   Register temp_reg = r19;  // not part of any compiled calling seq
1167   if (VerifyOops) {
1168     for (int i = 0; i < method->size_of_parameters(); i++) {
1169       if (sig_bt[i] == T_OBJECT ||
1170           sig_bt[i] == T_ARRAY) {
1171         VMReg r = regs[i].first();
1172         assert(r->is_valid(), "bad oop arg");
1173         if (r->is_stack()) {
1174           __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1175           __ verify_oop(temp_reg);
1176         } else {
1177           __ verify_oop(r->as_Register());
1178         }
1179       }
1180     }
1181   }
1182 }
1183 
1184 static void gen_special_dispatch(MacroAssembler* masm,
1185                                  const methodHandle& method,
1186                                  const BasicType* sig_bt,
1187                                  const VMRegPair* regs) {
1188   verify_oop_args(masm, method, sig_bt, regs);
1189   vmIntrinsics::ID iid = method->intrinsic_id();
1190 
1191   // Now write the args into the outgoing interpreter space
1192   bool     has_receiver   = false;
1193   Register receiver_reg   = noreg;
1194   int      member_arg_pos = -1;
1195   Register member_reg     = noreg;
1196   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1197   if (ref_kind != 0) {
1198     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1199     member_reg = r19;  // known to be free at this point
1200     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1201   } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
1202     has_receiver = true;



1203   } else {
1204     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1205   }
1206 
1207   if (member_reg != noreg) {
1208     // Load the member_arg into register, if necessary.
1209     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1210     VMReg r = regs[member_arg_pos].first();
1211     if (r->is_stack()) {
1212       __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1213     } else {
1214       // no data motion is needed
1215       member_reg = r->as_Register();
1216     }
1217   }
1218 
1219   if (has_receiver) {
1220     // Make sure the receiver is loaded into a register.
1221     assert(method->size_of_parameters() > 0, "oob");
1222     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");

1515   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1516     int i = arg_order.at(ai);
1517     int c_arg = arg_order.at(ai + 1);
1518     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1519     assert(c_arg != -1 && i != -1, "wrong order");
1520 #ifdef ASSERT
1521     if (in_regs[i].first()->is_Register()) {
1522       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1523     } else if (in_regs[i].first()->is_FloatRegister()) {
1524       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1525     }
1526     if (out_regs[c_arg].first()->is_Register()) {
1527       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1528     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1529       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1530     }
1531 #endif /* ASSERT */
1532     switch (in_sig_bt[i]) {
1533       case T_ARRAY:
1534       case T_OBJECT:
1535         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1536                     ((i == 0) && (!is_static)),
1537                     &receiver_offset);
1538         int_args++;
1539         break;
1540       case T_VOID:
1541         break;
1542 
1543       case T_FLOAT:
1544         float_move(masm, in_regs[i], out_regs[c_arg]);
1545         float_args++;
1546         break;
1547 
1548       case T_DOUBLE:
1549         assert( i + 1 < total_in_args &&
1550                 in_sig_bt[i + 1] == T_VOID &&
1551                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1552         double_move(masm, in_regs[i], out_regs[c_arg]);
1553         float_args++;
1554         break;
1555 
1556       case T_LONG :
1557         long_move(masm, in_regs[i], out_regs[c_arg]);
1558         int_args++;
1559         break;
1560 
1561       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1562 
1563       default:
1564         move32_64(masm, in_regs[i], out_regs[c_arg]);
1565         int_args++;
1566     }
1567   }
1568 
1569   // point c_arg at the first arg that is already loaded in case we
1570   // need to spill before we call out
1571   int c_arg = total_c_args - total_in_args;
1572 
1573   // Pre-load a static method's oop into c_rarg1.
1574   if (method->is_static()) {
1575 
1576     //  load oop into a register
1577     __ movoop(c_rarg1,
1578               JNIHandles::make_local(method->method_holder()->java_mirror()),
1579               /*immediate*/true);
1580 
1581     // Now handlize the static class mirror it's known not-null.
1582     __ str(c_rarg1, Address(sp, klass_offset));
1583     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1584 

1672     // Save the test result, for recursive case, the result is zero
1673     __ str(swap_reg, Address(lock_reg, mark_word_offset));
1674     __ br(Assembler::NE, slow_path_lock);
1675 
1676     // Slow path will re-enter here
1677 
1678     __ bind(lock_done);
1679   }
1680 
1681 
1682   // Finally just about ready to make the JNI call
1683 
1684   // get JNIEnv* which is first argument to native
1685   __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1686 
1687   // Now set thread in native
1688   __ mov(rscratch1, _thread_in_native);
1689   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1690   __ stlrw(rscratch1, rscratch2);
1691 
1692   rt_call(masm, native_func);
1693 
1694   __ bind(native_return);
1695 
1696   intptr_t return_pc = (intptr_t) __ pc();
1697   oop_maps->add_gc_map(return_pc - start, map);
1698 
1699   // Unpack native results.
1700   switch (ret_type) {
1701   case T_BOOLEAN: __ c2bool(r0);                     break;
1702   case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
1703   case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
1704   case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
1705   case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
1706   case T_DOUBLE :
1707   case T_FLOAT  :
1708     // Result is in v0 we'll save as needed
1709     break;
1710   case T_ARRAY:                 // Really a handle
1711   case T_OBJECT:                // Really a handle
1712       break; // can't de-handlize until after safepoint check

1881 
1882     __ block_comment("Slow path unlock {");
1883     __ bind(slow_path_unlock);
1884 
1885     // If we haven't already saved the native result we must save it now as xmm registers
1886     // are still exposed.
1887 
1888     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1889       save_native_result(masm, ret_type, stack_slots);
1890     }
1891 
1892     __ mov(c_rarg2, rthread);
1893     __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1894     __ mov(c_rarg0, obj_reg);
1895 
1896     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1897     // NOTE that obj_reg == r19 currently
1898     __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1899     __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1900 
1901     rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1902 
1903 #ifdef ASSERT
1904     {
1905       Label L;
1906       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1907       __ cbz(rscratch1, L);
1908       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1909       __ bind(L);
1910     }
1911 #endif /* ASSERT */
1912 
1913     __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1914 
1915     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1916       restore_native_result(masm, ret_type, stack_slots);
1917     }
1918     __ b(unlock_done);
1919 
1920     __ block_comment("} Slow path unlock");
1921 
1922   } // synchronized
1923 
1924   // SLOW PATH Reguard the stack if needed
1925 
1926   __ bind(reguard);
1927   save_native_result(masm, ret_type, stack_slots);
1928   rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1929   restore_native_result(masm, ret_type, stack_slots);
1930   // and continue
1931   __ b(reguard_done);
1932 
1933   // SLOW PATH safepoint
1934   {
1935     __ block_comment("safepoint {");
1936     __ bind(safepoint_in_progress);
1937 
1938     // Don't use call_VM as it will see a possible pending exception and forward it
1939     // and never return here preventing us from clearing _last_native_pc down below.
1940     //
1941     save_native_result(masm, ret_type, stack_slots);
1942     __ mov(c_rarg0, rthread);
1943 #ifndef PRODUCT
1944   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
1945 #endif
1946     __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1947     __ blr(rscratch1);
1948 

2916   __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
2917   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2918 #endif
2919   // Clear the exception oop so GC no longer processes it as a root.
2920   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2921 
2922   // r0: exception oop
2923   // r8:  exception handler
2924   // r4: exception pc
2925   // Jump to handler
2926 
2927   __ br(r8);
2928 
2929   // Make sure all code is generated
2930   masm->flush();
2931 
2932   // Set exception blob
2933   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
2934 }
2935 
2936 // ---------------------------------------------------------------
2937 
2938 class NativeInvokerGenerator : public StubCodeGenerator {
2939   address _call_target;
2940   int _shadow_space_bytes;
2941 
2942   const GrowableArray<VMReg>& _input_registers;
2943   const GrowableArray<VMReg>& _output_registers;
2944 
2945   int _frame_complete;
2946   int _framesize;
2947   OopMapSet* _oop_maps;
2948 public:
2949   NativeInvokerGenerator(CodeBuffer* buffer,
2950                          address call_target,
2951                          int shadow_space_bytes,
2952                          const GrowableArray<VMReg>& input_registers,
2953                          const GrowableArray<VMReg>& output_registers)
2954    : StubCodeGenerator(buffer, PrintMethodHandleStubs),
2955      _call_target(call_target),
2956      _shadow_space_bytes(shadow_space_bytes),
2957      _input_registers(input_registers),
2958      _output_registers(output_registers),
2959      _frame_complete(0),
2960      _framesize(0),
2961      _oop_maps(NULL) {
2962     assert(_output_registers.length() <= 1
2963            || (_output_registers.length() == 2 && !_output_registers.at(1)->is_valid()), "no multi-reg returns");
2964   }
2965 
2966   void generate();
2967 
2968   int spill_size_in_bytes() const {
2969     if (_output_registers.length() == 0) {
2970       return 0;
2971     }
2972     VMReg reg = _output_registers.at(0);
2973     assert(reg->is_reg(), "must be a register");
2974     if (reg->is_Register()) {
2975       return 8;
2976     } else if (reg->is_FloatRegister()) {
2977       bool use_sve = Matcher::supports_scalable_vector();
2978       if (use_sve) {
2979         return Matcher::scalable_vector_reg_size(T_BYTE);
2980       }
2981       return 16;
2982     } else {
2983       ShouldNotReachHere();
2984     }
2985     return 0;
2986   }
2987 
2988   void spill_output_registers() {
2989     if (_output_registers.length() == 0) {
2990       return;
2991     }
2992     VMReg reg = _output_registers.at(0);
2993     assert(reg->is_reg(), "must be a register");
2994     MacroAssembler* masm = _masm;
2995     if (reg->is_Register()) {
2996       __ spill(reg->as_Register(), true, 0);
2997     } else if (reg->is_FloatRegister()) {
2998       bool use_sve = Matcher::supports_scalable_vector();
2999       if (use_sve) {
3000         __ spill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE));
3001       } else {
3002         __ spill(reg->as_FloatRegister(), __ Q, 0);
3003       }
3004     } else {
3005       ShouldNotReachHere();
3006     }
3007   }
3008 
3009   void fill_output_registers() {
3010     if (_output_registers.length() == 0) {
3011       return;
3012     }
3013     VMReg reg = _output_registers.at(0);
3014     assert(reg->is_reg(), "must be a register");
3015     MacroAssembler* masm = _masm;
3016     if (reg->is_Register()) {
3017       __ unspill(reg->as_Register(), true, 0);
3018     } else if (reg->is_FloatRegister()) {
3019       bool use_sve = Matcher::supports_scalable_vector();
3020       if (use_sve) {
3021         __ unspill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE));
3022       } else {
3023         __ unspill(reg->as_FloatRegister(), __ Q, 0);
3024       }
3025     } else {
3026       ShouldNotReachHere();
3027     }
3028   }
3029 
3030   int frame_complete() const {
3031     return _frame_complete;
3032   }
3033 
3034   int framesize() const {
3035     return (_framesize >> (LogBytesPerWord - LogBytesPerInt));
3036   }
3037 
3038   OopMapSet* oop_maps() const {
3039     return _oop_maps;
3040   }
3041 
3042 private:
3043 #ifdef ASSERT
3044   bool target_uses_register(VMReg reg) {
3045     return _input_registers.contains(reg) || _output_registers.contains(reg);
3046   }
3047 #endif
3048 };
3049 
3050 static const int native_invoker_code_size = 1024;
3051 
3052 RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
3053                                                 int shadow_space_bytes,
3054                                                 const GrowableArray<VMReg>& input_registers,
3055                                                 const GrowableArray<VMReg>& output_registers) {
3056   int locs_size  = 64;
3057   CodeBuffer code("nep_invoker_blob", native_invoker_code_size, locs_size);
3058   NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers);
3059   g.generate();
3060   code.log_section_sizes("nep_invoker_blob");
3061 
3062   RuntimeStub* stub =
3063     RuntimeStub::new_runtime_stub("nep_invoker_blob",
3064                                   &code,
3065                                   g.frame_complete(),
3066                                   g.framesize(),
3067                                   g.oop_maps(), false);
3068   return stub;
3069 }
3070 
3071 void NativeInvokerGenerator::generate() {
3072   assert(!(target_uses_register(rscratch1->as_VMReg())
3073            || target_uses_register(rscratch2->as_VMReg())
3074            || target_uses_register(rthread->as_VMReg())),
3075          "Register conflict");
3076 
3077   enum layout {
3078     rbp_off,
3079     rbp_off2,
3080     return_off,
3081     return_off2,
3082     framesize // inclusive of return address
3083   };
3084 
3085   assert(_shadow_space_bytes == 0, "not expecting shadow space on AArch64");
3086   _framesize = align_up(framesize + (spill_size_in_bytes() >> LogBytesPerInt), 4);
3087   assert(is_even(_framesize/2), "sp not 16-byte aligned");
3088 
3089   _oop_maps  = new OopMapSet();
3090   MacroAssembler* masm = _masm;
3091 
3092   address start = __ pc();
3093 
3094   __ enter();
3095 
3096   // lr and fp are already in place
3097   __ sub(sp, rfp, ((unsigned)_framesize-4) << LogBytesPerInt); // prolog
3098 
3099   _frame_complete = __ pc() - start;
3100 
3101   address the_pc = __ pc();
3102   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
3103   OopMap* map = new OopMap(_framesize, 0);
3104   _oop_maps->add_gc_map(the_pc - start, map);
3105 
3106   // State transition
3107   __ mov(rscratch1, _thread_in_native);
3108   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
3109   __ stlrw(rscratch1, rscratch2);
3110 
3111   rt_call(masm, _call_target);
3112 
3113   __ mov(rscratch1, _thread_in_native_trans);
3114   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
3115 
3116   // Force this write out before the read below
3117   __ membar(Assembler::LoadLoad | Assembler::LoadStore |
3118             Assembler::StoreLoad | Assembler::StoreStore);
3119 
3120   __ verify_sve_vector_length();
3121 
3122   Label L_after_safepoint_poll;
3123   Label L_safepoint_poll_slow_path;
3124 
3125   __ safepoint_poll(L_safepoint_poll_slow_path, true /* at_return */, true /* acquire */, false /* in_nmethod */);
3126 
3127   __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
3128   __ cbnzw(rscratch1, L_safepoint_poll_slow_path);
3129 
3130   __ bind(L_after_safepoint_poll);
3131 
3132   // change thread state
3133   __ mov(rscratch1, _thread_in_Java);
3134   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
3135   __ stlrw(rscratch1, rscratch2);
3136 
3137   __ block_comment("reguard stack check");
3138   Label L_reguard;
3139   Label L_after_reguard;
3140   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
3141   __ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled);
3142   __ br(Assembler::EQ, L_reguard);
3143   __ bind(L_after_reguard);
3144 
3145   __ reset_last_Java_frame(true);
3146 
3147   __ leave(); // required for proper stackwalking of RuntimeStub frame
3148   __ ret(lr);
3149 
3150   //////////////////////////////////////////////////////////////////////////////
3151 
3152   __ block_comment("{ L_safepoint_poll_slow_path");
3153   __ bind(L_safepoint_poll_slow_path);
3154 
3155   // Need to save the native result registers around any runtime calls.
3156   spill_output_registers();
3157 
3158   __ mov(c_rarg0, rthread);
3159   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
3160   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
3161   __ blr(rscratch1);
3162 
3163   fill_output_registers();
3164 
3165   __ b(L_after_safepoint_poll);
3166   __ block_comment("} L_safepoint_poll_slow_path");
3167 
3168   //////////////////////////////////////////////////////////////////////////////
3169 
3170   __ block_comment("{ L_reguard");
3171   __ bind(L_reguard);
3172 
3173   spill_output_registers();
3174 
3175   rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
3176 
3177   fill_output_registers();
3178 
3179   __ b(L_after_reguard);
3180 
3181   __ block_comment("} L_reguard");
3182 
3183   //////////////////////////////////////////////////////////////////////////////
3184 
3185   __ flush();
3186 }
3187 #endif // COMPILER2


 262   __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
 263                    Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
 264 #else
 265 #if !INCLUDE_JVMCI
 266   assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
 267 #endif
 268   __ pop_CPU_state(_save_vectors);
 269 #endif
 270   __ leave();
 271 
 272 }
 273 
 274 // Is vector's size (in bytes) bigger than a size saved by default?
 275 // 8 bytes vector registers are saved by default on AArch64.
 276 // The SVE supported min vector size is 8 bytes and we need to save
 277 // predicate registers when the vector size is 8 bytes as well.
 278 bool SharedRuntime::is_wide_vector(int size) {
 279   return size > 8 || (UseSVE > 0 && size >= 8);
 280 }
 281 














 282 // ---------------------------------------------------------------------------
 283 // Read the array of BasicTypes from a signature, and compute where the
 284 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
 285 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
 286 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
 287 // as framesizes are fixed.
 288 // VMRegImpl::stack0 refers to the first slot 0(sp).
 289 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
 290 // up to RegisterImpl::number_of_registers) are the 64-bit
 291 // integer registers.
 292 
 293 // Note: the INPUTS in sig_bt are in units of Java argument words,
 294 // which are 64-bit.  The OUTPUTS are in 32-bit units.
 295 
 296 // The Java calling convention is a "shifted" version of the C ABI.
 297 // By skipping the first C ABI register we can call non-static jni
 298 // methods with small numbers of arguments without having to shuffle
 299 // the arguments at all. Since we control the java ABI we ought to at
 300 // least get some advantage out of it.
 301 

 887   return stk_args;
 888 }
 889 
 890 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 891                                              uint num_bits,
 892                                              uint total_args_passed) {
 893   Unimplemented();
 894   return 0;
 895 }
 896 
 897 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 898                                          VMRegPair *regs,
 899                                          VMRegPair *regs2,
 900                                          int total_args_passed)
 901 {
 902   int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed);
 903   guarantee(result >= 0, "Unsupported arguments configuration");
 904   return result;
 905 }
 906 






































































































































































 907 
 908 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 909   // We always ignore the frame_slots arg and just use the space just below frame pointer
 910   // which by this time is free to use
 911   switch (ret_type) {
 912   case T_FLOAT:
 913     __ strs(v0, Address(rfp, -wordSize));
 914     break;
 915   case T_DOUBLE:
 916     __ strd(v0, Address(rfp, -wordSize));
 917     break;
 918   case T_VOID:  break;
 919   default: {
 920     __ str(r0, Address(rfp, -wordSize));
 921     }
 922   }
 923 }
 924 
 925 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 926   // We always ignore the frame_slots arg and just use the space just below frame pointer

 952 
 953 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
 954   RegSet x;
 955   for ( int i = first_arg ; i < arg_count ; i++ ) {
 956     if (args[i].first()->is_Register()) {
 957       x = x + args[i].first()->as_Register();
 958     } else {
 959       ;
 960     }
 961   }
 962   __ pop(x, sp);
 963   for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
 964     if (args[i].first()->is_Register()) {
 965       ;
 966     } else if (args[i].first()->is_FloatRegister()) {
 967       __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
 968     }
 969   }
 970 }
 971 










 972 static void verify_oop_args(MacroAssembler* masm,
 973                             const methodHandle& method,
 974                             const BasicType* sig_bt,
 975                             const VMRegPair* regs) {
 976   Register temp_reg = r19;  // not part of any compiled calling seq
 977   if (VerifyOops) {
 978     for (int i = 0; i < method->size_of_parameters(); i++) {
 979       if (sig_bt[i] == T_OBJECT ||
 980           sig_bt[i] == T_ARRAY) {
 981         VMReg r = regs[i].first();
 982         assert(r->is_valid(), "bad oop arg");
 983         if (r->is_stack()) {
 984           __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
 985           __ verify_oop(temp_reg);
 986         } else {
 987           __ verify_oop(r->as_Register());
 988         }
 989       }
 990     }
 991   }
 992 }
 993 
 994 static void gen_special_dispatch(MacroAssembler* masm,
 995                                  const methodHandle& method,
 996                                  const BasicType* sig_bt,
 997                                  const VMRegPair* regs) {
 998   verify_oop_args(masm, method, sig_bt, regs);
 999   vmIntrinsics::ID iid = method->intrinsic_id();
1000 
1001   // Now write the args into the outgoing interpreter space
1002   bool     has_receiver   = false;
1003   Register receiver_reg   = noreg;
1004   int      member_arg_pos = -1;
1005   Register member_reg     = noreg;
1006   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1007   if (ref_kind != 0) {
1008     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1009     member_reg = r19;  // known to be free at this point
1010     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1011   } else if (iid == vmIntrinsics::_invokeBasic) {
1012     has_receiver = true;
1013   } else if (iid == vmIntrinsics::_linkToNative) {
1014     member_arg_pos = method->size_of_parameters() - 1;  // trailing NativeEntryPoint argument
1015     member_reg = r19;  // known to be free at this point
1016   } else {
1017     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1018   }
1019 
1020   if (member_reg != noreg) {
1021     // Load the member_arg into register, if necessary.
1022     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1023     VMReg r = regs[member_arg_pos].first();
1024     if (r->is_stack()) {
1025       __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1026     } else {
1027       // no data motion is needed
1028       member_reg = r->as_Register();
1029     }
1030   }
1031 
1032   if (has_receiver) {
1033     // Make sure the receiver is loaded into a register.
1034     assert(method->size_of_parameters() > 0, "oob");
1035     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");

1328   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1329     int i = arg_order.at(ai);
1330     int c_arg = arg_order.at(ai + 1);
1331     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1332     assert(c_arg != -1 && i != -1, "wrong order");
1333 #ifdef ASSERT
1334     if (in_regs[i].first()->is_Register()) {
1335       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1336     } else if (in_regs[i].first()->is_FloatRegister()) {
1337       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1338     }
1339     if (out_regs[c_arg].first()->is_Register()) {
1340       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1341     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1342       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1343     }
1344 #endif /* ASSERT */
1345     switch (in_sig_bt[i]) {
1346       case T_ARRAY:
1347       case T_OBJECT:
1348         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1349                        ((i == 0) && (!is_static)),
1350                        &receiver_offset);
1351         int_args++;
1352         break;
1353       case T_VOID:
1354         break;
1355 
1356       case T_FLOAT:
1357         __ float_move(in_regs[i], out_regs[c_arg]);
1358         float_args++;
1359         break;
1360 
1361       case T_DOUBLE:
1362         assert( i + 1 < total_in_args &&
1363                 in_sig_bt[i + 1] == T_VOID &&
1364                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1365         __ double_move(in_regs[i], out_regs[c_arg]);
1366         float_args++;
1367         break;
1368 
1369       case T_LONG :
1370         __ long_move(in_regs[i], out_regs[c_arg]);
1371         int_args++;
1372         break;
1373 
1374       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1375 
1376       default:
1377         __ move32_64(in_regs[i], out_regs[c_arg]);
1378         int_args++;
1379     }
1380   }
1381 
1382   // point c_arg at the first arg that is already loaded in case we
1383   // need to spill before we call out
1384   int c_arg = total_c_args - total_in_args;
1385 
1386   // Pre-load a static method's oop into c_rarg1.
1387   if (method->is_static()) {
1388 
1389     //  load oop into a register
1390     __ movoop(c_rarg1,
1391               JNIHandles::make_local(method->method_holder()->java_mirror()),
1392               /*immediate*/true);
1393 
1394     // Now handlize the static class mirror it's known not-null.
1395     __ str(c_rarg1, Address(sp, klass_offset));
1396     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1397 

1485     // Save the test result, for recursive case, the result is zero
1486     __ str(swap_reg, Address(lock_reg, mark_word_offset));
1487     __ br(Assembler::NE, slow_path_lock);
1488 
1489     // Slow path will re-enter here
1490 
1491     __ bind(lock_done);
1492   }
1493 
1494 
1495   // Finally just about ready to make the JNI call
1496 
1497   // get JNIEnv* which is first argument to native
1498   __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1499 
1500   // Now set thread in native
1501   __ mov(rscratch1, _thread_in_native);
1502   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1503   __ stlrw(rscratch1, rscratch2);
1504 
1505   __ rt_call(native_func);
1506 
1507   __ bind(native_return);
1508 
1509   intptr_t return_pc = (intptr_t) __ pc();
1510   oop_maps->add_gc_map(return_pc - start, map);
1511 
1512   // Unpack native results.
1513   switch (ret_type) {
1514   case T_BOOLEAN: __ c2bool(r0);                     break;
1515   case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
1516   case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
1517   case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
1518   case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
1519   case T_DOUBLE :
1520   case T_FLOAT  :
1521     // Result is in v0 we'll save as needed
1522     break;
1523   case T_ARRAY:                 // Really a handle
1524   case T_OBJECT:                // Really a handle
1525       break; // can't de-handlize until after safepoint check

1694 
1695     __ block_comment("Slow path unlock {");
1696     __ bind(slow_path_unlock);
1697 
1698     // If we haven't already saved the native result we must save it now as xmm registers
1699     // are still exposed.
1700 
1701     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1702       save_native_result(masm, ret_type, stack_slots);
1703     }
1704 
1705     __ mov(c_rarg2, rthread);
1706     __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1707     __ mov(c_rarg0, obj_reg);
1708 
1709     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1710     // NOTE that obj_reg == r19 currently
1711     __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1712     __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1713 
1714     __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1715 
1716 #ifdef ASSERT
1717     {
1718       Label L;
1719       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1720       __ cbz(rscratch1, L);
1721       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1722       __ bind(L);
1723     }
1724 #endif /* ASSERT */
1725 
1726     __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1727 
1728     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1729       restore_native_result(masm, ret_type, stack_slots);
1730     }
1731     __ b(unlock_done);
1732 
1733     __ block_comment("} Slow path unlock");
1734 
1735   } // synchronized
1736 
1737   // SLOW PATH Reguard the stack if needed
1738 
1739   __ bind(reguard);
1740   save_native_result(masm, ret_type, stack_slots);
1741   __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1742   restore_native_result(masm, ret_type, stack_slots);
1743   // and continue
1744   __ b(reguard_done);
1745 
1746   // SLOW PATH safepoint
1747   {
1748     __ block_comment("safepoint {");
1749     __ bind(safepoint_in_progress);
1750 
1751     // Don't use call_VM as it will see a possible pending exception and forward it
1752     // and never return here preventing us from clearing _last_native_pc down below.
1753     //
1754     save_native_result(masm, ret_type, stack_slots);
1755     __ mov(c_rarg0, rthread);
1756 #ifndef PRODUCT
1757   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
1758 #endif
1759     __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1760     __ blr(rscratch1);
1761 

2729   __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
2730   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2731 #endif
2732   // Clear the exception oop so GC no longer processes it as a root.
2733   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2734 
2735   // r0: exception oop
2736   // r8:  exception handler
2737   // r4: exception pc
2738   // Jump to handler
2739 
2740   __ br(r8);
2741 
2742   // Make sure all code is generated
2743   masm->flush();
2744 
2745   // Set exception blob
2746   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
2747 }
2748 



























































































































































































































































2749 #endif // COMPILER2
2750 
< prev index next >