262 __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
263 Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
264 #else
265 #if !INCLUDE_JVMCI
266 assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
267 #endif
268 __ pop_CPU_state(_save_vectors);
269 #endif
270 __ leave();
271
272 }
273
274 // Is vector's size (in bytes) bigger than a size saved by default?
275 // 8 bytes vector registers are saved by default on AArch64.
276 // The SVE supported min vector size is 8 bytes and we need to save
277 // predicate registers when the vector size is 8 bytes as well.
278 bool SharedRuntime::is_wide_vector(int size) {
279 return size > 8 || (UseSVE > 0 && size >= 8);
280 }
281
282 // The java_calling_convention describes stack locations as ideal slots on
283 // a frame with no abi restrictions. Since we must observe abi restrictions
284 // (like the placement of the register window) the slots must be biased by
285 // the following value.
286 static int reg2offset_in(VMReg r) {
287 // Account for saved rfp and lr
288 // This should really be in_preserve_stack_slots
289 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
290 }
291
292 static int reg2offset_out(VMReg r) {
293 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
294 }
295
296 // ---------------------------------------------------------------------------
297 // Read the array of BasicTypes from a signature, and compute where the
298 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
299 // quantities. Values less than VMRegImpl::stack0 are registers, those above
300 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
301 // as framesizes are fixed.
302 // VMRegImpl::stack0 refers to the first slot 0(sp).
303 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
304 // up to RegisterImpl::number_of_registers) are the 64-bit
305 // integer registers.
306
307 // Note: the INPUTS in sig_bt are in units of Java argument words,
308 // which are 64-bit. The OUTPUTS are in 32-bit units.
309
310 // The Java calling convention is a "shifted" version of the C ABI.
311 // By skipping the first C ABI register we can call non-static jni
312 // methods with small numbers of arguments without having to shuffle
313 // the arguments at all. Since we control the java ABI we ought to at
314 // least get some advantage out of it.
315
902 return stk_args;
903 }
904
905 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
906 uint num_bits,
907 uint total_args_passed) {
908 Unimplemented();
909 return 0;
910 }
911
912 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
913 VMRegPair *regs,
914 VMRegPair *regs2,
915 int total_args_passed)
916 {
917 int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed);
918 guarantee(result >= 0, "Unsupported arguments configuration");
919 return result;
920 }
921
922 // On 64 bit we will store integer like items to the stack as
923 // 64 bits items (Aarch64 abi) even though java would only store
924 // 32bits for a parameter. On 32bit it will simply be 32 bits
925 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
926 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
927 if (src.first()->is_stack()) {
928 if (dst.first()->is_stack()) {
929 // stack to stack
930 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
931 __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
932 } else {
933 // stack to reg
934 __ ldrsw(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
935 }
936 } else if (dst.first()->is_stack()) {
937 // reg to stack
938 // Do we really have to sign extend???
939 // __ movslq(src.first()->as_Register(), src.first()->as_Register());
940 __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
941 } else {
942 if (dst.first() != src.first()) {
943 __ sxtw(dst.first()->as_Register(), src.first()->as_Register());
944 }
945 }
946 }
947
948 // An oop arg. Must pass a handle not the oop itself
949 static void object_move(MacroAssembler* masm,
950 OopMap* map,
951 int oop_handle_offset,
952 int framesize_in_slots,
953 VMRegPair src,
954 VMRegPair dst,
955 bool is_receiver,
956 int* receiver_offset) {
957
958 // must pass a handle. First figure out the location we use as a handle
959
960 Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register();
961
962 // See if oop is NULL if it is we need no handle
963
964 if (src.first()->is_stack()) {
965
966 // Oop is already on the stack as an argument
967 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
968 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
969 if (is_receiver) {
970 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
971 }
972
973 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
974 __ lea(rHandle, Address(rfp, reg2offset_in(src.first())));
975 // conditionally move a NULL
976 __ cmp(rscratch1, zr);
977 __ csel(rHandle, zr, rHandle, Assembler::EQ);
978 } else {
979
980 // Oop is in an a register we must store it to the space we reserve
981 // on the stack for oop_handles and pass a handle if oop is non-NULL
982
983 const Register rOop = src.first()->as_Register();
984 int oop_slot;
985 if (rOop == j_rarg0)
986 oop_slot = 0;
987 else if (rOop == j_rarg1)
988 oop_slot = 1;
989 else if (rOop == j_rarg2)
990 oop_slot = 2;
991 else if (rOop == j_rarg3)
992 oop_slot = 3;
993 else if (rOop == j_rarg4)
994 oop_slot = 4;
995 else if (rOop == j_rarg5)
996 oop_slot = 5;
997 else if (rOop == j_rarg6)
998 oop_slot = 6;
999 else {
1000 assert(rOop == j_rarg7, "wrong register");
1001 oop_slot = 7;
1002 }
1003
1004 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
1005 int offset = oop_slot*VMRegImpl::stack_slot_size;
1006
1007 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1008 // Store oop in handle area, may be NULL
1009 __ str(rOop, Address(sp, offset));
1010 if (is_receiver) {
1011 *receiver_offset = offset;
1012 }
1013
1014 __ cmp(rOop, zr);
1015 __ lea(rHandle, Address(sp, offset));
1016 // conditionally move a NULL
1017 __ csel(rHandle, zr, rHandle, Assembler::EQ);
1018 }
1019
1020 // If arg is on the stack then place it otherwise it is already in correct reg.
1021 if (dst.first()->is_stack()) {
1022 __ str(rHandle, Address(sp, reg2offset_out(dst.first())));
1023 }
1024 }
1025
1026 // A float arg may have to do float reg int reg conversion
1027 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1028 assert(src.first()->is_stack() && dst.first()->is_stack() ||
1029 src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
1030 if (src.first()->is_stack()) {
1031 if (dst.first()->is_stack()) {
1032 __ ldrw(rscratch1, Address(rfp, reg2offset_in(src.first())));
1033 __ strw(rscratch1, Address(sp, reg2offset_out(dst.first())));
1034 } else {
1035 ShouldNotReachHere();
1036 }
1037 } else if (src.first() != dst.first()) {
1038 if (src.is_single_phys_reg() && dst.is_single_phys_reg())
1039 __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1040 else
1041 ShouldNotReachHere();
1042 }
1043 }
1044
1045 // A long move
1046 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1047 if (src.first()->is_stack()) {
1048 if (dst.first()->is_stack()) {
1049 // stack to stack
1050 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
1051 __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
1052 } else {
1053 // stack to reg
1054 __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
1055 }
1056 } else if (dst.first()->is_stack()) {
1057 // reg to stack
1058 // Do we really have to sign extend???
1059 // __ movslq(src.first()->as_Register(), src.first()->as_Register());
1060 __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
1061 } else {
1062 if (dst.first() != src.first()) {
1063 __ mov(dst.first()->as_Register(), src.first()->as_Register());
1064 }
1065 }
1066 }
1067
1068
1069 // A double move
1070 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1071 assert(src.first()->is_stack() && dst.first()->is_stack() ||
1072 src.first()->is_reg() && dst.first()->is_reg(), "Unexpected error");
1073 if (src.first()->is_stack()) {
1074 if (dst.first()->is_stack()) {
1075 __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
1076 __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
1077 } else {
1078 ShouldNotReachHere();
1079 }
1080 } else if (src.first() != dst.first()) {
1081 if (src.is_single_phys_reg() && dst.is_single_phys_reg())
1082 __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1083 else
1084 ShouldNotReachHere();
1085 }
1086 }
1087
1088
1089 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1090 // We always ignore the frame_slots arg and just use the space just below frame pointer
1091 // which by this time is free to use
1092 switch (ret_type) {
1093 case T_FLOAT:
1094 __ strs(v0, Address(rfp, -wordSize));
1095 break;
1096 case T_DOUBLE:
1097 __ strd(v0, Address(rfp, -wordSize));
1098 break;
1099 case T_VOID: break;
1100 default: {
1101 __ str(r0, Address(rfp, -wordSize));
1102 }
1103 }
1104 }
1105
1106 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1107 // We always ignore the frame_slots arg and just use the space just below frame pointer
1133
1134 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1135 RegSet x;
1136 for ( int i = first_arg ; i < arg_count ; i++ ) {
1137 if (args[i].first()->is_Register()) {
1138 x = x + args[i].first()->as_Register();
1139 } else {
1140 ;
1141 }
1142 }
1143 __ pop(x, sp);
1144 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1145 if (args[i].first()->is_Register()) {
1146 ;
1147 } else if (args[i].first()->is_FloatRegister()) {
1148 __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
1149 }
1150 }
1151 }
1152
1153 static void rt_call(MacroAssembler* masm, address dest) {
1154 CodeBlob *cb = CodeCache::find_blob(dest);
1155 if (cb) {
1156 __ far_call(RuntimeAddress(dest));
1157 } else {
1158 __ lea(rscratch1, RuntimeAddress(dest));
1159 __ blr(rscratch1);
1160 }
1161 }
1162
1163 static void verify_oop_args(MacroAssembler* masm,
1164 const methodHandle& method,
1165 const BasicType* sig_bt,
1166 const VMRegPair* regs) {
1167 Register temp_reg = r19; // not part of any compiled calling seq
1168 if (VerifyOops) {
1169 for (int i = 0; i < method->size_of_parameters(); i++) {
1170 if (sig_bt[i] == T_OBJECT ||
1171 sig_bt[i] == T_ARRAY) {
1172 VMReg r = regs[i].first();
1173 assert(r->is_valid(), "bad oop arg");
1174 if (r->is_stack()) {
1175 __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1176 __ verify_oop(temp_reg);
1177 } else {
1178 __ verify_oop(r->as_Register());
1179 }
1180 }
1181 }
1182 }
1183 }
1184
1185 static void gen_special_dispatch(MacroAssembler* masm,
1186 const methodHandle& method,
1187 const BasicType* sig_bt,
1188 const VMRegPair* regs) {
1189 verify_oop_args(masm, method, sig_bt, regs);
1190 vmIntrinsics::ID iid = method->intrinsic_id();
1191
1192 // Now write the args into the outgoing interpreter space
1193 bool has_receiver = false;
1194 Register receiver_reg = noreg;
1195 int member_arg_pos = -1;
1196 Register member_reg = noreg;
1197 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1198 if (ref_kind != 0) {
1199 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1200 member_reg = r19; // known to be free at this point
1201 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1202 } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
1203 has_receiver = true;
1204 } else {
1205 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1206 }
1207
1208 if (member_reg != noreg) {
1209 // Load the member_arg into register, if necessary.
1210 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1211 VMReg r = regs[member_arg_pos].first();
1212 if (r->is_stack()) {
1213 __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1214 } else {
1215 // no data motion is needed
1216 member_reg = r->as_Register();
1217 }
1218 }
1219
1220 if (has_receiver) {
1221 // Make sure the receiver is loaded into a register.
1222 assert(method->size_of_parameters() > 0, "oob");
1223 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1516 for (int ai = 0; ai < arg_order.length(); ai += 2) {
1517 int i = arg_order.at(ai);
1518 int c_arg = arg_order.at(ai + 1);
1519 __ block_comment(err_msg("move %d -> %d", i, c_arg));
1520 assert(c_arg != -1 && i != -1, "wrong order");
1521 #ifdef ASSERT
1522 if (in_regs[i].first()->is_Register()) {
1523 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1524 } else if (in_regs[i].first()->is_FloatRegister()) {
1525 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1526 }
1527 if (out_regs[c_arg].first()->is_Register()) {
1528 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1529 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1530 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1531 }
1532 #endif /* ASSERT */
1533 switch (in_sig_bt[i]) {
1534 case T_ARRAY:
1535 case T_OBJECT:
1536 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1537 ((i == 0) && (!is_static)),
1538 &receiver_offset);
1539 int_args++;
1540 break;
1541 case T_VOID:
1542 break;
1543
1544 case T_FLOAT:
1545 float_move(masm, in_regs[i], out_regs[c_arg]);
1546 float_args++;
1547 break;
1548
1549 case T_DOUBLE:
1550 assert( i + 1 < total_in_args &&
1551 in_sig_bt[i + 1] == T_VOID &&
1552 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1553 double_move(masm, in_regs[i], out_regs[c_arg]);
1554 float_args++;
1555 break;
1556
1557 case T_LONG :
1558 long_move(masm, in_regs[i], out_regs[c_arg]);
1559 int_args++;
1560 break;
1561
1562 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1563
1564 default:
1565 move32_64(masm, in_regs[i], out_regs[c_arg]);
1566 int_args++;
1567 }
1568 }
1569
1570 // point c_arg at the first arg that is already loaded in case we
1571 // need to spill before we call out
1572 int c_arg = total_c_args - total_in_args;
1573
1574 // Pre-load a static method's oop into c_rarg1.
1575 if (method->is_static()) {
1576
1577 // load oop into a register
1578 __ movoop(c_rarg1,
1579 JNIHandles::make_local(method->method_holder()->java_mirror()),
1580 /*immediate*/true);
1581
1582 // Now handlize the static class mirror it's known not-null.
1583 __ str(c_rarg1, Address(sp, klass_offset));
1584 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1585
1676 __ br(Assembler::NE, slow_path_lock);
1677 } else {
1678 __ b(slow_path_lock);
1679 }
1680
1681 // Slow path will re-enter here
1682 __ bind(lock_done);
1683 }
1684
1685
1686 // Finally just about ready to make the JNI call
1687
1688 // get JNIEnv* which is first argument to native
1689 __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1690
1691 // Now set thread in native
1692 __ mov(rscratch1, _thread_in_native);
1693 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1694 __ stlrw(rscratch1, rscratch2);
1695
1696 rt_call(masm, native_func);
1697
1698 __ bind(native_return);
1699
1700 intptr_t return_pc = (intptr_t) __ pc();
1701 oop_maps->add_gc_map(return_pc - start, map);
1702
1703 // Unpack native results.
1704 switch (ret_type) {
1705 case T_BOOLEAN: __ c2bool(r0); break;
1706 case T_CHAR : __ ubfx(r0, r0, 0, 16); break;
1707 case T_BYTE : __ sbfx(r0, r0, 0, 8); break;
1708 case T_SHORT : __ sbfx(r0, r0, 0, 16); break;
1709 case T_INT : __ sbfx(r0, r0, 0, 32); break;
1710 case T_DOUBLE :
1711 case T_FLOAT :
1712 // Result is in v0 we'll save as needed
1713 break;
1714 case T_ARRAY: // Really a handle
1715 case T_OBJECT: // Really a handle
1716 break; // can't de-handlize until after safepoint check
1890
1891 __ block_comment("Slow path unlock {");
1892 __ bind(slow_path_unlock);
1893
1894 // If we haven't already saved the native result we must save it now as xmm registers
1895 // are still exposed.
1896
1897 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1898 save_native_result(masm, ret_type, stack_slots);
1899 }
1900
1901 __ mov(c_rarg2, rthread);
1902 __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1903 __ mov(c_rarg0, obj_reg);
1904
1905 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1906 // NOTE that obj_reg == r19 currently
1907 __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1908 __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1909
1910 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1911
1912 #ifdef ASSERT
1913 {
1914 Label L;
1915 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1916 __ cbz(rscratch1, L);
1917 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1918 __ bind(L);
1919 }
1920 #endif /* ASSERT */
1921
1922 __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1923
1924 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1925 restore_native_result(masm, ret_type, stack_slots);
1926 }
1927 __ b(unlock_done);
1928
1929 __ block_comment("} Slow path unlock");
1930
1931 } // synchronized
1932
1933 // SLOW PATH Reguard the stack if needed
1934
1935 __ bind(reguard);
1936 save_native_result(masm, ret_type, stack_slots);
1937 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1938 restore_native_result(masm, ret_type, stack_slots);
1939 // and continue
1940 __ b(reguard_done);
1941
1942 // SLOW PATH safepoint
1943 {
1944 __ block_comment("safepoint {");
1945 __ bind(safepoint_in_progress);
1946
1947 // Don't use call_VM as it will see a possible pending exception and forward it
1948 // and never return here preventing us from clearing _last_native_pc down below.
1949 //
1950 save_native_result(masm, ret_type, stack_slots);
1951 __ mov(c_rarg0, rthread);
1952 #ifndef PRODUCT
1953 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
1954 #endif
1955 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1956 __ blr(rscratch1);
1957
2938 __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
2939 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2940 #endif
2941 // Clear the exception oop so GC no longer processes it as a root.
2942 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2943
2944 // r0: exception oop
2945 // r8: exception handler
2946 // r4: exception pc
2947 // Jump to handler
2948
2949 __ br(r8);
2950
2951 // Make sure all code is generated
2952 masm->flush();
2953
2954 // Set exception blob
2955 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
2956 }
2957
2958 // ---------------------------------------------------------------
2959
2960 class NativeInvokerGenerator : public StubCodeGenerator {
2961 address _call_target;
2962 int _shadow_space_bytes;
2963
2964 const GrowableArray<VMReg>& _input_registers;
2965 const GrowableArray<VMReg>& _output_registers;
2966
2967 int _frame_complete;
2968 int _framesize;
2969 OopMapSet* _oop_maps;
2970 public:
2971 NativeInvokerGenerator(CodeBuffer* buffer,
2972 address call_target,
2973 int shadow_space_bytes,
2974 const GrowableArray<VMReg>& input_registers,
2975 const GrowableArray<VMReg>& output_registers)
2976 : StubCodeGenerator(buffer, PrintMethodHandleStubs),
2977 _call_target(call_target),
2978 _shadow_space_bytes(shadow_space_bytes),
2979 _input_registers(input_registers),
2980 _output_registers(output_registers),
2981 _frame_complete(0),
2982 _framesize(0),
2983 _oop_maps(NULL) {
2984 assert(_output_registers.length() <= 1
2985 || (_output_registers.length() == 2 && !_output_registers.at(1)->is_valid()), "no multi-reg returns");
2986 }
2987
2988 void generate();
2989
2990 int spill_size_in_bytes() const {
2991 if (_output_registers.length() == 0) {
2992 return 0;
2993 }
2994 VMReg reg = _output_registers.at(0);
2995 assert(reg->is_reg(), "must be a register");
2996 if (reg->is_Register()) {
2997 return 8;
2998 } else if (reg->is_FloatRegister()) {
2999 bool use_sve = Matcher::supports_scalable_vector();
3000 if (use_sve) {
3001 return Matcher::scalable_vector_reg_size(T_BYTE);
3002 }
3003 return 16;
3004 } else {
3005 ShouldNotReachHere();
3006 }
3007 return 0;
3008 }
3009
3010 void spill_output_registers() {
3011 if (_output_registers.length() == 0) {
3012 return;
3013 }
3014 VMReg reg = _output_registers.at(0);
3015 assert(reg->is_reg(), "must be a register");
3016 MacroAssembler* masm = _masm;
3017 if (reg->is_Register()) {
3018 __ spill(reg->as_Register(), true, 0);
3019 } else if (reg->is_FloatRegister()) {
3020 bool use_sve = Matcher::supports_scalable_vector();
3021 if (use_sve) {
3022 __ spill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE));
3023 } else {
3024 __ spill(reg->as_FloatRegister(), __ Q, 0);
3025 }
3026 } else {
3027 ShouldNotReachHere();
3028 }
3029 }
3030
3031 void fill_output_registers() {
3032 if (_output_registers.length() == 0) {
3033 return;
3034 }
3035 VMReg reg = _output_registers.at(0);
3036 assert(reg->is_reg(), "must be a register");
3037 MacroAssembler* masm = _masm;
3038 if (reg->is_Register()) {
3039 __ unspill(reg->as_Register(), true, 0);
3040 } else if (reg->is_FloatRegister()) {
3041 bool use_sve = Matcher::supports_scalable_vector();
3042 if (use_sve) {
3043 __ unspill_sve_vector(reg->as_FloatRegister(), 0, Matcher::scalable_vector_reg_size(T_BYTE));
3044 } else {
3045 __ unspill(reg->as_FloatRegister(), __ Q, 0);
3046 }
3047 } else {
3048 ShouldNotReachHere();
3049 }
3050 }
3051
3052 int frame_complete() const {
3053 return _frame_complete;
3054 }
3055
3056 int framesize() const {
3057 return (_framesize >> (LogBytesPerWord - LogBytesPerInt));
3058 }
3059
3060 OopMapSet* oop_maps() const {
3061 return _oop_maps;
3062 }
3063
3064 private:
3065 #ifdef ASSERT
3066 bool target_uses_register(VMReg reg) {
3067 return _input_registers.contains(reg) || _output_registers.contains(reg);
3068 }
3069 #endif
3070 };
3071
3072 static const int native_invoker_code_size = 1024;
3073
3074 RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
3075 int shadow_space_bytes,
3076 const GrowableArray<VMReg>& input_registers,
3077 const GrowableArray<VMReg>& output_registers) {
3078 int locs_size = 64;
3079 CodeBuffer code("nep_invoker_blob", native_invoker_code_size, locs_size);
3080 NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers);
3081 g.generate();
3082 code.log_section_sizes("nep_invoker_blob");
3083
3084 RuntimeStub* stub =
3085 RuntimeStub::new_runtime_stub("nep_invoker_blob",
3086 &code,
3087 g.frame_complete(),
3088 g.framesize(),
3089 g.oop_maps(), false);
3090 return stub;
3091 }
3092
3093 void NativeInvokerGenerator::generate() {
3094 assert(!(target_uses_register(rscratch1->as_VMReg())
3095 || target_uses_register(rscratch2->as_VMReg())
3096 || target_uses_register(rthread->as_VMReg())),
3097 "Register conflict");
3098
3099 enum layout {
3100 rbp_off,
3101 rbp_off2,
3102 return_off,
3103 return_off2,
3104 framesize // inclusive of return address
3105 };
3106
3107 assert(_shadow_space_bytes == 0, "not expecting shadow space on AArch64");
3108 _framesize = align_up(framesize + (spill_size_in_bytes() >> LogBytesPerInt), 4);
3109 assert(is_even(_framesize/2), "sp not 16-byte aligned");
3110
3111 _oop_maps = new OopMapSet();
3112 MacroAssembler* masm = _masm;
3113
3114 address start = __ pc();
3115
3116 __ enter();
3117
3118 // lr and fp are already in place
3119 __ sub(sp, rfp, ((unsigned)_framesize-4) << LogBytesPerInt); // prolog
3120
3121 _frame_complete = __ pc() - start;
3122
3123 address the_pc = __ pc();
3124 __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
3125 OopMap* map = new OopMap(_framesize, 0);
3126 _oop_maps->add_gc_map(the_pc - start, map);
3127
3128 // State transition
3129 __ mov(rscratch1, _thread_in_native);
3130 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
3131 __ stlrw(rscratch1, rscratch2);
3132
3133 rt_call(masm, _call_target);
3134
3135 __ mov(rscratch1, _thread_in_native_trans);
3136 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
3137
3138 // Force this write out before the read below
3139 __ membar(Assembler::LoadLoad | Assembler::LoadStore |
3140 Assembler::StoreLoad | Assembler::StoreStore);
3141
3142 __ verify_sve_vector_length();
3143
3144 Label L_after_safepoint_poll;
3145 Label L_safepoint_poll_slow_path;
3146
3147 __ safepoint_poll(L_safepoint_poll_slow_path, true /* at_return */, true /* acquire */, false /* in_nmethod */);
3148
3149 __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
3150 __ cbnzw(rscratch1, L_safepoint_poll_slow_path);
3151
3152 __ bind(L_after_safepoint_poll);
3153
3154 // change thread state
3155 __ mov(rscratch1, _thread_in_Java);
3156 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
3157 __ stlrw(rscratch1, rscratch2);
3158
3159 __ block_comment("reguard stack check");
3160 Label L_reguard;
3161 Label L_after_reguard;
3162 __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
3163 __ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled);
3164 __ br(Assembler::EQ, L_reguard);
3165 __ bind(L_after_reguard);
3166
3167 __ reset_last_Java_frame(true);
3168
3169 __ leave(); // required for proper stackwalking of RuntimeStub frame
3170 __ ret(lr);
3171
3172 //////////////////////////////////////////////////////////////////////////////
3173
3174 __ block_comment("{ L_safepoint_poll_slow_path");
3175 __ bind(L_safepoint_poll_slow_path);
3176
3177 // Need to save the native result registers around any runtime calls.
3178 spill_output_registers();
3179
3180 __ mov(c_rarg0, rthread);
3181 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
3182 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
3183 __ blr(rscratch1);
3184
3185 fill_output_registers();
3186
3187 __ b(L_after_safepoint_poll);
3188 __ block_comment("} L_safepoint_poll_slow_path");
3189
3190 //////////////////////////////////////////////////////////////////////////////
3191
3192 __ block_comment("{ L_reguard");
3193 __ bind(L_reguard);
3194
3195 spill_output_registers();
3196
3197 rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
3198
3199 fill_output_registers();
3200
3201 __ b(L_after_reguard);
3202
3203 __ block_comment("} L_reguard");
3204
3205 //////////////////////////////////////////////////////////////////////////////
3206
3207 __ flush();
3208 }
3209 #endif // COMPILER2
|
262 __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
263 Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
264 #else
265 #if !INCLUDE_JVMCI
266 assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
267 #endif
268 __ pop_CPU_state(_save_vectors);
269 #endif
270 __ leave();
271
272 }
273
274 // Is vector's size (in bytes) bigger than a size saved by default?
275 // 8 bytes vector registers are saved by default on AArch64.
276 // The SVE supported min vector size is 8 bytes and we need to save
277 // predicate registers when the vector size is 8 bytes as well.
278 bool SharedRuntime::is_wide_vector(int size) {
279 return size > 8 || (UseSVE > 0 && size >= 8);
280 }
281
282 // ---------------------------------------------------------------------------
283 // Read the array of BasicTypes from a signature, and compute where the
284 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
285 // quantities. Values less than VMRegImpl::stack0 are registers, those above
286 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
287 // as framesizes are fixed.
288 // VMRegImpl::stack0 refers to the first slot 0(sp).
289 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
290 // up to RegisterImpl::number_of_registers) are the 64-bit
291 // integer registers.
292
293 // Note: the INPUTS in sig_bt are in units of Java argument words,
294 // which are 64-bit. The OUTPUTS are in 32-bit units.
295
296 // The Java calling convention is a "shifted" version of the C ABI.
297 // By skipping the first C ABI register we can call non-static jni
298 // methods with small numbers of arguments without having to shuffle
299 // the arguments at all. Since we control the java ABI we ought to at
300 // least get some advantage out of it.
301
888 return stk_args;
889 }
890
891 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
892 uint num_bits,
893 uint total_args_passed) {
894 Unimplemented();
895 return 0;
896 }
897
898 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
899 VMRegPair *regs,
900 VMRegPair *regs2,
901 int total_args_passed)
902 {
903 int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed);
904 guarantee(result >= 0, "Unsupported arguments configuration");
905 return result;
906 }
907
908
909 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
910 // We always ignore the frame_slots arg and just use the space just below frame pointer
911 // which by this time is free to use
912 switch (ret_type) {
913 case T_FLOAT:
914 __ strs(v0, Address(rfp, -wordSize));
915 break;
916 case T_DOUBLE:
917 __ strd(v0, Address(rfp, -wordSize));
918 break;
919 case T_VOID: break;
920 default: {
921 __ str(r0, Address(rfp, -wordSize));
922 }
923 }
924 }
925
926 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
927 // We always ignore the frame_slots arg and just use the space just below frame pointer
953
954 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
955 RegSet x;
956 for ( int i = first_arg ; i < arg_count ; i++ ) {
957 if (args[i].first()->is_Register()) {
958 x = x + args[i].first()->as_Register();
959 } else {
960 ;
961 }
962 }
963 __ pop(x, sp);
964 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
965 if (args[i].first()->is_Register()) {
966 ;
967 } else if (args[i].first()->is_FloatRegister()) {
968 __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
969 }
970 }
971 }
972
973 static void verify_oop_args(MacroAssembler* masm,
974 const methodHandle& method,
975 const BasicType* sig_bt,
976 const VMRegPair* regs) {
977 Register temp_reg = r19; // not part of any compiled calling seq
978 if (VerifyOops) {
979 for (int i = 0; i < method->size_of_parameters(); i++) {
980 if (sig_bt[i] == T_OBJECT ||
981 sig_bt[i] == T_ARRAY) {
982 VMReg r = regs[i].first();
983 assert(r->is_valid(), "bad oop arg");
984 if (r->is_stack()) {
985 __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
986 __ verify_oop(temp_reg);
987 } else {
988 __ verify_oop(r->as_Register());
989 }
990 }
991 }
992 }
993 }
994
995 static void gen_special_dispatch(MacroAssembler* masm,
996 const methodHandle& method,
997 const BasicType* sig_bt,
998 const VMRegPair* regs) {
999 verify_oop_args(masm, method, sig_bt, regs);
1000 vmIntrinsics::ID iid = method->intrinsic_id();
1001
1002 // Now write the args into the outgoing interpreter space
1003 bool has_receiver = false;
1004 Register receiver_reg = noreg;
1005 int member_arg_pos = -1;
1006 Register member_reg = noreg;
1007 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1008 if (ref_kind != 0) {
1009 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
1010 member_reg = r19; // known to be free at this point
1011 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1012 } else if (iid == vmIntrinsics::_invokeBasic) {
1013 has_receiver = true;
1014 } else if (iid == vmIntrinsics::_linkToNative) {
1015 member_arg_pos = method->size_of_parameters() - 1; // trailing NativeEntryPoint argument
1016 member_reg = r19; // known to be free at this point
1017 } else {
1018 fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1019 }
1020
1021 if (member_reg != noreg) {
1022 // Load the member_arg into register, if necessary.
1023 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1024 VMReg r = regs[member_arg_pos].first();
1025 if (r->is_stack()) {
1026 __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1027 } else {
1028 // no data motion is needed
1029 member_reg = r->as_Register();
1030 }
1031 }
1032
1033 if (has_receiver) {
1034 // Make sure the receiver is loaded into a register.
1035 assert(method->size_of_parameters() > 0, "oob");
1036 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1329 for (int ai = 0; ai < arg_order.length(); ai += 2) {
1330 int i = arg_order.at(ai);
1331 int c_arg = arg_order.at(ai + 1);
1332 __ block_comment(err_msg("move %d -> %d", i, c_arg));
1333 assert(c_arg != -1 && i != -1, "wrong order");
1334 #ifdef ASSERT
1335 if (in_regs[i].first()->is_Register()) {
1336 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1337 } else if (in_regs[i].first()->is_FloatRegister()) {
1338 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1339 }
1340 if (out_regs[c_arg].first()->is_Register()) {
1341 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1342 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1343 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1344 }
1345 #endif /* ASSERT */
1346 switch (in_sig_bt[i]) {
1347 case T_ARRAY:
1348 case T_OBJECT:
1349 __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1350 ((i == 0) && (!is_static)),
1351 &receiver_offset);
1352 int_args++;
1353 break;
1354 case T_VOID:
1355 break;
1356
1357 case T_FLOAT:
1358 __ float_move(in_regs[i], out_regs[c_arg]);
1359 float_args++;
1360 break;
1361
1362 case T_DOUBLE:
1363 assert( i + 1 < total_in_args &&
1364 in_sig_bt[i + 1] == T_VOID &&
1365 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1366 __ double_move(in_regs[i], out_regs[c_arg]);
1367 float_args++;
1368 break;
1369
1370 case T_LONG :
1371 __ long_move(in_regs[i], out_regs[c_arg]);
1372 int_args++;
1373 break;
1374
1375 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1376
1377 default:
1378 __ move32_64(in_regs[i], out_regs[c_arg]);
1379 int_args++;
1380 }
1381 }
1382
1383 // point c_arg at the first arg that is already loaded in case we
1384 // need to spill before we call out
1385 int c_arg = total_c_args - total_in_args;
1386
1387 // Pre-load a static method's oop into c_rarg1.
1388 if (method->is_static()) {
1389
1390 // load oop into a register
1391 __ movoop(c_rarg1,
1392 JNIHandles::make_local(method->method_holder()->java_mirror()),
1393 /*immediate*/true);
1394
1395 // Now handlize the static class mirror it's known not-null.
1396 __ str(c_rarg1, Address(sp, klass_offset));
1397 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1398
1489 __ br(Assembler::NE, slow_path_lock);
1490 } else {
1491 __ b(slow_path_lock);
1492 }
1493
1494 // Slow path will re-enter here
1495 __ bind(lock_done);
1496 }
1497
1498
1499 // Finally just about ready to make the JNI call
1500
1501 // get JNIEnv* which is first argument to native
1502 __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1503
1504 // Now set thread in native
1505 __ mov(rscratch1, _thread_in_native);
1506 __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1507 __ stlrw(rscratch1, rscratch2);
1508
1509 __ rt_call(native_func);
1510
1511 __ bind(native_return);
1512
1513 intptr_t return_pc = (intptr_t) __ pc();
1514 oop_maps->add_gc_map(return_pc - start, map);
1515
1516 // Unpack native results.
1517 switch (ret_type) {
1518 case T_BOOLEAN: __ c2bool(r0); break;
1519 case T_CHAR : __ ubfx(r0, r0, 0, 16); break;
1520 case T_BYTE : __ sbfx(r0, r0, 0, 8); break;
1521 case T_SHORT : __ sbfx(r0, r0, 0, 16); break;
1522 case T_INT : __ sbfx(r0, r0, 0, 32); break;
1523 case T_DOUBLE :
1524 case T_FLOAT :
1525 // Result is in v0 we'll save as needed
1526 break;
1527 case T_ARRAY: // Really a handle
1528 case T_OBJECT: // Really a handle
1529 break; // can't de-handlize until after safepoint check
1703
1704 __ block_comment("Slow path unlock {");
1705 __ bind(slow_path_unlock);
1706
1707 // If we haven't already saved the native result we must save it now as xmm registers
1708 // are still exposed.
1709
1710 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1711 save_native_result(masm, ret_type, stack_slots);
1712 }
1713
1714 __ mov(c_rarg2, rthread);
1715 __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1716 __ mov(c_rarg0, obj_reg);
1717
1718 // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1719 // NOTE that obj_reg == r19 currently
1720 __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1721 __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1722
1723 __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1724
1725 #ifdef ASSERT
1726 {
1727 Label L;
1728 __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1729 __ cbz(rscratch1, L);
1730 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1731 __ bind(L);
1732 }
1733 #endif /* ASSERT */
1734
1735 __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1736
1737 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1738 restore_native_result(masm, ret_type, stack_slots);
1739 }
1740 __ b(unlock_done);
1741
1742 __ block_comment("} Slow path unlock");
1743
1744 } // synchronized
1745
1746 // SLOW PATH Reguard the stack if needed
1747
1748 __ bind(reguard);
1749 save_native_result(masm, ret_type, stack_slots);
1750 __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1751 restore_native_result(masm, ret_type, stack_slots);
1752 // and continue
1753 __ b(reguard_done);
1754
1755 // SLOW PATH safepoint
1756 {
1757 __ block_comment("safepoint {");
1758 __ bind(safepoint_in_progress);
1759
1760 // Don't use call_VM as it will see a possible pending exception and forward it
1761 // and never return here preventing us from clearing _last_native_pc down below.
1762 //
1763 save_native_result(masm, ret_type, stack_slots);
1764 __ mov(c_rarg0, rthread);
1765 #ifndef PRODUCT
1766 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
1767 #endif
1768 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1769 __ blr(rscratch1);
1770
2751 __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
2752 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2753 #endif
2754 // Clear the exception oop so GC no longer processes it as a root.
2755 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2756
2757 // r0: exception oop
2758 // r8: exception handler
2759 // r4: exception pc
2760 // Jump to handler
2761
2762 __ br(r8);
2763
2764 // Make sure all code is generated
2765 masm->flush();
2766
2767 // Set exception blob
2768 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
2769 }
2770
2771 #endif // COMPILER2
2772
|