610
611 static int reg2offset(VMReg r) {
612 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
613 }
614
615 // ---------------------------------------------------------------------------
616 // Read the array of BasicTypes from a signature, and compute where the
617 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
618 // quantities. Values less than VMRegImpl::stack0 are registers, those above
619 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
620 // as framesizes are fixed.
621 // VMRegImpl::stack0 refers to the first slot 0(sp).
622 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
623 // up to Register::number_of_registers) are the 64-bit
624 // integer registers.
625
626 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
627 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
628 // units regardless of build. Of course for i486 there is no 64 bit build
629
630 // The Java calling convention is a "shifted" version of the C ABI.
631 // By skipping the first C ABI register we can call non-static jni methods
632 // with small numbers of arguments without having to shuffle the arguments
633 // at all. Since we control the java ABI we ought to at least get some
634 // advantage out of it.
635
636 const VMReg java_iarg_reg[8] = {
637 R3->as_VMReg(),
638 R4->as_VMReg(),
639 R5->as_VMReg(),
640 R6->as_VMReg(),
641 R7->as_VMReg(),
642 R8->as_VMReg(),
643 R9->as_VMReg(),
644 R10->as_VMReg()
645 };
646
647 const VMReg java_farg_reg[13] = {
648 F1->as_VMReg(),
649 F2->as_VMReg(),
650 F3->as_VMReg(),
651 F4->as_VMReg(),
652 F5->as_VMReg(),
653 F6->as_VMReg(),
654 F7->as_VMReg(),
754 ++freg;
755 } else {
756 // Put double on stack. They must be aligned to 2 slots.
757 if (stk & 0x1) ++stk;
758 reg = VMRegImpl::stack2reg(stk);
759 stk += inc_stk_for_longdouble;
760 }
761 regs[i].set2(reg);
762 break;
763 case T_VOID:
764 // Do not count halves.
765 regs[i].set_bad();
766 break;
767 default:
768 ShouldNotReachHere();
769 }
770 }
771 return stk;
772 }
773
774 // Calling convention for calling C code.
775 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
776 VMRegPair *regs,
777 int total_args_passed) {
778 // Calling conventions for C runtime calls and calls to JNI native methods.
779 //
780 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
781 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
782 // the first 13 flt/dbl's in the first 13 fp regs but additionally
783 // copy flt/dbl to the stack if they are beyond the 8th argument.
784
785 const VMReg iarg_reg[8] = {
786 R3->as_VMReg(),
787 R4->as_VMReg(),
788 R5->as_VMReg(),
789 R6->as_VMReg(),
790 R7->as_VMReg(),
791 R8->as_VMReg(),
792 R9->as_VMReg(),
793 R10->as_VMReg()
899 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
900 #if defined(ABI_ELFv2)
901 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
902 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
903 // indicates that all parameters can be passed in registers.
904 return stack_used ? (arg * 2) : 0;
905 #else
906 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
907 // We have to add extra slots because ABIv1 uses a larger header.
908 return MAX2(arg, 8) * 2 + additional_frame_header_slots;
909 #endif
910 }
911
912 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
913 uint num_bits,
914 uint total_args_passed) {
915 Unimplemented();
916 return 0;
917 }
918
919 static address gen_c2i_adapter(MacroAssembler *masm,
920 int total_args_passed,
921 int comp_args_on_stack,
922 const BasicType *sig_bt,
923 const VMRegPair *regs,
924 Label& call_interpreter,
925 const Register& ientry) {
926
927 address c2i_entrypoint;
928
929 const Register sender_SP = R21_sender_SP; // == R21_tmp1
930 const Register code = R22_tmp2;
931 //const Register ientry = R23_tmp3;
932 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
933 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
934 int value_regs_index = 0;
935
936 const Register return_pc = R27_tmp7;
937 const Register tmp = R28_tmp8;
938
939 assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
940
941 // Adapter needs TOP_IJAVA_FRAME_ABI.
942 const int adapter_size = frame::top_ijava_frame_abi_size +
943 align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
944
945 // regular (verified) c2i entry point
946 c2i_entrypoint = __ pc();
947
948 // Does compiled code exists? If yes, patch the caller's callsite.
949 __ ld(code, method_(code));
950 __ cmpdi(CR0, code, 0);
951 __ ld(ientry, method_(interpreter_entry)); // preloaded
952 __ beq(CR0, call_interpreter);
953
954
955 // Patch caller's callsite, method_(code) was not null which means that
956 // compiled code exists.
957 __ mflr(return_pc);
958 __ std(return_pc, _abi0(lr), R1_SP);
959 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
960
961 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
962
963 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
964 __ ld(return_pc, _abi0(lr), R1_SP);
965 __ ld(ientry, method_(interpreter_entry)); // preloaded
966 __ mtlr(return_pc);
967
968
969 // Call the interpreter.
970 __ BIND(call_interpreter);
971 __ mtctr(ientry);
972
973 // Get a copy of the current SP for loading caller's arguments.
974 __ mr(sender_SP, R1_SP);
975
976 // Add space for the adapter.
977 __ resize_frame(-adapter_size, R12_scratch2);
978
979 int st_off = adapter_size - wordSize;
980
981 // Write the args into the outgoing interpreter space.
982 for (int i = 0; i < total_args_passed; i++) {
983 VMReg r_1 = regs[i].first();
984 VMReg r_2 = regs[i].second();
985 if (!r_1->is_valid()) {
986 assert(!r_2->is_valid(), "");
987 continue;
988 }
989 if (r_1->is_stack()) {
990 Register tmp_reg = value_regs[value_regs_index];
991 value_regs_index = (value_regs_index + 1) % num_value_regs;
992 // The calling convention produces OptoRegs that ignore the out
993 // preserve area (JIT's ABI). We must account for it here.
994 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
995 if (!r_2->is_valid()) {
996 __ lwz(tmp_reg, ld_off, sender_SP);
997 } else {
998 __ ld(tmp_reg, ld_off, sender_SP);
999 }
1000 // Pretend stack targets were loaded into tmp_reg.
1001 r_1 = tmp_reg->as_VMReg();
1002 }
1003
1004 if (r_1->is_Register()) {
1005 Register r = r_1->as_Register();
1006 if (!r_2->is_valid()) {
1007 __ stw(r, st_off, R1_SP);
1008 st_off-=wordSize;
1009 } else {
1010 // Longs are given 2 64-bit slots in the interpreter, but the
1011 // data is passed in only 1 slot.
1012 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1013 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1014 st_off-=wordSize;
1015 }
1016 __ std(r, st_off, R1_SP);
1017 st_off-=wordSize;
1018 }
1019 } else {
1020 assert(r_1->is_FloatRegister(), "");
1021 FloatRegister f = r_1->as_FloatRegister();
1022 if (!r_2->is_valid()) {
1023 __ stfs(f, st_off, R1_SP);
1024 st_off-=wordSize;
1025 } else {
1026 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1027 // data is passed in only 1 slot.
1028 // One of these should get known junk...
1029 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1030 st_off-=wordSize;
1031 __ stfd(f, st_off, R1_SP);
1032 st_off-=wordSize;
1033 }
1034 }
1035 }
1036
1037 // Jump to the interpreter just as if interpreter was doing it.
1038
1039 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1040
1041 // load TOS
1042 __ addi(R15_esp, R1_SP, st_off);
1043
1044 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
1045 assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
1046 __ bctr();
1047
1048 return c2i_entrypoint;
1049 }
1050
1051 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1052 int total_args_passed,
1053 int comp_args_on_stack,
1054 const BasicType *sig_bt,
1055 const VMRegPair *regs) {
1056
1057 // Load method's entry-point from method.
1058 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1059 __ mtctr(R12_scratch2);
1060
1061 // We will only enter here from an interpreted frame and never from after
1062 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1063 // race and use a c2i we will remain interpreted for the race loser(s).
1064 // This removes all sorts of headaches on the x86 side and also eliminates
1065 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1066
1067 // Note: r13 contains the senderSP on entry. We must preserve it since
1068 // we may do a i2c -> c2i transition if we lose a race where compiled
1069 // code goes non-entrant while we get args ready.
1070 // In addition we use r13 to locate all the interpreter args as
1071 // we must align the stack to 16 bytes on an i2c entry else we
1072 // lose alignment we expect in all compiled code and register
1073 // save code can segv when fxsave instructions find improperly
1074 // aligned stack pointer.
1075
1076 const Register ld_ptr = R15_esp;
1077 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1078 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1079 int value_regs_index = 0;
1080
1081 int ld_offset = total_args_passed*wordSize;
1082
1083 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1084 // in registers, we will occasionally have no stack args.
1085 int comp_words_on_stack = 0;
1086 if (comp_args_on_stack) {
1087 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1088 // registers are below. By subtracting stack0, we either get a negative
1089 // number (all values in registers) or the maximum stack slot accessed.
1090
1091 // Convert 4-byte c2 stack slots to words.
1092 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1093 // Round up to miminum stack alignment, in wordSize.
1094 comp_words_on_stack = align_up(comp_words_on_stack, 2);
1095 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1096 }
1097
1098 // Now generate the shuffle code. Pick up all register args and move the
1099 // rest through register value=Z_R12.
1100 BLOCK_COMMENT("Shuffle arguments");
1101 for (int i = 0; i < total_args_passed; i++) {
1102 if (sig_bt[i] == T_VOID) {
1103 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1104 continue;
1105 }
1106
1107 // Pick up 0, 1 or 2 words from ld_ptr.
1108 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1109 "scrambled load targets?");
1110 VMReg r_1 = regs[i].first();
1111 VMReg r_2 = regs[i].second();
1112 if (!r_1->is_valid()) {
1113 assert(!r_2->is_valid(), "");
1114 continue;
1115 }
1116 if (r_1->is_FloatRegister()) {
1117 if (!r_2->is_valid()) {
1118 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1119 ld_offset-=wordSize;
1120 } else {
1121 // Skip the unused interpreter slot.
1122 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1123 ld_offset-=2*wordSize;
1124 }
1125 } else {
1126 Register r;
1127 if (r_1->is_stack()) {
1128 // Must do a memory to memory move thru "value".
1129 r = value_regs[value_regs_index];
1130 value_regs_index = (value_regs_index + 1) % num_value_regs;
1131 } else {
1132 r = r_1->as_Register();
1133 }
1134 if (!r_2->is_valid()) {
1135 // Not sure we need to do this but it shouldn't hurt.
1136 if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
1137 __ ld(r, ld_offset, ld_ptr);
1138 ld_offset-=wordSize;
1139 } else {
1140 __ lwz(r, ld_offset, ld_ptr);
1141 ld_offset-=wordSize;
1142 }
1143 } else {
1144 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1145 // data is passed in only 1 slot.
1146 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1147 ld_offset-=wordSize;
1148 }
1149 __ ld(r, ld_offset, ld_ptr);
1150 ld_offset-=wordSize;
1151 }
1152
1153 if (r_1->is_stack()) {
1154 // Now store value where the compiler expects it
1155 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1156
1157 if (sig_bt[i] == T_INT || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
1158 sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR || sig_bt[i] == T_BYTE) {
1159 __ stw(r, st_off, R1_SP);
1160 } else {
1161 __ std(r, st_off, R1_SP);
1162 }
1163 }
1164 }
1165 }
1166
1167 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1168
1169 BLOCK_COMMENT("Store method");
1170 // Store method into thread->callee_target.
1171 // We might end up in handle_wrong_method if the callee is
1172 // deoptimized as we race thru here. If that happens we don't want
1173 // to take a safepoint because the caller frame will look
1174 // interpreted and arguments are now "compiled" so it is much better
1175 // to make this transition invisible to the stack walking
1176 // code. Unfortunately if we try and find the callee by normal means
1177 // a safepoint is possible. So we stash the desired callee in the
1178 // thread and the vm will find there should this case occur.
1179 __ std(R19_method, thread_(callee_target));
1180
1181 // Jump to the compiled code just as if compiled code was doing it.
1182 __ bctr();
1183 }
1184
1185 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1186 int total_args_passed,
1187 int comp_args_on_stack,
1188 const BasicType *sig_bt,
1189 const VMRegPair *regs,
1190 address entry_address[AdapterBlob::ENTRY_COUNT]) {
1191 // entry: i2c
1192
1193 __ align(CodeEntryAlignment);
1194 entry_address[AdapterBlob::I2C] = __ pc();
1195 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1196
1197
1198 // entry: c2i unverified
1199
1200 __ align(CodeEntryAlignment);
1201 BLOCK_COMMENT("c2i unverified entry");
1202 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1203
1204 // inline_cache contains a CompiledICData
1205 const Register ic = R19_inline_cache_reg;
1206 const Register ic_klass = R11_scratch1;
1207 const Register receiver_klass = R12_scratch2;
1208 const Register code = R21_tmp1;
1209 const Register ientry = R23_tmp3;
1210
1211 assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
1212 assert(R11_scratch1 == R11, "need prologue scratch register");
1213
1214 Label call_interpreter;
1215
1216 __ ic_check(4 /* end_alignment */);
1217 __ ld(R19_method, CompiledICData::speculated_method_offset(), ic);
1218 // Argument is valid and klass is as expected, continue.
1219
1220 __ ld(code, method_(code));
1221 __ cmpdi(CR0, code, 0);
1222 __ ld(ientry, method_(interpreter_entry)); // preloaded
1223 __ beq_predict_taken(CR0, call_interpreter);
1224
1225 // Branch to ic_miss_stub.
1226 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1227
1228 // entry: c2i
1229
1230 entry_address[AdapterBlob::C2I] = __ pc();
1231
1232 // Class initialization barrier for static methods
1233 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1234 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1235 Label L_skip_barrier;
1236
1237 // Bypass the barrier for non-static methods
1238 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1239 __ andi_(R0, R0, JVM_ACC_STATIC);
1240 __ beq(CR0, L_skip_barrier); // non-static
1241
1242 Register klass = R11_scratch1;
1243 __ load_method_holder(klass, R19_method);
1244 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1245
1246 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1247 __ mtctr(klass);
1248 __ bctr();
1249
1250 __ bind(L_skip_barrier);
1251 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
1252
1253 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1254 bs->c2i_entry_barrier(masm, /* tmp register*/ ic_klass, /* tmp register*/ receiver_klass, /* tmp register*/ code);
1255
1256 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
1257 return;
1258 }
1259
1260 // An oop arg. Must pass a handle not the oop itself.
1261 static void object_move(MacroAssembler* masm,
1262 int frame_size_in_slots,
1263 OopMap* oop_map, int oop_handle_offset,
1264 bool is_receiver, int* receiver_offset,
1265 VMRegPair src, VMRegPair dst,
1266 Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1267 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1268 "receiver has already been moved");
1269
1270 // We must pass a handle. First figure out the location we use as a handle.
1271
1272 if (src.first()->is_stack()) {
1273 // stack to stack or reg
1274
1275 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1276 Label skip;
1277 const int oop_slot_in_callers_frame = reg2slot(src.first());
3692 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3693
3694 // Local scratch arrays
3695 unsigned long
3696 *a = scratch + 0 * longwords,
3697 *n = scratch + 1 * longwords,
3698 *m = scratch + 2 * longwords;
3699
3700 reverse_words((unsigned long *)a_ints, a, longwords);
3701 reverse_words((unsigned long *)n_ints, n, longwords);
3702
3703 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3704 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3705 } else {
3706 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3707 }
3708
3709 reverse_words(m, (unsigned long *)m_ints, longwords);
3710 }
3711
3712 #if INCLUDE_JFR
3713
3714 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3715 // It returns a jobject handle to the event writer.
3716 // The handle is dereferenced and the return value is the event writer oop.
3717 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3718 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3719 CodeBuffer code(name, 512, 64);
3720 MacroAssembler* masm = new MacroAssembler(&code);
3721
3722 Register tmp1 = R10_ARG8;
3723 Register tmp2 = R9_ARG7;
3724
3725 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3726 address start = __ pc();
3727 __ mflr(tmp1);
3728 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3729 __ push_frame_reg_args(0, tmp1);
3730 int frame_complete = __ pc() - start;
3731 __ set_last_Java_frame(R1_SP, noreg);
3767 int frame_complete = __ pc() - start;
3768 __ set_last_Java_frame(R1_SP, noreg);
3769 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3770 address calls_return_pc = __ last_calls_return_pc();
3771 __ reset_last_Java_frame();
3772 __ pop_frame();
3773 __ ld(tmp1, _abi0(lr), R1_SP);
3774 __ mtlr(tmp1);
3775 __ blr();
3776
3777 OopMapSet* oop_maps = new OopMapSet();
3778 OopMap* map = new OopMap(framesize, 0);
3779 oop_maps->add_gc_map(calls_return_pc - start, map);
3780
3781 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3782 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3783 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3784 oop_maps, false);
3785 return stub;
3786 }
3787
3788 #endif // INCLUDE_JFR
|
610
611 static int reg2offset(VMReg r) {
612 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
613 }
614
615 // ---------------------------------------------------------------------------
616 // Read the array of BasicTypes from a signature, and compute where the
617 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
618 // quantities. Values less than VMRegImpl::stack0 are registers, those above
619 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
620 // as framesizes are fixed.
621 // VMRegImpl::stack0 refers to the first slot 0(sp).
622 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
623 // up to Register::number_of_registers) are the 64-bit
624 // integer registers.
625
626 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
627 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
628 // units regardless of build. Of course for i486 there is no 64 bit build
629
630 // In contrast to other platforms the Java calling convention is *NOT* a
631 // "shifted" version of the C ABI.
632
633 const VMReg java_iarg_reg[8] = {
634 R3->as_VMReg(),
635 R4->as_VMReg(),
636 R5->as_VMReg(),
637 R6->as_VMReg(),
638 R7->as_VMReg(),
639 R8->as_VMReg(),
640 R9->as_VMReg(),
641 R10->as_VMReg()
642 };
643
644 const VMReg java_farg_reg[13] = {
645 F1->as_VMReg(),
646 F2->as_VMReg(),
647 F3->as_VMReg(),
648 F4->as_VMReg(),
649 F5->as_VMReg(),
650 F6->as_VMReg(),
651 F7->as_VMReg(),
751 ++freg;
752 } else {
753 // Put double on stack. They must be aligned to 2 slots.
754 if (stk & 0x1) ++stk;
755 reg = VMRegImpl::stack2reg(stk);
756 stk += inc_stk_for_longdouble;
757 }
758 regs[i].set2(reg);
759 break;
760 case T_VOID:
761 // Do not count halves.
762 regs[i].set_bad();
763 break;
764 default:
765 ShouldNotReachHere();
766 }
767 }
768 return stk;
769 }
770
771 // Similar to java_calling_convention() but for multiple return
772 // values. There's no way to store them on the stack so if we don't
773 // have enough registers, multiple values can't be returned.
774 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
775 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
776 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
777 VMRegPair *regs,
778 int total_args_passed) {
779 // Create the mapping between argument positions and
780 // registers.
781 static const Register INT_ArgReg[java_return_convention_max_int] = {
782 R3_RET, R10_ARG8, R9_ARG7, R8_ARG6, R7_ARG5, R6_ARG4, R5_ARG3, R4_ARG2
783 };
784 static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
785 F1_RET, F2_ARG2, F3_ARG3, F4_ARG4, F5_ARG5, F6_ARG6, F7_ARG7, F8_ARG8,
786 F9_ARG9, F10_ARG10, F11_ARG11, F12_ARG12, F13_ARG13
787 };
788
789
790 uint int_args = 0;
791 uint fp_args = 0;
792
793 for (int i = 0; i < total_args_passed; i++) {
794 switch (sig_bt[i]) {
795 case T_BOOLEAN:
796 case T_CHAR:
797 case T_BYTE:
798 case T_SHORT:
799 case T_INT:
800 if (int_args < java_return_convention_max_int) {
801 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
802 int_args++;
803 } else {
804 return -1;
805 }
806 break;
807 case T_VOID:
808 // halves of T_LONG or T_DOUBLE
809 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
810 regs[i].set_bad();
811 break;
812 case T_LONG:
813 assert(sig_bt[i + 1] == T_VOID, "expecting half");
814 // fall through
815 case T_OBJECT:
816 case T_ARRAY:
817 case T_ADDRESS:
818 case T_METADATA:
819 if (int_args < java_return_convention_max_int) {
820 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
821 int_args++;
822 } else {
823 return -1;
824 }
825 break;
826 case T_FLOAT:
827 if (fp_args < java_return_convention_max_float) {
828 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
829 fp_args++;
830 } else {
831 return -1;
832 }
833 break;
834 case T_DOUBLE:
835 assert(sig_bt[i + 1] == T_VOID, "expecting half");
836 if (fp_args < java_return_convention_max_float) {
837 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
838 fp_args++;
839 } else {
840 return -1;
841 }
842 break;
843 default:
844 ShouldNotReachHere();
845 break;
846 }
847 }
848
849 return int_args + fp_args;
850 }
851
852 // Calling convention for calling C code.
853 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
854 VMRegPair *regs,
855 int total_args_passed) {
856 // Calling conventions for C runtime calls and calls to JNI native methods.
857 //
858 // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
859 // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
860 // the first 13 flt/dbl's in the first 13 fp regs but additionally
861 // copy flt/dbl to the stack if they are beyond the 8th argument.
862
863 const VMReg iarg_reg[8] = {
864 R3->as_VMReg(),
865 R4->as_VMReg(),
866 R5->as_VMReg(),
867 R6->as_VMReg(),
868 R7->as_VMReg(),
869 R8->as_VMReg(),
870 R9->as_VMReg(),
871 R10->as_VMReg()
977 // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
978 #if defined(ABI_ELFv2)
979 assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
980 // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
981 // indicates that all parameters can be passed in registers.
982 return stack_used ? (arg * 2) : 0;
983 #else
984 // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
985 // We have to add extra slots because ABIv1 uses a larger header.
986 return MAX2(arg, 8) * 2 + additional_frame_header_slots;
987 #endif
988 }
989
990 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
991 uint num_bits,
992 uint total_args_passed) {
993 Unimplemented();
994 return 0;
995 }
996
997 // Patch the callers callsite with entry to compiled code if it exists.
998 static void patch_callers_callsite(MacroAssembler *masm, int adapter_size, int total_args_passed, const VMRegPair *regs) {
999 Label L;
1000 __ ld(R0, in_bytes(Method::code_offset()), R19_method);
1001 __ cmpdi(CR0, R0, 0);
1002 __ beq(CR0, L);
1003
1004 // Patch caller's callsite, method_(code) was not null which means that
1005 // compiled code exists.
1006 const Register return_pc = R11_scratch1;
1007 const Register tmp = R12_scratch2;
1008 __ mflr(return_pc);
1009 __ std(return_pc, _abi0(lr), R1_SP);
1010 RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
1011
1012 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
1013
1014 RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
1015 __ ld(return_pc, _abi0(lr), R1_SP);
1016 __ mtlr(return_pc);
1017
1018 // callsite->set_to_clean() uses icache flush including isync
1019
1020 __ bind(L);
1021 }
1022
1023 // For each inline type argument, sig includes the list of fields of
1024 // the inline type. This utility function computes the number of
1025 // arguments for the call if inline types are passed by reference (the
1026 // calling convention the interpreter expects).
1027 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
1028 int total_args_passed = 0;
1029 if (InlineTypePassFieldsAsArgs) {
1030 for (int i = 0; i < sig_extended->length(); i++) {
1031 BasicType bt = sig_extended->at(i)._bt;
1032 if (bt == T_METADATA) {
1033 // In sig_extended, an inline type argument starts with:
1034 // T_METADATA, followed by the types of the fields of the
1035 // inline type and T_VOID to mark the end of the value
1036 // type. Inline types are flattened so, for instance, in the
1037 // case of an inline type with an int field and an inline type
1038 // field that itself has 2 fields, an int and a long:
1039 // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
1040 // slot for the T_LONG) T_VOID (inner inline type) T_VOID
1041 // (outer inline type)
1042 total_args_passed++;
1043 int vt = 1;
1044 do {
1045 i++;
1046 BasicType bt = sig_extended->at(i)._bt;
1047 BasicType prev_bt = sig_extended->at(i-1)._bt;
1048 if (bt == T_METADATA) {
1049 vt++;
1050 } else if (bt == T_VOID &&
1051 prev_bt != T_LONG &&
1052 prev_bt != T_DOUBLE) {
1053 vt--;
1054 }
1055 } while (vt != 0);
1056 } else {
1057 total_args_passed++;
1058 }
1059 }
1060 } else {
1061 total_args_passed = sig_extended->length();
1062 }
1063 return total_args_passed;
1064 }
1065
1066 static void gen_c2i_adapter(MacroAssembler *masm,
1067 const GrowableArray<SigEntry>* sig_extended,
1068 const VMRegPair *regs,
1069 bool requires_clinit_barrier,
1070 address& c2i_no_clinit_check_entry,
1071 Label& skip_fixup,
1072 address start,
1073 OopMapSet* oop_maps,
1074 int& frame_complete,
1075 int& frame_size_in_words,
1076 bool alloc_inline_receiver) {
1077 if (requires_clinit_barrier) {
1078 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1079 Label L_skip_barrier;
1080
1081 // Bypass the barrier for non-static methods
1082 __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1083 __ andi_(R0, R0, JVM_ACC_STATIC);
1084 __ beq(CR0, L_skip_barrier); // non-static
1085
1086 Register klass = R11_scratch1;
1087 __ load_method_holder(klass, R19_method);
1088 __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1089
1090 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1091 __ mtctr(klass);
1092 __ bctr();
1093
1094 __ bind(L_skip_barrier);
1095 c2i_no_clinit_check_entry = __ pc();
1096 }
1097
1098 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1099 bs->c2i_entry_barrier(masm, R11_scratch1, R12_scratch2, R21_tmp1);
1100
1101 // Since all args are passed on the stack, total_args_passed *
1102 // Interpreter::stackElementSize is the space we need.
1103 int total_args_passed = compute_total_args_passed_int(sig_extended);
1104 assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
1105
1106 // Adapter needs TOP_IJAVA_FRAME_ABI.
1107 const int adapter_size = frame::top_ijava_frame_abi_size +
1108 align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
1109
1110 // Before we get into the guts of the C2I adapter, see if we should be here
1111 // at all. We've come from compiled code and are attempting to jump to the
1112 // interpreter, which means the caller made a static call to get here
1113 // (vcalls always get a compiled target if there is one). Check for a
1114 // compiled target. If there is one, we need to patch the caller's call.
1115 patch_callers_callsite(masm, adapter_size, total_args_passed, regs);
1116
1117 __ bind(skip_fixup);
1118
1119 if (InlineTypePassFieldsAsArgs) {
1120 // Is there an inline type argument?
1121 bool has_inline_argument = false;
1122 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
1123 has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
1124 }
1125 if (has_inline_argument) {
1126 __ unimplemented("c2i has_inline_argument");
1127 }
1128 }
1129
1130 // Call the interpreter.
1131 const Register tmp = R22_tmp2, ientry = R23_tmp3;
1132 const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
1133 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1134 int value_regs_index = 0;
1135
1136 __ ld(ientry, method_(interpreter_entry)); // preloaded
1137 __ mtctr(ientry);
1138
1139 // Get a copy of the current SP for loading caller's arguments.
1140 __ mr(R21_sender_SP, R1_SP);
1141
1142 // Add space for the adapter.
1143 __ resize_frame(-adapter_size, R12_scratch2);
1144
1145 int st_off = adapter_size - wordSize;
1146
1147 // Write the args into the outgoing interpreter space.
1148 // TODO: support for InlineTypePassFieldsAsArgs
1149 for (int i = 0; i < total_args_passed; i++) {
1150 BasicType bt = sig_extended->at(i)._bt;
1151
1152 VMReg r_1 = regs[i].first();
1153 VMReg r_2 = regs[i].second();
1154 if (!r_1->is_valid()) {
1155 assert(!r_2->is_valid(), "");
1156 continue;
1157 }
1158 if (r_1->is_stack()) {
1159 Register tmp_reg = value_regs[value_regs_index];
1160 value_regs_index = (value_regs_index + 1) % num_value_regs;
1161 // The calling convention produces OptoRegs that ignore the out
1162 // preserve area (JIT's ABI). We must account for it here.
1163 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1164 if (!r_2->is_valid()) {
1165 __ lwz(tmp_reg, ld_off, R21_sender_SP);
1166 } else {
1167 __ ld(tmp_reg, ld_off, R21_sender_SP);
1168 }
1169 // Pretend stack targets were loaded into tmp_reg.
1170 r_1 = tmp_reg->as_VMReg();
1171 }
1172
1173 if (r_1->is_Register()) {
1174 Register r = r_1->as_Register();
1175 if (!r_2->is_valid()) {
1176 __ stw(r, st_off, R1_SP);
1177 st_off-=wordSize;
1178 } else {
1179 // Longs are given 2 64-bit slots in the interpreter, but the
1180 // data is passed in only 1 slot.
1181 if (bt == T_LONG || bt == T_DOUBLE) {
1182 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1183 st_off-=wordSize;
1184 }
1185 __ std(r, st_off, R1_SP);
1186 st_off-=wordSize;
1187 }
1188 } else {
1189 assert(r_1->is_FloatRegister(), "");
1190 FloatRegister f = r_1->as_FloatRegister();
1191 if (!r_2->is_valid()) {
1192 __ stfs(f, st_off, R1_SP);
1193 st_off-=wordSize;
1194 } else {
1195 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1196 // data is passed in only 1 slot.
1197 // One of these should get known junk...
1198 DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1199 st_off-=wordSize;
1200 __ stfd(f, st_off, R1_SP);
1201 st_off-=wordSize;
1202 }
1203 }
1204 }
1205
1206 // Jump to the interpreter just as if interpreter was doing it.
1207
1208 __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1209
1210 // load TOS
1211 __ addi(R15_esp, R1_SP, st_off);
1212
1213 __ bctr();
1214 }
1215
1216 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1217 int comp_args_on_stack,
1218 const GrowableArray<SigEntry>* sig,
1219 const VMRegPair *regs) {
1220
1221 // Load method's entry-point from method.
1222 __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1223 __ mtctr(R12_scratch2);
1224
1225 // We will only enter here from an interpreted frame and never from after
1226 // passing thru a c2i. Azul allowed this but we do not. If we lose the
1227 // race and use a c2i we will remain interpreted for the race loser(s).
1228 // This removes all sorts of headaches on the x86 side and also eliminates
1229 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1230
1231 // Note: r13 contains the senderSP on entry. We must preserve it since
1232 // we may do a i2c -> c2i transition if we lose a race where compiled
1233 // code goes non-entrant while we get args ready.
1234 // In addition we use r13 to locate all the interpreter args as
1235 // we must align the stack to 16 bytes on an i2c entry else we
1236 // lose alignment we expect in all compiled code and register
1237 // save code can segv when fxsave instructions find improperly
1238 // aligned stack pointer.
1239
1240 const Register ld_ptr = R15_esp;
1241 const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1242 const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1243 int value_regs_index = 0;
1244
1245 int total_args_passed = sig->length();
1246 int ld_offset = total_args_passed*wordSize;
1247
1248 // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1249 // in registers, we will occasionally have no stack args.
1250 int comp_words_on_stack = 0;
1251 if (comp_args_on_stack) {
1252 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1253 // registers are below. By subtracting stack0, we either get a negative
1254 // number (all values in registers) or the maximum stack slot accessed.
1255
1256 // Convert 4-byte c2 stack slots to words.
1257 comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1258 // Round up to miminum stack alignment, in wordSize.
1259 comp_words_on_stack = align_up(comp_words_on_stack, 2);
1260 __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1261 }
1262
1263 // Now generate the shuffle code. Pick up all register args and move the
1264 // rest through register value=Z_R12.
1265 BLOCK_COMMENT("Shuffle arguments");
1266
1267 for (int i = 0; i < total_args_passed; i++) {
1268 BasicType bt = sig->at(i)._bt;
1269 if (bt == T_VOID) {
1270 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
1271 continue;
1272 }
1273
1274 // Pick up 0, 1 or 2 words from ld_ptr.
1275 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1276 "scrambled load targets?");
1277 VMReg r_1 = regs[i].first();
1278 VMReg r_2 = regs[i].second();
1279 if (!r_1->is_valid()) {
1280 assert(!r_2->is_valid(), "");
1281 continue;
1282 }
1283 if (r_1->is_FloatRegister()) {
1284 if (!r_2->is_valid()) {
1285 __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1286 ld_offset-=wordSize;
1287 } else {
1288 // Skip the unused interpreter slot.
1289 __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1290 ld_offset-=2*wordSize;
1291 }
1292 } else {
1293 Register r;
1294 if (r_1->is_stack()) {
1295 // Must do a memory to memory move thru "value".
1296 r = value_regs[value_regs_index];
1297 value_regs_index = (value_regs_index + 1) % num_value_regs;
1298 } else {
1299 r = r_1->as_Register();
1300 }
1301 if (!r_2->is_valid()) {
1302 // Not sure we need to do this but it shouldn't hurt.
1303 if (is_reference_type(bt) || bt == T_ADDRESS) {
1304 __ ld(r, ld_offset, ld_ptr);
1305 ld_offset-=wordSize;
1306 } else {
1307 __ lwz(r, ld_offset, ld_ptr);
1308 ld_offset-=wordSize;
1309 }
1310 } else {
1311 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1312 // data is passed in only 1 slot.
1313 if (bt == T_LONG || bt == T_DOUBLE) {
1314 ld_offset-=wordSize;
1315 }
1316 __ ld(r, ld_offset, ld_ptr);
1317 ld_offset-=wordSize;
1318 }
1319
1320 if (r_1->is_stack()) {
1321 // Now store value where the compiler expects it
1322 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1323
1324 if (bt == T_INT || bt == T_FLOAT || bt == T_BOOLEAN ||
1325 bt == T_SHORT || bt == T_CHAR || bt == T_BYTE) {
1326 __ stw(r, st_off, R1_SP);
1327 } else {
1328 __ std(r, st_off, R1_SP);
1329 }
1330 }
1331 }
1332 }
1333
1334 __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1335
1336 BLOCK_COMMENT("Store method");
1337 // Store method into thread->callee_target.
1338 // We might end up in handle_wrong_method if the callee is
1339 // deoptimized as we race thru here. If that happens we don't want
1340 // to take a safepoint because the caller frame will look
1341 // interpreted and arguments are now "compiled" so it is much better
1342 // to make this transition invisible to the stack walking
1343 // code. Unfortunately if we try and find the callee by normal means
1344 // a safepoint is possible. So we stash the desired callee in the
1345 // thread and the vm will find there should this case occur.
1346 __ std(R19_method, thread_(callee_target));
1347
1348 // Jump to the compiled code just as if compiled code was doing it.
1349 __ bctr();
1350 }
1351
1352 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1353 __ ic_check(BytesPerInstWord /* end_alignment */);
1354 __ ld(R19_method, CompiledICData::speculated_method_offset(), R19_inline_cache_reg);
1355
1356 // Method might have been compiled since the call site was patched to
1357 // interpreted; if that is the case treat it as a miss so we can get
1358 // the call site corrected.
1359 __ ld(R0, method_(code));
1360 __ cmpdi(CR0, R0, 0);
1361 __ beq_predict_taken(CR0, skip_fixup);
1362
1363 // Branch to ic_miss_stub.
1364 __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1365 }
1366
1367 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1368 int comp_args_on_stack,
1369 const GrowableArray<SigEntry>* sig,
1370 const VMRegPair* regs,
1371 const GrowableArray<SigEntry>* sig_cc,
1372 const VMRegPair* regs_cc,
1373 const GrowableArray<SigEntry>* sig_cc_ro,
1374 const VMRegPair* regs_cc_ro,
1375 address entry_address[AdapterBlob::ENTRY_COUNT],
1376 AdapterBlob*& new_adapter,
1377 bool allocate_code_blob) {
1378
1379 entry_address[AdapterBlob::I2C] = __ pc();
1380 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1381
1382 // -------------------------------------------------------------------------
1383 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
1384 // to the interpreter. The args start out packed in the compiled layout. They
1385 // need to be unpacked into the interpreter layout. This will almost always
1386 // require some stack space. We grow the current (compiled) stack, then repack
1387 // the args. We finally end in a jump to the generic interpreter entry point.
1388 // On exit from the interpreter, the interpreter will restore our SP (lest the
1389 // compiled code, which relies solely on SP and not FP, get sick).
1390
1391 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1392 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1393 Label skip_fixup;
1394
1395 gen_inline_cache_check(masm, skip_fixup);
1396
1397 OopMapSet* oop_maps = new OopMapSet();
1398 int frame_complete = CodeOffsets::frame_never_safe;
1399 int frame_size_in_words = 0;
1400
1401 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1402 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1403 entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
1404 if (regs_cc != regs_cc_ro) {
1405 // No class init barrier needed because method is guaranteed to be non-static
1406 __ unimplemented("C2I_Inline_RO");
1407 #if 0
1408 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1409 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1410 #endif
1411 skip_fixup.reset();
1412 }
1413
1414 // Scalarized c2i adapter
1415 entry_address[AdapterBlob::C2I] = __ pc();
1416 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1417 gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1418 skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1419
1420 // Non-scalarized c2i adapter
1421 if (regs != regs_cc) {
1422 entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1423 Label inline_entry_skip_fixup;
1424 __ unimplemented("C2I_Unverified_Inline");
1425 #if 0
1426 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1427 #endif
1428
1429 entry_address[AdapterBlob::C2I_Inline] = __ pc();
1430 __ unimplemented("C2I_Inline2");
1431 #if 0
1432 gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1433 inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1434 #endif
1435 }
1436 // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1437 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1438 if (allocate_code_blob) {
1439 bool caller_must_gc_arguments = (regs != regs_cc);
1440 int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1441 assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1442 AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1443 new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1444 }
1445 }
1446
1447 // An oop arg. Must pass a handle not the oop itself.
1448 static void object_move(MacroAssembler* masm,
1449 int frame_size_in_slots,
1450 OopMap* oop_map, int oop_handle_offset,
1451 bool is_receiver, int* receiver_offset,
1452 VMRegPair src, VMRegPair dst,
1453 Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1454 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1455 "receiver has already been moved");
1456
1457 // We must pass a handle. First figure out the location we use as a handle.
1458
1459 if (src.first()->is_stack()) {
1460 // stack to stack or reg
1461
1462 const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1463 Label skip;
1464 const int oop_slot_in_callers_frame = reg2slot(src.first());
3879 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3880
3881 // Local scratch arrays
3882 unsigned long
3883 *a = scratch + 0 * longwords,
3884 *n = scratch + 1 * longwords,
3885 *m = scratch + 2 * longwords;
3886
3887 reverse_words((unsigned long *)a_ints, a, longwords);
3888 reverse_words((unsigned long *)n_ints, n, longwords);
3889
3890 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3891 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3892 } else {
3893 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3894 }
3895
3896 reverse_words(m, (unsigned long *)m_ints, longwords);
3897 }
3898
3899 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3900 Unimplemented();
3901 return nullptr;
3902 }
3903
3904 #if INCLUDE_JFR
3905
3906 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3907 // It returns a jobject handle to the event writer.
3908 // The handle is dereferenced and the return value is the event writer oop.
3909 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3910 const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3911 CodeBuffer code(name, 512, 64);
3912 MacroAssembler* masm = new MacroAssembler(&code);
3913
3914 Register tmp1 = R10_ARG8;
3915 Register tmp2 = R9_ARG7;
3916
3917 int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3918 address start = __ pc();
3919 __ mflr(tmp1);
3920 __ std(tmp1, _abi0(lr), R1_SP); // save return pc
3921 __ push_frame_reg_args(0, tmp1);
3922 int frame_complete = __ pc() - start;
3923 __ set_last_Java_frame(R1_SP, noreg);
3959 int frame_complete = __ pc() - start;
3960 __ set_last_Java_frame(R1_SP, noreg);
3961 __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3962 address calls_return_pc = __ last_calls_return_pc();
3963 __ reset_last_Java_frame();
3964 __ pop_frame();
3965 __ ld(tmp1, _abi0(lr), R1_SP);
3966 __ mtlr(tmp1);
3967 __ blr();
3968
3969 OopMapSet* oop_maps = new OopMapSet();
3970 OopMap* map = new OopMap(framesize, 0);
3971 oop_maps->add_gc_map(calls_return_pc - start, map);
3972
3973 RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3974 RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3975 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3976 oop_maps, false);
3977 return stub;
3978 }
3979 #endif // INCLUDE_JFR
|