12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/macroAssembler.hpp"
26 #include "code/codeBlob.hpp"
27 #include "code/codeBlob.hpp"
28 #include "code/vmreg.inline.hpp"
29 #include "compiler/disassembler.hpp"
30 #include "logging/logStream.hpp"
31 #include "memory/resourceArea.hpp"
32 #include "prims/universalUpcallHandler.hpp"
33 #include "runtime/sharedRuntime.hpp"
34 #include "runtime/signature.hpp"
35 #include "runtime/stubRoutines.hpp"
36 #include "utilities/formatBuffer.hpp"
37 #include "utilities/globalDefinitions.hpp"
38
39 #define __ _masm->
40
41 // 1. Create buffer according to layout
42 // 2. Load registers & stack args into buffer
43 // 3. Call upcall helper with upcall handler instance & buffer pointer (C++ ABI)
44 // 4. Load return value from buffer into foreign ABI registers
45 // 5. Return
46 address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
47 ResourceMark rm;
48 const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
49 const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout);
50
51 CodeBuffer buffer("upcall_stub", 1024, upcall_stub_size);
52
53 MacroAssembler* _masm = new MacroAssembler(&buffer);
54 int stack_alignment_C = 16; // bytes
55 int register_size = sizeof(uintptr_t);
56 int buffer_alignment = xmm_reg_size;
57
58 // stub code
59 __ enter();
60
61 // save pointer to JNI receiver handle into constant segment
62 Address rec_adr = __ as_Address(InternalAddress(__ address_constant((address)rec)));
63
64 __ subptr(rsp, (int) align_up(layout.buffer_size, buffer_alignment));
65
66 Register used[] = { c_rarg0, c_rarg1, rax, rbx, rdi, rsi, r12, r13, r14, r15 };
67 GrowableArray<Register> preserved;
68 // TODO need to preserve anything killed by the upcall that is non-volatile, needs XMM regs as well, probably
69 for (size_t i = 0; i < sizeof(used)/sizeof(Register); i++) {
70 Register reg = used[i];
71 if (!abi.is_volatile_reg(reg)) {
72 preserved.push(reg);
73 }
74 }
75
76 int preserved_size = align_up(preserved.length() * register_size, stack_alignment_C); // includes register alignment
77 int buffer_offset = preserved_size; // offset from rsp
78
79 __ subptr(rsp, preserved_size);
80 for (int i = 0; i < preserved.length(); i++) {
81 __ movptr(Address(rsp, i * register_size), preserved.at(i));
82 }
83
84 for (int i = 0; i < abi._integer_argument_registers.length(); i++) {
85 size_t offs = buffer_offset + layout.arguments_integer + i * sizeof(uintptr_t);
86 __ movptr(Address(rsp, (int)offs), abi._integer_argument_registers.at(i));
87 }
88
89 for (int i = 0; i < abi._vector_argument_registers.length(); i++) {
90 XMMRegister reg = abi._vector_argument_registers.at(i);
91 size_t offs = buffer_offset + layout.arguments_vector + i * xmm_reg_size;
92 __ movdqu(Address(rsp, (int)offs), reg);
93 }
94
95 // Capture prev stack pointer (stack arguments base)
96 #ifndef _WIN64
97 __ lea(rax, Address(rbp, 16)); // skip frame+return address
98 #else
99 __ lea(rax, Address(rbp, 16 + 32)); // also skip shadow space
100 #endif
101 __ movptr(Address(rsp, buffer_offset + (int) layout.stack_args), rax);
102 #ifndef PRODUCT
103 __ movptr(Address(rsp, buffer_offset + (int) layout.stack_args_bytes), -1); // unknown
104 #endif
105
106 // Call upcall helper
107
108 __ movptr(c_rarg0, rec_adr);
109 __ lea(c_rarg1, Address(rsp, buffer_offset));
110
111 #ifdef _WIN64
112 __ block_comment("allocate shadow space for argument register spill");
113 __ subptr(rsp, 32);
114 #endif
115
116 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::attach_thread_and_do_upcall)));
117
118 #ifdef _WIN64
119 __ block_comment("pop shadow space");
120 __ addptr(rsp, 32);
121 #endif
122
123 for (int i = 0; i < abi._integer_return_registers.length(); i++) {
124 size_t offs = buffer_offset + layout.returns_integer + i * sizeof(uintptr_t);
125 __ movptr(abi._integer_return_registers.at(i), Address(rsp, (int)offs));
126 }
127
128 for (int i = 0; i < abi._vector_return_registers.length(); i++) {
129 XMMRegister reg = abi._vector_return_registers.at(i);
130 size_t offs = buffer_offset + layout.returns_vector + i * xmm_reg_size;
131 __ movdqu(reg, Address(rsp, (int)offs));
132 }
133
134 for (size_t i = abi._X87_return_registers_noof; i > 0 ; i--) {
135 ssize_t offs = buffer_offset + layout.returns_x87 + (i - 1) * (sizeof(long double));
136 __ fld_x (Address(rsp, (int)offs));
137 }
138
139 // Restore preserved registers
140 for (int i = 0; i < preserved.length(); i++) {
141 __ movptr(preserved.at(i), Address(rsp, i * register_size));
142 }
143
144 __ leave();
145 __ ret(0);
146
147 _masm->flush();
148
149 BufferBlob* blob = BufferBlob::create("upcall_stub", &buffer);
150
151 return blob->code_begin();
152 }
153
154 struct ArgMove {
155 BasicType bt;
156 VMRegPair from;
157 VMRegPair to;
158
159 bool is_identity() const {
160 return from.first() == to.first() && from.second() == to.second();
161 }
162 };
163
164 static GrowableArray<ArgMove> compute_argument_shuffle(Method* entry, int& out_arg_size_bytes, const CallRegs& conv, BasicType& ret_type) {
165 assert(entry->is_static(), "");
166
167 // Fill in the signature array, for the calling-convention call.
168 const int total_out_args = entry->size_of_parameters();
169 assert(total_out_args > 0, "receiver arg ");
170
171 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_out_args);
172 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_out_args);
173
174 {
175 int i = 0;
176 SignatureStream ss(entry->signature());
177 for (; !ss.at_return_type(); ss.next()) {
178 out_sig_bt[i++] = ss.type(); // Collect remaining bits of signature
179 if (ss.type() == T_LONG || ss.type() == T_DOUBLE)
180 out_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
181 }
182 assert(i == total_out_args, "");
183 ret_type = ss.type();
184 }
185
186 int out_arg_slots = SharedRuntime::java_calling_convention(out_sig_bt, out_regs, total_out_args);
187
188 const int total_in_args = total_out_args - 1; // skip receiver
189 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
190 VMRegPair* in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_in_args);
191
192 for (int i = 0; i < total_in_args ; i++ ) {
193 in_sig_bt[i] = out_sig_bt[i+1]; // skip receiver
194 }
195
196 // Now figure out where the args must be stored and how much stack space they require.
197 conv.calling_convention(in_sig_bt, in_regs, total_in_args);
198
199 GrowableArray<int> arg_order(2 * total_in_args);
200
201 VMRegPair tmp_vmreg;
202 tmp_vmreg.set2(rbx->as_VMReg());
203
204 // Compute a valid move order, using tmp_vmreg to break any cycles
205 SharedRuntime::compute_move_order(in_sig_bt,
206 total_in_args, in_regs,
207 total_out_args, out_regs,
208 arg_order,
209 tmp_vmreg);
210
211 GrowableArray<ArgMove> arg_order_vmreg(total_in_args); // conservative
212
213 #ifdef ASSERT
214 bool reg_destroyed[RegisterImpl::number_of_registers];
215 bool freg_destroyed[XMMRegisterImpl::number_of_registers];
216 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
217 reg_destroyed[r] = false;
218 }
219 for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
220 freg_destroyed[f] = false;
221 }
222 #endif // ASSERT
223
224 for (int i = 0; i < arg_order.length(); i += 2) {
225 int in_arg = arg_order.at(i);
226 int out_arg = arg_order.at(i + 1);
227
228 assert(in_arg != -1 || out_arg != -1, "");
229 BasicType arg_bt = (in_arg != -1 ? in_sig_bt[in_arg] : out_sig_bt[out_arg]);
230 switch (arg_bt) {
231 case T_BOOLEAN:
232 case T_BYTE:
233 case T_SHORT:
234 case T_CHAR:
235 case T_INT:
236 case T_FLOAT:
237 break; // process
238
239 case T_LONG:
240 case T_DOUBLE:
241 assert(in_arg == -1 || (in_arg + 1 < total_in_args && in_sig_bt[in_arg + 1] == T_VOID), "bad arg list: %d", in_arg);
242 assert(out_arg == -1 || (out_arg + 1 < total_out_args && out_sig_bt[out_arg + 1] == T_VOID), "bad arg list: %d", out_arg);
243 break; // process
244
245 case T_VOID:
246 continue; // skip
247
248 default:
249 fatal("found in upcall args: %s", type2name(arg_bt));
250 }
251
252 ArgMove move;
253 move.bt = arg_bt;
254 move.from = (in_arg != -1 ? in_regs[in_arg] : tmp_vmreg);
255 move.to = (out_arg != -1 ? out_regs[out_arg] : tmp_vmreg);
256
257 if(move.is_identity()) {
258 continue; // useless move
259 }
260
261 #ifdef ASSERT
262 if (in_arg != -1) {
263 if (in_regs[in_arg].first()->is_Register()) {
264 assert(!reg_destroyed[in_regs[in_arg].first()->as_Register()->encoding()], "destroyed reg!");
265 } else if (in_regs[in_arg].first()->is_XMMRegister()) {
266 assert(!freg_destroyed[in_regs[in_arg].first()->as_XMMRegister()->encoding()], "destroyed reg!");
267 }
268 }
269 if (out_arg != -1) {
270 if (out_regs[out_arg].first()->is_Register()) {
271 reg_destroyed[out_regs[out_arg].first()->as_Register()->encoding()] = true;
272 } else if (out_regs[out_arg].first()->is_XMMRegister()) {
273 freg_destroyed[out_regs[out_arg].first()->as_XMMRegister()->encoding()] = true;
274 }
275 }
276 #endif /* ASSERT */
277
278 arg_order_vmreg.push(move);
279 }
280
281 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
282 out_arg_size_bytes = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
283
284 return arg_order_vmreg;
285 }
286
287 static const char* null_safe_string(const char* str) {
288 return str == nullptr ? "NULL" : str;
289 }
290
291 #ifdef ASSERT
292 static void print_arg_moves(const GrowableArray<ArgMove>& arg_moves, Method* entry) {
293 LogTarget(Trace, foreign) lt;
294 if (lt.is_enabled()) {
295 ResourceMark rm;
296 LogStream ls(lt);
297 ls.print_cr("Argument shuffle for %s {", entry->name_and_sig_as_C_string());
298 for (int i = 0; i < arg_moves.length(); i++) {
299 ArgMove arg_mv = arg_moves.at(i);
300 BasicType arg_bt = arg_mv.bt;
301 VMRegPair from_vmreg = arg_mv.from;
302 VMRegPair to_vmreg = arg_mv.to;
303
304 ls.print("Move a %s from (", null_safe_string(type2name(arg_bt)));
305 from_vmreg.first()->print_on(&ls);
306 ls.print(",");
307 from_vmreg.second()->print_on(&ls);
308 ls.print(") to ");
309 to_vmreg.first()->print_on(&ls);
310 ls.print(",");
311 to_vmreg.second()->print_on(&ls);
312 ls.print_cr(")");
313 }
314 ls.print_cr("}");
315 }
316 }
317 #endif
318
319 static void save_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) {
320 __ block_comment("{ save_native_args ");
321 int store_offset = arg_save_area_offset;
322 for (int i = 0; i < conv._args_length; i++) {
323 VMReg reg = conv._arg_regs[i];
324 if (reg->is_Register()) {
325 __ movptr(Address(rsp, store_offset), reg->as_Register());
326 store_offset += 8;
327 } else if (reg->is_XMMRegister()) {
328 // Java API doesn't support vector args
329 __ movdqu(Address(rsp, store_offset), reg->as_XMMRegister());
330 store_offset += 16;
331 }
332 // do nothing for stack
333 }
334 __ block_comment("} save_native_args ");
335 }
336
337 static void restore_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) {
338 __ block_comment("{ restore_native_args ");
339 int load_offset = arg_save_area_offset;
340 for (int i = 0; i < conv._args_length; i++) {
341 VMReg reg = conv._arg_regs[i];
342 if (reg->is_Register()) {
343 __ movptr(reg->as_Register(), Address(rsp, load_offset));
344 load_offset += 8;
345 } else if (reg->is_XMMRegister()) {
346 // Java API doesn't support vector args
347 __ movdqu(reg->as_XMMRegister(), Address(rsp, load_offset));
348 load_offset += 16;
349 }
350 // do nothing for stack
351 }
352 __ block_comment("} restore_native_args ");
353 }
354
355 static bool is_valid_XMM(XMMRegister reg) {
356 return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
357 }
358
359 // for callee saved regs, according to the caller's ABI
360 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
361 int size = 0;
362 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
363 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
364 if (!abi.is_volatile_reg(reg)) {
365 size += 8; // bytes
366 }
367 }
368
369 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
370 if (!abi.is_volatile_reg(reg)) {
371 if (UseAVX >= 3) {
372 size += 64; // bytes
373 } else if (UseAVX >= 1) {
374 size += 32;
375 } else {
376 size += 16;
377 }
378 }
379 }
380
381 #ifndef _WIN64
382 // for mxcsr
383 size += 8;
384 #endif
385
386 return size;
387 }
388
389 static int compute_arg_save_area_size(const CallRegs& conv) {
390 int result_size = 0;
391 for (int i = 0; i < conv._args_length; i++) {
392 VMReg reg = conv._arg_regs[i];
393 if (reg->is_Register()) {
394 result_size += 8;
395 } else if (reg->is_XMMRegister()) {
396 // Java API doesn't support vector args
397 result_size += 16;
398 }
399 // do nothing for stack
400 }
401 return result_size;
402 }
403
404 static int compute_res_save_area_size(const CallRegs& conv) {
405 int result_size = 0;
406 for (int i = 0; i < conv._rets_length; i++) {
407 VMReg reg = conv._ret_regs[i];
408 if (reg->is_Register()) {
409 result_size += 8;
410 } else if (reg->is_XMMRegister()) {
411 // Java API doesn't support vector args
412 result_size += 16;
413 } else {
414 ShouldNotReachHere(); // unhandled type
415 }
416 }
417 return result_size;
418 }
419
420 static void save_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
421 int offset = res_save_area_offset;
422 __ block_comment("{ save java result ");
423 for (int i = 0; i < conv._rets_length; i++) {
424 VMReg reg = conv._ret_regs[i];
425 if (reg->is_Register()) {
426 __ movptr(Address(rsp, offset), reg->as_Register());
427 offset += 8;
428 } else if (reg->is_XMMRegister()) {
429 // Java API doesn't support vector args
430 __ movdqu(Address(rsp, offset), reg->as_XMMRegister());
431 offset += 16;
432 } else {
433 ShouldNotReachHere(); // unhandled type
434 }
435 }
436 __ block_comment("} save java result ");
437 }
438
439 static void restore_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
440 int offset = res_save_area_offset;
441 __ block_comment("{ restore java result ");
442 for (int i = 0; i < conv._rets_length; i++) {
443 VMReg reg = conv._ret_regs[i];
444 if (reg->is_Register()) {
445 __ movptr(reg->as_Register(), Address(rsp, offset));
446 offset += 8;
447 } else if (reg->is_XMMRegister()) {
448 // Java API doesn't support vector args
449 __ movdqu(reg->as_XMMRegister(), Address(rsp, offset));
450 offset += 16;
451 } else {
452 ShouldNotReachHere(); // unhandled type
453 }
454 }
455 __ block_comment("} restore java result ");
456 }
457
458 constexpr int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
459
460 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
461 // 1. iterate all registers in the architecture
462 // - check if they are volatile or not for the given abi
463 // - if NOT, we need to save it here
464 // 2. save mxcsr on non-windows platforms
465
466 int offset = reg_save_area_offset;
467
468 __ block_comment("{ preserve_callee_saved_regs ");
469 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
470 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
471 if (!abi.is_volatile_reg(reg)) {
472 __ movptr(Address(rsp, offset), reg);
473 offset += 8;
474 }
475 }
476
477 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
529 if (UseAVX >= 3) {
530 __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit);
531 offset += 64;
532 } else if (UseAVX >= 1) {
533 __ vmovdqu(reg, Address(rsp, offset));
534 offset += 32;
535 } else {
536 __ movdqu(reg, Address(rsp, offset));
537 offset += 16;
538 }
539 }
540 }
541
542 #ifndef _WIN64
543 const Address mxcsr_save(rsp, offset);
544 __ ldmxcsr(mxcsr_save);
545 #endif
546
547 __ block_comment("} restore_callee_saved_regs ");
548 }
549
550 static void shuffle_arguments(MacroAssembler* _masm, const GrowableArray<ArgMove>& arg_moves) {
551 for (int i = 0; i < arg_moves.length(); i++) {
552 ArgMove arg_mv = arg_moves.at(i);
553 BasicType arg_bt = arg_mv.bt;
554 VMRegPair from_vmreg = arg_mv.from;
555 VMRegPair to_vmreg = arg_mv.to;
556
557 assert(
558 !((from_vmreg.first()->is_Register() && to_vmreg.first()->is_XMMRegister())
559 || (from_vmreg.first()->is_XMMRegister() && to_vmreg.first()->is_Register())),
560 "move between gp and fp reg not supported");
561
562 __ block_comment(err_msg("bt=%s", null_safe_string(type2name(arg_bt))));
563 switch (arg_bt) {
564 case T_BOOLEAN:
565 case T_BYTE:
566 case T_SHORT:
567 case T_CHAR:
568 case T_INT:
569 __ move32_64(from_vmreg, to_vmreg);
570 break;
571
572 case T_FLOAT:
573 __ float_move(from_vmreg, to_vmreg);
574 break;
575
576 case T_DOUBLE:
577 __ double_move(from_vmreg, to_vmreg);
578 break;
579
580 case T_LONG :
581 __ long_move(from_vmreg, to_vmreg);
582 break;
583
584 default:
585 fatal("found in upcall args: %s", type2name(arg_bt));
586 }
587 }
588 }
589
590 // Register is a class, but it would be assigned numerical value.
591 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull.
592 PRAGMA_DIAG_PUSH
593 PRAGMA_NONNULL_IGNORED
594 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry, jobject jabi, jobject jconv) {
595 ResourceMark rm;
596 const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
597 const CallRegs conv = ForeignGlobals::parse_call_regs(jconv);
598 assert(conv._rets_length <= 1, "no multi reg returns");
599 CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
600
601 int register_size = sizeof(uintptr_t);
602 int buffer_alignment = xmm_reg_size;
603
604 int out_arg_area = -1;
605 BasicType ret_type;
606 GrowableArray<ArgMove> arg_moves = compute_argument_shuffle(entry, out_arg_area, conv, ret_type);
607 assert(out_arg_area != -1, "Should have been set");
608 DEBUG_ONLY(print_arg_moves(arg_moves, entry);)
609
610 // out_arg_area (for stack arguments) doubles as shadow space for native calls.
611 // make sure it is big enough.
612 if (out_arg_area < frame::arg_reg_save_area_bytes) {
613 out_arg_area = frame::arg_reg_save_area_bytes;
614 }
615
616 int reg_save_area_size = compute_reg_save_area_size(abi);
617 int arg_save_area_size = compute_arg_save_area_size(conv);
618 int res_save_area_size = compute_res_save_area_size(conv);
619
620 int shuffle_area_offset = 0;
621 int res_save_area_offset = shuffle_area_offset + out_arg_area;
622 int arg_save_area_offset = res_save_area_offset + res_save_area_size;
623 int reg_save_area_offset = arg_save_area_offset + arg_save_area_size;
624 int frame_data_offset = reg_save_area_offset + reg_save_area_size;
625 int frame_bottom_offset = frame_data_offset + sizeof(OptimizedEntryBlob::FrameData);
626
627 int frame_size = frame_bottom_offset;
628 frame_size = align_up(frame_size, StackAlignmentInBytes);
629
630 // Ok The space we have allocated will look like:
631 //
632 //
633 // FP-> | |
634 // |---------------------| = frame_bottom_offset = frame_size
635 // | |
636 // | FrameData |
637 // |---------------------| = frame_data_offset
638 // | |
639 // | reg_save_area |
640 // |---------------------| = reg_save_are_offset
641 // | |
642 // | arg_save_area |
643 // |---------------------| = arg_save_are_offset
644 // | |
645 // | res_save_area |
646 // |---------------------| = res_save_are_offset
647 // | |
648 // SP-> | out_arg_area | needs to be at end for shadow space
649 //
650 //
651
652 //////////////////////////////////////////////////////////////////////////////
653
654 MacroAssembler* _masm = new MacroAssembler(&buffer);
655 address start = __ pc();
656 __ enter(); // set up frame
657 if ((abi._stack_alignment_bytes % 16) != 0) {
658 // stack alignment of caller is not a multiple of 16
659 __ andptr(rsp, -StackAlignmentInBytes); // align stack
660 }
661 // allocate frame (frame_size is also aligned, so stack is still aligned)
662 __ subptr(rsp, frame_size);
663
664 // we have to always spill args since we need to do a call to get the thread
665 // (and maybe attach it).
666 save_native_arguments(_masm, conv, arg_save_area_offset);
667
668 preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
669
670 __ block_comment("{ on_entry");
671 __ vzeroupper();
672 __ lea(c_rarg0, Address(rsp, frame_data_offset));
673 // stack already aligned
674 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
675 __ movptr(r15_thread, rax);
676 __ reinit_heapbase();
677 __ block_comment("} on_entry");
678
679 __ block_comment("{ argument shuffle");
680 // TODO merge these somehow
681 restore_native_arguments(_masm, conv, arg_save_area_offset);
682 shuffle_arguments(_masm, arg_moves);
683 __ block_comment("} argument shuffle");
684
685 __ block_comment("{ receiver ");
686 __ movptr(rscratch1, (intptr_t)receiver);
687 __ resolve_jobject(rscratch1, r15_thread, rscratch2);
688 __ movptr(j_rarg0, rscratch1);
689 __ block_comment("} receiver ");
690
691 __ mov_metadata(rbx, entry);
692 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
693
694 __ call(Address(rbx, Method::from_compiled_offset()));
695
696 save_java_result(_masm, conv, res_save_area_offset);
697
698 __ block_comment("{ on_exit");
699 __ vzeroupper();
700 __ lea(c_rarg0, Address(rsp, frame_data_offset));
701 // stack already aligned
702 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
703 __ reinit_heapbase();
704 __ block_comment("} on_exit");
705
706 restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
707
708 restore_java_result(_masm, conv, res_save_area_offset);
709
710 // return value shuffle
711 #ifdef ASSERT
712 if (conv._rets_length == 1) { // 0 or 1
713 VMReg j_expected_result_reg;
714 switch (ret_type) {
715 case T_BOOLEAN:
716 case T_BYTE:
717 case T_SHORT:
718 case T_CHAR:
719 case T_INT:
720 case T_LONG:
721 j_expected_result_reg = rax->as_VMReg();
722 break;
723 case T_FLOAT:
724 case T_DOUBLE:
725 j_expected_result_reg = xmm0->as_VMReg();
726 break;
727 default:
728 fatal("unexpected return type: %s", type2name(ret_type));
729 }
730 // No need to move for now, since CallArranger can pick a return type
731 // that goes in the same reg for both CCs. But, at least assert they are the same
732 assert(conv._ret_regs[0] == j_expected_result_reg,
733 "unexpected result register: %s != %s", conv._ret_regs[0]->name(), j_expected_result_reg->name());
734 }
735 #endif
736
737 __ leave();
738 __ ret(0);
739
740 //////////////////////////////////////////////////////////////////////////////
741
742 __ block_comment("{ exception handler");
743
744 intptr_t exception_handler_offset = __ pc() - start;
745
746 // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly?
747
748 // native caller has no idea how to handle exceptions
749 // we just crash here. Up to callee to catch exceptions.
750 __ verify_oop(rax);
751 __ vzeroupper();
752 __ mov(c_rarg0, rax);
753 __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI
754 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed)
755 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception)));
756 __ should_not_reach_here();
757
758 __ block_comment("} exception handler");
759
760 _masm->flush();
761
762
763 #ifndef PRODUCT
764 stringStream ss;
765 ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
766 const char* name = _masm->code_string(ss.as_string());
767 #else // PRODUCT
768 const char* name = "optimized_upcall_stub";
769 #endif // PRODUCT
770
771 OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, in_ByteSize(frame_data_offset));
772
773 if (TraceOptimizedUpcallStubs) {
774 blob->print_on(tty);
775 Disassembler::decode(blob, tty);
776 }
777
778 return blob->code_begin();
779 }
780 PRAGMA_DIAG_POP
781
782 bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
783 return true;
784 }
|
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/macroAssembler.hpp"
26 #include "code/codeBlob.hpp"
27 #include "code/codeBlob.hpp"
28 #include "code/vmreg.inline.hpp"
29 #include "compiler/disassembler.hpp"
30 #include "logging/logStream.hpp"
31 #include "memory/resourceArea.hpp"
32 #include "prims/foreign_globals.inline.hpp"
33 #include "prims/universalUpcallHandler.hpp"
34 #include "runtime/sharedRuntime.hpp"
35 #include "runtime/signature.hpp"
36 #include "runtime/stubRoutines.hpp"
37 #include "utilities/formatBuffer.hpp"
38 #include "utilities/globalDefinitions.hpp"
39
40 #define __ _masm->
41
42 static bool is_valid_XMM(XMMRegister reg) {
43 return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
44 }
45
46 // for callee saved regs, according to the caller's ABI
47 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
48 int size = 0;
49 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
50 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
51 if (!abi.is_volatile_reg(reg)) {
52 size += 8; // bytes
53 }
54 }
55
56 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
57 if (!abi.is_volatile_reg(reg)) {
58 if (UseAVX >= 3) {
59 size += 64; // bytes
60 } else if (UseAVX >= 1) {
61 size += 32;
62 } else {
63 size += 16;
64 }
65 }
66 }
67
68 #ifndef _WIN64
69 // for mxcsr
70 size += 8;
71 #endif
72
73 return size;
74 }
75
76 constexpr int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
77
78 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
79 // 1. iterate all registers in the architecture
80 // - check if they are volatile or not for the given abi
81 // - if NOT, we need to save it here
82 // 2. save mxcsr on non-windows platforms
83
84 int offset = reg_save_area_offset;
85
86 __ block_comment("{ preserve_callee_saved_regs ");
87 for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
88 if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
89 if (!abi.is_volatile_reg(reg)) {
90 __ movptr(Address(rsp, offset), reg);
91 offset += 8;
92 }
93 }
94
95 for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
147 if (UseAVX >= 3) {
148 __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit);
149 offset += 64;
150 } else if (UseAVX >= 1) {
151 __ vmovdqu(reg, Address(rsp, offset));
152 offset += 32;
153 } else {
154 __ movdqu(reg, Address(rsp, offset));
155 offset += 16;
156 }
157 }
158 }
159
160 #ifndef _WIN64
161 const Address mxcsr_save(rsp, offset);
162 __ ldmxcsr(mxcsr_save);
163 #endif
164
165 __ block_comment("} restore_callee_saved_regs ");
166 }
167 // Register is a class, but it would be assigned numerical value.
168 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull.
169 PRAGMA_DIAG_PUSH
170 PRAGMA_NONNULL_IGNORED
171 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
172 BasicType* in_sig_bt, int total_in_args,
173 BasicType* out_sig_bt, int total_out_args,
174 BasicType ret_type,
175 jobject jabi, jobject jconv,
176 bool needs_return_buffer, int ret_buf_size) {
177 const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
178 const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);
179 CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
180
181 Register shuffle_reg = rbx;
182 JavaCallConv out_conv;
183 NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
184 ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg());
185 int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots();
186 int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
187
188 #ifdef ASSERT
189 LogTarget(Trace, panama) lt;
190 if (lt.is_enabled()) {
191 ResourceMark rm;
192 LogStream ls(lt);
193 arg_shuffle.print_on(&ls);
194 }
195 #endif
196
197 // out_arg_area (for stack arguments) doubles as shadow space for native calls.
198 // make sure it is big enough.
199 if (out_arg_area < frame::arg_reg_save_area_bytes) {
200 out_arg_area = frame::arg_reg_save_area_bytes;
201 }
202
203 int reg_save_area_size = compute_reg_save_area_size(abi);
204 RegSpiller arg_spiller(call_regs._arg_regs, call_regs._args_length);
205 RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length);
206
207 int shuffle_area_offset = 0;
208 int res_save_area_offset = shuffle_area_offset + out_arg_area;
209 int arg_save_area_offset = res_save_area_offset + result_spiller.spill_size_bytes();
210 int reg_save_area_offset = arg_save_area_offset + arg_spiller.spill_size_bytes();
211 int frame_data_offset = reg_save_area_offset + reg_save_area_size;
212 int frame_bottom_offset = frame_data_offset + sizeof(OptimizedEntryBlob::FrameData);
213
214 int ret_buf_offset = -1;
215 if (needs_return_buffer) {
216 ret_buf_offset = frame_bottom_offset;
217 frame_bottom_offset += ret_buf_size;
218 }
219
220 int frame_size = frame_bottom_offset;
221 frame_size = align_up(frame_size, StackAlignmentInBytes);
222
223 // Ok The space we have allocated will look like:
224 //
225 //
226 // FP-> | |
227 // |---------------------| = frame_bottom_offset = frame_size
228 // | (optional) |
229 // | ret_buf |
230 // |---------------------| = ret_buf_offset
231 // | |
232 // | FrameData |
233 // |---------------------| = frame_data_offset
234 // | |
235 // | reg_save_area |
236 // |---------------------| = reg_save_are_offset
237 // | |
238 // | arg_save_area |
239 // |---------------------| = arg_save_are_offset
240 // | |
241 // | res_save_area |
242 // |---------------------| = res_save_are_offset
243 // | |
244 // SP-> | out_arg_area | needs to be at end for shadow space
245 //
246 //
247
248 //////////////////////////////////////////////////////////////////////////////
249
250 MacroAssembler* _masm = new MacroAssembler(&buffer);
251 address start = __ pc();
252 __ enter(); // set up frame
253 if ((abi._stack_alignment_bytes % 16) != 0) {
254 // stack alignment of caller is not a multiple of 16
255 __ andptr(rsp, -StackAlignmentInBytes); // align stack
256 }
257 // allocate frame (frame_size is also aligned, so stack is still aligned)
258 __ subptr(rsp, frame_size);
259
260 // we have to always spill args since we need to do a call to get the thread
261 // (and maybe attach it).
262 arg_spiller.generate_spill(_masm, arg_save_area_offset);
263
264 preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
265
266 __ block_comment("{ on_entry");
267 __ vzeroupper();
268 __ lea(c_rarg0, Address(rsp, frame_data_offset));
269 // stack already aligned
270 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
271 __ movptr(r15_thread, rax);
272 __ reinit_heapbase();
273 __ block_comment("} on_entry");
274
275 __ block_comment("{ argument shuffle");
276 arg_spiller.generate_fill(_masm, arg_save_area_offset);
277 if (needs_return_buffer) {
278 assert(ret_buf_offset != -1, "no return buffer allocated");
279 __ lea(abi._ret_buf_addr_reg, Address(rsp, ret_buf_offset));
280 }
281 arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
282 __ block_comment("} argument shuffle");
283
284 __ block_comment("{ receiver ");
285 __ movptr(rscratch1, (intptr_t)receiver);
286 __ resolve_jobject(rscratch1, r15_thread, rscratch2);
287 __ movptr(j_rarg0, rscratch1);
288 __ block_comment("} receiver ");
289
290 __ mov_metadata(rbx, entry);
291 __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
292
293 __ call(Address(rbx, Method::from_compiled_offset()));
294
295 // return value shuffle
296 if (!needs_return_buffer) {
297 #ifdef ASSERT
298 if (call_regs._rets_length == 1) { // 0 or 1
299 VMReg j_expected_result_reg;
300 switch (ret_type) {
301 case T_BOOLEAN:
302 case T_BYTE:
303 case T_SHORT:
304 case T_CHAR:
305 case T_INT:
306 case T_LONG:
307 j_expected_result_reg = rax->as_VMReg();
308 break;
309 case T_FLOAT:
310 case T_DOUBLE:
311 j_expected_result_reg = xmm0->as_VMReg();
312 break;
313 default:
314 fatal("unexpected return type: %s", type2name(ret_type));
315 }
316 // No need to move for now, since CallArranger can pick a return type
317 // that goes in the same reg for both CCs. But, at least assert they are the same
318 assert(call_regs._ret_regs[0] == j_expected_result_reg,
319 "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
320 }
321 #endif
322 } else {
323 assert(ret_buf_offset != -1, "no return buffer allocated");
324 __ lea(rscratch1, Address(rsp, ret_buf_offset));
325 int offset = 0;
326 for (int i = 0; i < call_regs._rets_length; i++) {
327 VMReg reg = call_regs._ret_regs[i];
328 if (reg->is_Register()) {
329 __ movptr(reg->as_Register(), Address(rscratch1, offset));
330 offset += 8;
331 } else if (reg->is_XMMRegister()) {
332 __ movdqu(reg->as_XMMRegister(), Address(rscratch1, offset));
333 offset += 16;
334 } else {
335 ShouldNotReachHere();
336 }
337 }
338 }
339
340 result_spiller.generate_spill(_masm, res_save_area_offset);
341
342 __ block_comment("{ on_exit");
343 __ vzeroupper();
344 __ lea(c_rarg0, Address(rsp, frame_data_offset));
345 // stack already aligned
346 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
347 __ reinit_heapbase();
348 __ block_comment("} on_exit");
349
350 restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
351
352 result_spiller.generate_fill(_masm, res_save_area_offset);
353
354 __ leave();
355 __ ret(0);
356
357 //////////////////////////////////////////////////////////////////////////////
358
359 __ block_comment("{ exception handler");
360
361 intptr_t exception_handler_offset = __ pc() - start;
362
363 // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly?
364
365 // native caller has no idea how to handle exceptions
366 // we just crash here. Up to callee to catch exceptions.
367 __ verify_oop(rax);
368 __ vzeroupper();
369 __ mov(c_rarg0, rax);
370 __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI
371 __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed)
372 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception)));
373 __ should_not_reach_here();
374
375 __ block_comment("} exception handler");
376
377 _masm->flush();
378
379
380 #ifndef PRODUCT
381 stringStream ss;
382 ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
383 const char* name = _masm->code_string(ss.as_string());
384 #else // PRODUCT
385 const char* name = "optimized_upcall_stub";
386 #endif // PRODUCT
387
388 OptimizedEntryBlob* blob
389 = OptimizedEntryBlob::create(name,
390 &buffer,
391 exception_handler_offset,
392 receiver,
393 in_ByteSize(frame_data_offset));
394
395 if (TraceOptimizedUpcallStubs) {
396 blob->print_on(tty);
397 }
398
399 return blob->code_begin();
400 }
401 PRAGMA_DIAG_POP
|