< prev index next >

src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp

Print this page

 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "precompiled.hpp"
 25 #include "asm/macroAssembler.hpp"
 26 #include "code/codeBlob.hpp"
 27 #include "code/codeBlob.hpp"
 28 #include "code/vmreg.inline.hpp"
 29 #include "compiler/disassembler.hpp"
 30 #include "logging/logStream.hpp"
 31 #include "memory/resourceArea.hpp"

 32 #include "prims/universalUpcallHandler.hpp"
 33 #include "runtime/sharedRuntime.hpp"
 34 #include "runtime/signature.hpp"
 35 #include "runtime/stubRoutines.hpp"
 36 #include "utilities/formatBuffer.hpp"
 37 #include "utilities/globalDefinitions.hpp"
 38 
 39 #define __ _masm->
 40 
 41 // 1. Create buffer according to layout
 42 // 2. Load registers & stack args into buffer
 43 // 3. Call upcall helper with upcall handler instance & buffer pointer (C++ ABI)
 44 // 4. Load return value from buffer into foreign ABI registers
 45 // 5. Return
 46 address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
 47   ResourceMark rm;
 48   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
 49   const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout);
 50 
 51   CodeBuffer buffer("upcall_stub", 1024, upcall_stub_size);
 52 
 53   MacroAssembler* _masm = new MacroAssembler(&buffer);
 54   int stack_alignment_C = 16; // bytes
 55   int register_size = sizeof(uintptr_t);
 56   int buffer_alignment = xmm_reg_size;
 57 
 58   // stub code
 59   __ enter();
 60 
 61   // save pointer to JNI receiver handle into constant segment
 62   Address rec_adr = __ as_Address(InternalAddress(__ address_constant((address)rec)));
 63 
 64   __ subptr(rsp, (int) align_up(layout.buffer_size, buffer_alignment));
 65 
 66   Register used[] = { c_rarg0, c_rarg1, rax, rbx, rdi, rsi, r12, r13, r14, r15 };
 67   GrowableArray<Register> preserved;
 68   // TODO need to preserve anything killed by the upcall that is non-volatile, needs XMM regs as well, probably
 69   for (size_t i = 0; i < sizeof(used)/sizeof(Register); i++) {
 70     Register reg = used[i];
 71     if (!abi.is_volatile_reg(reg)) {
 72       preserved.push(reg);
 73     }
 74   }
 75 
 76   int preserved_size = align_up(preserved.length() * register_size, stack_alignment_C); // includes register alignment
 77   int buffer_offset = preserved_size; // offset from rsp
 78 
 79   __ subptr(rsp, preserved_size);
 80   for (int i = 0; i < preserved.length(); i++) {
 81     __ movptr(Address(rsp, i * register_size), preserved.at(i));
 82   }
 83 
 84   for (int i = 0; i < abi._integer_argument_registers.length(); i++) {
 85     size_t offs = buffer_offset + layout.arguments_integer + i * sizeof(uintptr_t);
 86     __ movptr(Address(rsp, (int)offs), abi._integer_argument_registers.at(i));
 87   }
 88 
 89   for (int i = 0; i < abi._vector_argument_registers.length(); i++) {
 90     XMMRegister reg = abi._vector_argument_registers.at(i);
 91     size_t offs = buffer_offset + layout.arguments_vector + i * xmm_reg_size;
 92     __ movdqu(Address(rsp, (int)offs), reg);
 93   }
 94 
 95   // Capture prev stack pointer (stack arguments base)
 96 #ifndef _WIN64
 97   __ lea(rax, Address(rbp, 16)); // skip frame+return address
 98 #else
 99   __ lea(rax, Address(rbp, 16 + 32)); // also skip shadow space
100 #endif
101   __ movptr(Address(rsp, buffer_offset + (int) layout.stack_args), rax);
102 #ifndef PRODUCT
103   __ movptr(Address(rsp, buffer_offset + (int) layout.stack_args_bytes), -1); // unknown
104 #endif
105 
106   // Call upcall helper
107 
108   __ movptr(c_rarg0, rec_adr);
109   __ lea(c_rarg1, Address(rsp, buffer_offset));
110 
111 #ifdef _WIN64
112   __ block_comment("allocate shadow space for argument register spill");
113   __ subptr(rsp, 32);
114 #endif
115 
116   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::attach_thread_and_do_upcall)));
117 
118 #ifdef _WIN64
119   __ block_comment("pop shadow space");
120   __ addptr(rsp, 32);
121 #endif
122 
123   for (int i = 0; i < abi._integer_return_registers.length(); i++) {
124     size_t offs = buffer_offset + layout.returns_integer + i * sizeof(uintptr_t);
125     __ movptr(abi._integer_return_registers.at(i), Address(rsp, (int)offs));
126   }
127 
128   for (int i = 0; i < abi._vector_return_registers.length(); i++) {
129     XMMRegister reg = abi._vector_return_registers.at(i);
130     size_t offs = buffer_offset + layout.returns_vector + i * xmm_reg_size;
131     __ movdqu(reg, Address(rsp, (int)offs));
132   }
133 
134   for (size_t i = abi._X87_return_registers_noof; i > 0 ; i--) {
135       ssize_t offs = buffer_offset + layout.returns_x87 + (i - 1) * (sizeof(long double));
136       __ fld_x (Address(rsp, (int)offs));
137   }
138 
139   // Restore preserved registers
140   for (int i = 0; i < preserved.length(); i++) {
141     __ movptr(preserved.at(i), Address(rsp, i * register_size));
142   }
143 
144   __ leave();
145   __ ret(0);
146 
147   _masm->flush();
148 
149   BufferBlob* blob = BufferBlob::create("upcall_stub", &buffer);
150 
151   return blob->code_begin();
152 }
153 
154 struct ArgMove {
155   BasicType bt;
156   VMRegPair from;
157   VMRegPair to;
158 
159   bool is_identity() const {
160       return from.first() == to.first() && from.second() == to.second();
161   }
162 };
163 
164 static GrowableArray<ArgMove> compute_argument_shuffle(Method* entry, int& out_arg_size_bytes, const CallRegs& conv, BasicType& ret_type) {
165   assert(entry->is_static(), "");
166 
167   // Fill in the signature array, for the calling-convention call.
168   const int total_out_args = entry->size_of_parameters();
169   assert(total_out_args > 0, "receiver arg ");
170 
171   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_out_args);
172   VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_out_args);
173 
174   {
175     int i = 0;
176     SignatureStream ss(entry->signature());
177     for (; !ss.at_return_type(); ss.next()) {
178       out_sig_bt[i++] = ss.type();  // Collect remaining bits of signature
179       if (ss.type() == T_LONG || ss.type() == T_DOUBLE)
180         out_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
181     }
182     assert(i == total_out_args, "");
183     ret_type = ss.type();
184   }
185 
186   int out_arg_slots = SharedRuntime::java_calling_convention(out_sig_bt, out_regs, total_out_args);
187 
188   const int total_in_args = total_out_args - 1; // skip receiver
189   BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
190   VMRegPair* in_regs    = NEW_RESOURCE_ARRAY(VMRegPair, total_in_args);
191 
192   for (int i = 0; i < total_in_args ; i++ ) {
193     in_sig_bt[i] = out_sig_bt[i+1]; // skip receiver
194   }
195 
196   // Now figure out where the args must be stored and how much stack space they require.
197   conv.calling_convention(in_sig_bt, in_regs, total_in_args);
198 
199   GrowableArray<int> arg_order(2 * total_in_args);
200 
201   VMRegPair tmp_vmreg;
202   tmp_vmreg.set2(rbx->as_VMReg());
203 
204   // Compute a valid move order, using tmp_vmreg to break any cycles
205   SharedRuntime::compute_move_order(in_sig_bt,
206                                     total_in_args, in_regs,
207                                     total_out_args, out_regs,
208                                     arg_order,
209                                     tmp_vmreg);
210 
211   GrowableArray<ArgMove> arg_order_vmreg(total_in_args); // conservative
212 
213 #ifdef ASSERT
214   bool reg_destroyed[RegisterImpl::number_of_registers];
215   bool freg_destroyed[XMMRegisterImpl::number_of_registers];
216   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
217     reg_destroyed[r] = false;
218   }
219   for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
220     freg_destroyed[f] = false;
221   }
222 #endif // ASSERT
223 
224   for (int i = 0; i < arg_order.length(); i += 2) {
225     int in_arg  = arg_order.at(i);
226     int out_arg = arg_order.at(i + 1);
227 
228     assert(in_arg != -1 || out_arg != -1, "");
229     BasicType arg_bt = (in_arg != -1 ? in_sig_bt[in_arg] : out_sig_bt[out_arg]);
230     switch (arg_bt) {
231       case T_BOOLEAN:
232       case T_BYTE:
233       case T_SHORT:
234       case T_CHAR:
235       case T_INT:
236       case T_FLOAT:
237         break; // process
238 
239       case T_LONG:
240       case T_DOUBLE:
241         assert(in_arg  == -1 || (in_arg  + 1 < total_in_args  &&  in_sig_bt[in_arg  + 1] == T_VOID), "bad arg list: %d", in_arg);
242         assert(out_arg == -1 || (out_arg + 1 < total_out_args && out_sig_bt[out_arg + 1] == T_VOID), "bad arg list: %d", out_arg);
243         break; // process
244 
245       case T_VOID:
246         continue; // skip
247 
248       default:
249         fatal("found in upcall args: %s", type2name(arg_bt));
250     }
251 
252     ArgMove move;
253     move.bt   = arg_bt;
254     move.from = (in_arg != -1 ? in_regs[in_arg] : tmp_vmreg);
255     move.to   = (out_arg != -1 ? out_regs[out_arg] : tmp_vmreg);
256 
257     if(move.is_identity()) {
258       continue; // useless move
259     }
260 
261 #ifdef ASSERT
262     if (in_arg != -1) {
263       if (in_regs[in_arg].first()->is_Register()) {
264         assert(!reg_destroyed[in_regs[in_arg].first()->as_Register()->encoding()], "destroyed reg!");
265       } else if (in_regs[in_arg].first()->is_XMMRegister()) {
266         assert(!freg_destroyed[in_regs[in_arg].first()->as_XMMRegister()->encoding()], "destroyed reg!");
267       }
268     }
269     if (out_arg != -1) {
270       if (out_regs[out_arg].first()->is_Register()) {
271         reg_destroyed[out_regs[out_arg].first()->as_Register()->encoding()] = true;
272       } else if (out_regs[out_arg].first()->is_XMMRegister()) {
273         freg_destroyed[out_regs[out_arg].first()->as_XMMRegister()->encoding()] = true;
274       }
275     }
276 #endif /* ASSERT */
277 
278     arg_order_vmreg.push(move);
279   }
280 
281   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
282   out_arg_size_bytes = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
283 
284   return arg_order_vmreg;
285 }
286 
287 static const char* null_safe_string(const char* str) {
288   return str == nullptr ? "NULL" : str;
289 }
290 
291 #ifdef ASSERT
292 static void print_arg_moves(const GrowableArray<ArgMove>& arg_moves, Method* entry) {
293   LogTarget(Trace, foreign) lt;
294   if (lt.is_enabled()) {
295     ResourceMark rm;
296     LogStream ls(lt);
297     ls.print_cr("Argument shuffle for %s {", entry->name_and_sig_as_C_string());
298     for (int i = 0; i < arg_moves.length(); i++) {
299       ArgMove arg_mv = arg_moves.at(i);
300       BasicType arg_bt     = arg_mv.bt;
301       VMRegPair from_vmreg = arg_mv.from;
302       VMRegPair to_vmreg   = arg_mv.to;
303 
304       ls.print("Move a %s from (", null_safe_string(type2name(arg_bt)));
305       from_vmreg.first()->print_on(&ls);
306       ls.print(",");
307       from_vmreg.second()->print_on(&ls);
308       ls.print(") to ");
309       to_vmreg.first()->print_on(&ls);
310       ls.print(",");
311       to_vmreg.second()->print_on(&ls);
312       ls.print_cr(")");
313     }
314     ls.print_cr("}");
315   }
316 }
317 #endif
318 
319 static void save_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) {
320   __ block_comment("{ save_native_args ");
321   int store_offset = arg_save_area_offset;
322   for (int i = 0; i < conv._args_length; i++) {
323     VMReg reg = conv._arg_regs[i];
324     if (reg->is_Register()) {
325       __ movptr(Address(rsp, store_offset), reg->as_Register());
326       store_offset += 8;
327     } else if (reg->is_XMMRegister()) {
328       // Java API doesn't support vector args
329       __ movdqu(Address(rsp, store_offset), reg->as_XMMRegister());
330       store_offset += 16;
331     }
332     // do nothing for stack
333   }
334   __ block_comment("} save_native_args ");
335 }
336 
337 static void restore_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) {
338   __ block_comment("{ restore_native_args ");
339   int load_offset = arg_save_area_offset;
340   for (int i = 0; i < conv._args_length; i++) {
341     VMReg reg = conv._arg_regs[i];
342     if (reg->is_Register()) {
343       __ movptr(reg->as_Register(), Address(rsp, load_offset));
344       load_offset += 8;
345     } else if (reg->is_XMMRegister()) {
346       // Java API doesn't support vector args
347       __ movdqu(reg->as_XMMRegister(), Address(rsp, load_offset));
348       load_offset += 16;
349     }
350     // do nothing for stack
351   }
352   __ block_comment("} restore_native_args ");
353 }
354 
355 static bool is_valid_XMM(XMMRegister reg) {
356   return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
357 }
358 
359 // for callee saved regs, according to the caller's ABI
360 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
361   int size = 0;
362   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
363     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
364     if (!abi.is_volatile_reg(reg)) {
365       size += 8; // bytes
366     }
367   }
368 
369   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
370     if (!abi.is_volatile_reg(reg)) {
371       if (UseAVX >= 3) {
372         size += 64; // bytes
373       } else if (UseAVX >= 1) {
374         size += 32;
375       } else {
376         size += 16;
377       }
378     }
379   }
380 
381 #ifndef _WIN64
382   // for mxcsr
383   size += 8;
384 #endif
385 
386   return size;
387 }
388 
389 static int compute_arg_save_area_size(const CallRegs& conv) {
390   int result_size = 0;
391   for (int i = 0; i < conv._args_length; i++) {
392     VMReg reg = conv._arg_regs[i];
393     if (reg->is_Register()) {
394       result_size += 8;
395     } else if (reg->is_XMMRegister()) {
396       // Java API doesn't support vector args
397       result_size += 16;
398     }
399     // do nothing for stack
400   }
401   return result_size;
402 }
403 
404 static int compute_res_save_area_size(const CallRegs& conv) {
405   int result_size = 0;
406   for (int i = 0; i < conv._rets_length; i++) {
407     VMReg reg = conv._ret_regs[i];
408     if (reg->is_Register()) {
409       result_size += 8;
410     } else if (reg->is_XMMRegister()) {
411       // Java API doesn't support vector args
412       result_size += 16;
413     } else {
414       ShouldNotReachHere(); // unhandled type
415     }
416   }
417   return result_size;
418 }
419 
420 static void save_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
421   int offset = res_save_area_offset;
422   __ block_comment("{ save java result ");
423   for (int i = 0; i < conv._rets_length; i++) {
424     VMReg reg = conv._ret_regs[i];
425     if (reg->is_Register()) {
426       __ movptr(Address(rsp, offset), reg->as_Register());
427       offset += 8;
428     } else if (reg->is_XMMRegister()) {
429       // Java API doesn't support vector args
430       __ movdqu(Address(rsp, offset), reg->as_XMMRegister());
431       offset += 16;
432     } else {
433       ShouldNotReachHere(); // unhandled type
434     }
435   }
436   __ block_comment("} save java result ");
437 }
438 
439 static void restore_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
440   int offset = res_save_area_offset;
441   __ block_comment("{ restore java result ");
442   for (int i = 0; i < conv._rets_length; i++) {
443     VMReg reg = conv._ret_regs[i];
444     if (reg->is_Register()) {
445       __ movptr(reg->as_Register(), Address(rsp, offset));
446       offset += 8;
447     } else if (reg->is_XMMRegister()) {
448       // Java API doesn't support vector args
449       __ movdqu(reg->as_XMMRegister(), Address(rsp, offset));
450       offset += 16;
451     } else {
452       ShouldNotReachHere(); // unhandled type
453     }
454   }
455   __ block_comment("} restore java result ");
456 }
457 
458 constexpr int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
459 
460 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
461   // 1. iterate all registers in the architecture
462   //     - check if they are volatile or not for the given abi
463   //     - if NOT, we need to save it here
464   // 2. save mxcsr on non-windows platforms
465 
466   int offset = reg_save_area_offset;
467 
468   __ block_comment("{ preserve_callee_saved_regs ");
469   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
470     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
471     if (!abi.is_volatile_reg(reg)) {
472       __ movptr(Address(rsp, offset), reg);
473       offset += 8;
474     }
475   }
476 
477   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {

529       if (UseAVX >= 3) {
530         __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit);
531         offset += 64;
532       } else if (UseAVX >= 1) {
533         __ vmovdqu(reg, Address(rsp, offset));
534         offset += 32;
535       } else {
536         __ movdqu(reg, Address(rsp, offset));
537         offset += 16;
538       }
539     }
540   }
541 
542 #ifndef _WIN64
543   const Address mxcsr_save(rsp, offset);
544   __ ldmxcsr(mxcsr_save);
545 #endif
546 
547   __ block_comment("} restore_callee_saved_regs ");
548 }
549 
550 static void shuffle_arguments(MacroAssembler* _masm, const GrowableArray<ArgMove>& arg_moves) {
551   for (int i = 0; i < arg_moves.length(); i++) {
552     ArgMove arg_mv = arg_moves.at(i);
553     BasicType arg_bt     = arg_mv.bt;
554     VMRegPair from_vmreg = arg_mv.from;
555     VMRegPair to_vmreg   = arg_mv.to;
556 
557     assert(
558       !((from_vmreg.first()->is_Register() && to_vmreg.first()->is_XMMRegister())
559       || (from_vmreg.first()->is_XMMRegister() && to_vmreg.first()->is_Register())),
560        "move between gp and fp reg not supported");
561 
562     __ block_comment(err_msg("bt=%s", null_safe_string(type2name(arg_bt))));
563     switch (arg_bt) {
564       case T_BOOLEAN:
565       case T_BYTE:
566       case T_SHORT:
567       case T_CHAR:
568       case T_INT:
569        __ move32_64(from_vmreg, to_vmreg);
570        break;
571 
572       case T_FLOAT:
573         __ float_move(from_vmreg, to_vmreg);
574         break;
575 
576       case T_DOUBLE:
577         __ double_move(from_vmreg, to_vmreg);
578         break;
579 
580       case T_LONG :
581         __ long_move(from_vmreg, to_vmreg);
582         break;
583 
584       default:
585         fatal("found in upcall args: %s", type2name(arg_bt));
586     }
587   }
588 }
589 
590 // Register is a class, but it would be assigned numerical value.
591 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull.
592 PRAGMA_DIAG_PUSH
593 PRAGMA_NONNULL_IGNORED
594 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry, jobject jabi, jobject jconv) {
595   ResourceMark rm;




596   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
597   const CallRegs conv = ForeignGlobals::parse_call_regs(jconv);
598   assert(conv._rets_length <= 1, "no multi reg returns");
599   CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
600 
601   int register_size = sizeof(uintptr_t);
602   int buffer_alignment = xmm_reg_size;




603 
604   int out_arg_area = -1;
605   BasicType ret_type;
606   GrowableArray<ArgMove> arg_moves = compute_argument_shuffle(entry, out_arg_area, conv, ret_type);
607   assert(out_arg_area != -1, "Should have been set");
608   DEBUG_ONLY(print_arg_moves(arg_moves, entry);)



609 
610   // out_arg_area (for stack arguments) doubles as shadow space for native calls.
611   // make sure it is big enough.
612   if (out_arg_area < frame::arg_reg_save_area_bytes) {
613     out_arg_area = frame::arg_reg_save_area_bytes;
614   }
615 
616   int reg_save_area_size = compute_reg_save_area_size(abi);
617   int arg_save_area_size = compute_arg_save_area_size(conv);
618   int res_save_area_size = compute_res_save_area_size(conv);
619 
620   int shuffle_area_offset    = 0;
621   int res_save_area_offset   = shuffle_area_offset    + out_arg_area;
622   int arg_save_area_offset   = res_save_area_offset   + res_save_area_size;
623   int reg_save_area_offset   = arg_save_area_offset   + arg_save_area_size;
624   int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
625   int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
626 






627   int frame_size = frame_bottom_offset;
628   frame_size = align_up(frame_size, StackAlignmentInBytes);
629 
630   // Ok The space we have allocated will look like:
631   //
632   //
633   // FP-> |                     |
634   //      |---------------------| = frame_bottom_offset = frame_size



635   //      |                     |
636   //      | FrameData           |
637   //      |---------------------| = frame_data_offset
638   //      |                     |
639   //      | reg_save_area       |
640   //      |---------------------| = reg_save_are_offset
641   //      |                     |
642   //      | arg_save_area       |
643   //      |---------------------| = arg_save_are_offset
644   //      |                     |
645   //      | res_save_area       |
646   //      |---------------------| = res_save_are_offset
647   //      |                     |
648   // SP-> | out_arg_area        |   needs to be at end for shadow space
649   //
650   //
651 
652   //////////////////////////////////////////////////////////////////////////////
653 
654   MacroAssembler* _masm = new MacroAssembler(&buffer);
655   address start = __ pc();
656   __ enter(); // set up frame
657   if ((abi._stack_alignment_bytes % 16) != 0) {
658     // stack alignment of caller is not a multiple of 16
659     __ andptr(rsp, -StackAlignmentInBytes); // align stack
660   }
661   // allocate frame (frame_size is also aligned, so stack is still aligned)
662   __ subptr(rsp, frame_size);
663 
664   // we have to always spill args since we need to do a call to get the thread
665   // (and maybe attach it).
666   save_native_arguments(_masm, conv, arg_save_area_offset);
667 
668   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
669 
670   __ block_comment("{ on_entry");
671   __ vzeroupper();
672   __ lea(c_rarg0, Address(rsp, frame_data_offset));
673   // stack already aligned
674   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
675   __ movptr(r15_thread, rax);
676   __ reinit_heapbase();
677   __ block_comment("} on_entry");
678 
679   __ block_comment("{ argument shuffle");
680   // TODO merge these somehow
681   restore_native_arguments(_masm, conv, arg_save_area_offset);
682   shuffle_arguments(_masm, arg_moves);



683   __ block_comment("} argument shuffle");
684 
685   __ block_comment("{ receiver ");
686   __ movptr(rscratch1, (intptr_t)receiver);
687   __ resolve_jobject(rscratch1, r15_thread, rscratch2);
688   __ movptr(j_rarg0, rscratch1);
689   __ block_comment("} receiver ");
690 
691   __ mov_metadata(rbx, entry);
692   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
693 
694   __ call(Address(rbx, Method::from_compiled_offset()));
695 
696   save_java_result(_masm, conv, res_save_area_offset);













































697 
698   __ block_comment("{ on_exit");
699   __ vzeroupper();
700   __ lea(c_rarg0, Address(rsp, frame_data_offset));
701   // stack already aligned
702   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
703   __ reinit_heapbase();
704   __ block_comment("} on_exit");
705 
706   restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
707 
708   restore_java_result(_masm, conv, res_save_area_offset);
709 
710   // return value shuffle
711 #ifdef ASSERT
712   if (conv._rets_length == 1) { // 0 or 1
713     VMReg j_expected_result_reg;
714     switch (ret_type) {
715       case T_BOOLEAN:
716       case T_BYTE:
717       case T_SHORT:
718       case T_CHAR:
719       case T_INT:
720       case T_LONG:
721        j_expected_result_reg = rax->as_VMReg();
722        break;
723       case T_FLOAT:
724       case T_DOUBLE:
725         j_expected_result_reg = xmm0->as_VMReg();
726         break;
727       default:
728         fatal("unexpected return type: %s", type2name(ret_type));
729     }
730     // No need to move for now, since CallArranger can pick a return type
731     // that goes in the same reg for both CCs. But, at least assert they are the same
732     assert(conv._ret_regs[0] == j_expected_result_reg,
733      "unexpected result register: %s != %s", conv._ret_regs[0]->name(), j_expected_result_reg->name());
734   }
735 #endif
736 
737   __ leave();
738   __ ret(0);
739 
740   //////////////////////////////////////////////////////////////////////////////
741 
742   __ block_comment("{ exception handler");
743 
744   intptr_t exception_handler_offset = __ pc() - start;
745 
746   // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly?
747 
748   // native caller has no idea how to handle exceptions
749   // we just crash here. Up to callee to catch exceptions.
750   __ verify_oop(rax);
751   __ vzeroupper();
752   __ mov(c_rarg0, rax);
753   __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI
754   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed)
755   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception)));
756   __ should_not_reach_here();
757 
758   __ block_comment("} exception handler");
759 
760   _masm->flush();
761 
762 
763 #ifndef PRODUCT
764   stringStream ss;
765   ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
766   const char* name = _masm->code_string(ss.as_string());
767 #else // PRODUCT
768   const char* name = "optimized_upcall_stub";
769 #endif // PRODUCT
770 
771   OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, in_ByteSize(frame_data_offset));





772 
773   if (TraceOptimizedUpcallStubs) {
774     blob->print_on(tty);
775     Disassembler::decode(blob, tty);
776   }
777 
778   return blob->code_begin();
779 }
780 PRAGMA_DIAG_POP
781 
782 bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
783   return true;
784 }

 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "precompiled.hpp"
 25 #include "asm/macroAssembler.hpp"
 26 #include "code/codeBlob.hpp"
 27 #include "code/codeBlob.hpp"
 28 #include "code/vmreg.inline.hpp"
 29 #include "compiler/disassembler.hpp"
 30 #include "logging/logStream.hpp"
 31 #include "memory/resourceArea.hpp"
 32 #include "prims/foreign_globals.inline.hpp"
 33 #include "prims/universalUpcallHandler.hpp"
 34 #include "runtime/sharedRuntime.hpp"
 35 #include "runtime/signature.hpp"
 36 #include "runtime/stubRoutines.hpp"
 37 #include "utilities/formatBuffer.hpp"
 38 #include "utilities/globalDefinitions.hpp"
 39 
 40 #define __ _masm->
 41 


























































































































































































































































































































 42 static bool is_valid_XMM(XMMRegister reg) {
 43   return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
 44 }
 45 
 46 // for callee saved regs, according to the caller's ABI
 47 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
 48   int size = 0;
 49   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
 50     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
 51     if (!abi.is_volatile_reg(reg)) {
 52       size += 8; // bytes
 53     }
 54   }
 55 
 56   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
 57     if (!abi.is_volatile_reg(reg)) {
 58       if (UseAVX >= 3) {
 59         size += 64; // bytes
 60       } else if (UseAVX >= 1) {
 61         size += 32;
 62       } else {
 63         size += 16;
 64       }
 65     }
 66   }
 67 
 68 #ifndef _WIN64
 69   // for mxcsr
 70   size += 8;
 71 #endif
 72 
 73   return size;
 74 }
 75 





































































 76 constexpr int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
 77 
 78 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
 79   // 1. iterate all registers in the architecture
 80   //     - check if they are volatile or not for the given abi
 81   //     - if NOT, we need to save it here
 82   // 2. save mxcsr on non-windows platforms
 83 
 84   int offset = reg_save_area_offset;
 85 
 86   __ block_comment("{ preserve_callee_saved_regs ");
 87   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
 88     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
 89     if (!abi.is_volatile_reg(reg)) {
 90       __ movptr(Address(rsp, offset), reg);
 91       offset += 8;
 92     }
 93   }
 94 
 95   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {

147       if (UseAVX >= 3) {
148         __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit);
149         offset += 64;
150       } else if (UseAVX >= 1) {
151         __ vmovdqu(reg, Address(rsp, offset));
152         offset += 32;
153       } else {
154         __ movdqu(reg, Address(rsp, offset));
155         offset += 16;
156       }
157     }
158   }
159 
160 #ifndef _WIN64
161   const Address mxcsr_save(rsp, offset);
162   __ ldmxcsr(mxcsr_save);
163 #endif
164 
165   __ block_comment("} restore_callee_saved_regs ");
166 }









































167 // Register is a class, but it would be assigned numerical value.
168 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull.
169 PRAGMA_DIAG_PUSH
170 PRAGMA_NONNULL_IGNORED
171 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
172                                                                   BasicType* in_sig_bt, int total_in_args,
173                                                                   BasicType* out_sig_bt, int total_out_args,
174                                                                   BasicType ret_type,
175                                                                   jobject jabi, jobject jconv,
176                                                                   bool needs_return_buffer, int ret_buf_size) {
177   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
178   const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);

179   CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
180 
181   Register shuffle_reg = rbx;
182   JavaCallConv out_conv;
183   NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
184   ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg());
185   int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots();
186   int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
187 
188 #ifdef ASSERT
189   LogTarget(Trace, panama) lt;
190   if (lt.is_enabled()) {
191     ResourceMark rm;
192     LogStream ls(lt);
193     arg_shuffle.print_on(&ls);
194   }
195 #endif
196 
197   // out_arg_area (for stack arguments) doubles as shadow space for native calls.
198   // make sure it is big enough.
199   if (out_arg_area < frame::arg_reg_save_area_bytes) {
200     out_arg_area = frame::arg_reg_save_area_bytes;
201   }
202 
203   int reg_save_area_size = compute_reg_save_area_size(abi);
204   RegSpiller arg_spiller(call_regs._arg_regs, call_regs._args_length);
205   RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length);
206 
207   int shuffle_area_offset    = 0;
208   int res_save_area_offset   = shuffle_area_offset    + out_arg_area;
209   int arg_save_area_offset   = res_save_area_offset   + result_spiller.spill_size_bytes();
210   int reg_save_area_offset   = arg_save_area_offset   + arg_spiller.spill_size_bytes();
211   int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
212   int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
213 
214   int ret_buf_offset = -1;
215   if (needs_return_buffer) {
216     ret_buf_offset = frame_bottom_offset;
217     frame_bottom_offset += ret_buf_size;
218   }
219 
220   int frame_size = frame_bottom_offset;
221   frame_size = align_up(frame_size, StackAlignmentInBytes);
222 
223   // Ok The space we have allocated will look like:
224   //
225   //
226   // FP-> |                     |
227   //      |---------------------| = frame_bottom_offset = frame_size
228   //      | (optional)          |
229   //      | ret_buf             |
230   //      |---------------------| = ret_buf_offset
231   //      |                     |
232   //      | FrameData           |
233   //      |---------------------| = frame_data_offset
234   //      |                     |
235   //      | reg_save_area       |
236   //      |---------------------| = reg_save_are_offset
237   //      |                     |
238   //      | arg_save_area       |
239   //      |---------------------| = arg_save_are_offset
240   //      |                     |
241   //      | res_save_area       |
242   //      |---------------------| = res_save_are_offset
243   //      |                     |
244   // SP-> | out_arg_area        |   needs to be at end for shadow space
245   //
246   //
247 
248   //////////////////////////////////////////////////////////////////////////////
249 
250   MacroAssembler* _masm = new MacroAssembler(&buffer);
251   address start = __ pc();
252   __ enter(); // set up frame
253   if ((abi._stack_alignment_bytes % 16) != 0) {
254     // stack alignment of caller is not a multiple of 16
255     __ andptr(rsp, -StackAlignmentInBytes); // align stack
256   }
257   // allocate frame (frame_size is also aligned, so stack is still aligned)
258   __ subptr(rsp, frame_size);
259 
260   // we have to always spill args since we need to do a call to get the thread
261   // (and maybe attach it).
262   arg_spiller.generate_spill(_masm, arg_save_area_offset);
263 
264   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
265 
266   __ block_comment("{ on_entry");
267   __ vzeroupper();
268   __ lea(c_rarg0, Address(rsp, frame_data_offset));
269   // stack already aligned
270   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
271   __ movptr(r15_thread, rax);
272   __ reinit_heapbase();
273   __ block_comment("} on_entry");
274 
275   __ block_comment("{ argument shuffle");
276   arg_spiller.generate_fill(_masm, arg_save_area_offset);
277   if (needs_return_buffer) {
278     assert(ret_buf_offset != -1, "no return buffer allocated");
279     __ lea(abi._ret_buf_addr_reg, Address(rsp, ret_buf_offset));
280   }
281   arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
282   __ block_comment("} argument shuffle");
283 
284   __ block_comment("{ receiver ");
285   __ movptr(rscratch1, (intptr_t)receiver);
286   __ resolve_jobject(rscratch1, r15_thread, rscratch2);
287   __ movptr(j_rarg0, rscratch1);
288   __ block_comment("} receiver ");
289 
290   __ mov_metadata(rbx, entry);
291   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
292 
293   __ call(Address(rbx, Method::from_compiled_offset()));
294 
295   // return value shuffle
296   if (!needs_return_buffer) {
297 #ifdef ASSERT
298     if (call_regs._rets_length == 1) { // 0 or 1
299       VMReg j_expected_result_reg;
300       switch (ret_type) {
301         case T_BOOLEAN:
302         case T_BYTE:
303         case T_SHORT:
304         case T_CHAR:
305         case T_INT:
306         case T_LONG:
307         j_expected_result_reg = rax->as_VMReg();
308         break;
309         case T_FLOAT:
310         case T_DOUBLE:
311           j_expected_result_reg = xmm0->as_VMReg();
312           break;
313         default:
314           fatal("unexpected return type: %s", type2name(ret_type));
315       }
316       // No need to move for now, since CallArranger can pick a return type
317       // that goes in the same reg for both CCs. But, at least assert they are the same
318       assert(call_regs._ret_regs[0] == j_expected_result_reg,
319       "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
320     }
321 #endif
322   } else {
323     assert(ret_buf_offset != -1, "no return buffer allocated");
324     __ lea(rscratch1, Address(rsp, ret_buf_offset));
325     int offset = 0;
326     for (int i = 0; i < call_regs._rets_length; i++) {
327       VMReg reg = call_regs._ret_regs[i];
328       if (reg->is_Register()) {
329         __ movptr(reg->as_Register(), Address(rscratch1, offset));
330         offset += 8;
331       } else if (reg->is_XMMRegister()) {
332         __ movdqu(reg->as_XMMRegister(), Address(rscratch1, offset));
333         offset += 16;
334       } else {
335         ShouldNotReachHere();
336       }
337     }
338   }
339 
340   result_spiller.generate_spill(_masm, res_save_area_offset);
341 
342   __ block_comment("{ on_exit");
343   __ vzeroupper();
344   __ lea(c_rarg0, Address(rsp, frame_data_offset));
345   // stack already aligned
346   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
347   __ reinit_heapbase();
348   __ block_comment("} on_exit");
349 
350   restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
351 
352   result_spiller.generate_fill(_masm, res_save_area_offset);



























353 
354   __ leave();
355   __ ret(0);
356 
357   //////////////////////////////////////////////////////////////////////////////
358 
359   __ block_comment("{ exception handler");
360 
361   intptr_t exception_handler_offset = __ pc() - start;
362 
363   // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly?
364 
365   // native caller has no idea how to handle exceptions
366   // we just crash here. Up to callee to catch exceptions.
367   __ verify_oop(rax);
368   __ vzeroupper();
369   __ mov(c_rarg0, rax);
370   __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI
371   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed)
372   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception)));
373   __ should_not_reach_here();
374 
375   __ block_comment("} exception handler");
376 
377   _masm->flush();
378 
379 
380 #ifndef PRODUCT
381   stringStream ss;
382   ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
383   const char* name = _masm->code_string(ss.as_string());
384 #else // PRODUCT
385   const char* name = "optimized_upcall_stub";
386 #endif // PRODUCT
387 
388   OptimizedEntryBlob* blob
389     = OptimizedEntryBlob::create(name,
390                                  &buffer,
391                                  exception_handler_offset,
392                                  receiver,
393                                  in_ByteSize(frame_data_offset));
394 
395   if (TraceOptimizedUpcallStubs) {
396     blob->print_on(tty);

397   }
398 
399   return blob->code_begin();
400 }
401 PRAGMA_DIAG_POP




< prev index next >