1 /*
  2  * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 #include "precompiled.hpp"
 25 #include "asm/macroAssembler.hpp"
 26 #include "code/codeBlob.hpp"
 27 #include "code/codeBlob.hpp"
 28 #include "code/vmreg.inline.hpp"
 29 #include "compiler/disassembler.hpp"
 30 #include "logging/logStream.hpp"
 31 #include "memory/resourceArea.hpp"
 32 #include "prims/foreign_globals.inline.hpp"
 33 #include "prims/universalUpcallHandler.hpp"
 34 #include "runtime/sharedRuntime.hpp"
 35 #include "runtime/signature.hpp"
 36 #include "runtime/stubRoutines.hpp"
 37 #include "utilities/formatBuffer.hpp"
 38 #include "utilities/globalDefinitions.hpp"
 39 
 40 #define __ _masm->
 41 
 42 static bool is_valid_XMM(XMMRegister reg) {
 43   return reg->is_valid() && (UseAVX >= 3 || (reg->encoding() < 16)); // why is this not covered by is_valid()?
 44 }
 45 
 46 // for callee saved regs, according to the caller's ABI
 47 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
 48   int size = 0;
 49   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
 50     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
 51     if (!abi.is_volatile_reg(reg)) {
 52       size += 8; // bytes
 53     }
 54   }
 55 
 56   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
 57     if (!abi.is_volatile_reg(reg)) {
 58       if (UseAVX >= 3) {
 59         size += 64; // bytes
 60       } else if (UseAVX >= 1) {
 61         size += 32;
 62       } else {
 63         size += 16;
 64       }
 65     }
 66   }
 67 
 68 #ifndef _WIN64
 69   // for mxcsr
 70   size += 8;
 71 #endif
 72 
 73   return size;
 74 }
 75 
 76 constexpr int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
 77 
 78 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
 79   // 1. iterate all registers in the architecture
 80   //     - check if they are volatile or not for the given abi
 81   //     - if NOT, we need to save it here
 82   // 2. save mxcsr on non-windows platforms
 83 
 84   int offset = reg_save_area_offset;
 85 
 86   __ block_comment("{ preserve_callee_saved_regs ");
 87   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
 88     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
 89     if (!abi.is_volatile_reg(reg)) {
 90       __ movptr(Address(rsp, offset), reg);
 91       offset += 8;
 92     }
 93   }
 94 
 95   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
 96     if (!abi.is_volatile_reg(reg)) {
 97       if (UseAVX >= 3) {
 98         __ evmovdqul(Address(rsp, offset), reg, Assembler::AVX_512bit);
 99         offset += 64;
100       } else if (UseAVX >= 1) {
101         __ vmovdqu(Address(rsp, offset), reg);
102         offset += 32;
103       } else {
104         __ movdqu(Address(rsp, offset), reg);
105         offset += 16;
106       }
107     }
108   }
109 
110 #ifndef _WIN64
111   {
112     const Address mxcsr_save(rsp, offset);
113     Label skip_ldmx;
114     __ stmxcsr(mxcsr_save);
115     __ movl(rax, mxcsr_save);
116     __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
117     ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
118     __ cmp32(rax, mxcsr_std);
119     __ jcc(Assembler::equal, skip_ldmx);
120     __ ldmxcsr(mxcsr_std);
121     __ bind(skip_ldmx);
122   }
123 #endif
124 
125   __ block_comment("} preserve_callee_saved_regs ");
126 }
127 
128 static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
129   // 1. iterate all registers in the architecture
130   //     - check if they are volatile or not for the given abi
131   //     - if NOT, we need to restore it here
132   // 2. restore mxcsr on non-windows platforms
133 
134   int offset = reg_save_area_offset;
135 
136   __ block_comment("{ restore_callee_saved_regs ");
137   for (Register reg = as_Register(0); reg->is_valid(); reg = reg->successor()) {
138     if (reg == rbp || reg == rsp) continue; // saved/restored by prologue/epilogue
139     if (!abi.is_volatile_reg(reg)) {
140       __ movptr(reg, Address(rsp, offset));
141       offset += 8;
142     }
143   }
144 
145   for (XMMRegister reg = as_XMMRegister(0); is_valid_XMM(reg); reg = reg->successor()) {
146     if (!abi.is_volatile_reg(reg)) {
147       if (UseAVX >= 3) {
148         __ evmovdqul(reg, Address(rsp, offset), Assembler::AVX_512bit);
149         offset += 64;
150       } else if (UseAVX >= 1) {
151         __ vmovdqu(reg, Address(rsp, offset));
152         offset += 32;
153       } else {
154         __ movdqu(reg, Address(rsp, offset));
155         offset += 16;
156       }
157     }
158   }
159 
160 #ifndef _WIN64
161   const Address mxcsr_save(rsp, offset);
162   __ ldmxcsr(mxcsr_save);
163 #endif
164 
165   __ block_comment("} restore_callee_saved_regs ");
166 }
167 // Register is a class, but it would be assigned numerical value.
168 // "0" is assigned for rax and for xmm0. Thus we need to ignore -Wnonnull.
169 PRAGMA_DIAG_PUSH
170 PRAGMA_NONNULL_IGNORED
171 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
172                                                                   BasicType* in_sig_bt, int total_in_args,
173                                                                   BasicType* out_sig_bt, int total_out_args,
174                                                                   BasicType ret_type,
175                                                                   jobject jabi, jobject jconv,
176                                                                   bool needs_return_buffer, int ret_buf_size) {
177   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
178   const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);
179   CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
180 
181   Register shuffle_reg = rbx;
182   JavaCallConv out_conv;
183   NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
184   ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg());
185   int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots();
186   int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
187 
188 #ifdef ASSERT
189   LogTarget(Trace, panama) lt;
190   if (lt.is_enabled()) {
191     ResourceMark rm;
192     LogStream ls(lt);
193     arg_shuffle.print_on(&ls);
194   }
195 #endif
196 
197   // out_arg_area (for stack arguments) doubles as shadow space for native calls.
198   // make sure it is big enough.
199   if (out_arg_area < frame::arg_reg_save_area_bytes) {
200     out_arg_area = frame::arg_reg_save_area_bytes;
201   }
202 
203   int reg_save_area_size = compute_reg_save_area_size(abi);
204   RegSpiller arg_spiller(call_regs._arg_regs, call_regs._args_length);
205   RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length);
206 
207   int shuffle_area_offset    = 0;
208   int res_save_area_offset   = shuffle_area_offset    + out_arg_area;
209   int arg_save_area_offset   = res_save_area_offset   + result_spiller.spill_size_bytes();
210   int reg_save_area_offset   = arg_save_area_offset   + arg_spiller.spill_size_bytes();
211   int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
212   int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
213 
214   int ret_buf_offset = -1;
215   if (needs_return_buffer) {
216     ret_buf_offset = frame_bottom_offset;
217     frame_bottom_offset += ret_buf_size;
218   }
219 
220   int frame_size = frame_bottom_offset;
221   frame_size = align_up(frame_size, StackAlignmentInBytes);
222 
223   // Ok The space we have allocated will look like:
224   //
225   //
226   // FP-> |                     |
227   //      |---------------------| = frame_bottom_offset = frame_size
228   //      | (optional)          |
229   //      | ret_buf             |
230   //      |---------------------| = ret_buf_offset
231   //      |                     |
232   //      | FrameData           |
233   //      |---------------------| = frame_data_offset
234   //      |                     |
235   //      | reg_save_area       |
236   //      |---------------------| = reg_save_are_offset
237   //      |                     |
238   //      | arg_save_area       |
239   //      |---------------------| = arg_save_are_offset
240   //      |                     |
241   //      | res_save_area       |
242   //      |---------------------| = res_save_are_offset
243   //      |                     |
244   // SP-> | out_arg_area        |   needs to be at end for shadow space
245   //
246   //
247 
248   //////////////////////////////////////////////////////////////////////////////
249 
250   MacroAssembler* _masm = new MacroAssembler(&buffer);
251   address start = __ pc();
252   __ enter(); // set up frame
253   if ((abi._stack_alignment_bytes % 16) != 0) {
254     // stack alignment of caller is not a multiple of 16
255     __ andptr(rsp, -StackAlignmentInBytes); // align stack
256   }
257   // allocate frame (frame_size is also aligned, so stack is still aligned)
258   __ subptr(rsp, frame_size);
259 
260   // we have to always spill args since we need to do a call to get the thread
261   // (and maybe attach it).
262   arg_spiller.generate_spill(_masm, arg_save_area_offset);
263 
264   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
265 
266   __ block_comment("{ on_entry");
267   __ vzeroupper();
268   __ lea(c_rarg0, Address(rsp, frame_data_offset));
269   // stack already aligned
270   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
271   __ movptr(r15_thread, rax);
272   __ reinit_heapbase();
273   __ block_comment("} on_entry");
274 
275   __ block_comment("{ argument shuffle");
276   arg_spiller.generate_fill(_masm, arg_save_area_offset);
277   if (needs_return_buffer) {
278     assert(ret_buf_offset != -1, "no return buffer allocated");
279     __ lea(abi._ret_buf_addr_reg, Address(rsp, ret_buf_offset));
280   }
281   arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
282   __ block_comment("} argument shuffle");
283 
284   __ block_comment("{ receiver ");
285   __ movptr(rscratch1, (intptr_t)receiver);
286   __ resolve_jobject(rscratch1, r15_thread, rscratch2);
287   __ movptr(j_rarg0, rscratch1);
288   __ block_comment("} receiver ");
289 
290   __ mov_metadata(rbx, entry);
291   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
292 
293   __ call(Address(rbx, Method::from_compiled_offset()));
294 
295   // return value shuffle
296   if (!needs_return_buffer) {
297 #ifdef ASSERT
298     if (call_regs._rets_length == 1) { // 0 or 1
299       VMReg j_expected_result_reg;
300       switch (ret_type) {
301         case T_BOOLEAN:
302         case T_BYTE:
303         case T_SHORT:
304         case T_CHAR:
305         case T_INT:
306         case T_LONG:
307         j_expected_result_reg = rax->as_VMReg();
308         break;
309         case T_FLOAT:
310         case T_DOUBLE:
311           j_expected_result_reg = xmm0->as_VMReg();
312           break;
313         default:
314           fatal("unexpected return type: %s", type2name(ret_type));
315       }
316       // No need to move for now, since CallArranger can pick a return type
317       // that goes in the same reg for both CCs. But, at least assert they are the same
318       assert(call_regs._ret_regs[0] == j_expected_result_reg,
319       "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
320     }
321 #endif
322   } else {
323     assert(ret_buf_offset != -1, "no return buffer allocated");
324     __ lea(rscratch1, Address(rsp, ret_buf_offset));
325     int offset = 0;
326     for (int i = 0; i < call_regs._rets_length; i++) {
327       VMReg reg = call_regs._ret_regs[i];
328       if (reg->is_Register()) {
329         __ movptr(reg->as_Register(), Address(rscratch1, offset));
330         offset += 8;
331       } else if (reg->is_XMMRegister()) {
332         __ movdqu(reg->as_XMMRegister(), Address(rscratch1, offset));
333         offset += 16;
334       } else {
335         ShouldNotReachHere();
336       }
337     }
338   }
339 
340   result_spiller.generate_spill(_masm, res_save_area_offset);
341 
342   __ block_comment("{ on_exit");
343   __ vzeroupper();
344   __ lea(c_rarg0, Address(rsp, frame_data_offset));
345   // stack already aligned
346   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
347   __ reinit_heapbase();
348   __ block_comment("} on_exit");
349 
350   restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
351 
352   result_spiller.generate_fill(_masm, res_save_area_offset);
353 
354   __ leave();
355   __ ret(0);
356 
357   //////////////////////////////////////////////////////////////////////////////
358 
359   __ block_comment("{ exception handler");
360 
361   intptr_t exception_handler_offset = __ pc() - start;
362 
363   // TODO: this is always the same, can we bypass and call handle_uncaught_exception directly?
364 
365   // native caller has no idea how to handle exceptions
366   // we just crash here. Up to callee to catch exceptions.
367   __ verify_oop(rax);
368   __ vzeroupper();
369   __ mov(c_rarg0, rax);
370   __ andptr(rsp, -StackAlignmentInBytes); // align stack as required by ABI
371   __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows (not really needed)
372   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::handle_uncaught_exception)));
373   __ should_not_reach_here();
374 
375   __ block_comment("} exception handler");
376 
377   _masm->flush();
378 
379 
380 #ifndef PRODUCT
381   stringStream ss;
382   ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
383   const char* name = _masm->code_string(ss.as_string());
384 #else // PRODUCT
385   const char* name = "optimized_upcall_stub";
386 #endif // PRODUCT
387 
388   OptimizedEntryBlob* blob
389     = OptimizedEntryBlob::create(name,
390                                  &buffer,
391                                  exception_handler_offset,
392                                  receiver,
393                                  in_ByteSize(frame_data_offset));
394 
395   if (TraceOptimizedUpcallStubs) {
396     blob->print_on(tty);
397   }
398 
399   return blob->code_begin();
400 }
401 PRAGMA_DIAG_POP