1 /*
  2  * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2019, 2021, Arm Limited. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "asm/macroAssembler.hpp"
 27 #include "logging/logStream.hpp"
 28 #include "memory/resourceArea.hpp"
 29 #include "prims/universalUpcallHandler.hpp"
 30 #include "runtime/sharedRuntime.hpp"
 31 #include "runtime/signature.hpp"
 32 #include "runtime/signature.hpp"
 33 #include "runtime/stubRoutines.hpp"
 34 #include "utilities/formatBuffer.hpp"
 35 #include "utilities/globalDefinitions.hpp"
 36 #include "vmreg_aarch64.inline.hpp"
 37 
 38 #define __ _masm->
 39 
 40 // for callee saved regs, according to the caller's ABI
 41 static int compute_reg_save_area_size(const ABIDescriptor& abi) {
 42   int size = 0;
 43   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
 44     Register reg = as_Register(i);
 45     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
 46     if (!abi.is_volatile_reg(reg)) {
 47       size += 8; // bytes
 48     }
 49   }
 50 
 51   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 52     FloatRegister reg = as_FloatRegister(i);
 53     if (!abi.is_volatile_reg(reg)) {
 54       // Only the lower 64 bits of vector registers need to be preserved.
 55       size += 8; // bytes
 56     }
 57   }
 58 
 59   return size;
 60 }
 61 
 62 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
 63   // 1. iterate all registers in the architecture
 64   //     - check if they are volatile or not for the given abi
 65   //     - if NOT, we need to save it here
 66 
 67   int offset = reg_save_area_offset;
 68 
 69   __ block_comment("{ preserve_callee_saved_regs ");
 70   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
 71     Register reg = as_Register(i);
 72     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
 73     if (!abi.is_volatile_reg(reg)) {
 74       __ str(reg, Address(sp, offset));
 75       offset += 8;
 76     }
 77   }
 78 
 79   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
 80     FloatRegister reg = as_FloatRegister(i);
 81     if (!abi.is_volatile_reg(reg)) {
 82       __ strd(reg, Address(sp, offset));
 83       offset += 8;
 84     }
 85   }
 86 
 87   __ block_comment("} preserve_callee_saved_regs ");
 88 }
 89 
 90 static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
 91   // 1. iterate all registers in the architecture
 92   //     - check if they are volatile or not for the given abi
 93   //     - if NOT, we need to restore it here
 94 
 95   int offset = reg_save_area_offset;
 96 
 97   __ block_comment("{ restore_callee_saved_regs ");
 98   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
 99     Register reg = as_Register(i);
100     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
101     if (!abi.is_volatile_reg(reg)) {
102       __ ldr(reg, Address(sp, offset));
103       offset += 8;
104     }
105   }
106 
107   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
108     FloatRegister reg = as_FloatRegister(i);
109     if (!abi.is_volatile_reg(reg)) {
110       __ ldrd(reg, Address(sp, offset));
111       offset += 8;
112     }
113   }
114 
115   __ block_comment("} restore_callee_saved_regs ");
116 }
117 
118 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
119                                                                   BasicType* in_sig_bt, int total_in_args,
120                                                                   BasicType* out_sig_bt, int total_out_args,
121                                                                   BasicType ret_type,
122                                                                   jobject jabi, jobject jconv,
123                                                                   bool needs_return_buffer, int ret_buf_size) {
124   ResourceMark rm;
125   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
126   const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);
127   CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
128 
129   Register shuffle_reg = r19;
130   JavaCallConv out_conv;
131   NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
132   ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg());
133   int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots();
134   int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
135 
136 #ifdef ASSERT
137   LogTarget(Trace, panama) lt;
138   if (lt.is_enabled()) {
139     ResourceMark rm;
140     LogStream ls(lt);
141     arg_shuffle.print_on(&ls);
142   }
143 #endif
144 
145   // out_arg_area (for stack arguments) doubles as shadow space for native calls.
146   // make sure it is big enough.
147   if (out_arg_area < frame::arg_reg_save_area_bytes) {
148     out_arg_area = frame::arg_reg_save_area_bytes;
149   }
150 
151   int reg_save_area_size = compute_reg_save_area_size(abi);
152   RegSpiller arg_spilller(call_regs._arg_regs, call_regs._args_length);
153   RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length);
154   // To spill receiver during deopt
155   int deopt_spill_size = 1 * BytesPerWord;
156 
157   int shuffle_area_offset    = 0;
158   int deopt_spill_offset     = shuffle_area_offset    + out_arg_area;
159   int res_save_area_offset   = deopt_spill_offset     + deopt_spill_size;
160   int arg_save_area_offset   = res_save_area_offset   + result_spiller.spill_size_bytes();
161   int reg_save_area_offset   = arg_save_area_offset   + arg_spilller.spill_size_bytes();
162   int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
163   int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
164 
165   int ret_buf_offset = -1;
166   if (needs_return_buffer) {
167     ret_buf_offset = frame_bottom_offset;
168     frame_bottom_offset += ret_buf_size;
169   }
170 
171   int frame_size = frame_bottom_offset;
172   frame_size = align_up(frame_size, StackAlignmentInBytes);
173 
174   // The space we have allocated will look like:
175   //
176   //
177   // FP-> |                     |
178   //      |---------------------| = frame_bottom_offset = frame_size
179   //      | (optional)          |
180   //      | ret_buf             |
181   //      |---------------------| = ret_buf_offset
182   //      |                     |
183   //      | FrameData           |
184   //      |---------------------| = frame_data_offset
185   //      |                     |
186   //      | reg_save_area       |
187   //      |---------------------| = reg_save_are_offset
188   //      |                     |
189   //      | arg_save_area       |
190   //      |---------------------| = arg_save_are_offset
191   //      |                     |
192   //      | res_save_area       |
193   //      |---------------------| = res_save_are_offset
194   //      |                     |
195   //      | deopt_spill         |
196   //      |---------------------| = deopt_spill_offset
197   //      |                     |
198   // SP-> | out_arg_area        |   needs to be at end for shadow space
199   //
200   //
201 
202   //////////////////////////////////////////////////////////////////////////////
203 
204   MacroAssembler* _masm = new MacroAssembler(&buffer);
205   address start = __ pc();
206   __ enter(); // set up frame
207   assert((abi._stack_alignment_bytes % 16) == 0, "must be 16 byte aligned");
208   // allocate frame (frame_size is also aligned, so stack is still aligned)
209   __ sub(sp, sp, frame_size);
210 
211   // we have to always spill args since we need to do a call to get the thread
212   // (and maybe attach it).
213   arg_spilller.generate_spill(_masm, arg_save_area_offset);
214   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
215 
216   __ block_comment("{ on_entry");
217   __ lea(c_rarg0, Address(sp, frame_data_offset));
218   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::on_entry));
219   __ blr(rscratch1);
220   __ mov(rthread, r0);
221   __ reinit_heapbase();
222   __ block_comment("} on_entry");
223 
224   __ block_comment("{ argument shuffle");
225   arg_spilller.generate_fill(_masm, arg_save_area_offset);
226   if (needs_return_buffer) {
227     assert(ret_buf_offset != -1, "no return buffer allocated");
228     __ lea(abi._ret_buf_addr_reg, Address(sp, ret_buf_offset));
229   }
230   arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
231   __ block_comment("} argument shuffle");
232 
233   __ block_comment("{ receiver ");
234   __ movptr(shuffle_reg, (intptr_t)receiver);
235   __ resolve_jobject(shuffle_reg, rthread, rscratch2);
236   __ mov(j_rarg0, shuffle_reg);
237   __ block_comment("} receiver ");
238 
239   __ mov_metadata(rmethod, entry);
240   __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
241 
242   __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
243   __ blr(rscratch1);
244 
245     // return value shuffle
246   if (!needs_return_buffer) {
247 #ifdef ASSERT
248     if (call_regs._rets_length == 1) { // 0 or 1
249       VMReg j_expected_result_reg;
250       switch (ret_type) {
251         case T_BOOLEAN:
252         case T_BYTE:
253         case T_SHORT:
254         case T_CHAR:
255         case T_INT:
256         case T_LONG:
257         j_expected_result_reg = r0->as_VMReg();
258         break;
259         case T_FLOAT:
260         case T_DOUBLE:
261           j_expected_result_reg = v0->as_VMReg();
262           break;
263         default:
264           fatal("unexpected return type: %s", type2name(ret_type));
265       }
266       // No need to move for now, since CallArranger can pick a return type
267       // that goes in the same reg for both CCs. But, at least assert they are the same
268       assert(call_regs._ret_regs[0] == j_expected_result_reg,
269       "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
270     }
271 #endif
272   } else {
273     assert(ret_buf_offset != -1, "no return buffer allocated");
274     __ lea(rscratch1, Address(sp, ret_buf_offset));
275     int offset = 0;
276     for (int i = 0; i < call_regs._rets_length; i++) {
277       VMReg reg = call_regs._ret_regs[i];
278       if (reg->is_Register()) {
279         __ ldr(reg->as_Register(), Address(rscratch1, offset));
280         offset += 8;
281       } else if (reg->is_FloatRegister()) {
282         __ ldrd(reg->as_FloatRegister(), Address(rscratch1, offset));
283         offset += 16; // needs to match VECTOR_REG_SIZE in AArch64Architecture (Java)
284       } else {
285         ShouldNotReachHere();
286       }
287     }
288   }
289 
290   result_spiller.generate_spill(_masm, res_save_area_offset);
291 
292   __ block_comment("{ on_exit");
293   __ lea(c_rarg0, Address(sp, frame_data_offset));
294   // stack already aligned
295   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::on_exit));
296   __ blr(rscratch1);
297   __ block_comment("} on_exit");
298 
299   restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
300 
301   result_spiller.generate_fill(_masm, res_save_area_offset);
302 
303   __ leave();
304   __ ret(lr);
305 
306   //////////////////////////////////////////////////////////////////////////////
307 
308   __ block_comment("{ exception handler");
309 
310   intptr_t exception_handler_offset = __ pc() - start;
311 
312   // Native caller has no idea how to handle exceptions,
313   // so we just crash here. Up to callee to catch exceptions.
314   __ verify_oop(r0);
315   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::handle_uncaught_exception));
316   __ blr(rscratch1);
317   __ should_not_reach_here();
318 
319   __ block_comment("} exception handler");
320 
321   _masm->flush();
322 
323 #ifndef PRODUCT
324   stringStream ss;
325   ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
326   const char* name = _masm->code_string(ss.as_string());
327 #else // PRODUCT
328   const char* name = "optimized_upcall_stub";
329 #endif // PRODUCT
330 
331   OptimizedEntryBlob* blob
332     = OptimizedEntryBlob::create(name,
333                                  &buffer,
334                                  exception_handler_offset,
335                                  receiver,
336                                  in_ByteSize(frame_data_offset));
337 
338   if (TraceOptimizedUpcallStubs) {
339     blob->print_on(tty);
340   }
341 
342   return blob->code_begin();
343 }