< prev index next >

src/hotspot/cpu/aarch64/universalUpcallHandler_aarch64.cpp

Print this page
@@ -1,8 +1,8 @@
  /*
-  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2019, Arm Limited. All rights reserved.
+  * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2019, 2021, Arm Limited. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.

@@ -22,89 +22,322 @@
   * questions.
   */
  
  #include "precompiled.hpp"
  #include "asm/macroAssembler.hpp"
+ #include "logging/logStream.hpp"
  #include "memory/resourceArea.hpp"
  #include "prims/universalUpcallHandler.hpp"
+ #include "runtime/sharedRuntime.hpp"
+ #include "runtime/signature.hpp"
+ #include "runtime/signature.hpp"
+ #include "runtime/stubRoutines.hpp"
+ #include "utilities/formatBuffer.hpp"
+ #include "utilities/globalDefinitions.hpp"
+ #include "vmreg_aarch64.inline.hpp"
  
  #define __ _masm->
  
- // 1. Create buffer according to layout
- // 2. Load registers & stack args into buffer
- // 3. Call upcall helper with upcall handler instance & buffer pointer (C++ ABI)
- // 4. Load return value from buffer into foreign ABI registers
- // 5. Return
- address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) {
-   ResourceMark rm;
-   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
-   const BufferLayout layout = ForeignGlobals::parse_buffer_layout(jlayout);
+ // for callee saved regs, according to the caller's ABI
+ static int compute_reg_save_area_size(const ABIDescriptor& abi) {
+   int size = 0;
+   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
+     Register reg = as_Register(i);
+     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
+     if (!abi.is_volatile_reg(reg)) {
+       size += 8; // bytes
+     }
+   }
  
-   CodeBuffer buffer("upcall_stub", 1024, upcall_stub_size);
+   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+     FloatRegister reg = as_FloatRegister(i);
+     if (!abi.is_volatile_reg(reg)) {
+       // Only the lower 64 bits of vector registers need to be preserved.
+       size += 8; // bytes
+     }
+   }
  
-   MacroAssembler* _masm = new MacroAssembler(&buffer);
+   return size;
+ }
  
-   // stub code
-   __ enter();
+ static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
+   // 1. iterate all registers in the architecture
+   //     - check if they are volatile or not for the given abi
+   //     - if NOT, we need to save it here
  
-   // save pointer to JNI receiver handle into constant segment
-   Address rec_adr = InternalAddress(__ address_constant((address)rec));
+   int offset = reg_save_area_offset;
  
-   assert(abi._stack_alignment_bytes % 16 == 0, "stack must be 16 byte aligned");
+   __ block_comment("{ preserve_callee_saved_regs ");
+   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
+     Register reg = as_Register(i);
+     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
+     if (!abi.is_volatile_reg(reg)) {
+       __ str(reg, Address(sp, offset));
+       offset += 8;
+     }
+   }
  
-   __ sub(sp, sp, (int) align_up(layout.buffer_size, abi._stack_alignment_bytes));
+   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+     FloatRegister reg = as_FloatRegister(i);
+     if (!abi.is_volatile_reg(reg)) {
+       __ strd(reg, Address(sp, offset));
+       offset += 8;
+     }
+   }
+ 
+   __ block_comment("} preserve_callee_saved_regs ");
+ }
  
-   // TODO: This stub only uses registers which are caller-save in the
-   //       standard C ABI. If this is called from a different ABI then
-   //       we need to save registers here according to abi.is_volatile_reg.
+ static void restore_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
+   // 1. iterate all registers in the architecture
+   //     - check if they are volatile or not for the given abi
+   //     - if NOT, we need to restore it here
+ 
+   int offset = reg_save_area_offset;
+ 
+   __ block_comment("{ restore_callee_saved_regs ");
+   for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
+     Register reg = as_Register(i);
+     if (reg == rfp || reg == sp) continue; // saved/restored by prologue/epilogue
+     if (!abi.is_volatile_reg(reg)) {
+       __ ldr(reg, Address(sp, offset));
+       offset += 8;
+     }
+   }
  
-   for (int i = 0; i < abi._integer_argument_registers.length(); i++) {
-     Register reg = abi._integer_argument_registers.at(i);
-     ssize_t offset = layout.arguments_integer + i * sizeof(uintptr_t);
-     __ str(reg, Address(sp, offset));
+   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+     FloatRegister reg = as_FloatRegister(i);
+     if (!abi.is_volatile_reg(reg)) {
+       __ ldrd(reg, Address(sp, offset));
+       offset += 8;
+     }
    }
  
-   for (int i = 0; i < abi._vector_argument_registers.length(); i++) {
-     FloatRegister reg = abi._vector_argument_registers.at(i);
-     ssize_t offset = layout.arguments_vector + i * float_reg_size;
-     __ strq(reg, Address(sp, offset));
+   __ block_comment("} restore_callee_saved_regs ");
+ }
+ 
+ address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry,
+                                                                   BasicType* in_sig_bt, int total_in_args,
+                                                                   BasicType* out_sig_bt, int total_out_args,
+                                                                   BasicType ret_type,
+                                                                   jobject jabi, jobject jconv,
+                                                                   bool needs_return_buffer, int ret_buf_size) {
+   ResourceMark rm;
+   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
+   const CallRegs call_regs = ForeignGlobals::parse_call_regs(jconv);
+   CodeBuffer buffer("upcall_stub_linkToNative", /* code_size = */ 2048, /* locs_size = */ 1024);
+ 
+   Register shuffle_reg = r19;
+   JavaCallConv out_conv;
+   NativeCallConv in_conv(call_regs._arg_regs, call_regs._args_length);
+   ArgumentShuffle arg_shuffle(in_sig_bt, total_in_args, out_sig_bt, total_out_args, &in_conv, &out_conv, shuffle_reg->as_VMReg());
+   int stack_slots = SharedRuntime::out_preserve_stack_slots() + arg_shuffle.out_arg_stack_slots();
+   int out_arg_area = align_up(stack_slots * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
+ 
+ #ifdef ASSERT
+   LogTarget(Trace, panama) lt;
+   if (lt.is_enabled()) {
+     ResourceMark rm;
+     LogStream ls(lt);
+     arg_shuffle.print_on(&ls);
+   }
+ #endif
+ 
+   // out_arg_area (for stack arguments) doubles as shadow space for native calls.
+   // make sure it is big enough.
+   if (out_arg_area < frame::arg_reg_save_area_bytes) {
+     out_arg_area = frame::arg_reg_save_area_bytes;
+   }
+ 
+   int reg_save_area_size = compute_reg_save_area_size(abi);
+   RegSpiller arg_spilller(call_regs._arg_regs, call_regs._args_length);
+   RegSpiller result_spiller(call_regs._ret_regs, call_regs._rets_length);
+   // To spill receiver during deopt
+   int deopt_spill_size = 1 * BytesPerWord;
+ 
+   int shuffle_area_offset    = 0;
+   int deopt_spill_offset     = shuffle_area_offset    + out_arg_area;
+   int res_save_area_offset   = deopt_spill_offset     + deopt_spill_size;
+   int arg_save_area_offset   = res_save_area_offset   + result_spiller.spill_size_bytes();
+   int reg_save_area_offset   = arg_save_area_offset   + arg_spilller.spill_size_bytes();
+   int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
+   int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
+ 
+   int ret_buf_offset = -1;
+   if (needs_return_buffer) {
+     ret_buf_offset = frame_bottom_offset;
+     frame_bottom_offset += ret_buf_size;
    }
  
-   // Capture prev stack pointer (stack arguments base)
-   __ add(rscratch1, rfp, 16);   // Skip saved FP and LR
-   __ str(rscratch1, Address(sp, layout.stack_args));
+   int frame_size = frame_bottom_offset;
+   frame_size = align_up(frame_size, StackAlignmentInBytes);
  
-   // Call upcall helper
-   __ ldr(c_rarg0, rec_adr);
-   __ mov(c_rarg1, sp);
-   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::attach_thread_and_do_upcall));
+   // The space we have allocated will look like:
+   //
+   //
+   // FP-> |                     |
+   //      |---------------------| = frame_bottom_offset = frame_size
+   //      | (optional)          |
+   //      | ret_buf             |
+   //      |---------------------| = ret_buf_offset
+   //      |                     |
+   //      | FrameData           |
+   //      |---------------------| = frame_data_offset
+   //      |                     |
+   //      | reg_save_area       |
+   //      |---------------------| = reg_save_are_offset
+   //      |                     |
+   //      | arg_save_area       |
+   //      |---------------------| = arg_save_are_offset
+   //      |                     |
+   //      | res_save_area       |
+   //      |---------------------| = res_save_are_offset
+   //      |                     |
+   //      | deopt_spill         |
+   //      |---------------------| = deopt_spill_offset
+   //      |                     |
+   // SP-> | out_arg_area        |   needs to be at end for shadow space
+   //
+   //
+ 
+   //////////////////////////////////////////////////////////////////////////////
+ 
+   MacroAssembler* _masm = new MacroAssembler(&buffer);
+   address start = __ pc();
+   __ enter(); // set up frame
+   assert((abi._stack_alignment_bytes % 16) == 0, "must be 16 byte aligned");
+   // allocate frame (frame_size is also aligned, so stack is still aligned)
+   __ sub(sp, sp, frame_size);
+ 
+   // we have to always spill args since we need to do a call to get the thread
+   // (and maybe attach it).
+   arg_spilller.generate_spill(_masm, arg_save_area_offset);
+   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
+ 
+   __ block_comment("{ on_entry");
+   __ lea(c_rarg0, Address(sp, frame_data_offset));
+   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::on_entry));
    __ blr(rscratch1);
+   __ mov(rthread, r0);
+   __ reinit_heapbase();
+   __ block_comment("} on_entry");
  
-   for (int i = 0; i < abi._integer_return_registers.length(); i++) {
-     ssize_t offs = layout.returns_integer + i * sizeof(uintptr_t);
-     __ ldr(abi._integer_return_registers.at(i), Address(sp, offs));
+   __ block_comment("{ argument shuffle");
+   arg_spilller.generate_fill(_masm, arg_save_area_offset);
+   if (needs_return_buffer) {
+     assert(ret_buf_offset != -1, "no return buffer allocated");
+     __ lea(abi._ret_buf_addr_reg, Address(sp, ret_buf_offset));
    }
+   arg_shuffle.generate(_masm, shuffle_reg->as_VMReg(), abi._shadow_space_bytes, 0);
+   __ block_comment("} argument shuffle");
+ 
+   __ block_comment("{ receiver ");
+   __ movptr(shuffle_reg, (intptr_t)receiver);
+   __ resolve_jobject(shuffle_reg, rthread, rscratch2);
+   __ mov(j_rarg0, shuffle_reg);
+   __ block_comment("} receiver ");
+ 
+   __ mov_metadata(rmethod, entry);
+   __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); // just in case callee is deoptimized
  
-   for (int i = 0; i < abi._vector_return_registers.length(); i++) {
-     FloatRegister reg = abi._vector_return_registers.at(i);
-     ssize_t offs = layout.returns_vector + i * float_reg_size;
-     __ ldrq(reg, Address(sp, offs));
+   __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset()));
+   __ blr(rscratch1);
+ 
+     // return value shuffle
+   if (!needs_return_buffer) {
+ #ifdef ASSERT
+     if (call_regs._rets_length == 1) { // 0 or 1
+       VMReg j_expected_result_reg;
+       switch (ret_type) {
+         case T_BOOLEAN:
+         case T_BYTE:
+         case T_SHORT:
+         case T_CHAR:
+         case T_INT:
+         case T_LONG:
+         j_expected_result_reg = r0->as_VMReg();
+         break;
+         case T_FLOAT:
+         case T_DOUBLE:
+           j_expected_result_reg = v0->as_VMReg();
+           break;
+         default:
+           fatal("unexpected return type: %s", type2name(ret_type));
+       }
+       // No need to move for now, since CallArranger can pick a return type
+       // that goes in the same reg for both CCs. But, at least assert they are the same
+       assert(call_regs._ret_regs[0] == j_expected_result_reg,
+       "unexpected result register: %s != %s", call_regs._ret_regs[0]->name(), j_expected_result_reg->name());
+     }
+ #endif
+   } else {
+     assert(ret_buf_offset != -1, "no return buffer allocated");
+     __ lea(rscratch1, Address(sp, ret_buf_offset));
+     int offset = 0;
+     for (int i = 0; i < call_regs._rets_length; i++) {
+       VMReg reg = call_regs._ret_regs[i];
+       if (reg->is_Register()) {
+         __ ldr(reg->as_Register(), Address(rscratch1, offset));
+         offset += 8;
+       } else if (reg->is_FloatRegister()) {
+         __ ldrd(reg->as_FloatRegister(), Address(rscratch1, offset));
+         offset += 16; // needs to match VECTOR_REG_SIZE in AArch64Architecture (Java)
+       } else {
+         ShouldNotReachHere();
+       }
+     }
    }
  
+   result_spiller.generate_spill(_masm, res_save_area_offset);
+ 
+   __ block_comment("{ on_exit");
+   __ lea(c_rarg0, Address(sp, frame_data_offset));
+   // stack already aligned
+   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::on_exit));
+   __ blr(rscratch1);
+   __ block_comment("} on_exit");
+ 
+   restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
+ 
+   result_spiller.generate_fill(_masm, res_save_area_offset);
+ 
    __ leave();
    __ ret(lr);
  
-   __ flush();
+   //////////////////////////////////////////////////////////////////////////////
  
-   BufferBlob* blob = BufferBlob::create("upcall_stub", &buffer);
+   __ block_comment("{ exception handler");
  
-   return blob->code_begin();
- }
+   intptr_t exception_handler_offset = __ pc() - start;
  
- address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
-   ShouldNotCallThis();
-   return nullptr;
- }
+   // Native caller has no idea how to handle exceptions,
+   // so we just crash here. Up to callee to catch exceptions.
+   __ verify_oop(r0);
+   __ movptr(rscratch1, CAST_FROM_FN_PTR(uint64_t, ProgrammableUpcallHandler::handle_uncaught_exception));
+   __ blr(rscratch1);
+   __ should_not_reach_here();
+ 
+   __ block_comment("} exception handler");
  
- bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
-   return false;
+   _masm->flush();
+ 
+ #ifndef PRODUCT
+   stringStream ss;
+   ss.print("optimized_upcall_stub_%s", entry->signature()->as_C_string());
+   const char* name = _masm->code_string(ss.as_string());
+ #else // PRODUCT
+   const char* name = "optimized_upcall_stub";
+ #endif // PRODUCT
+ 
+   OptimizedEntryBlob* blob
+     = OptimizedEntryBlob::create(name,
+                                  &buffer,
+                                  exception_handler_offset,
+                                  receiver,
+                                  in_ByteSize(frame_data_offset));
+ 
+   if (TraceOptimizedUpcallStubs) {
+     blob->print_on(tty);
+   }
+ 
+   return blob->code_begin();
  }
< prev index next >