Cdiff src/hotspot/cpu/x86/sharedRuntime_x86

src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp

*** 26,10 ***
--- 26,11 ---
  #ifndef _WINDOWS
  #include "alloca.h"
  #endif
  #include "asm/macroAssembler.hpp"
  #include "asm/macroAssembler.inline.hpp"
+ #include "classfile/symbolTable.hpp"
  #include "code/compiledIC.hpp"
  #include "code/debugInfoRec.hpp"
  #include "code/nativeInst.hpp"
  #include "code/vtableStubs.hpp"
  #include "compiler/oopMap.hpp"

*** 632,10 ***
--- 633,91 ---
    }
  
    return stk_args;
  }
  
+ // Same as java_calling_convention() but for multiple return
+ // values. There's no way to store them on the stack so if we don't
+ // have enough registers, multiple values can't be returned.
+ const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
+ const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
+ int SharedRuntime::java_return_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed) {
+   // Create the mapping between argument positions and
+   // registers.
+   static const Register INT_ArgReg[java_return_convention_max_int] = {
+     rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
+   };
+   static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
+     j_farg0, j_farg1, j_farg2, j_farg3,
+     j_farg4, j_farg5, j_farg6, j_farg7
+   };
+ 
+ 
+   uint int_args = 0;
+   uint fp_args = 0;
+ 
+   for (int i = 0; i < total_args_passed; i++) {
+     switch (sig_bt[i]) {
+     case T_BOOLEAN:
+     case T_CHAR:
+     case T_BYTE:
+     case T_SHORT:
+     case T_INT:
+       if (int_args < Argument::n_int_register_parameters_j+1) {
+         regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
+         int_args++;
+       } else {
+         return -1;
+       }
+       break;
+     case T_VOID:
+       // halves of T_LONG or T_DOUBLE
+       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+       regs[i].set_bad();
+       break;
+     case T_LONG:
+       assert(sig_bt[i + 1] == T_VOID, "expecting half");
+       // fall through
+     case T_OBJECT:
+     case T_ARRAY:
+     case T_ADDRESS:
+     case T_METADATA:
+       if (int_args < Argument::n_int_register_parameters_j+1) {
+         regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
+         int_args++;
+       } else {
+         return -1;
+       }
+       break;
+     case T_FLOAT:
+       if (fp_args < Argument::n_float_register_parameters_j) {
+         regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
+         fp_args++;
+       } else {
+         return -1;
+       }
+       break;
+     case T_DOUBLE:
+       assert(sig_bt[i + 1] == T_VOID, "expecting half");
+       if (fp_args < Argument::n_float_register_parameters_j) {
+         regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
+         fp_args++;
+       } else {
+         return -1;
+       }
+       break;
+     default:
+       ShouldNotReachHere();
+       break;
+     }
+   }
+ 
+   return int_args + fp_args;
+ }
+ 
  // Patch the callers callsite with entry to compiled code if it exists.
  static void patch_callers_callsite(MacroAssembler *masm) {
    Label L;
    __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
    __ jcc(Assembler::equal, L);

*** 674,29 ***
    // restore sp
    __ mov(rsp, r13);
    __ bind(L);
  }
  
  
  static void gen_c2i_adapter(MacroAssembler *masm,
!                             int total_args_passed,
-                             int comp_args_on_stack,
-                             const BasicType *sig_bt,
                              const VMRegPair *regs,
!                             Label& skip_fixup) {
    // Before we get into the guts of the C2I adapter, see if we should be here
    // at all.  We've come from compiled code and are attempting to jump to the
    // interpreter, which means the caller made a static call to get here
    // (vcalls always get a compiled target if there is one).  Check for a
    // compiled target.  If there is one, we need to patch the caller's call.
    patch_callers_callsite(masm);
  
    __ bind(skip_fixup);
  
    // Since all args are passed on the stack, total_args_passed *
    // Interpreter::stackElementSize is the space we need.
! 
    assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
  
    int extraspace = (total_args_passed * Interpreter::stackElementSize);
  
    // stack is aligned, keep it that way
--- 756,205 ---
    // restore sp
    __ mov(rsp, r13);
    __ bind(L);
  }
  
+ // For each inline type argument, sig includes the list of fields of
+ // the inline type. This utility function computes the number of
+ // arguments for the call if inline types are passed by reference (the
+ // calling convention the interpreter expects).
+ static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
+   int total_args_passed = 0;
+   if (InlineTypePassFieldsAsArgs) {
+     for (int i = 0; i < sig_extended->length(); i++) {
+       BasicType bt = sig_extended->at(i)._bt;
+       if (bt == T_METADATA) {
+         // In sig_extended, an inline type argument starts with:
+         // T_METADATA, followed by the types of the fields of the
+         // inline type and T_VOID to mark the end of the value
+         // type. Inline types are flattened so, for instance, in the
+         // case of an inline type with an int field and an inline type
+         // field that itself has 2 fields, an int and a long:
+         // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
+         // slot for the T_LONG) T_VOID (inner inline type) T_VOID
+         // (outer inline type)
+         total_args_passed++;
+         int vt = 1;
+         do {
+           i++;
+           BasicType bt = sig_extended->at(i)._bt;
+           BasicType prev_bt = sig_extended->at(i-1)._bt;
+           if (bt == T_METADATA) {
+             vt++;
+           } else if (bt == T_VOID &&
+                      prev_bt != T_LONG &&
+                      prev_bt != T_DOUBLE) {
+             vt--;
+           }
+         } while (vt != 0);
+       } else {
+         total_args_passed++;
+       }
+     }
+   } else {
+     total_args_passed = sig_extended->length();
+   }
+   return total_args_passed;
+ }
+ 
+ 
+ static void gen_c2i_adapter_helper(MacroAssembler* masm,
+                                    BasicType bt,
+                                    BasicType prev_bt,
+                                    size_t size_in_bytes,
+                                    const VMRegPair& reg_pair,
+                                    const Address& to,
+                                    int extraspace,
+                                    bool is_oop) {
+   if (bt == T_VOID) {
+     assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
+     return;
+   }
+ 
+   // Say 4 args:
+   // i   st_off
+   // 0   32 T_LONG
+   // 1   24 T_VOID
+   // 2   16 T_OBJECT
+   // 3    8 T_BOOL
+   // -    0 return address
+   //
+   // However to make thing extra confusing. Because we can fit a long/double in
+   // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+   // leaves one slot empty and only stores to a single slot. In this case the
+   // slot that is occupied is the T_VOID slot. See I said it was confusing.
+ 
+   bool wide = (size_in_bytes == wordSize);
+   VMReg r_1 = reg_pair.first();
+   VMReg r_2 = reg_pair.second();
+   assert(r_2->is_valid() == wide, "invalid size");
+   if (!r_1->is_valid()) {
+     assert(!r_2->is_valid(), "must be invalid");
+     return;
+   }
+ 
+   if (!r_1->is_XMMRegister()) {
+     Register val = rax;
+     if (r_1->is_stack()) {
+       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+       __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
+     } else {
+       val = r_1->as_Register();
+     }
+     assert_different_registers(to.base(), val, rscratch1);
+     if (is_oop) {
+       __ push(r13);
+       __ push(rbx);
+       __ store_heap_oop(to, val, rscratch1, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
+       __ pop(rbx);
+       __ pop(r13);
+     } else {
+       __ store_sized_value(to, val, size_in_bytes);
+     }
+   } else {
+     if (wide) {
+       __ movdbl(to, r_1->as_XMMRegister());
+     } else {
+       __ movflt(to, r_1->as_XMMRegister());
+     }
+   }
+ }
  
  static void gen_c2i_adapter(MacroAssembler *masm,
!                             const GrowableArray<SigEntry>* sig_extended,
                              const VMRegPair *regs,
!                             bool requires_clinit_barrier,
+                             address& c2i_no_clinit_check_entry,
+                             Label& skip_fixup,
+                             address start,
+                             OopMapSet* oop_maps,
+                             int& frame_complete,
+                             int& frame_size_in_words,
+                             bool alloc_inline_receiver) {
+   if (requires_clinit_barrier && VM_Version::supports_fast_class_init_checks()) {
+     Label L_skip_barrier;
+     Register method = rbx;
+ 
+     { // Bypass the barrier for non-static methods
+       Register flags = rscratch1;
+       __ movl(flags, Address(method, Method::access_flags_offset()));
+       __ testl(flags, JVM_ACC_STATIC);
+       __ jcc(Assembler::zero, L_skip_barrier); // non-static
+     }
+ 
+     Register klass = rscratch1;
+     __ load_method_holder(klass, method);
+     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
+ 
+     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
+ 
+     __ bind(L_skip_barrier);
+     c2i_no_clinit_check_entry = __ pc();
+   }
+ 
+   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+   bs->c2i_entry_barrier(masm);
+ 
    // Before we get into the guts of the C2I adapter, see if we should be here
    // at all.  We've come from compiled code and are attempting to jump to the
    // interpreter, which means the caller made a static call to get here
    // (vcalls always get a compiled target if there is one).  Check for a
    // compiled target.  If there is one, we need to patch the caller's call.
    patch_callers_callsite(masm);
  
    __ bind(skip_fixup);
  
+   if (InlineTypePassFieldsAsArgs) {
+     // Is there an inline type argument?
+     bool has_inline_argument = false;
+     for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
+       has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
+     }
+     if (has_inline_argument) {
+       // There is at least an inline type argument: we're coming from
+       // compiled code so we have no buffers to back the inline types.
+       // Allocate the buffers here with a runtime call.
+       OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, /*save_vectors*/ false);
+ 
+       frame_complete = __ offset();
+ 
+       __ set_last_Java_frame(noreg, noreg, nullptr, rscratch1);
+ 
+       __ mov(c_rarg0, r15_thread);
+       __ mov(c_rarg1, rbx);
+       __ mov64(c_rarg2, (int64_t)alloc_inline_receiver);
+       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
+ 
+       oop_maps->add_gc_map((int)(__ pc() - start), map);
+       __ reset_last_Java_frame(false);
+ 
+       RegisterSaver::restore_live_registers(masm);
+ 
+       Label no_exception;
+       __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), NULL_WORD);
+       __ jcc(Assembler::equal, no_exception);
+ 
+       __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), NULL_WORD);
+       __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
+       __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ 
+       __ bind(no_exception);
+ 
+       // We get an array of objects from the runtime call
+       __ get_vm_result(rscratch2, r15_thread); // Use rscratch2 (r11) as temporary because rscratch1 (r10) is trashed by movptr()
+       __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live?
+     }
+   }
+ 
    // Since all args are passed on the stack, total_args_passed *
    // Interpreter::stackElementSize is the space we need.
!   int total_args_passed = compute_total_args_passed_int(sig_extended);
    assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
  
    int extraspace = (total_args_passed * Interpreter::stackElementSize);
  
    // stack is aligned, keep it that way

*** 727,100 ***
  #ifdef ASSERT
    __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
  #endif
  
    // Now write the args into the outgoing interpreter space
-   for (int i = 0; i < total_args_passed; i++) {
-     if (sig_bt[i] == T_VOID) {
-       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-       continue;
-     }
- 
-     // offset to start parameters
-     int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
-     int next_off = st_off - Interpreter::stackElementSize;
- 
-     // Say 4 args:
-     // i   st_off
-     // 0   32 T_LONG
-     // 1   24 T_VOID
-     // 2   16 T_OBJECT
-     // 3    8 T_BOOL
-     // -    0 return address
-     //
-     // However to make thing extra confusing. Because we can fit a long/double in
-     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
-     // leaves one slot empty and only stores to a single slot. In this case the
-     // slot that is occupied is the T_VOID slot. See I said it was confusing.
- 
-     VMReg r_1 = regs[i].first();
-     VMReg r_2 = regs[i].second();
-     if (!r_1->is_valid()) {
-       assert(!r_2->is_valid(), "");
-       continue;
-     }
-     if (r_1->is_stack()) {
-       // memory to memory use rax
-       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
-       if (!r_2->is_valid()) {
-         // sign extend??
-         __ movl(rax, Address(rsp, ld_off));
-         __ movptr(Address(rsp, st_off), rax);
- 
-       } else {
  
!         __ movq(rax, Address(rsp, ld_off));
! 
!         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
!         // T_DOUBLE and T_LONG use two slots in the interpreter
!         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
!           // ld_off == LSW, ld_off+wordSize == MSW
!           // st_off == MSW, next_off == LSW
!           __ movq(Address(rsp, next_off), rax);
  #ifdef ASSERT
!           // Overwrite the unused slot with known junk
!           __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
!           __ movptr(Address(rsp, st_off), rax);
! #endif /* ASSERT */
-         } else {
-           __ movq(Address(rsp, st_off), rax);
-         }
        }
-     } else if (r_1->is_Register()) {
-       Register r = r_1->as_Register();
-       if (!r_2->is_valid()) {
-         // must be only an int (or less ) so move only 32bits to slot
-         // why not sign extend??
-         __ movl(Address(rsp, st_off), r);
-       } else {
-         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-         // T_DOUBLE and T_LONG use two slots in the interpreter
-         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-           // long/double in gpr
- #ifdef ASSERT
-           // Overwrite the unused slot with known junk
-           __ mov64(rax, CONST64(0xdeadffffdeadaaab));
-           __ movptr(Address(rsp, st_off), rax);
  #endif /* ASSERT */
!           __ movq(Address(rsp, next_off), r);
          } else {
!           __ movptr(Address(rsp, st_off), r);
          }
!       }
!     } else {
!       assert(r_1->is_XMMRegister(), "");
!       if (!r_2->is_valid()) {
-         // only a float use just part of the slot
-         __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
-       } else {
- #ifdef ASSERT
-         // Overwrite the unused slot with known junk
-         __ mov64(rax, CONST64(0xdeadffffdeadaaac));
-         __ movptr(Address(rsp, st_off), rax);
- #endif /* ASSERT */
-         __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
-       }
      }
    }
  
    // Schedule the branch target address early.
    __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
--- 985,96 ---
  #ifdef ASSERT
    __ check_stack_alignment(rsp, "callee stack not aligned", wordSize, rax);
  #endif
  
    // Now write the args into the outgoing interpreter space
  
!   // next_arg_comp is the next argument from the compiler point of
!   // view (inline type fields are passed in registers/on the stack). In
!   // sig_extended, an inline type argument starts with: T_METADATA,
!   // followed by the types of the fields of the inline type and T_VOID
!   // to mark the end of the inline type. ignored counts the number of
!   // T_METADATA/T_VOID. next_vt_arg is the next inline type argument:
!   // used to get the buffer for that argument from the pool of buffers
!   // we allocated above and want to pass to the
+   // interpreter. next_arg_int is the next argument from the
+   // interpreter point of view (inline types are passed by reference).
+   for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
+        next_arg_comp < sig_extended->length(); next_arg_comp++) {
+     assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
+     assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
+     BasicType bt = sig_extended->at(next_arg_comp)._bt;
+     int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
+     if (!InlineTypePassFieldsAsArgs || bt != T_METADATA) {
+       int next_off = st_off - Interpreter::stackElementSize;
+       const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
+       const VMRegPair reg_pair = regs[next_arg_comp-ignored];
+       size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
+       gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
+                              size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
+       next_arg_int++;
  #ifdef ASSERT
!       if (bt == T_LONG || bt == T_DOUBLE) {
!         // Overwrite the unused slot with known junk
!         __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
!         __ movptr(Address(rsp, st_off), rax);
        }
  #endif /* ASSERT */
!     } else {
+       ignored++;
+       // get the buffer from the just allocated pool of buffers
+       int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_OBJECT);
+       __ load_heap_oop(r14, Address(rscratch2, index));
+       next_vt_arg++; next_arg_int++;
+       int vt = 1;
+       // write fields we get from compiled code in registers/stack
+       // slots to the buffer: we know we are done with that inline type
+       // argument when we hit the T_VOID that acts as an end of inline
+       // type delimiter for this inline type. Inline types are flattened
+       // so we might encounter embedded inline types. Each entry in
+       // sig_extended contains a field offset in the buffer.
+       Label L_null;
+       do {
+         next_arg_comp++;
+         BasicType bt = sig_extended->at(next_arg_comp)._bt;
+         BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
+         if (bt == T_METADATA) {
+           vt++;
+           ignored++;
+         } else if (bt == T_VOID &&
+                    prev_bt != T_LONG &&
+                    prev_bt != T_DOUBLE) {
+           vt--;
+           ignored++;
          } else {
!           int off = sig_extended->at(next_arg_comp)._offset;
+           if (off == -1) {
+             // Nullable inline type argument, emit null check
+             VMReg reg = regs[next_arg_comp-ignored].first();
+             Label L_notNull;
+             if (reg->is_stack()) {
+               int ld_off = reg->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+               __ testb(Address(rsp, ld_off), 1);
+             } else {
+               __ testb(reg->as_Register(), 1);
+             }
+             __ jcc(Assembler::notZero, L_notNull);
+             __ movptr(Address(rsp, st_off), 0);
+             __ jmp(L_null);
+             __ bind(L_notNull);
+             continue;
+           }
+           assert(off > 0, "offset in object should be positive");
+           size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+           bool is_oop = is_reference_type(bt);
+           gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
+                                  size_in_bytes, regs[next_arg_comp-ignored], Address(r14, off), extraspace, is_oop);
          }
!       } while (vt != 0);
!       // pass the buffer to the interpreter
!       __ movptr(Address(rsp, st_off), r14);
!       __ bind(L_null);
      }
    }
  
    // Schedule the branch target address early.
    __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));

*** 839,13 ***
    __ jcc(Assembler::below, L_ok);
    __ bind(L_fail);
  }
  
  void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-                                     int total_args_passed,
                                      int comp_args_on_stack,
!                                     const BasicType *sig_bt,
                                      const VMRegPair *regs) {
  
    // Note: r13 contains the senderSP on entry. We must preserve it since
    // we may do a i2c -> c2i transition if we lose a race where compiled
    // code goes non-entrant while we get args ready.
--- 1093,12 ---
    __ jcc(Assembler::below, L_ok);
    __ bind(L_fail);
  }
  
  void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
                                      int comp_args_on_stack,
!                                     const GrowableArray<SigEntry>* sig,
                                      const VMRegPair *regs) {
  
    // Note: r13 contains the senderSP on entry. We must preserve it since
    // we may do a i2c -> c2i transition if we lose a race where compiled
    // code goes non-entrant while we get args ready.

*** 932,11 ***
    const Register saved_sp = rax;
    __ movptr(saved_sp, r11);
  
    // Will jump to the compiled code just as if compiled code was doing it.
    // Pre-load the register-jump target early, to schedule it better.
!   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
  
  #if INCLUDE_JVMCI
    if (EnableJVMCI) {
      // check if this call should be routed towards a specific entry point
      __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
--- 1185,11 ---
    const Register saved_sp = rax;
    __ movptr(saved_sp, r11);
  
    // Will jump to the compiled code just as if compiled code was doing it.
    // Pre-load the register-jump target early, to schedule it better.
!   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_inline_offset())));
  
  #if INCLUDE_JVMCI
    if (EnableJVMCI) {
      // check if this call should be routed towards a specific entry point
      __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);

*** 946,17 ***
      __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
      __ bind(no_alternative_target);
    }
  #endif // INCLUDE_JVMCI
  
    // Now generate the shuffle code.  Pick up all register args and move the
    // rest through the floating point stack top.
    for (int i = 0; i < total_args_passed; i++) {
!     if (sig_bt[i] == T_VOID) {
        // Longs and doubles are passed in native word order, but misaligned
        // in the 32-bit build.
!       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
        continue;
      }
  
      // Pick up 0, 1 or 2 words from SP+offset.
  
--- 1199,21 ---
      __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
      __ bind(no_alternative_target);
    }
  #endif // INCLUDE_JVMCI
  
+   int total_args_passed = sig->length();
+ 
    // Now generate the shuffle code.  Pick up all register args and move the
    // rest through the floating point stack top.
    for (int i = 0; i < total_args_passed; i++) {
!     BasicType bt = sig->at(i)._bt;
+     if (bt == T_VOID) {
        // Longs and doubles are passed in native word order, but misaligned
        // in the 32-bit build.
!       BasicType prev_bt = (i > 0) ? sig->at(i-1)._bt : T_ILLEGAL;
+       assert(i > 0 && (prev_bt == T_LONG || prev_bt == T_DOUBLE), "missing half");
        continue;
      }
  
      // Pick up 0, 1 or 2 words from SP+offset.

*** 994,11 ***
          //
          // Interpreter local[n] == MSW, local[n+1] == LSW however locals
          // are accessed as negative so LSW is at LOW address
  
          // ld_off is MSW so get LSW
!         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
                             next_off : ld_off;
          __ movq(r13, Address(saved_sp, offset));
          // st_off is LSW (i.e. reg.first())
          __ movq(Address(rsp, st_off), r13);
        }
--- 1251,11 ---
          //
          // Interpreter local[n] == MSW, local[n+1] == LSW however locals
          // are accessed as negative so LSW is at LOW address
  
          // ld_off is MSW so get LSW
!         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
                             next_off : ld_off;
          __ movq(r13, Address(saved_sp, offset));
          // st_off is LSW (i.e. reg.first())
          __ movq(Address(rsp, st_off), r13);
        }

*** 1009,11 ***
          //
          // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
          // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
          // So we must adjust where to pick up the data to match the interpreter.
  
!         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
                             next_off : ld_off;
  
          // this can be a misaligned move
          __ movq(r, Address(saved_sp, offset));
        } else {
--- 1266,11 ---
          //
          // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
          // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
          // So we must adjust where to pick up the data to match the interpreter.
  
!         const int offset = (bt==T_LONG||bt==T_DOUBLE)?
                             next_off : ld_off;
  
          // this can be a misaligned move
          __ movq(r, Address(saved_sp, offset));
        } else {

*** 1042,85 ***
    // and the vm will find there should this case occur.
  
    __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
  
    // put Method* where a c2i would expect should we end up there
!   // only needed because eof c2 resolve stubs return Method* as a result in
    // rax
    __ mov(rax, rbx);
    __ jmp(r11);
  }
  
  // ---------------------------------------------------------------
! AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-                                                             int total_args_passed,
                                                              int comp_args_on_stack,
!                                                             const BasicType *sig_bt,
!                                                             const VMRegPair *regs,
!                                                             AdapterFingerPrint* fingerprint) {
    address i2c_entry = __ pc();
! 
-   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
  
    // -------------------------------------------------------------------------
    // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
    // to the interpreter.  The args start out packed in the compiled layout.  They
    // need to be unpacked into the interpreter layout.  This will almost always
    // require some stack space.  We grow the current (compiled) stack, then repack
    // the args.  We  finally end in a jump to the generic interpreter entry point.
    // On exit from the interpreter, the interpreter will restore our SP (lest the
    // compiled code, which relies solely on SP and not RBP, get sick).
  
!   address c2i_unverified_entry = __ pc();
    Label skip_fixup;
  
!   Register data = rax;
-   Register receiver = j_rarg0;
-   Register temp = rbx;
- 
-   {
-     __ ic_check(1 /* end_alignment */);
-     __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
-     // Method might have been compiled since the call site was patched to
-     // interpreted if that is the case treat it as a miss so we can get
-     // the call site corrected.
-     __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
-     __ jcc(Assembler::equal, skip_fixup);
-     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-   }
  
!   address c2i_entry = __ pc();
  
!   // Class initialization barrier for static methods
    address c2i_no_clinit_check_entry = nullptr;
!   if (VM_Version::supports_fast_class_init_checks()) {
!     Label L_skip_barrier;
!     Register method = rbx;
! 
!     { // Bypass the barrier for non-static methods
!       Register flags = rscratch1;
!       __ movl(flags, Address(method, Method::access_flags_offset()));
-       __ testl(flags, JVM_ACC_STATIC);
-       __ jcc(Assembler::zero, L_skip_barrier); // non-static
-     }
  
!     Register klass = rscratch1;
!     __ load_method_holder(klass, method);
!     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  
!     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  
!     __ bind(L_skip_barrier);
!     c2i_no_clinit_check_entry = __ pc();
    }
  
!   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
!   bs->c2i_entry_barrier(masm);
! 
!   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  
!   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
  }
  
  int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                           VMRegPair *regs,
                                           int total_args_passed) {
--- 1299,98 ---
    // and the vm will find there should this case occur.
  
    __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
  
    // put Method* where a c2i would expect should we end up there
!   // only needed because of c2 resolve stubs return Method* as a result in
    // rax
    __ mov(rax, rbx);
    __ jmp(r11);
  }
  
+ static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
+   Register data = rax;
+   __ ic_check(1 /* end_alignment */);
+   __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
+ 
+   // Method might have been compiled since the call site was patched to
+   // interpreted if that is the case treat it as a miss so we can get
+   // the call site corrected.
+   __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
+   __ jcc(Assembler::equal, skip_fixup);
+   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ }
+ 
  // ---------------------------------------------------------------
! AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
                                                              int comp_args_on_stack,
!                                                             const GrowableArray<SigEntry>* sig,
!                                                             const VMRegPair* regs,
!                                                             const GrowableArray<SigEntry>* sig_cc,
+                                                             const VMRegPair* regs_cc,
+                                                             const GrowableArray<SigEntry>* sig_cc_ro,
+                                                             const VMRegPair* regs_cc_ro,
+                                                             AdapterFingerPrint* fingerprint,
+                                                             AdapterBlob*& new_adapter,
+                                                             bool allocate_code_blob) {
    address i2c_entry = __ pc();
!   gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
  
    // -------------------------------------------------------------------------
    // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
    // to the interpreter.  The args start out packed in the compiled layout.  They
    // need to be unpacked into the interpreter layout.  This will almost always
    // require some stack space.  We grow the current (compiled) stack, then repack
    // the args.  We  finally end in a jump to the generic interpreter entry point.
    // On exit from the interpreter, the interpreter will restore our SP (lest the
    // compiled code, which relies solely on SP and not RBP, get sick).
  
!   address c2i_unverified_entry        = __ pc();
+   address c2i_unverified_inline_entry = __ pc();
    Label skip_fixup;
  
!   gen_inline_cache_check(masm, skip_fixup);
  
!   OopMapSet* oop_maps = new OopMapSet();
+   int frame_complete = CodeOffsets::frame_never_safe;
+   int frame_size_in_words = 0;
  
!   // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
    address c2i_no_clinit_check_entry = nullptr;
!   address c2i_inline_ro_entry = __ pc();
!   if (regs_cc != regs_cc_ro) {
!     // No class init barrier needed because method is guaranteed to be non-static
!     gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, c2i_no_clinit_check_entry,
!                     skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
!     skip_fixup.reset();
!   }
  
!   // Scalarized c2i adapter
!   address c2i_entry        = __ pc();
!   address c2i_inline_entry = __ pc();
+   gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry,
+                   skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
  
!   // Non-scalarized c2i adapter
+   if (regs != regs_cc) {
+     c2i_unverified_inline_entry = __ pc();
+     Label inline_entry_skip_fixup;
+     gen_inline_cache_check(masm, inline_entry_skip_fixup);
  
!     c2i_inline_entry = __ pc();
!     gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, c2i_no_clinit_check_entry,
+                     inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
    }
  
!   // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
!   // the GC knows about the location of oop argument locations passed to the c2i adapter.
!   if (allocate_code_blob) {
!     bool caller_must_gc_arguments = (regs != regs_cc);
+     new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
+   }
  
!   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry);
  }
  
  int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                           VMRegPair *regs,
                                           int total_args_passed) {

*** 2243,10 ***
--- 2513,14 ---
        // Load immediate 1 into swap_reg %rax
        __ movl(swap_reg, 1);
  
        // Load (object->mark() | 1) into swap_reg %rax
        __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+       if (EnableValhalla) {
+         // Mask inline_type bit such that we go to the slow path if object is an inline type
+         __ andptr(swap_reg, ~((int) markWord::inline_type_bit_in_place));
+       }
  
        // Save (object->mark() | 1) into BasicLock's displaced header
        __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
  
        // src -> dest iff dest == rax else rax <- dest

*** 3584,10 ***
--- 3858,121 ---
    }
  
    reverse_words(m, (julong *)m_ints, longwords);
  }
  
+ BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
+   BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
+   CodeBuffer buffer(buf);
+   short buffer_locs[20];
+   buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
+                                          sizeof(buffer_locs)/sizeof(relocInfo));
+ 
+   MacroAssembler* masm = new MacroAssembler(&buffer);
+ 
+   const Array<SigEntry>* sig_vk = vk->extended_sig();
+   const Array<VMRegPair>* regs = vk->return_regs();
+ 
+   int pack_fields_jobject_off = __ offset();
+   // Resolve pre-allocated buffer from JNI handle.
+   // We cannot do this in generate_call_stub() because it requires GC code to be initialized.
+   __ movptr(rax, Address(r13, 0));
+   __ resolve_jobject(rax /* value */,
+                      r15_thread /* thread */,
+                      r12 /* tmp */);
+   __ movptr(Address(r13, 0), rax);
+ 
+   int pack_fields_off = __ offset();
+ 
+   int j = 1;
+   for (int i = 0; i < sig_vk->length(); i++) {
+     BasicType bt = sig_vk->at(i)._bt;
+     if (bt == T_METADATA) {
+       continue;
+     }
+     if (bt == T_VOID) {
+       if (sig_vk->at(i-1)._bt == T_LONG ||
+           sig_vk->at(i-1)._bt == T_DOUBLE) {
+         j++;
+       }
+       continue;
+     }
+     int off = sig_vk->at(i)._offset;
+     assert(off > 0, "offset in object should be positive");
+     VMRegPair pair = regs->at(j);
+     VMReg r_1 = pair.first();
+     VMReg r_2 = pair.second();
+     Address to(rax, off);
+     if (bt == T_FLOAT) {
+       __ movflt(to, r_1->as_XMMRegister());
+     } else if (bt == T_DOUBLE) {
+       __ movdbl(to, r_1->as_XMMRegister());
+     } else {
+       Register val = r_1->as_Register();
+       assert_different_registers(to.base(), val, r14, r13, rbx, rscratch1);
+       if (is_reference_type(bt)) {
+         __ store_heap_oop(to, val, r14, r13, rbx, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
+       } else {
+         __ store_sized_value(to, r_1->as_Register(), type2aelembytes(bt));
+       }
+     }
+     j++;
+   }
+   assert(j == regs->length(), "missed a field?");
+ 
+   __ ret(0);
+ 
+   int unpack_fields_off = __ offset();
+ 
+   Label skip;
+   __ testptr(rax, rax);
+   __ jcc(Assembler::zero, skip);
+ 
+   j = 1;
+   for (int i = 0; i < sig_vk->length(); i++) {
+     BasicType bt = sig_vk->at(i)._bt;
+     if (bt == T_METADATA) {
+       continue;
+     }
+     if (bt == T_VOID) {
+       if (sig_vk->at(i-1)._bt == T_LONG ||
+           sig_vk->at(i-1)._bt == T_DOUBLE) {
+         j++;
+       }
+       continue;
+     }
+     int off = sig_vk->at(i)._offset;
+     assert(off > 0, "offset in object should be positive");
+     VMRegPair pair = regs->at(j);
+     VMReg r_1 = pair.first();
+     VMReg r_2 = pair.second();
+     Address from(rax, off);
+     if (bt == T_FLOAT) {
+       __ movflt(r_1->as_XMMRegister(), from);
+     } else if (bt == T_DOUBLE) {
+       __ movdbl(r_1->as_XMMRegister(), from);
+     } else if (bt == T_OBJECT || bt == T_ARRAY) {
+       assert_different_registers(rax, r_1->as_Register());
+       __ load_heap_oop(r_1->as_Register(), from);
+     } else {
+       assert(is_java_primitive(bt), "unexpected basic type");
+       assert_different_registers(rax, r_1->as_Register());
+       size_t size_in_bytes = type2aelembytes(bt);
+       __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
+     }
+     j++;
+   }
+   assert(j == regs->length(), "missed a field?");
+ 
+   __ bind(skip);
+   __ ret(0);
+ 
+   __ flush();
+ 
+   return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, pack_fields_jobject_off, unpack_fields_off);
+ }
+ 
  #if INCLUDE_JFR
  
  // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
  // It returns a jobject handle to the event writer.
  // The handle is dereferenced and the return value is the event writer oop.

*** 3676,6 ***
                                    false);
    return stub;
  }
  
  #endif // INCLUDE_JFR
- 
--- 4061,5 ---

< prev index next >