< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page
@@ -27,10 +27,11 @@
  
  #include "precompiled.hpp"
  #include "asm/assembler.hpp"
  #include "asm/assembler.inline.hpp"
  #include "ci/ciEnv.hpp"
+ #include "ci/ciInlineKlass.hpp"
  #include "compiler/compileTask.hpp"
  #include "compiler/disassembler.hpp"
  #include "compiler/oopMap.hpp"
  #include "gc/shared/barrierSet.hpp"
  #include "gc/shared/barrierSetAssembler.hpp"

@@ -46,18 +47,21 @@
  #include "nativeInst_aarch64.hpp"
  #include "oops/accessDecorators.hpp"
  #include "oops/compressedKlass.inline.hpp"
  #include "oops/compressedOops.inline.hpp"
  #include "oops/klass.inline.hpp"
+ #include "oops/resolvedFieldEntry.hpp"
  #include "runtime/continuation.hpp"
  #include "runtime/icache.hpp"
  #include "runtime/interfaceSupport.inline.hpp"
  #include "runtime/javaThread.hpp"
  #include "runtime/jniHandles.inline.hpp"
  #include "runtime/sharedRuntime.hpp"
+ #include "runtime/signature_cc.hpp"
  #include "runtime/stubRoutines.hpp"
  #include "utilities/powerOfTwo.hpp"
+ #include "vmreg_aarch64.inline.hpp"
  #ifdef COMPILER1
  #include "c1/c1_LIRAssembler.hpp"
  #endif
  #ifdef COMPILER2
  #include "oops/oop.hpp"

@@ -1119,10 +1123,45 @@
  
  void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
  
  void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
  
+ void MacroAssembler::get_default_value_oop(Register inline_klass, Register temp_reg, Register obj) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_inline_type(inline_klass, temp_reg, done_check);
+     stop("get_default_value_oop from non inline type klass");
+     bind(done_check);
+   }
+ #endif
+   Register offset = temp_reg;
+   // Getting the offset of the pre-allocated default value
+   ldr(offset, Address(inline_klass, in_bytes(InstanceKlass::adr_inlineklass_fixed_block_offset())));
+   ldr(offset, Address(offset, in_bytes(InlineKlass::default_value_offset_offset())));
+ 
+   // Getting the mirror
+   ldr(obj, Address(inline_klass, in_bytes(Klass::java_mirror_offset())));
+   resolve_oop_handle(obj, inline_klass, temp_reg);
+ 
+   // Getting the pre-allocated default value from the mirror
+   Address field(obj, offset);
+   load_heap_oop(obj, field, inline_klass, rscratch2);
+ }
+ 
+ void MacroAssembler::get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_empty_inline_type(inline_klass, temp_reg, done_check);
+     stop("get_empty_value from non-empty inline klass");
+     bind(done_check);
+   }
+ #endif
+   get_default_value_oop(inline_klass, temp_reg, obj);
+ }
+ 
  // Look up the method for a megamorphic invokeinterface call.
  // The target method is determined by <intf_klass, itable_index>.
  // The receiver klass is in recv_klass.
  // On success, the result will be in method_result, and execution falls through.
  // On failure, execution transfers to the given label.

@@ -1572,11 +1611,15 @@
      Unimplemented();
    }
  }
  
  void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
-   if (!VerifyOops) return;
+   if (!VerifyOops || VerifyAdapterSharing) {
+     // Below address of the code string confuses VerifyAdapterSharing
+     // because it may differ between otherwise equivalent adapters.
+     return;
+   }
  
    // Pass register number to verify_oop_subroutine
    const char* b = nullptr;
    {
      ResourceMark rm;

@@ -1605,11 +1648,15 @@
  
    BLOCK_COMMENT("} verify_oop");
  }
  
  void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
-   if (!VerifyOops) return;
+   if (!VerifyOops || VerifyAdapterSharing) {
+     // Below address of the code string confuses VerifyAdapterSharing
+     // because it may differ between otherwise equivalent adapters.
+     return;
+   }
  
    const char* b = nullptr;
    {
      ResourceMark rm;
      stringStream ss;

@@ -1703,10 +1750,14 @@
    pass_arg1(this, arg_1);
    pass_arg2(this, arg_2);
    call_VM_leaf_base(entry_point, 3);
  }
  
+ void MacroAssembler::super_call_VM_leaf(address entry_point) {
+   MacroAssembler::call_VM_leaf_base(entry_point, 1);
+ }
+ 
  void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
    pass_arg0(this, arg_0);
    MacroAssembler::call_VM_leaf_base(entry_point, 1);
  }
  

@@ -1748,10 +1799,120 @@
      // nothing to do, (later) access of M[reg + offset]
      // will provoke OS null exception if reg is null
    }
  }
  
+ void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
+   assert_different_registers(markword, rscratch2);
+   andr(markword, markword, markWord::inline_type_mask_in_place);
+   mov(rscratch2, markWord::inline_type_pattern);
+   cmp(markword, rscratch2);
+   br(Assembler::EQ, is_inline_type);
+ }
+ 
+ void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
+   ldrw(temp_reg, Address(klass, Klass::access_flags_offset()));
+   andr(temp_reg, temp_reg, JVM_ACC_IDENTITY);
+   cbz(temp_reg, is_inline_type);
+ }
+ 
+ void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type) {
+   assert_different_registers(tmp, rscratch1);
+   cbz(object, not_inline_type);
+   const int is_inline_type_mask = markWord::inline_type_pattern;
+   ldr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
+   mov(rscratch1, is_inline_type_mask);
+   andr(tmp, tmp, rscratch1);
+   cmp(tmp, rscratch1);
+   br(Assembler::NE, not_inline_type);
+ }
+ 
+ void MacroAssembler::test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_inline_type(klass, temp_reg, done_check);
+     stop("test_klass_is_empty_inline_type with non inline type klass");
+     bind(done_check);
+   }
+ #endif
+   ldrw(temp_reg, Address(klass, InstanceKlass::misc_flags_offset()));
+   andr(temp_reg, temp_reg, InstanceKlassFlags::is_empty_inline_type_value());
+   cbnz(temp_reg, is_empty_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbnz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, is_null_free_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbz(flags, ResolvedFieldEntry::is_null_free_inline_type_shift, not_null_free_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_flat(Register flags, Register temp_reg, Label& is_flat) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbnz(flags, ResolvedFieldEntry::is_flat_shift, is_flat);
+ }
+ 
+ void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
+   Label test_mark_word;
+   // load mark word
+   ldr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
+   // check displaced
+   tst(temp_reg, markWord::unlocked_value);
+   br(Assembler::NE, test_mark_word);
+   // slow path use klass prototype
+   load_prototype_header(temp_reg, oop);
+ 
+   bind(test_mark_word);
+   andr(temp_reg, temp_reg, test_bit);
+   if (jmp_set) {
+     cbnz(temp_reg, jmp_label);
+   } else {
+     cbz(temp_reg, jmp_label);
+   }
+ }
+ 
+ void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flat_array);
+ }
+ 
+ void MacroAssembler::test_non_flat_array_oop(Register oop, Register temp_reg,
+                                                   Label&is_non_flat_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flat_array);
+ }
+ 
+ void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
+ }
+ 
+ void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
+ }
+ 
+ void MacroAssembler::test_flat_array_layout(Register lh, Label& is_flat_array) {
+   tst(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
+   br(Assembler::NE, is_flat_array);
+ }
+ 
+ void MacroAssembler::test_non_flat_array_layout(Register lh, Label& is_non_flat_array) {
+   tst(lh, Klass::_lh_array_tag_flat_value_bit_inplace);
+   br(Assembler::EQ, is_non_flat_array);
+ }
+ 
+ void MacroAssembler::test_null_free_array_layout(Register lh, Label& is_null_free_array) {
+   tst(lh, Klass::_lh_null_free_array_bit_inplace);
+   br(Assembler::NE, is_null_free_array);
+ }
+ 
+ void MacroAssembler::test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array) {
+   tst(lh, Klass::_lh_null_free_array_bit_inplace);
+   br(Assembler::EQ, is_non_null_free_array);
+ }
+ 
  // MacroAssembler protected routines needed to implement
  // public methods
  
  void MacroAssembler::mov(Register r, Address dest) {
    code_section()->relocate(pc(), dest.rspec());

@@ -4417,10 +4578,18 @@
    ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    ldr(holder, Address(holder, ConstantPool::pool_holder_offset()));          // InstanceKlass*
  }
  
+ void MacroAssembler::load_metadata(Register dst, Register src) {
+   if (UseCompressedClassPointers) {
+     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+   } else {
+     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+   }
+ }
+ 
  void MacroAssembler::load_klass(Register dst, Register src) {
    if (UseCompressedClassPointers) {
      ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
      decode_klass_not_null(dst);
    } else {

@@ -4475,10 +4644,15 @@
      ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
    }
    cmp(trial_klass, tmp);
  }
  
+ void MacroAssembler::load_prototype_header(Register dst, Register src) {
+   load_klass(dst, src);
+   ldr(dst, Address(dst, Klass::prototype_header_offset()));
+ }
+ 
  void MacroAssembler::store_klass(Register dst, Register src) {
    // FIXME: Should this be a store release?  concurrent gcs assumes
    // klass length is valid if klass field is not null.
    if (UseCompressedClassPointers) {
      encode_klass_not_null(src);

@@ -4799,10 +4973,50 @@
    } else {
      bs->store_at(this, decorators, type, dst, val, tmp1, tmp2, tmp3);
    }
  }
  
+ void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Register dst,
+                                        Register inline_klass) {
+   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+   bs->value_copy(this, decorators, src, dst, inline_klass);
+ }
+ 
+ void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
+   ldr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
+   ldrw(offset, Address(offset, InlineKlass::first_field_offset_offset()));
+ }
+ 
+ void MacroAssembler::data_for_oop(Register oop, Register data, Register inline_klass) {
+   // ((address) (void*) o) + vk->first_field_offset();
+   Register offset = (data == oop) ? rscratch1 : data;
+   first_field_offset(inline_klass, offset);
+   if (data == oop) {
+     add(data, data, offset);
+   } else {
+     lea(data, Address(oop, offset));
+   }
+ }
+ 
+ void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
+                                                 Register index, Register data) {
+   assert_different_registers(array, array_klass, index);
+   assert_different_registers(rscratch1, array, index);
+ 
+   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
+   ldrw(rscratch1, Address(array_klass, Klass::layout_helper_offset()));
+ 
+   // Klass::layout_helper_log2_element_size(lh)
+   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
+   lsr(rscratch1, rscratch1, Klass::_lh_log2_element_size_shift);
+   andr(rscratch1, rscratch1, Klass::_lh_log2_element_size_mask);
+   lslv(index, index, rscratch1);
+ 
+   add(data, array, index);
+   add(data, data, arrayOopDesc::base_offset_in_bytes(T_PRIMITIVE_OBJECT));
+ }
+ 
  void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
                                     Register tmp2, DecoratorSet decorators) {
    access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
  }
  

@@ -4875,10 +5089,106 @@
  #endif
    int oop_index = oop_recorder()->find_index(obj);
    return Address((address)obj, oop_Relocation::spec(oop_index));
  }
  
+ // Object / value buffer allocation...
+ void MacroAssembler::allocate_instance(Register klass, Register new_obj,
+                                        Register t1, Register t2,
+                                        bool clear_fields, Label& alloc_failed)
+ {
+   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
+   Register layout_size = t1;
+   assert(new_obj == r0, "needs to be r0");
+   assert_different_registers(klass, new_obj, t1, t2);
+ 
+   // get instance_size in InstanceKlass (scaled to a count of bytes)
+   ldrw(layout_size, Address(klass, Klass::layout_helper_offset()));
+   // test to see if it has a finalizer or is malformed in some way
+   tst(layout_size, Klass::_lh_instance_slow_path_bit);
+   br(Assembler::NE, slow_case_no_pop);
+ 
+   // Allocate the instance:
+   //  If TLAB is enabled:
+   //    Try to allocate in the TLAB.
+   //    If fails, go to the slow path.
+   //    Initialize the allocation.
+   //    Exit.
+   //
+   //  Go to slow path.
+ 
+   if (UseTLAB) {
+     push(klass);
+     tlab_allocate(new_obj, layout_size, 0, klass, t2, slow_case);
+     if (ZeroTLAB || (!clear_fields)) {
+       // the fields have been already cleared
+       b(initialize_header);
+     } else {
+       // initialize both the header and fields
+       b(initialize_object);
+     }
+ 
+     if (clear_fields) {
+       // The object is initialized before the header.  If the object size is
+       // zero, go directly to the header initialization.
+       bind(initialize_object);
+       subs(layout_size, layout_size, sizeof(oopDesc));
+       br(Assembler::EQ, initialize_header);
+ 
+       // Initialize topmost object field, divide size by 8, check if odd and
+       // test if zero.
+ 
+   #ifdef ASSERT
+       // make sure instance_size was multiple of 8
+       Label L;
+       tst(layout_size, 7);
+       br(Assembler::EQ, L);
+       stop("object size is not multiple of 8 - adjust this code");
+       bind(L);
+       // must be > 0, no extra check needed here
+   #endif
+ 
+       lsr(layout_size, layout_size, LogBytesPerLong);
+ 
+       // initialize remaining object fields: instance_size was a multiple of 8
+       {
+         Label loop;
+         Register base = t2;
+ 
+         bind(loop);
+         add(rscratch1, new_obj, layout_size, Assembler::LSL, LogBytesPerLong);
+         str(zr, Address(rscratch1, sizeof(oopDesc) - 1*oopSize));
+         subs(layout_size, layout_size, 1);
+         br(Assembler::NE, loop);
+       }
+     } // clear_fields
+ 
+     // initialize object header only.
+     bind(initialize_header);
+     pop(klass);
+     Register mark_word = t2;
+     ldr(mark_word, Address(klass, Klass::prototype_header_offset()));
+     str(mark_word, Address(new_obj, oopDesc::mark_offset_in_bytes ()));
+     store_klass_gap(new_obj, zr);  // zero klass gap for compressed oops
+     mov(t2, klass);         // preserve klass
+     store_klass(new_obj, t2);  // src klass reg is potentially compressed
+ 
+     // TODO: Valhalla removed SharedRuntime::dtrace_object_alloc from here ?
+ 
+     b(done);
+   }
+ 
+   if (UseTLAB) {
+     bind(slow_case);
+     pop(klass);
+   }
+   bind(slow_case_no_pop);
+   b(alloc_failed);
+ 
+   bind(done);
+ }
+ 
  // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  void MacroAssembler::tlab_allocate(Register obj,
                                     Register var_size_in_bytes,
                                     int con_size_in_bytes,
                                     Register t1,

@@ -4914,10 +5224,24 @@
      ldp(rscratch2, rscratch1, Address(post(sp, 16)));
    }
  #endif
  }
  
+ void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
+   ldr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
+ #ifdef ASSERT
+   {
+     Label done;
+     cbnz(inline_klass, done);
+     stop("get_inline_type_field_klass contains no inline klass");
+     bind(done);
+   }
+ #endif
+   lea(inline_klass, Address(inline_klass, Array<InlineKlass*>::base_offset_in_bytes()));
+   ldr(inline_klass, Address(inline_klass, index, Address::lsl(3)));
+ }
+ 
  // Writes to stack successive pages until offset reached to check for
  // stack overflow + shadow pages.  This clobbers tmp.
  void MacroAssembler::bang_stack_size(Register size, Register tmp) {
    assert_different_registers(tmp, size, rscratch1);
    mov(tmp, sp);

@@ -5039,10 +5363,61 @@
      ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
    }
    authenticate_return_address();
  }
  
+ void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
+   if (needs_stack_repair) {
+     // Remove the extension of the caller's frame used for inline type unpacking
+     //
+     // Right now the stack looks like this:
+     //
+     // | Arguments from caller     |
+     // |---------------------------|  <-- caller's SP
+     // | Saved LR #1               |
+     // | Saved FP #1               |
+     // |---------------------------|
+     // | Extension space for       |
+     // |   inline arg (un)packing  |
+     // |---------------------------|  <-- start of this method's frame
+     // | Saved LR #2               |
+     // | Saved FP #2               |
+     // |---------------------------|  <-- FP
+     // | sp_inc                    |
+     // | method locals             |
+     // |---------------------------|  <-- SP
+     //
+     // There are two copies of FP and LR on the stack. They will be identical
+     // unless the caller has been deoptimized, in which case LR #1 will be patched
+     // to point at the deopt blob, and LR #2 will still point into the old method.
+     //
+     // The sp_inc stack slot holds the total size of the frame including the
+     // extension space minus two words for the saved FP and LR.
+ 
+     int sp_inc_offset = initial_framesize - 3 * wordSize;  // Immediately below saved LR and FP
+ 
+     ldr(rscratch1, Address(sp, sp_inc_offset));
+     add(sp, sp, rscratch1);
+     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+   } else {
+     remove_frame(initial_framesize);
+   }
+ }
+ 
+ void MacroAssembler::save_stack_increment(int sp_inc, int frame_size) {
+   int real_frame_size = frame_size + sp_inc;
+   assert(sp_inc == 0 || sp_inc > 2*wordSize, "invalid sp_inc value");
+   assert(real_frame_size >= 2*wordSize, "frame size must include FP/LR space");
+   assert((real_frame_size & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ 
+   int sp_inc_offset = frame_size - 3 * wordSize;  // Immediately below saved LR and FP
+ 
+   // Subtract two words for the saved FP and LR as these will be popped
+   // separately. See remove_frame above.
+   mov(rscratch1, real_frame_size - 2*wordSize);
+   str(rscratch1, Address(sp, sp_inc_offset));
+ }
  
  // This method counts leading positive bytes (highest bit not set) in provided byte array
  address MacroAssembler::count_positives(Register ary1, Register len, Register result) {
      // Simple and most common case of aligned small array which is not at the
      // end of memory page is placed here. All other cases are in stub.

@@ -5953,10 +6328,451 @@
  
    pop(saved_regs, sp);
    authenticate_return_address();
  }
  
+ #ifdef COMPILER2
+ // C2 compiled method's prolog code
+ // Moved here from aarch64.ad to support Valhalla code belows
+ void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+   if (C->clinit_barrier_on_entry()) {
+     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
+ 
+     Label L_skip_barrier;
+ 
+     mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
+     clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
+     far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+     bind(L_skip_barrier);
+   }
+ 
+   if (C->max_vector_size() > 0) {
+     reinitialize_ptrue();
+   }
+ 
+   int bangsize = C->output()->bang_size_in_bytes();
+   if (C->output()->need_stack_bang(bangsize))
+     generate_stack_overflow_check(bangsize);
+ 
+   // n.b. frame size includes space for return pc and rfp
+   const long framesize = C->output()->frame_size_in_bytes();
+   build_frame(framesize);
+ 
+   if (C->needs_stack_repair()) {
+     save_stack_increment(sp_inc, framesize);
+   }
+ 
+   if (VerifyStackAtCalls) {
+     Unimplemented();
+   }
+ }
+ #endif // COMPILER2
+ 
+ int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
+   assert(InlineTypeReturnedAsFields, "Inline types should never be returned as fields");
+   // An inline type might be returned. If fields are in registers we
+   // need to allocate an inline type instance and initialize it with
+   // the value of the fields.
+   Label skip;
+   // We only need a new buffered inline type if a new one is not returned
+   tbz(r0, 0, skip);
+   int call_offset = -1;
+ 
+   // Be careful not to clobber r1-7 which hold returned fields
+   // Also do not use callee-saved registers as these may be live in the interpreter
+   Register tmp1 = r13, tmp2 = r14, klass = r15, r0_preserved = r12;
+ 
+   // The following code is similar to allocate_instance but has some slight differences,
+   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
+   // allocating is not necessary if vk != nullptr, etc. allocate_instance is not aware of these.
+   Label slow_case;
+   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
+   mov(r0_preserved, r0); // save r0 for slow_case since *_allocate may corrupt it when allocation failed
+ 
+   if (vk != nullptr) {
+     // Called from C1, where the return type is statically known.
+     movptr(klass, (intptr_t)vk->get_InlineKlass());
+     jint obj_size = vk->layout_helper();
+     assert(obj_size != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
+     if (UseTLAB) {
+       tlab_allocate(r0, noreg, obj_size, tmp1, tmp2, slow_case);
+     } else {
+       b(slow_case);
+     }
+   } else {
+     // Call from interpreter. R0 contains ((the InlineKlass* of the return type) | 0x01)
+     andr(klass, r0, -2);
+     ldrw(tmp2, Address(klass, Klass::layout_helper_offset()));
+     if (UseTLAB) {
+       tlab_allocate(r0, tmp2, 0, tmp1, tmp2, slow_case);
+     } else {
+       b(slow_case);
+     }
+   }
+   if (UseTLAB) {
+     // 2. Initialize buffered inline instance header
+     Register buffer_obj = r0;
+     mov(rscratch1, (intptr_t)markWord::inline_type_prototype().value());
+     str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes()));
+     store_klass_gap(buffer_obj, zr);
+     if (vk == nullptr) {
+       // store_klass corrupts klass, so save it for later use (interpreter case only).
+       mov(tmp1, klass);
+     }
+     store_klass(buffer_obj, klass);
+     // 3. Initialize its fields with an inline class specific handler
+     if (vk != nullptr) {
+       far_call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
+     } else {
+       // tmp1 holds klass preserved above
+       ldr(tmp1, Address(tmp1, InstanceKlass::adr_inlineklass_fixed_block_offset()));
+       ldr(tmp1, Address(tmp1, InlineKlass::pack_handler_offset()));
+       blr(tmp1);
+     }
+ 
+     membar(Assembler::StoreStore);
+     b(skip);
+   } else {
+     // Must have already branched to slow_case above.
+     DEBUG_ONLY(should_not_reach_here());
+   }
+   bind(slow_case);
+   // We failed to allocate a new inline type, fall back to a runtime
+   // call. Some oop field may be live in some registers but we can't
+   // tell. That runtime call will take care of preserving them
+   // across a GC if there's one.
+   mov(r0, r0_preserved);
+ 
+   if (from_interpreter) {
+     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
+   } else {
+     far_call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
+     call_offset = offset();
+   }
+   membar(Assembler::StoreStore);
+ 
+   bind(skip);
+   return call_offset;
+ }
+ 
+ // Move a value between registers/stack slots and update the reg_state
+ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
+   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
+   if (reg_state[to->value()] == reg_written) {
+     return true; // Already written
+   }
+ 
+   if (from != to && bt != T_VOID) {
+     if (reg_state[to->value()] == reg_readonly) {
+       return false; // Not yet writable
+     }
+     if (from->is_reg()) {
+       if (to->is_reg()) {
+         if (from->is_Register() && to->is_Register()) {
+           mov(to->as_Register(), from->as_Register());
+         } else if (from->is_FloatRegister() && to->is_FloatRegister()) {
+           fmovd(to->as_FloatRegister(), from->as_FloatRegister());
+         } else {
+           ShouldNotReachHere();
+         }
+       } else {
+         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
+         Address to_addr = Address(sp, st_off);
+         if (from->is_FloatRegister()) {
+           if (bt == T_DOUBLE) {
+              strd(from->as_FloatRegister(), to_addr);
+           } else {
+              assert(bt == T_FLOAT, "must be float");
+              strs(from->as_FloatRegister(), to_addr);
+           }
+         } else {
+           str(from->as_Register(), to_addr);
+         }
+       }
+     } else {
+       Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size);
+       if (to->is_reg()) {
+         if (to->is_FloatRegister()) {
+           if (bt == T_DOUBLE) {
+             ldrd(to->as_FloatRegister(), from_addr);
+           } else {
+             assert(bt == T_FLOAT, "must be float");
+             ldrs(to->as_FloatRegister(), from_addr);
+           }
+         } else {
+           ldr(to->as_Register(), from_addr);
+         }
+       } else {
+         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
+         ldr(rscratch1, from_addr);
+         str(rscratch1, Address(sp, st_off));
+       }
+     }
+   }
+ 
+   // Update register states
+   reg_state[from->value()] = reg_writable;
+   reg_state[to->value()] = reg_written;
+   return true;
+ }
+ 
+ // Calculate the extra stack space required for packing or unpacking inline
+ // args and adjust the stack pointer
+ int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
+   int sp_inc = args_on_stack * VMRegImpl::stack_slot_size;
+   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+   assert(sp_inc > 0, "sanity");
+ 
+   // Save a copy of the FP and LR here for deoptimization patching and frame walking
+   stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ 
+   // Adjust the stack pointer. This will be repaired on return by MacroAssembler::remove_frame
+   if (sp_inc < (1 << 9)) {
+     sub(sp, sp, sp_inc);   // Fits in an immediate
+   } else {
+     mov(rscratch1, sp_inc);
+     sub(sp, sp, rscratch1);
+   }
+ 
+   return sp_inc + 2 * wordSize;  // Account for the FP/LR space
+ }
+ 
+ // Read all fields from an inline type oop and store the values in registers/stack slots
+ bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
+                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
+                                           RegState reg_state[]) {
+   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
+   assert(from->is_valid(), "source must be valid");
+   bool progress = false;
+ #ifdef ASSERT
+   const int start_offset = offset();
+ #endif
+ 
+   Label L_null, L_notNull;
+   // Don't use r14 as tmp because it's used for spilling (see MacroAssembler::spill_reg_for)
+   Register tmp1 = r10;
+   Register tmp2 = r11;
+   Register fromReg = noreg;
+   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
+   bool done = true;
+   bool mark_done = true;
+   VMReg toReg;
+   BasicType bt;
+   // Check if argument requires a null check
+   bool null_check = false;
+   VMReg nullCheckReg;
+   while (stream.next(nullCheckReg, bt)) {
+     if (sig->at(stream.sig_index())._offset == -1) {
+       null_check = true;
+       break;
+     }
+   }
+   stream.reset(sig_index, to_index);
+   while (stream.next(toReg, bt)) {
+     assert(toReg->is_valid(), "destination must be valid");
+     int idx = (int)toReg->value();
+     if (reg_state[idx] == reg_readonly) {
+       if (idx != from->value()) {
+         mark_done = false;
+       }
+       done = false;
+       continue;
+     } else if (reg_state[idx] == reg_written) {
+       continue;
+     }
+     assert(reg_state[idx] == reg_writable, "must be writable");
+     reg_state[idx] = reg_written;
+     progress = true;
+ 
+     if (fromReg == noreg) {
+       if (from->is_reg()) {
+         fromReg = from->as_Register();
+       } else {
+         int st_off = from->reg2stack() * VMRegImpl::stack_slot_size;
+         ldr(tmp1, Address(sp, st_off));
+         fromReg = tmp1;
+       }
+       if (null_check) {
+         // Nullable inline type argument, emit null check
+         cbz(fromReg, L_null);
+       }
+     }
+     int off = sig->at(stream.sig_index())._offset;
+     if (off == -1) {
+       assert(null_check, "Missing null check at");
+       if (toReg->is_stack()) {
+         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
+         mov(tmp2, 1);
+         str(tmp2, Address(sp, st_off));
+       } else {
+         mov(toReg->as_Register(), 1);
+       }
+       continue;
+     }
+     assert(off > 0, "offset in object should be positive");
+     Address fromAddr = Address(fromReg, off);
+     if (!toReg->is_FloatRegister()) {
+       Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
+       if (is_reference_type(bt)) {
+         load_heap_oop(dst, fromAddr, rscratch1, rscratch2);
+       } else {
+         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
+       }
+       if (toReg->is_stack()) {
+         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
+         str(dst, Address(sp, st_off));
+       }
+     } else if (bt == T_DOUBLE) {
+       ldrd(toReg->as_FloatRegister(), fromAddr);
+     } else {
+       assert(bt == T_FLOAT, "must be float");
+       ldrs(toReg->as_FloatRegister(), fromAddr);
+     }
+   }
+   if (progress && null_check) {
+     if (done) {
+       b(L_notNull);
+       bind(L_null);
+       // Set IsInit field to zero to signal that the argument is null.
+       // Also set all oop fields to zero to make the GC happy.
+       stream.reset(sig_index, to_index);
+       while (stream.next(toReg, bt)) {
+         if (sig->at(stream.sig_index())._offset == -1 ||
+             bt == T_OBJECT || bt == T_ARRAY) {
+           if (toReg->is_stack()) {
+             int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
+             str(zr, Address(sp, st_off));
+           } else {
+             mov(toReg->as_Register(), zr);
+           }
+         }
+       }
+       bind(L_notNull);
+     } else {
+       bind(L_null);
+     }
+   }
+ 
+   sig_index = stream.sig_index();
+   to_index = stream.regs_index();
+ 
+   if (mark_done && reg_state[from->value()] != reg_written) {
+     // This is okay because no one else will write to that slot
+     reg_state[from->value()] = reg_writable;
+   }
+   from_index--;
+   assert(progress || (start_offset == offset()), "should not emit code");
+   return done;
+ }
+ 
+ // Pack fields back into an inline type oop
+ bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
+                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
+                                         RegState reg_state[], Register val_array) {
+   assert(sig->at(sig_index)._bt == T_METADATA, "should be at delimiter");
+   assert(to->is_valid(), "destination must be valid");
+ 
+   if (reg_state[to->value()] == reg_written) {
+     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
+     return true; // Already written
+   }
+ 
+   // The GC barrier expanded by store_heap_oop below may call into the
+   // runtime so use callee-saved registers for any values that need to be
+   // preserved. The GC barrier assembler should take care of saving the
+   // Java argument registers.
+   // TODO 8284443 Isn't it an issue if below code uses r14 as tmp when it contains a spilled value?
+   // Be careful with r14 because it's used for spilling (see MacroAssembler::spill_reg_for).
+   Register val_obj_tmp = r21;
+   Register from_reg_tmp = r22;
+   Register tmp1 = r14;
+   Register tmp2 = r13;
+   Register tmp3 = r12;
+   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
+ 
+   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
+ 
+   if (reg_state[to->value()] == reg_readonly) {
+     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
+       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
+       return false; // Not yet writable
+     }
+     val_obj = val_obj_tmp;
+   }
+ 
+   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_OBJECT);
+   load_heap_oop(val_obj, Address(val_array, index), tmp1, tmp2);
+ 
+   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
+   VMReg fromReg;
+   BasicType bt;
+   Label L_null;
+   while (stream.next(fromReg, bt)) {
+     assert(fromReg->is_valid(), "source must be valid");
+     reg_state[fromReg->value()] = reg_writable;
+ 
+     int off = sig->at(stream.sig_index())._offset;
+     if (off == -1) {
+       // Nullable inline type argument, emit null check
+       Label L_notNull;
+       if (fromReg->is_stack()) {
+         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
+         ldrb(tmp2, Address(sp, ld_off));
+         cbnz(tmp2, L_notNull);
+       } else {
+         cbnz(fromReg->as_Register(), L_notNull);
+       }
+       mov(val_obj, 0);
+       b(L_null);
+       bind(L_notNull);
+       continue;
+     }
+ 
+     assert(off > 0, "offset in object should be positive");
+     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+ 
+     // Pack the scalarized field into the value object.
+     Address dst(val_obj, off);
+ 
+     if (!fromReg->is_FloatRegister()) {
+       Register src;
+       if (fromReg->is_stack()) {
+         src = from_reg_tmp;
+         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
+         load_sized_value(src, Address(sp, ld_off), size_in_bytes, /* is_signed */ false);
+       } else {
+         src = fromReg->as_Register();
+       }
+       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
+       if (is_reference_type(bt)) {
+         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
+       } else {
+         store_sized_value(dst, src, size_in_bytes);
+       }
+     } else if (bt == T_DOUBLE) {
+       strd(fromReg->as_FloatRegister(), dst);
+     } else {
+       assert(bt == T_FLOAT, "must be float");
+       strs(fromReg->as_FloatRegister(), dst);
+     }
+   }
+   bind(L_null);
+   sig_index = stream.sig_index();
+   from_index = stream.regs_index();
+ 
+   assert(reg_state[to->value()] == reg_writable, "must have already been read");
+   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
+   assert(success, "to register must be writeable");
+ 
+   return true;
+ }
+ 
+ VMReg MacroAssembler::spill_reg_for(VMReg reg) {
+   return (reg->is_FloatRegister()) ? v8->as_VMReg() : r14->as_VMReg();
+ }
+ 
  void MacroAssembler::cache_wb(Address line) {
    assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset");
    assert(line.index() == noreg, "index should be noreg");
    assert(line.offset() == 0, "offset should be 0");
    // would like to assert this

@@ -6326,10 +7142,15 @@
    cmpw(t1, (unsigned)LockStack::end_offset() - 1);
    br(Assembler::GT, slow);
  
    // Load (object->mark() | 1) into hdr
    orr(hdr, hdr, markWord::unlocked_value);
+   if (EnableValhalla) {
+     // Mask inline_type bit such that we go to the slow path if object is an inline type
+     andr(hdr, hdr, ~((int) markWord::inline_type_bit_in_place));
+   }
+ 
    // Clear lock-bits, into t2
    eor(t2, hdr, markWord::unlocked_value);
    // Try to swing header from unlocked to locked
    // Clobbers rscratch1 when UseLSE is false
    cmpxchg(/*addr*/ obj, /*expected*/ hdr, /*new*/ t2, Assembler::xword,
< prev index next >