< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Print this page
@@ -27,10 +27,11 @@
  
  #include "precompiled.hpp"
  #include "jvm.h"
  #include "asm/assembler.hpp"
  #include "asm/assembler.inline.hpp"
+ #include "ci/ciInlineKlass.hpp"
  #include "gc/shared/barrierSet.hpp"
  #include "gc/shared/barrierSetAssembler.hpp"
  #include "gc/shared/cardTableBarrierSet.hpp"
  #include "gc/shared/cardTable.hpp"
  #include "gc/shared/collectedHeap.hpp"

@@ -47,13 +48,15 @@
  #include "oops/klass.inline.hpp"
  #include "runtime/icache.hpp"
  #include "runtime/interfaceSupport.inline.hpp"
  #include "runtime/jniHandles.inline.hpp"
  #include "runtime/sharedRuntime.hpp"
+ #include "runtime/signature_cc.hpp"
  #include "runtime/stubRoutines.hpp"
  #include "runtime/thread.hpp"
  #include "utilities/powerOfTwo.hpp"
+ #include "vmreg_aarch64.inline.hpp"
  #ifdef COMPILER1
  #include "c1/c1_LIRAssembler.hpp"
  #endif
  #ifdef COMPILER2
  #include "oops/oop.hpp"

@@ -769,10 +772,45 @@
  
  void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
  
  void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
  
+ void MacroAssembler::get_default_value_oop(Register inline_klass, Register temp_reg, Register obj) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_inline_type(inline_klass, temp_reg, done_check);
+     stop("get_default_value_oop from non inline type klass");
+     bind(done_check);
+   }
+ #endif
+   Register offset = temp_reg;
+   // Getting the offset of the pre-allocated default value
+   ldr(offset, Address(inline_klass, in_bytes(InstanceKlass::adr_inlineklass_fixed_block_offset())));
+   ldr(offset, Address(offset, in_bytes(InlineKlass::default_value_offset_offset())));
+ 
+   // Getting the mirror
+   ldr(obj, Address(inline_klass, in_bytes(Klass::java_mirror_offset())));
+   resolve_oop_handle(obj, inline_klass);
+ 
+   // Getting the pre-allocated default value from the mirror
+   Address field(obj, offset);
+   load_heap_oop(obj, field);
+ }
+ 
+ void MacroAssembler::get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_empty_inline_type(inline_klass, temp_reg, done_check);
+     stop("get_empty_value from non-empty inline klass");
+     bind(done_check);
+   }
+ #endif
+   get_default_value_oop(inline_klass, temp_reg, obj);
+ }
+ 
  // Look up the method for a megamorphic invokeinterface call.
  // The target method is determined by <intf_klass, itable_index>.
  // The receiver klass is in recv_klass.
  // On success, the result will be in method_result, and execution falls through.
  // On failure, execution transfers to the given label.

@@ -1119,11 +1157,15 @@
      Unimplemented();
    }
  }
  
  void MacroAssembler::verify_oop(Register reg, const char* s) {
-   if (!VerifyOops) return;
+   if (!VerifyOops || VerifyAdapterSharing) {
+     // Below address of the code string confuses VerifyAdapterSharing
+     // because it may differ between otherwise equivalent adapters.
+     return;
+   }
  
    // Pass register number to verify_oop_subroutine
    const char* b = NULL;
    {
      ResourceMark rm;

@@ -1149,11 +1191,15 @@
  
    BLOCK_COMMENT("} verify_oop");
  }
  
  void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-   if (!VerifyOops) return;
+   if (!VerifyOops || VerifyAdapterSharing) {
+     // Below address of the code string confuses VerifyAdapterSharing
+     // because it may differ between otherwise equivalent adapters.
+     return;
+   }
  
    const char* b = NULL;
    {
      ResourceMark rm;
      stringStream ss;

@@ -1228,23 +1274,30 @@
    pass_arg0(this, arg_0);
    call_VM_leaf_base(entry_point, 1);
  }
  
  void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+   assert_different_registers(arg_1, c_rarg0);
    pass_arg0(this, arg_0);
    pass_arg1(this, arg_1);
    call_VM_leaf_base(entry_point, 2);
  }
  
  void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
                                    Register arg_1, Register arg_2) {
+   assert_different_registers(arg_1, c_rarg0);
+   assert_different_registers(arg_2, c_rarg0, c_rarg1);
    pass_arg0(this, arg_0);
    pass_arg1(this, arg_1);
    pass_arg2(this, arg_2);
    call_VM_leaf_base(entry_point, 3);
  }
  
+ void MacroAssembler::super_call_VM_leaf(address entry_point) {
+   MacroAssembler::call_VM_leaf_base(entry_point, 1);
+ }
+ 
  void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
    pass_arg0(this, arg_0);
    MacroAssembler::call_VM_leaf_base(entry_point, 1);
  }
  

@@ -1290,10 +1343,119 @@
      // nothing to do, (later) access of M[reg + offset]
      // will provoke OS NULL exception if reg = NULL
    }
  }
  
+ void MacroAssembler::test_markword_is_inline_type(Register markword, Label& is_inline_type) {
+   assert_different_registers(markword, rscratch2);
+   andr(markword, markword, markWord::inline_type_mask_in_place);
+   mov(rscratch2, markWord::inline_type_pattern);
+   cmp(markword, rscratch2);
+   br(Assembler::EQ, is_inline_type);
+ }
+ 
+ void MacroAssembler::test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type) {
+   ldrw(temp_reg, Address(klass, Klass::access_flags_offset()));
+   andr(temp_reg, temp_reg, JVM_ACC_INLINE);
+   cbnz(temp_reg, is_inline_type);
+ }
+ 
+ void MacroAssembler::test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type) {
+   cbz(object, not_inline_type);
+   const int is_inline_type_mask = markWord::inline_type_pattern;
+   ldr(tmp, Address(object, oopDesc::mark_offset_in_bytes()));
+   mov(rscratch1, is_inline_type_mask);
+   andr(tmp, tmp, rscratch1);
+   cmp(tmp, rscratch1);
+   br(Assembler::NE, not_inline_type);
+ }
+ 
+ void MacroAssembler::test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type) {
+ #ifdef ASSERT
+   {
+     Label done_check;
+     test_klass_is_inline_type(klass, temp_reg, done_check);
+     stop("test_klass_is_empty_inline_type with non inline type klass");
+     bind(done_check);
+   }
+ #endif
+   ldrw(temp_reg, Address(klass, InstanceKlass::misc_flags_offset()));
+   andr(temp_reg, temp_reg, InstanceKlass::misc_flags_is_empty_inline_type());
+   cbnz(temp_reg, is_empty_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free_inline_type) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbnz(flags, ConstantPoolCacheEntry::is_null_free_inline_type_shift, is_null_free_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbz(flags, ConstantPoolCacheEntry::is_null_free_inline_type_shift, not_null_free_inline_type);
+ }
+ 
+ void MacroAssembler::test_field_is_inlined(Register flags, Register temp_reg, Label& is_flattened) {
+   assert(temp_reg == noreg, "not needed"); // keep signature uniform with x86
+   tbnz(flags, ConstantPoolCacheEntry::is_inlined_shift, is_flattened);
+ }
+ 
+ void MacroAssembler::test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label) {
+   Label test_mark_word;
+   // load mark word
+   ldr(temp_reg, Address(oop, oopDesc::mark_offset_in_bytes()));
+   // check displaced
+   tst(temp_reg, markWord::unlocked_value);
+   br(Assembler::NE, test_mark_word);
+   // slow path use klass prototype
+   load_prototype_header(temp_reg, oop);
+ 
+   bind(test_mark_word);
+   andr(temp_reg, temp_reg, test_bit);
+   if (jmp_set) {
+     cbnz(temp_reg, jmp_label);
+   } else {
+     cbz(temp_reg, jmp_label);
+   }
+ }
+ 
+ void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg, Label& is_flattened_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, true, is_flattened_array);
+ }
+ 
+ void MacroAssembler::test_non_flattened_array_oop(Register oop, Register temp_reg,
+                                                   Label&is_non_flattened_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::flat_array_bit_in_place, false, is_non_flattened_array);
+ }
+ 
+ void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, true, is_null_free_array);
+ }
+ 
+ void MacroAssembler::test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array) {
+   test_oop_prototype_bit(oop, temp_reg, markWord::null_free_array_bit_in_place, false, is_non_null_free_array);
+ }
+ 
+ void MacroAssembler::test_flattened_array_layout(Register lh, Label& is_flattened_array) {
+   tst(lh, Klass::_lh_array_tag_vt_value_bit_inplace);
+   br(Assembler::NE, is_flattened_array);
+ }
+ 
+ void MacroAssembler::test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array) {
+   tst(lh, Klass::_lh_array_tag_vt_value_bit_inplace);
+   br(Assembler::EQ, is_non_flattened_array);
+ }
+ 
+ void MacroAssembler::test_null_free_array_layout(Register lh, Label& is_null_free_array) {
+   tst(lh, Klass::_lh_null_free_bit_inplace);
+   br(Assembler::NE, is_null_free_array);
+ }
+ 
+ void MacroAssembler::test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array) {
+   tst(lh, Klass::_lh_null_free_bit_inplace);
+   br(Assembler::EQ, is_non_null_free_array);
+ }
+ 
  // MacroAssembler protected routines needed to implement
  // public methods
  
  void MacroAssembler::mov(Register r, Address dest) {
    code_section()->relocate(pc(), dest.rspec());

@@ -3604,10 +3766,18 @@
    ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
    ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
    ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
  }
  
+ void MacroAssembler::load_metadata(Register dst, Register src) {
+   if (UseCompressedClassPointers) {
+     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+   } else {
+     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+   }
+ }
+ 
  void MacroAssembler::load_klass(Register dst, Register src) {
    if (UseCompressedClassPointers) {
      ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
      decode_klass_not_null(dst);
    } else {

@@ -3663,10 +3833,15 @@
      ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
    }
    cmp(trial_klass, tmp);
  }
  
+ void MacroAssembler::load_prototype_header(Register dst, Register src) {
+   load_klass(dst, src);
+   ldr(dst, Address(dst, Klass::prototype_header_offset()));
+ }
+ 
  void MacroAssembler::store_klass(Register dst, Register src) {
    // FIXME: Should this be a store release?  concurrent gcs assumes
    // klass length is valid if klass field is not null.
    if (UseCompressedClassPointers) {
      encode_klass_not_null(src);

@@ -3976,21 +4151,62 @@
    }
  }
  
  void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
                                       Address dst, Register src,
-                                      Register tmp1, Register thread_tmp) {
+                                      Register tmp1, Register thread_tmp, Register tmp3) {
+ 
    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
    decorators = AccessInternal::decorator_fixup(decorators);
    bool as_raw = (decorators & AS_RAW) != 0;
    if (as_raw) {
-     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3);
+   } else {
+     bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3);
+   }
+ }
+ 
+ void MacroAssembler::access_value_copy(DecoratorSet decorators, Register src, Register dst,
+                                        Register inline_klass) {
+   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+   bs->value_copy(this, decorators, src, dst, inline_klass);
+ }
+ 
+ void MacroAssembler::first_field_offset(Register inline_klass, Register offset) {
+   ldr(offset, Address(inline_klass, InstanceKlass::adr_inlineklass_fixed_block_offset()));
+   ldrw(offset, Address(offset, InlineKlass::first_field_offset_offset()));
+ }
+ 
+ void MacroAssembler::data_for_oop(Register oop, Register data, Register inline_klass) {
+   // ((address) (void*) o) + vk->first_field_offset();
+   Register offset = (data == oop) ? rscratch1 : data;
+   first_field_offset(inline_klass, offset);
+   if (data == oop) {
+     add(data, data, offset);
    } else {
-     bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+     lea(data, Address(oop, offset));
    }
  }
  
+ void MacroAssembler::data_for_value_array_index(Register array, Register array_klass,
+                                                 Register index, Register data) {
+   assert_different_registers(array, array_klass, index);
+   assert_different_registers(rscratch1, array, index);
+ 
+   // array->base() + (index << Klass::layout_helper_log2_element_size(lh));
+   ldrw(rscratch1, Address(array_klass, Klass::layout_helper_offset()));
+ 
+   // Klass::layout_helper_log2_element_size(lh)
+   // (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
+   lsr(rscratch1, rscratch1, Klass::_lh_log2_element_size_shift);
+   andr(rscratch1, rscratch1, Klass::_lh_log2_element_size_mask);
+   lslv(index, index, rscratch1);
+ 
+   add(data, array, index);
+   add(data, data, arrayOopDesc::base_offset_in_bytes(T_INLINE_TYPE));
+ }
+ 
  void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
                                     Register thread_tmp, DecoratorSet decorators) {
    access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
  }
  

@@ -3998,17 +4214,17 @@
                                              Register thread_tmp, DecoratorSet decorators) {
    access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
  }
  
  void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-                                     Register thread_tmp, DecoratorSet decorators) {
-   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+                                     Register thread_tmp, Register tmp3, DecoratorSet decorators) {
+   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp, tmp3);
  }
  
  // Used for storing NULLs.
  void MacroAssembler::store_heap_oop_null(Address dst) {
-   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
  }
  
  Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
    assert(oop_recorder() != NULL, "this assembler needs a Recorder");
    int index = oop_recorder()->allocate_metadata_index(obj);

@@ -4068,10 +4284,118 @@
  #endif
    int oop_index = oop_recorder()->find_index(obj);
    return Address((address)obj, oop_Relocation::spec(oop_index));
  }
  
+ // Object / value buffer allocation...
+ void MacroAssembler::allocate_instance(Register klass, Register new_obj,
+                                        Register t1, Register t2,
+                                        bool clear_fields, Label& alloc_failed)
+ {
+   Label done, initialize_header, initialize_object, slow_case, slow_case_no_pop;
+   Register layout_size = t1;
+   assert(new_obj == r0, "needs to be r0, according to barrier asm eden_allocate");
+   assert_different_registers(klass, new_obj, t1, t2);
+ 
+   // get instance_size in InstanceKlass (scaled to a count of bytes)
+   ldrw(layout_size, Address(klass, Klass::layout_helper_offset()));
+   // test to see if it has a finalizer or is malformed in some way
+   tst(layout_size, Klass::_lh_instance_slow_path_bit);
+   br(Assembler::NE, slow_case_no_pop);
+ 
+   // Allocate the instance:
+   //  If TLAB is enabled:
+   //    Try to allocate in the TLAB.
+   //    If fails, go to the slow path.
+   //  Else If inline contiguous allocations are enabled:
+   //    Try to allocate in eden.
+   //    If fails due to heap end, go to slow path.
+   //
+   //  If TLAB is enabled OR inline contiguous is enabled:
+   //    Initialize the allocation.
+   //    Exit.
+   //
+   //  Go to slow path.
+   const bool allow_shared_alloc =
+     Universe::heap()->supports_inline_contig_alloc();
+ 
+   push(klass);
+ 
+   if (UseTLAB) {
+     tlab_allocate(new_obj, layout_size, 0, klass, t2, slow_case);
+     if (ZeroTLAB || (!clear_fields)) {
+       // the fields have been already cleared
+       b(initialize_header);
+     } else {
+       // initialize both the header and fields
+       b(initialize_object);
+     }
+   } else {
+     // Allocation in the shared Eden, if allowed.
+     //
+     eden_allocate(new_obj, layout_size, 0, t2, slow_case);
+   }
+ 
+   // If UseTLAB or allow_shared_alloc are true, the object is created above and
+   // there is an initialize need. Otherwise, skip and go to the slow path.
+   if (UseTLAB || allow_shared_alloc) {
+     if (clear_fields) {
+       // The object is initialized before the header.  If the object size is
+       // zero, go directly to the header initialization.
+       bind(initialize_object);
+       subs(layout_size, layout_size, sizeof(oopDesc));
+       br(Assembler::EQ, initialize_header);
+ 
+       // Initialize topmost object field, divide size by 8, check if odd and
+       // test if zero.
+ 
+   #ifdef ASSERT
+       // make sure instance_size was multiple of 8
+       Label L;
+       tst(layout_size, 7);
+       br(Assembler::EQ, L);
+       stop("object size is not multiple of 8 - adjust this code");
+       bind(L);
+       // must be > 0, no extra check needed here
+   #endif
+ 
+       lsr(layout_size, layout_size, LogBytesPerLong);
+ 
+       // initialize remaining object fields: instance_size was a multiple of 8
+       {
+         Label loop;
+         Register base = t2;
+ 
+         bind(loop);
+         add(rscratch1, new_obj, layout_size, Assembler::LSL, LogBytesPerLong);
+         str(zr, Address(rscratch1, sizeof(oopDesc) - 1*oopSize));
+         subs(layout_size, layout_size, 1);
+         br(Assembler::NE, loop);
+       }
+     } // clear_fields
+ 
+     // initialize object header only.
+     bind(initialize_header);
+     pop(klass);
+     Register mark_word = t2;
+     ldr(mark_word, Address(klass, Klass::prototype_header_offset()));
+     str(mark_word, Address(new_obj, oopDesc::mark_offset_in_bytes ()));
+     store_klass_gap(new_obj, zr);  // zero klass gap for compressed oops
+     mov(t2, klass);         // preserve klass
+     store_klass(new_obj, t2);  // src klass reg is potentially compressed
+ 
+     b(done);
+   }
+ 
+   bind(slow_case);
+   pop(klass);
+   bind(slow_case_no_pop);
+   b(alloc_failed);
+ 
+   bind(done);
+ }
+ 
  // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
  void MacroAssembler::tlab_allocate(Register obj,
                                     Register var_size_in_bytes,
                                     int con_size_in_bytes,
                                     Register t1,

@@ -4117,10 +4441,23 @@
      ldp(rscratch2, rscratch1, Address(post(sp, 16)));
    }
  #endif
  }
  
+ void MacroAssembler::get_inline_type_field_klass(Register klass, Register index, Register inline_klass) {
+   ldr(inline_klass, Address(klass, InstanceKlass::inline_type_field_klasses_offset()));
+ #ifdef ASSERT
+   {
+     Label done;
+     cbnz(inline_klass, done);
+     stop("get_inline_type_field_klass contains no inline klass");
+     bind(done);
+   }
+ #endif
+   ldr(inline_klass, Address(inline_klass, index, Address::lsl(3)));
+ }
+ 
  // Writes to stack successive pages until offset reached to check for
  // stack overflow + shadow pages.  This clobbers tmp.
  void MacroAssembler::bang_stack_size(Register size, Register tmp) {
    assert_different_registers(tmp, size, rscratch1);
    mov(tmp, sp);

@@ -4240,10 +4577,61 @@
      }
      ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
    }
  }
  
+ void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
+   if (needs_stack_repair) {
+     // Remove the extension of the caller's frame used for inline type unpacking
+     //
+     // Right now the stack looks like this:
+     //
+     // | Arguments from caller     |
+     // |---------------------------|  <-- caller's SP
+     // | Saved LR #1               |
+     // | Saved FP #1               |
+     // |---------------------------|
+     // | Extension space for       |
+     // |   inline arg (un)packing  |
+     // |---------------------------|  <-- start of this method's frame
+     // | Saved LR #2               |
+     // | Saved FP #2               |
+     // |---------------------------|  <-- FP
+     // | sp_inc                    |
+     // | method locals             |
+     // |---------------------------|  <-- SP
+     //
+     // There are two copies of FP and LR on the stack. They will be identical
+     // unless the caller has been deoptimized, in which case LR #1 will be patched
+     // to point at the deopt blob, and LR #2 will still point into the old method.
+     //
+     // The sp_inc stack slot holds the total size of the frame including the
+     // extension space minus two words for the saved FP and LR.
+ 
+     int sp_inc_offset = initial_framesize - 3 * wordSize;  // Immediately below saved LR and FP
+ 
+     ldr(rscratch1, Address(sp, sp_inc_offset));
+     add(sp, sp, rscratch1);
+     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+   } else {
+     remove_frame(initial_framesize);
+   }
+ }
+ 
+ void MacroAssembler::save_stack_increment(int sp_inc, int frame_size) {
+   int real_frame_size = frame_size + sp_inc;
+   assert(sp_inc == 0 || sp_inc > 2*wordSize, "invalid sp_inc value");
+   assert(real_frame_size >= 2*wordSize, "frame size must include FP/LR space");
+   assert((real_frame_size & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ 
+   int sp_inc_offset = frame_size - 3 * wordSize;  // Immediately below saved LR and FP
+ 
+   // Subtract two words for the saved FP and LR as these will be popped
+   // separately. See remove_frame above.
+   mov(rscratch1, real_frame_size - 2*wordSize);
+   str(rscratch1, Address(sp, sp_inc_offset));
+ }
  
  // This method checks if provided byte array contains byte with highest bit set.
  address MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
      // Simple and most common case of aligned small array which is not at the
      // end of memory page is placed here. All other cases are in stub.

@@ -5083,10 +5471,362 @@
    }
  
    pop(saved_regs, sp);
  }
  
+ #ifdef COMPILER2
+ // C2 compiled method's prolog code
+ // Moved here from aarch64.ad to support Valhalla code belows
+ void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+ 
+   // n.b. frame size includes space for return pc and rfp
+   const long framesize = C->output()->frame_size_in_bytes();
+ 
+   // insert a nop at the start of the prolog so we can patch in a
+   // branch if we need to invalidate the method later
+   nop();
+ 
+   int bangsize = C->output()->bang_size_in_bytes();
+   if (C->output()->need_stack_bang(bangsize))
+     generate_stack_overflow_check(bangsize);
+ 
+   build_frame(framesize);
+ 
+   if (C->needs_stack_repair()) {
+     save_stack_increment(sp_inc, framesize);
+   }
+ 
+   if (VerifyStackAtCalls) {
+     Unimplemented();
+   }
+ }
+ #endif // COMPILER2
+ 
+ int MacroAssembler::store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter) {
+   // An inline type might be returned. If fields are in registers we
+   // need to allocate an inline type instance and initialize it with
+   // the value of the fields.
+   Label skip;
+   // We only need a new buffered inline type if a new one is not returned
+   tbz(r0, 0, skip);
+   int call_offset = -1;
+ 
+   // Be careful not to clobber r1-7 which hold returned fields
+   // Also do not use callee-saved registers as these may be live in the interpreter
+   Register tmp1 = r13, tmp2 = r14, klass = r15, r0_preserved = r12;
+ 
+   // The following code is similar to allocate_instance but has some slight differences,
+   // e.g. object size is always not zero, sometimes it's constant; storing klass ptr after
+   // allocating is not necessary if vk != NULL, etc. allocate_instance is not aware of these.
+   Label slow_case;
+   // 1. Try to allocate a new buffered inline instance either from TLAB or eden space
+   mov(r0_preserved, r0); // save r0 for slow_case since *_allocate may corrupt it when allocation failed
+ 
+   if (vk != NULL) {
+     // Called from C1, where the return type is statically known.
+     movptr(klass, (intptr_t)vk->get_InlineKlass());
+     jint obj_size = vk->layout_helper();
+     assert(obj_size != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
+     if (UseTLAB) {
+       tlab_allocate(r0, noreg, obj_size, tmp1, tmp2, slow_case);
+     } else {
+       eden_allocate(r0, noreg, obj_size, tmp1, slow_case);
+     }
+   } else {
+     // Call from interpreter. R0 contains ((the InlineKlass* of the return type) | 0x01)
+     andr(klass, r0, -2);
+     ldrw(tmp2, Address(klass, Klass::layout_helper_offset()));
+     if (UseTLAB) {
+       tlab_allocate(r0, tmp2, 0, tmp1, tmp2, slow_case);
+     } else {
+       eden_allocate(r0, tmp2, 0, tmp1, slow_case);
+     }
+   }
+   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
+     // 2. Initialize buffered inline instance header
+     Register buffer_obj = r0;
+     mov(rscratch1, (intptr_t)markWord::inline_type_prototype().value());
+     str(rscratch1, Address(buffer_obj, oopDesc::mark_offset_in_bytes()));
+     store_klass_gap(buffer_obj, zr);
+     if (vk == NULL) {
+       // store_klass corrupts klass, so save it for later use (interpreter case only).
+       mov(tmp1, klass);
+     }
+     store_klass(buffer_obj, klass);
+     // 3. Initialize its fields with an inline class specific handler
+     if (vk != NULL) {
+       far_call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
+     } else {
+       // tmp1 holds klass preserved above
+       ldr(tmp1, Address(tmp1, InstanceKlass::adr_inlineklass_fixed_block_offset()));
+       ldr(tmp1, Address(tmp1, InlineKlass::pack_handler_offset()));
+       blr(tmp1);
+     }
+ 
+     membar(Assembler::StoreStore);
+     b(skip);
+   } else {
+     // Must have already branched to slow_case in eden_allocate() above.
+     DEBUG_ONLY(should_not_reach_here());
+   }
+   bind(slow_case);
+   // We failed to allocate a new inline type, fall back to a runtime
+   // call. Some oop field may be live in some registers but we can't
+   // tell. That runtime call will take care of preserving them
+   // across a GC if there's one.
+   mov(r0, r0_preserved);
+ 
+   if (from_interpreter) {
+     super_call_VM_leaf(StubRoutines::store_inline_type_fields_to_buf());
+   } else {
+     far_call(RuntimeAddress(StubRoutines::store_inline_type_fields_to_buf()));
+     call_offset = offset();
+   }
+   membar(Assembler::StoreStore);
+ 
+   bind(skip);
+   return call_offset;
+ }
+ 
+ // Move a value between registers/stack slots and update the reg_state
+ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]) {
+   assert(from->is_valid() && to->is_valid(), "source and destination must be valid");
+   if (reg_state[to->value()] == reg_written) {
+     return true; // Already written
+   }
+ 
+   if (from != to && bt != T_VOID) {
+     if (reg_state[to->value()] == reg_readonly) {
+       return false; // Not yet writable
+     }
+     if (from->is_reg()) {
+       if (to->is_reg()) {
+         if (from->is_Register() && to->is_Register()) {
+           mov(to->as_Register(), from->as_Register());
+         } else if (from->is_FloatRegister() && to->is_FloatRegister()) {
+           fmovd(to->as_FloatRegister(), from->as_FloatRegister());
+         } else {
+           ShouldNotReachHere();
+         }
+       } else {
+         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
+         Address to_addr = Address(sp, st_off);
+         if (from->is_FloatRegister()) {
+           if (bt == T_DOUBLE) {
+              strd(from->as_FloatRegister(), to_addr);
+           } else {
+              assert(bt == T_FLOAT, "must be float");
+              strs(from->as_FloatRegister(), to_addr);
+           }
+         } else {
+           str(from->as_Register(), to_addr);
+         }
+       }
+     } else {
+       Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size);
+       if (to->is_reg()) {
+         if (to->is_FloatRegister()) {
+           if (bt == T_DOUBLE) {
+             ldrd(to->as_FloatRegister(), from_addr);
+           } else {
+             assert(bt == T_FLOAT, "must be float");
+             ldrs(to->as_FloatRegister(), from_addr);
+           }
+         } else {
+           ldr(to->as_Register(), from_addr);
+         }
+       } else {
+         int st_off = to->reg2stack() * VMRegImpl::stack_slot_size;
+         ldr(rscratch1, from_addr);
+         str(rscratch1, Address(sp, st_off));
+       }
+     }
+   }
+ 
+   // Update register states
+   reg_state[from->value()] = reg_writable;
+   reg_state[to->value()] = reg_written;
+   return true;
+ }
+ 
+ // Calculate the extra stack space required for packing or unpacking inline
+ // args and adjust the stack pointer
+ int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
+   int sp_inc = args_on_stack * VMRegImpl::stack_slot_size;
+   sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+   assert(sp_inc > 0, "sanity");
+ 
+   // Save a copy of the FP and LR here for deoptimization patching and frame walking
+   stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ 
+   // Adjust the stack pointer. This will be repaired on return by MacroAssembler::remove_frame
+   if (sp_inc < (1 << 9)) {
+     sub(sp, sp, sp_inc);   // Fits in an immediate
+   } else {
+     mov(rscratch1, sp_inc);
+     sub(sp, sp, rscratch1);
+   }
+ 
+   return sp_inc + 2 * wordSize;  // Account for the FP/LR space
+ }
+ 
+ // Read all fields from an inline type oop and store the values in registers/stack slots
+ bool MacroAssembler::unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
+                                           VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
+                                           RegState reg_state[]) {
+   assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
+   assert(from->is_valid(), "source must be valid");
+   Register tmp1 = r10, tmp2 = r11;
+   Register fromReg;
+   if (from->is_reg()) {
+     fromReg = from->as_Register();
+   } else {
+     int st_off = from->reg2stack() * VMRegImpl::stack_slot_size;
+     ldr(tmp1, Address(sp, st_off));
+     fromReg = tmp1;
+   }
+ 
+   ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1);
+   bool done = true;
+   bool mark_done = true;
+   VMReg toReg;
+   BasicType bt;
+   while (stream.next(toReg, bt)) {
+     assert(toReg->is_valid(), "destination must be valid");
+     int off = sig->at(stream.sig_index())._offset;
+     assert(off > 0, "offset in object should be positive");
+     Address fromAddr = Address(fromReg, off);
+ 
+     int idx = (int)toReg->value();
+     if (reg_state[idx] == reg_readonly) {
+       if (idx != from->value()) {
+         mark_done = false;
+       }
+       done = false;
+       continue;
+     } else if (reg_state[idx] == reg_written) {
+       continue;
+     } else {
+       assert(reg_state[idx] == reg_writable, "must be writable");
+       reg_state[idx] = reg_written;
+     }
+ 
+     if (!toReg->is_FloatRegister()) {
+       Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register();
+       if (is_reference_type(bt)) {
+         load_heap_oop(dst, fromAddr);
+       } else {
+         bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+         load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
+       }
+       if (toReg->is_stack()) {
+         int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size;
+         str(dst, Address(sp, st_off));
+       }
+     } else if (bt == T_DOUBLE) {
+       ldrd(toReg->as_FloatRegister(), fromAddr);
+     } else {
+       assert(bt == T_FLOAT, "must be float");
+       ldrs(toReg->as_FloatRegister(), fromAddr);
+     }
+   }
+   sig_index = stream.sig_index();
+   to_index = stream.regs_index();
+ 
+   if (mark_done && reg_state[from->value()] != reg_written) {
+     // This is okay because no one else will write to that slot
+     reg_state[from->value()] = reg_writable;
+   }
+   from_index--;
+   return done;
+ }
+ 
+ // Pack fields back into an inline type oop
+ bool MacroAssembler::pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
+                                         VMRegPair* from, int from_count, int& from_index, VMReg to,
+                                         RegState reg_state[], Register val_array) {
+   assert(sig->at(sig_index)._bt == T_INLINE_TYPE, "should be at end delimiter");
+   assert(to->is_valid(), "destination must be valid");
+ 
+   if (reg_state[to->value()] == reg_written) {
+     skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
+     return true; // Already written
+   }
+ 
+   // The GC barrier expanded by store_heap_oop below may call into the
+   // runtime so use callee-saved registers for any values that need to be
+   // preserved. The GC barrier assembler should take care of saving the
+   // Java argument registers.
+   Register val_obj_tmp = r21;
+   Register from_reg_tmp = r22;
+   Register tmp1 = r14;
+   Register tmp2 = r13;
+   Register tmp3 = r12;
+   Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
+ 
+   assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array);
+ 
+   if (reg_state[to->value()] == reg_readonly) {
+     if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) {
+       skip_unpacked_fields(sig, sig_index, from, from_count, from_index);
+       return false; // Not yet writable
+     }
+     val_obj = val_obj_tmp;
+   }
+ 
+   int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_INLINE_TYPE);
+   load_heap_oop(val_obj, Address(val_array, index));
+ 
+   ScalarizedInlineArgsStream stream(sig, sig_index, from, from_count, from_index);
+   VMReg fromReg;
+   BasicType bt;
+   while (stream.next(fromReg, bt)) {
+     assert(fromReg->is_valid(), "source must be valid");
+     int off = sig->at(stream.sig_index())._offset;
+     assert(off > 0, "offset in object should be positive");
+     size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+ 
+     // Pack the scalarized field into the value object.
+     Address dst(val_obj, off);
+ 
+     if (!fromReg->is_FloatRegister()) {
+       Register src;
+       if (fromReg->is_stack()) {
+         src = from_reg_tmp;
+         int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size;
+         load_sized_value(src, Address(sp, ld_off), size_in_bytes, /* is_signed */ false);
+       } else {
+         src = fromReg->as_Register();
+       }
+       assert_different_registers(dst.base(), src, tmp1, tmp2, tmp3, val_array);
+       if (is_reference_type(bt)) {
+         store_heap_oop(dst, src, tmp1, tmp2, tmp3, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED);
+       } else {
+         store_sized_value(dst, src, size_in_bytes);
+       }
+     } else if (bt == T_DOUBLE) {
+       strd(fromReg->as_FloatRegister(), dst);
+     } else {
+       assert(bt == T_FLOAT, "must be float");
+       strs(fromReg->as_FloatRegister(), dst);
+     }
+     reg_state[fromReg->value()] = reg_writable;
+   }
+   sig_index = stream.sig_index();
+   from_index = stream.regs_index();
+ 
+   assert(reg_state[to->value()] == reg_writable, "must have already been read");
+   bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state);
+   assert(success, "to register must be writeable");
+ 
+   return true;
+ }
+ 
+ VMReg MacroAssembler::spill_reg_for(VMReg reg) {
+   return (reg->is_FloatRegister()) ? v0->as_VMReg() : r14->as_VMReg();
+ }
+ 
  void MacroAssembler::cache_wb(Address line) {
    assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset");
    assert(line.index() == noreg, "index should be noreg");
    assert(line.offset() == 0, "offset should be 0");
    // would like to assert this
< prev index next >