< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -43,13 +43,15 @@
 #include "runtime/objectMonitor.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "runtime/signature_cc.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.hpp"
 #include "utilities/macros.hpp"
+#include "vmreg_x86.inline.hpp"
 #include "crc32c.h"
 #ifdef COMPILER2
 #include "opto/intrinsicnode.hpp"
 #endif
 

@@ -1751,10 +1753,14 @@
   testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
   jccb(Assembler::notZero, IsInflated);
 
   // Attempt stack-locking ...
   orptr (tmpReg, markOopDesc::unlocked_value);
+  if (EnableValhalla && !UseBiasedLocking) {
+    // Mask always_locked bit such that we go to the slow path if object is a value type
+    andptr(tmpReg, ~markOopDesc::biased_lock_bit_in_place);
+  }
   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
   lock();
   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
   if (counters != NULL) {
     cond_inc32(Assembler::equal,

@@ -2424,10 +2430,14 @@
   pass_arg1(this, arg_1);
   pass_arg0(this, arg_0);
   call_VM_leaf(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
   pass_arg0(this, arg_0);
   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 }
 

@@ -3418,10 +3428,53 @@
     // nothing to do, (later) access of M[reg + offset]
     // will provoke OS NULL exception if reg = NULL
   }
 }
 
+void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) {
+  movl(temp_reg, Address(klass, Klass::access_flags_offset()));
+  testl(temp_reg, JVM_ACC_VALUE);
+  jcc(Assembler::notZero, is_value);
+}
+
+void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::notZero, is_flattenable);
+}
+
+void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::zero, notFlattenable);
+}
+
+void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) {
+  movl(temp_reg, flags);
+  shrl(temp_reg, ConstantPoolCacheEntry::is_flattened_field_shift);
+  andl(temp_reg, 0x1);
+  testl(temp_reg, temp_reg);
+  jcc(Assembler::notZero, is_flattened);
+}
+
+void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg,
+                                              Label&is_flattened_array) {
+  load_storage_props(temp_reg, oop);
+  testb(temp_reg, ArrayStorageProperties::flattened_value);
+  jcc(Assembler::notZero, is_flattened_array);
+}
+
+void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label&is_null_free_array) {
+  load_storage_props(temp_reg, oop);
+  testb(temp_reg, ArrayStorageProperties::null_free_value);
+  jcc(Assembler::notZero, is_null_free_array);
+}
+
 void MacroAssembler::os_breakpoint() {
   // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
   // (e.g., MSVC can't call ps() otherwise)
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 }

@@ -4650,11 +4703,15 @@
     bind(L);
   }
 }
 
 void MacroAssembler::verify_oop(Register reg, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
   {
     ResourceMark rm;

@@ -4740,11 +4797,15 @@
   return Address(rsp, scale_reg, scale_factor, offset);
 }
 
 
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
   {

@@ -5229,24 +5290,49 @@
 void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
   load_method_holder(rresult, rmethod);
   movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
 }
 
+void MacroAssembler::load_metadata(Register dst, Register src) {
+  if (UseCompressedClassPointers) {
+    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  } else {
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::load_storage_props(Register dst, Register src) {
+  load_metadata(dst, src);
+  if (UseCompressedClassPointers) {
+    shrl(dst, oopDesc::narrow_storage_props_shift);
+  } else {
+    shrq(dst, oopDesc::wide_storage_props_shift);
+  }
+}
+
 void MacroAssembler::load_method_holder(Register holder, Register method) {
   movptr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
   movptr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 }
 
 void MacroAssembler::load_klass(Register dst, Register src) {
+  load_metadata(dst, src);
 #ifdef _LP64
   if (UseCompressedClassPointers) {
-    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    andl(dst, oopDesc::compressed_klass_mask());
     decode_klass_not_null(dst);
   } else
 #endif
-    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  {
+#ifdef _LP64
+    shlq(dst, oopDesc::storage_props_nof_bits);
+    shrq(dst, oopDesc::storage_props_nof_bits);
+#else
+    andl(dst, oopDesc::wide_klass_mask());
+#endif
+  }
 }
 
 void MacroAssembler::load_prototype_header(Register dst, Register src) {
   load_klass(dst, src);
   movptr(dst, Address(dst, Klass::prototype_header_offset()));

@@ -5273,18 +5359,18 @@
     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
   }
 }
 
 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
-                                     Register tmp1, Register tmp2) {
+                                     Register tmp1, Register tmp2, Register tmp3) {
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
   decorators = AccessInternal::decorator_fixup(decorators);
   bool as_raw = (decorators & AS_RAW) != 0;
   if (as_raw) {
-    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
   } else {
-    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
+    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
   }
 }
 
 void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
   // Use stronger ACCESS_WRITE|ACCESS_READ by default.

@@ -5305,17 +5391,17 @@
                                             Register thread_tmp, DecoratorSet decorators) {
   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
 }
 
 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-                                    Register tmp2, DecoratorSet decorators) {
-  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
+                                    Register tmp2, Register tmp3, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
 }
 
 // Used for storing NULLs.
 void MacroAssembler::store_heap_oop_null(Address dst) {
-  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
 }
 
 #ifdef _LP64
 void MacroAssembler::store_klass_gap(Register dst, Register src) {
   if (UseCompressedClassPointers) {

@@ -5642,11 +5728,16 @@
 }
 
 #endif // _LP64
 
 // C2 compiled method's prolog code.
-void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
+void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+  bool fp_mode_24b = C->in_24_bit_fp_mode();
+  int stack_bang_size = C->need_stack_bang(bangsize) ? bangsize : 0;
+  bool is_stub = C->stub_function() != NULL;
 
   // WARNING: Initial instruction MUST be 5 bytes or longer so that
   // NativeJump::patch_verified_entry will be able to patch out the entry
   // code safely. The push to verify stack depth is ok at 5 bytes,
   // the frame allocation can be either 3 or 6 bytes. So if we don't do

@@ -5695,10 +5786,16 @@
         addptr(rbp, framesize);
       }
     }
   }
 
+  if (C->needs_stack_repair()) {
+    // Save stack increment (also account for fixed framesize and rbp)
+    assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
+    movptr(Address(rsp, C->sp_inc_offset()), sp_inc + framesize + wordSize);
+  }
+
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
     framesize -= wordSize;
     movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
   }
 

@@ -5731,18 +5828,20 @@
     bs->nmethod_entry_barrier(this);
   }
 }
 
 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
-void MacroAssembler::xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp) {
+void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
+  movdq(xtmp, val);
   if (UseAVX >= 2) {
-    vpxor(xtmp, xtmp, xtmp, AVX_256bit);
+    punpcklqdq(xtmp, xtmp);
+    vinserti128_high(xtmp, xtmp);
   } else {
-    pxor(xtmp, xtmp);
+    punpcklqdq(xtmp, xtmp);
   }
   jmp(L_zero_64_bytes);
 
   BIND(L_loop);
   if (UseAVX >= 2) {

@@ -5782,26 +5881,396 @@
   decrement(cnt);
   jccb(Assembler::greaterEqual, L_sloop);
   BIND(L_end);
 }
 
-void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
+int MacroAssembler::store_value_type_fields_to_buf(ciValueKlass* vk, bool from_interpreter) {
+  // A value type might be returned. If fields are in registers we
+  // need to allocate a value type instance and initialize it with
+  // the value of the fields.
+  Label skip;
+  // We only need a new buffered value if a new one is not returned
+  testptr(rax, 1);
+  jcc(Assembler::zero, skip);
+  int call_offset = -1;
+
+#ifdef _LP64
+  Label slow_case;
+
+  // Try to allocate a new buffered value (from the heap)
+  if (UseTLAB) {
+    // FIXME -- for smaller code, the inline allocation (and the slow case) should be moved inside the pack handler.
+    if (vk != NULL) {
+      // Called from C1, where the return type is statically known.
+      movptr(rbx, (intptr_t)vk->get_ValueKlass());
+      jint lh = vk->layout_helper();
+      assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved");
+      movl(r14, lh);
+    } else {
+      // Call from interpreter. RAX contains ((the ValueKlass* of the return type) | 0x01)
+      mov(rbx, rax);
+      andptr(rbx, -2);
+      movl(r14, Address(rbx, Klass::layout_helper_offset()));
+    }
+
+    movptr(r13, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
+    lea(r14, Address(r13, r14, Address::times_1));
+    cmpptr(r14, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
+    jcc(Assembler::above, slow_case);
+    movptr(Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())), r14);
+    movptr(Address(r13, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::always_locked_prototype());
+
+    xorl(rax, rax); // use zero reg to clear memory (shorter code)
+    store_klass_gap(r13, rax);  // zero klass gap for compressed oops
+
+    if (vk == NULL) {
+      // store_klass corrupts rbx, so save it in rax for later use (interpreter case only).
+      mov(rax, rbx);
+    }
+    store_klass(r13, rbx);  // klass
+
+    // We have our new buffered value, initialize its fields with a
+    // value class specific handler
+    if (vk != NULL) {
+      // FIXME -- do the packing in-line to avoid the runtime call
+      mov(rax, r13);
+      call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint.
+    } else {
+      movptr(rbx, Address(rax, InstanceKlass::adr_valueklass_fixed_block_offset()));
+      movptr(rbx, Address(rbx, ValueKlass::pack_handler_offset()));
+      mov(rax, r13);
+      call(rbx);
+    }
+    jmp(skip);
+  }
+
+  bind(slow_case);
+  // We failed to allocate a new value, fall back to a runtime
+  // call. Some oop field may be live in some registers but we can't
+  // tell. That runtime call will take care of preserving them
+  // across a GC if there's one.
+#endif
+
+  if (from_interpreter) {
+    super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf());
+  } else {
+    call(RuntimeAddress(StubRoutines::store_value_type_fields_to_buf()));
+    call_offset = offset();
+  }
+
+  bind(skip);
+  return call_offset;
+}
+
+
+// Move a value between registers/stack slots and update the reg_state
+bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off, int extra_stack_offset) {
+  if (reg_state[to->value()] == reg_written) {
+    return true; // Already written
+  }
+  if (from != to && bt != T_VOID) {
+    if (reg_state[to->value()] == reg_readonly) {
+      return false; // Not yet writable
+    }
+    if (from->is_reg()) {
+      if (to->is_reg()) {
+        if (from->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to->as_XMMRegister(), from->as_XMMRegister());
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to->as_XMMRegister(), from->as_XMMRegister());
+          }
+        } else {
+          movq(to->as_Register(), from->as_Register());
+        }
+      } else {
+        int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset;
+        assert(st_off != ret_off, "overwriting return address at %d", st_off);
+        Address to_addr = Address(rsp, st_off);
+        if (from->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to_addr, from->as_XMMRegister());
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to_addr, from->as_XMMRegister());
+          }
+        } else {
+          movq(to_addr, from->as_Register());
+        }
+      }
+    } else {
+      Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset);
+      if (to->is_reg()) {
+        if (to->is_XMMRegister()) {
+          if (bt == T_DOUBLE) {
+            movdbl(to->as_XMMRegister(), from_addr);
+          } else {
+            assert(bt == T_FLOAT, "must be float");
+            movflt(to->as_XMMRegister(), from_addr);
+          }
+        } else {
+          movq(to->as_Register(), from_addr);
+        }
+      } else {
+        int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset;
+        assert(st_off != ret_off, "overwriting return address at %d", st_off);
+        movq(r13, from_addr);
+        movq(Address(rsp, st_off), r13);
+      }
+    }
+  }
+  // Update register states
+  reg_state[from->value()] = reg_writable;
+  reg_state[to->value()] = reg_written;
+  return true;
+}
+
+// Read all fields from a value type oop and store the values in registers/stack slots
+bool MacroAssembler::unpack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, VMReg from, VMRegPair* regs_to,
+                                         int& to_index, RegState reg_state[], int ret_off, int extra_stack_offset) {
+  Register fromReg = from->is_reg() ? from->as_Register() : noreg;
+  assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
+
+  int vt = 1;
+  bool done = true;
+  bool mark_done = true;
+  do {
+    sig_index--;
+    BasicType bt = sig->at(sig_index)._bt;
+    if (bt == T_VALUETYPE) {
+      vt--;
+    } else if (bt == T_VOID &&
+               sig->at(sig_index-1)._bt != T_LONG &&
+               sig->at(sig_index-1)._bt != T_DOUBLE) {
+      vt++;
+    } else if (SigEntry::is_reserved_entry(sig, sig_index)) {
+      to_index--; // Ignore this
+    } else {
+      assert(to_index >= 0, "invalid to_index");
+      VMRegPair pair_to = regs_to[to_index--];
+      VMReg to = pair_to.first();
+
+      if (bt == T_VOID) continue;
+
+      int idx = (int)to->value();
+      if (reg_state[idx] == reg_readonly) {
+         if (idx != from->value()) {
+           mark_done = false;
+         }
+         done = false;
+         continue;
+      } else if (reg_state[idx] == reg_written) {
+        continue;
+      } else {
+        assert(reg_state[idx] == reg_writable, "must be writable");
+        reg_state[idx] = reg_written;
+       }
+
+      if (fromReg == noreg) {
+        int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset;
+        movq(r10, Address(rsp, st_off));
+        fromReg = r10;
+      }
+
+      int off = sig->at(sig_index)._offset;
+      assert(off > 0, "offset in object should be positive");
+      bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+
+      Address fromAddr = Address(fromReg, off);
+      bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+      if (!to->is_XMMRegister()) {
+        Register dst = to->is_stack() ? r13 : to->as_Register();
+        if (is_oop) {
+          load_heap_oop(dst, fromAddr);
+        } else {
+          load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
+        }
+        if (to->is_stack()) {
+          int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset;
+          assert(st_off != ret_off, "overwriting return address at %d", st_off);
+          movq(Address(rsp, st_off), dst);
+        }
+      } else {
+        if (bt == T_DOUBLE) {
+          movdbl(to->as_XMMRegister(), fromAddr);
+        } else {
+          assert(bt == T_FLOAT, "must be float");
+          movflt(to->as_XMMRegister(), fromAddr);
+        }
+      }
+    }
+  } while (vt != 0);
+  if (mark_done && reg_state[from->value()] != reg_written) {
+    // This is okay because no one else will write to that slot
+    reg_state[from->value()] = reg_writable;
+  }
+  return done;
+}
+
+// Pack fields back into a value type oop
+bool MacroAssembler::pack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
+                                       VMReg to, VMRegPair* regs_from, int regs_from_count, int& from_index, RegState reg_state[],
+                                       int ret_off, int extra_stack_offset) {
+  assert(sig->at(sig_index)._bt == T_VALUETYPE, "should be at end delimiter");
+  assert(to->is_valid(), "must be");
+
+  if (reg_state[to->value()] == reg_written) {
+    skip_unpacked_fields(sig, sig_index, regs_from, regs_from_count, from_index);
+    return true; // Already written
+  }
+
+  Register val_array = rax;
+  Register val_obj_tmp = r11;
+  Register from_reg_tmp = r10;
+  Register tmp1 = r14;
+  Register tmp2 = r13;
+  Register tmp3 = rbx;
+  Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register();
+
+  if (reg_state[to->value()] == reg_readonly) {
+    if (!is_reg_in_unpacked_fields(sig, sig_index, to, regs_from, regs_from_count, from_index)) {
+      skip_unpacked_fields(sig, sig_index, regs_from, regs_from_count, from_index);
+      return false; // Not yet writable
+    }
+    val_obj = val_obj_tmp;
+  }
+
+  int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_VALUETYPE);
+  load_heap_oop(val_obj, Address(val_array, index));
+
+  ScalarizedValueArgsStream stream(sig, sig_index, regs_from, regs_from_count, from_index);
+  VMRegPair from_pair;
+  BasicType bt;
+  while (stream.next(from_pair, bt)) {
+    int off = sig->at(stream.sig_cc_index())._offset;
+    assert(off > 0, "offset in object should be positive");
+    bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+    size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+
+    VMReg from_r1 = from_pair.first();
+    VMReg from_r2 = from_pair.second();
+
+    // Pack the scalarized field into the value object.
+    Address dst(val_obj, off);
+    if (!from_r1->is_XMMRegister()) {
+      Register from_reg;
+
+      if (from_r1->is_stack()) {
+        from_reg = from_reg_tmp;
+        int ld_off = from_r1->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset;
+        load_sized_value(from_reg, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
+      } else {
+        from_reg = from_r1->as_Register();
+      }
+
+      if (is_oop) {
+        DecoratorSet decorators = IN_HEAP | ACCESS_WRITE;
+        store_heap_oop(dst, from_reg, tmp1, tmp2, tmp3, decorators);
+      } else {
+        store_sized_value(dst, from_reg, size_in_bytes);
+      }
+    } else {
+      if (from_r2->is_valid()) {
+        movdbl(dst, from_r1->as_XMMRegister());
+      } else {
+        movflt(dst, from_r1->as_XMMRegister());
+      }
+    }
+    reg_state[from_r1->value()] = reg_writable;
+  }
+  sig_index = stream.sig_cc_index();
+  from_index = stream.regs_cc_index();
+
+  assert(reg_state[to->value()] == reg_writable, "must have already been read");
+  bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state, ret_off, extra_stack_offset);
+  assert(success, "to register must be writeable");
+
+  return true;
+}
+
+// Unpack all value type arguments passed as oops
+void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
+  int sp_inc = unpack_value_args_common(C, receiver_only);
+  // Emit code for verified entry and save increment for stack repair on return
+  verified_entry(C, sp_inc);
+}
+
+int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset,
+                                       BasicType* sig_bt, const GrowableArray<SigEntry>* sig_cc,
+                                       int args_passed, int args_on_stack, VMRegPair* regs,            // from
+                                       int args_passed_to, int args_on_stack_to, VMRegPair* regs_to) { // to
+  // Check if we need to extend the stack for packing/unpacking
+  int sp_inc = (args_on_stack_to - args_on_stack) * VMRegImpl::stack_slot_size;
+  if (sp_inc > 0) {
+    sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+    if (!is_packing) {
+      // Save the return address, adjust the stack (make sure it is properly
+      // 16-byte aligned) and copy the return address to the new top of the stack.
+      // (Note: C1 does this in C1_MacroAssembler::scalarized_entry).
+      pop(r13);
+      subptr(rsp, sp_inc);
+      push(r13);
+    }
+  } else {
+    // The scalarized calling convention needs less stack space than the unscalarized one.
+    // No need to extend the stack, the caller will take care of these adjustments.
+    sp_inc = 0;
+  }
+
+  int ret_off; // make sure we don't overwrite the return address
+  if (is_packing) {
+    // For C1 code, the VVEP doesn't have reserved slots, so we store the returned address at
+    // rsp[0] during shuffling.
+    ret_off = 0;
+  } else {
+    // C2 code ensures that sp_inc is a reserved slot.
+    ret_off = sp_inc;
+  }
+
+  return shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset,
+                                   sig_bt, sig_cc,
+                                   args_passed, args_on_stack, regs,
+                                   args_passed_to, args_on_stack_to, regs_to,
+                                   sp_inc, ret_off);
+}
+
+VMReg MacroAssembler::spill_reg_for(VMReg reg) {
+  return reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
+}
+
+// Restores the stack on return
+void MacroAssembler::restore_stack(Compile* C) {
+  int framesize = C->frame_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  // Remove word for return addr already pushed and RBP
+  framesize -= 2*wordSize;
+
+  if (C->needs_stack_repair()) {
+    // Restore rbp and repair rsp by adding the stack increment
+    movq(rbp, Address(rsp, framesize));
+    addq(rsp, Address(rsp, C->sp_inc_offset()));
+  } else {
+    if (framesize > 0) {
+      addq(rsp, framesize);
+    }
+    pop(rbp);
+  }
+}
+
+void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
   // is_large - if optimizers know cnt is larger than InitArrayShortSize
   assert(base==rdi, "base register must be edi for rep stos");
-  assert(tmp==rax,   "tmp register must be eax for rep stos");
+  assert(val==rax,   "tmp register must be eax for rep stos");
   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
   assert(InitArrayShortSize % BytesPerLong == 0,
     "InitArrayShortSize should be the multiple of BytesPerLong");
 
   Label DONE;
 
-  if (!is_large || !UseXMMForObjInit) {
-    xorptr(tmp, tmp);
-  }
-
   if (!is_large) {
     Label LOOP, LONG;
     cmpptr(cnt, InitArrayShortSize/BytesPerLong);
     jccb(Assembler::greater, LONG);
 

@@ -5810,25 +6279,24 @@
     decrement(cnt);
     jccb(Assembler::negative, DONE); // Zero length
 
     // Use individual pointer-sized stores for small counts:
     BIND(LOOP);
-    movptr(Address(base, cnt, Address::times_ptr), tmp);
+    movptr(Address(base, cnt, Address::times_ptr), val);
     decrement(cnt);
     jccb(Assembler::greaterEqual, LOOP);
     jmpb(DONE);
 
     BIND(LONG);
   }
 
   // Use longer rep-prefixed ops for non-small counts:
-  if (UseFastStosb) {
+  if (UseFastStosb && !word_copy_only) {
     shlptr(cnt, 3); // convert to number of bytes
     rep_stosb();
   } else if (UseXMMForObjInit) {
-    movptr(tmp, base);
-    xmm_clear_mem(tmp, cnt, xtmp);
+    xmm_clear_mem(base, cnt, val, xtmp);
   } else {
     NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
     rep_stos();
   }
 
< prev index next >