1 /*
   2  * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "c1/c1_CodeStubs.hpp"
  29 #include "c1/c1_Compilation.hpp"
  30 #include "c1/c1_LIRAssembler.hpp"
  31 #include "c1/c1_MacroAssembler.hpp"
  32 #include "c1/c1_Runtime1.hpp"
  33 #include "c1/c1_ValueStack.hpp"
  34 #include "ci/ciArrayKlass.hpp"
  35 #include "ci/ciInstance.hpp"
  36 #include "compiler/oopMap.hpp"
  37 #include "gc/shared/collectedHeap.hpp"
  38 #include "gc/shared/gc_globals.hpp"
  39 #include "nativeInst_x86.hpp"
  40 #include "oops/objArrayKlass.hpp"
  41 #include "runtime/frame.inline.hpp"
  42 #include "runtime/safepointMechanism.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/stubRoutines.hpp"
  45 #include "utilities/powerOfTwo.hpp"
  46 #include "vmreg_x86.inline.hpp"
  47 
  48 
  49 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  50 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  51 // fast versions of NegF/NegD and AbsF/AbsD.
  52 
  53 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  54 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  55   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  56   // of 128-bits operands for SSE instructions.
  57   jlong *operand = (jlong*)(((intptr_t)adr) & ((intptr_t)(~0xF)));
  58   // Store the value to a 128-bits operand.
  59   operand[0] = lo;
  60   operand[1] = hi;
  61   return operand;
  62 }
  63 
  64 // Buffer for 128-bits masks used by SSE instructions.
  65 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  66 
  67 // Static initialization during VM startup.
  68 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2],         CONST64(0x7FFFFFFF7FFFFFFF),         CONST64(0x7FFFFFFF7FFFFFFF));
  69 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2],         CONST64(0x7FFFFFFFFFFFFFFF),         CONST64(0x7FFFFFFFFFFFFFFF));
  70 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], (jlong)UCONST64(0x8000000080000000), (jlong)UCONST64(0x8000000080000000));
  71 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], (jlong)UCONST64(0x8000000000000000), (jlong)UCONST64(0x8000000000000000));
  72 
  73 
  74 NEEDS_CLEANUP // remove this definitions ?
  75 const Register IC_Klass    = rax;   // where the IC klass is cached
  76 const Register SYNC_header = rax;   // synchronization header
  77 const Register SHIFT_count = rcx;   // where count for shift operations must be
  78 
  79 #define __ _masm->
  80 
  81 
  82 static void select_different_registers(Register preserve,
  83                                        Register extra,
  84                                        Register &tmp1,
  85                                        Register &tmp2) {
  86   if (tmp1 == preserve) {
  87     assert_different_registers(tmp1, tmp2, extra);
  88     tmp1 = extra;
  89   } else if (tmp2 == preserve) {
  90     assert_different_registers(tmp1, tmp2, extra);
  91     tmp2 = extra;
  92   }
  93   assert_different_registers(preserve, tmp1, tmp2);
  94 }
  95 
  96 
  97 
  98 static void select_different_registers(Register preserve,
  99                                        Register extra,
 100                                        Register &tmp1,
 101                                        Register &tmp2,
 102                                        Register &tmp3) {
 103   if (tmp1 == preserve) {
 104     assert_different_registers(tmp1, tmp2, tmp3, extra);
 105     tmp1 = extra;
 106   } else if (tmp2 == preserve) {
 107     assert_different_registers(tmp1, tmp2, tmp3, extra);
 108     tmp2 = extra;
 109   } else if (tmp3 == preserve) {
 110     assert_different_registers(tmp1, tmp2, tmp3, extra);
 111     tmp3 = extra;
 112   }
 113   assert_different_registers(preserve, tmp1, tmp2, tmp3);
 114 }
 115 
 116 
 117 
 118 bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
 119   if (opr->is_constant()) {
 120     LIR_Const* constant = opr->as_constant_ptr();
 121     switch (constant->type()) {
 122       case T_INT: {
 123         return true;
 124       }
 125 
 126       default:
 127         return false;
 128     }
 129   }
 130   return false;
 131 }
 132 
 133 
 134 LIR_Opr LIR_Assembler::receiverOpr() {
 135   return FrameMap::receiver_opr;
 136 }
 137 
 138 LIR_Opr LIR_Assembler::osrBufferPointer() {
 139   return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 140 }
 141 
 142 //--------------fpu register translations-----------------------
 143 
 144 
 145 address LIR_Assembler::float_constant(float f) {
 146   address const_addr = __ float_constant(f);
 147   if (const_addr == NULL) {
 148     bailout("const section overflow");
 149     return __ code()->consts()->start();
 150   } else {
 151     return const_addr;
 152   }
 153 }
 154 
 155 
 156 address LIR_Assembler::double_constant(double d) {
 157   address const_addr = __ double_constant(d);
 158   if (const_addr == NULL) {
 159     bailout("const section overflow");
 160     return __ code()->consts()->start();
 161   } else {
 162     return const_addr;
 163   }
 164 }
 165 
 166 #ifndef _LP64
 167 void LIR_Assembler::fpop() {
 168   __ fpop();
 169 }
 170 
 171 void LIR_Assembler::fxch(int i) {
 172   __ fxch(i);
 173 }
 174 
 175 void LIR_Assembler::fld(int i) {
 176   __ fld_s(i);
 177 }
 178 
 179 void LIR_Assembler::ffree(int i) {
 180   __ ffree(i);
 181 }
 182 #endif // !_LP64
 183 
 184 void LIR_Assembler::breakpoint() {
 185   __ int3();
 186 }
 187 
 188 void LIR_Assembler::push(LIR_Opr opr) {
 189   if (opr->is_single_cpu()) {
 190     __ push_reg(opr->as_register());
 191   } else if (opr->is_double_cpu()) {
 192     NOT_LP64(__ push_reg(opr->as_register_hi()));
 193     __ push_reg(opr->as_register_lo());
 194   } else if (opr->is_stack()) {
 195     __ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
 196   } else if (opr->is_constant()) {
 197     LIR_Const* const_opr = opr->as_constant_ptr();
 198     if (const_opr->type() == T_OBJECT) {
 199       __ push_oop(const_opr->as_jobject());
 200     } else if (const_opr->type() == T_INT) {
 201       __ push_jint(const_opr->as_jint());
 202     } else {
 203       ShouldNotReachHere();
 204     }
 205 
 206   } else {
 207     ShouldNotReachHere();
 208   }
 209 }
 210 
 211 void LIR_Assembler::pop(LIR_Opr opr) {
 212   if (opr->is_single_cpu()) {
 213     __ pop_reg(opr->as_register());
 214   } else {
 215     ShouldNotReachHere();
 216   }
 217 }
 218 
 219 bool LIR_Assembler::is_literal_address(LIR_Address* addr) {
 220   return addr->base()->is_illegal() && addr->index()->is_illegal();
 221 }
 222 
 223 //-------------------------------------------
 224 
 225 Address LIR_Assembler::as_Address(LIR_Address* addr) {
 226   return as_Address(addr, rscratch1);
 227 }
 228 
 229 Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
 230   if (addr->base()->is_illegal()) {
 231     assert(addr->index()->is_illegal(), "must be illegal too");
 232     AddressLiteral laddr((address)addr->disp(), relocInfo::none);
 233     if (! __ reachable(laddr)) {
 234       __ movptr(tmp, laddr.addr());
 235       Address res(tmp, 0);
 236       return res;
 237     } else {
 238       return __ as_Address(laddr);
 239     }
 240   }
 241 
 242   Register base = addr->base()->as_pointer_register();
 243 
 244   if (addr->index()->is_illegal()) {
 245     return Address( base, addr->disp());
 246   } else if (addr->index()->is_cpu_register()) {
 247     Register index = addr->index()->as_pointer_register();
 248     return Address(base, index, (Address::ScaleFactor) addr->scale(), addr->disp());
 249   } else if (addr->index()->is_constant()) {
 250     intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp();
 251     assert(Assembler::is_simm32(addr_offset), "must be");
 252 
 253     return Address(base, addr_offset);
 254   } else {
 255     Unimplemented();
 256     return Address();
 257   }
 258 }
 259 
 260 
 261 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
 262   Address base = as_Address(addr);
 263   return Address(base._base, base._index, base._scale, base._disp + BytesPerWord);
 264 }
 265 
 266 
 267 Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
 268   return as_Address(addr);
 269 }
 270 
 271 
 272 void LIR_Assembler::osr_entry() {
 273   offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
 274   BlockBegin* osr_entry = compilation()->hir()->osr_entry();
 275   ValueStack* entry_state = osr_entry->state();
 276   int number_of_locks = entry_state->locks_size();
 277 
 278   // we jump here if osr happens with the interpreter
 279   // state set up to continue at the beginning of the
 280   // loop that triggered osr - in particular, we have
 281   // the following registers setup:
 282   //
 283   // rcx: osr buffer
 284   //
 285 
 286   // build frame
 287   ciMethod* m = compilation()->method();
 288   __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 289 
 290   // OSR buffer is
 291   //
 292   // locals[nlocals-1..0]
 293   // monitors[0..number_of_locks]
 294   //
 295   // locals is a direct copy of the interpreter frame so in the osr buffer
 296   // so first slot in the local array is the last local from the interpreter
 297   // and last slot is local[0] (receiver) from the interpreter
 298   //
 299   // Similarly with locks. The first lock slot in the osr buffer is the nth lock
 300   // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
 301   // in the interpreter frame (the method lock if a sync method)
 302 
 303   // Initialize monitors in the compiled activation.
 304   //   rcx: pointer to osr buffer
 305   //
 306   // All other registers are dead at this point and the locals will be
 307   // copied into place by code emitted in the IR.
 308 
 309   Register OSR_buf = osrBufferPointer()->as_pointer_register();
 310   { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
 311     int monitor_offset = BytesPerWord * method()->max_locals() +
 312       (BasicObjectLock::size() * BytesPerWord) * (number_of_locks - 1);
 313     // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
 314     // the OSR buffer using 2 word entries: first the lock and then
 315     // the oop.
 316     for (int i = 0; i < number_of_locks; i++) {
 317       int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
 318 #ifdef ASSERT
 319       // verify the interpreter's monitor has a non-null object
 320       {
 321         Label L;
 322         __ cmpptr(Address(OSR_buf, slot_offset + 1*BytesPerWord), (int32_t)NULL_WORD);
 323         __ jcc(Assembler::notZero, L);
 324         __ stop("locked object is NULL");
 325         __ bind(L);
 326       }
 327 #endif
 328       __ movptr(rbx, Address(OSR_buf, slot_offset + 0));
 329       __ movptr(frame_map()->address_for_monitor_lock(i), rbx);
 330       __ movptr(rbx, Address(OSR_buf, slot_offset + 1*BytesPerWord));
 331       __ movptr(frame_map()->address_for_monitor_object(i), rbx);
 332     }
 333   }
 334 }
 335 
 336 
 337 // inline cache check; done before the frame is built.
 338 int LIR_Assembler::check_icache() {
 339   Register receiver = FrameMap::receiver_opr->as_register();
 340   Register ic_klass = IC_Klass;
 341   const int ic_cmp_size = LP64_ONLY(10) NOT_LP64(9);
 342   const bool do_post_padding = VerifyOops || UseCompressedClassPointers;
 343   if (!do_post_padding) {
 344     // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
 345     __ align(CodeEntryAlignment, __ offset() + ic_cmp_size);
 346   }
 347   int offset = __ offset();
 348   __ inline_cache_check(receiver, IC_Klass);
 349   assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct");
 350   if (do_post_padding) {
 351     // force alignment after the cache check.
 352     // It's been verified to be aligned if !VerifyOops
 353     __ align(CodeEntryAlignment);
 354   }
 355   return offset;
 356 }
 357 
 358 void LIR_Assembler::clinit_barrier(ciMethod* method) {
 359   assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 360   assert(!method->holder()->is_not_initialized(), "initialization should have been started");
 361 
 362   Label L_skip_barrier;
 363   Register klass = rscratch1;
 364   Register thread = LP64_ONLY( r15_thread ) NOT_LP64( noreg );
 365   assert(thread != noreg, "x86_32 not implemented");
 366 
 367   __ mov_metadata(klass, method->holder()->constant_encoding());
 368   __ clinit_barrier(klass, thread, &L_skip_barrier /*L_fast_path*/);
 369 
 370   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 371 
 372   __ bind(L_skip_barrier);
 373 }
 374 
 375 void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
 376   jobject o = NULL;
 377   PatchingStub* patch = new PatchingStub(_masm, patching_id(info));
 378   __ movoop(reg, o);
 379   patching_epilog(patch, lir_patch_normal, reg, info);
 380 }
 381 
 382 void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
 383   Metadata* o = NULL;
 384   PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
 385   __ mov_metadata(reg, o);
 386   patching_epilog(patch, lir_patch_normal, reg, info);
 387 }
 388 
 389 // This specifies the rsp decrement needed to build the frame
 390 int LIR_Assembler::initial_frame_size_in_bytes() const {
 391   // if rounding, must let FrameMap know!
 392 
 393   // The frame_map records size in slots (32bit word)
 394 
 395   // subtract two words to account for return address and link
 396   return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
 397 }
 398 
 399 
 400 int LIR_Assembler::emit_exception_handler() {
 401   // if the last instruction is a call (typically to do a throw which
 402   // is coming at the end after block reordering) the return address
 403   // must still point into the code area in order to avoid assertion
 404   // failures when searching for the corresponding bci => add a nop
 405   // (was bug 5/14/1999 - gri)
 406   __ nop();
 407 
 408   // generate code for exception handler
 409   address handler_base = __ start_a_stub(exception_handler_size());
 410   if (handler_base == NULL) {
 411     // not enough space left for the handler
 412     bailout("exception handler overflow");
 413     return -1;
 414   }
 415 
 416   int offset = code_offset();
 417 
 418   // the exception oop and pc are in rax, and rdx
 419   // no other registers need to be preserved, so invalidate them
 420   __ invalidate_registers(false, true, true, false, true, true);
 421 
 422   // check that there is really an exception
 423   __ verify_not_null_oop(rax);
 424 
 425   // search an exception handler (rax: exception oop, rdx: throwing pc)
 426   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
 427   __ should_not_reach_here();
 428   guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
 429   __ end_a_stub();
 430 
 431   return offset;
 432 }
 433 
 434 
 435 // Emit the code to remove the frame from the stack in the exception
 436 // unwind path.
 437 int LIR_Assembler::emit_unwind_handler() {
 438 #ifndef PRODUCT
 439   if (CommentedAssembly) {
 440     _masm->block_comment("Unwind handler");
 441   }
 442 #endif
 443 
 444   int offset = code_offset();
 445 
 446   // Fetch the exception from TLS and clear out exception related thread state
 447   Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread);
 448   NOT_LP64(__ get_thread(thread));
 449   __ movptr(rax, Address(thread, JavaThread::exception_oop_offset()));
 450   __ movptr(Address(thread, JavaThread::exception_oop_offset()), (intptr_t)NULL_WORD);
 451   __ movptr(Address(thread, JavaThread::exception_pc_offset()), (intptr_t)NULL_WORD);
 452 
 453   __ bind(_unwind_handler_entry);
 454   __ verify_not_null_oop(rax);
 455   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 456     __ mov(rbx, rax);  // Preserve the exception (rbx is always callee-saved)
 457   }
 458 
 459   // Preform needed unlocking
 460   MonitorExitStub* stub = NULL;
 461   if (method()->is_synchronized()) {
 462     monitor_address(0, FrameMap::rax_opr);
 463     stub = new MonitorExitStub(FrameMap::rax_opr, true, 0);
 464     __ unlock_object(rdi, rsi, rax, *stub->entry());
 465     __ bind(*stub->continuation());
 466     NOT_LP64(__ get_thread(thread);)
 467     __ dec_held_monitor_count(thread);
 468   }
 469 
 470   if (compilation()->env()->dtrace_method_probes()) {
 471 #ifdef _LP64
 472     __ mov(rdi, r15_thread);
 473     __ mov_metadata(rsi, method()->constant_encoding());
 474 #else
 475     __ get_thread(rax);
 476     __ movptr(Address(rsp, 0), rax);
 477     __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding());
 478 #endif
 479     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
 480   }
 481 
 482   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 483     __ mov(rax, rbx);  // Restore the exception
 484   }
 485 
 486   // remove the activation and dispatch to the unwind handler
 487   __ remove_frame(initial_frame_size_in_bytes());
 488   __ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 489 
 490   // Emit the slow path assembly
 491   if (stub != NULL) {
 492     stub->emit_code(this);
 493   }
 494 
 495   return offset;
 496 }
 497 
 498 
 499 int LIR_Assembler::emit_deopt_handler() {
 500   // if the last instruction is a call (typically to do a throw which
 501   // is coming at the end after block reordering) the return address
 502   // must still point into the code area in order to avoid assertion
 503   // failures when searching for the corresponding bci => add a nop
 504   // (was bug 5/14/1999 - gri)
 505   __ nop();
 506 
 507   // generate code for exception handler
 508   address handler_base = __ start_a_stub(deopt_handler_size());
 509   if (handler_base == NULL) {
 510     // not enough space left for the handler
 511     bailout("deopt handler overflow");
 512     return -1;
 513   }
 514 
 515   int offset = code_offset();
 516   InternalAddress here(__ pc());
 517 
 518   __ pushptr(here.addr());
 519   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 520   guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
 521   __ end_a_stub();
 522 
 523   return offset;
 524 }
 525 
 526 void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
 527   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
 528   if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
 529     assert(result->fpu() == 0, "result must already be on TOS");
 530   }
 531 
 532   // Pop the stack before the safepoint code
 533   __ remove_frame(initial_frame_size_in_bytes());
 534 
 535   if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
 536     __ reserved_stack_check();
 537   }
 538 
 539   // Note: we do not need to round double result; float result has the right precision
 540   // the poll sets the condition code, but no data registers
 541 
 542 #ifdef _LP64
 543   const Register thread = r15_thread;
 544 #else
 545   const Register thread = rbx;
 546   __ get_thread(thread);
 547 #endif
 548   code_stub->set_safepoint_offset(__ offset());
 549   __ relocate(relocInfo::poll_return_type);
 550   __ safepoint_poll(*code_stub->entry(), thread, true /* at_return */, true /* in_nmethod */);
 551   __ ret(0);
 552 }
 553 
 554 
 555 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
 556   guarantee(info != NULL, "Shouldn't be NULL");
 557   int offset = __ offset();
 558 #ifdef _LP64
 559   const Register poll_addr = rscratch1;
 560   __ movptr(poll_addr, Address(r15_thread, JavaThread::polling_page_offset()));
 561 #else
 562   assert(tmp->is_cpu_register(), "needed");
 563   const Register poll_addr = tmp->as_register();
 564   __ get_thread(poll_addr);
 565   __ movptr(poll_addr, Address(poll_addr, in_bytes(JavaThread::polling_page_offset())));
 566 #endif
 567   add_debug_info_for_branch(info);
 568   __ relocate(relocInfo::poll_type);
 569   address pre_pc = __ pc();
 570   __ testl(rax, Address(poll_addr, 0));
 571   address post_pc = __ pc();
 572   guarantee(pointer_delta(post_pc, pre_pc, 1) == 2 LP64_ONLY(+1), "must be exact length");
 573   return offset;
 574 }
 575 
 576 
 577 void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
 578   if (from_reg != to_reg) __ mov(to_reg, from_reg);
 579 }
 580 
 581 void LIR_Assembler::swap_reg(Register a, Register b) {
 582   __ xchgptr(a, b);
 583 }
 584 
 585 
 586 void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
 587   assert(src->is_constant(), "should not call otherwise");
 588   assert(dest->is_register(), "should not call otherwise");
 589   LIR_Const* c = src->as_constant_ptr();
 590 
 591   switch (c->type()) {
 592     case T_INT: {
 593       assert(patch_code == lir_patch_none, "no patching handled here");
 594       __ movl(dest->as_register(), c->as_jint());
 595       break;
 596     }
 597 
 598     case T_ADDRESS: {
 599       assert(patch_code == lir_patch_none, "no patching handled here");
 600       __ movptr(dest->as_register(), c->as_jint());
 601       break;
 602     }
 603 
 604     case T_LONG: {
 605       assert(patch_code == lir_patch_none, "no patching handled here");
 606 #ifdef _LP64
 607       __ movptr(dest->as_register_lo(), (intptr_t)c->as_jlong());
 608 #else
 609       __ movptr(dest->as_register_lo(), c->as_jint_lo());
 610       __ movptr(dest->as_register_hi(), c->as_jint_hi());
 611 #endif // _LP64
 612       break;
 613     }
 614 
 615     case T_OBJECT: {
 616       if (patch_code != lir_patch_none) {
 617         jobject2reg_with_patching(dest->as_register(), info);
 618       } else {
 619         __ movoop(dest->as_register(), c->as_jobject());
 620       }
 621       break;
 622     }
 623 
 624     case T_METADATA: {
 625       if (patch_code != lir_patch_none) {
 626         klass2reg_with_patching(dest->as_register(), info);
 627       } else {
 628         __ mov_metadata(dest->as_register(), c->as_metadata());
 629       }
 630       break;
 631     }
 632 
 633     case T_FLOAT: {
 634       if (dest->is_single_xmm()) {
 635         if (LP64_ONLY(UseAVX <= 2 &&) c->is_zero_float()) {
 636           __ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg());
 637         } else {
 638           __ movflt(dest->as_xmm_float_reg(),
 639                    InternalAddress(float_constant(c->as_jfloat())));
 640         }
 641       } else {
 642 #ifndef _LP64
 643         assert(dest->is_single_fpu(), "must be");
 644         assert(dest->fpu_regnr() == 0, "dest must be TOS");
 645         if (c->is_zero_float()) {
 646           __ fldz();
 647         } else if (c->is_one_float()) {
 648           __ fld1();
 649         } else {
 650           __ fld_s (InternalAddress(float_constant(c->as_jfloat())));
 651         }
 652 #else
 653         ShouldNotReachHere();
 654 #endif // !_LP64
 655       }
 656       break;
 657     }
 658 
 659     case T_DOUBLE: {
 660       if (dest->is_double_xmm()) {
 661         if (LP64_ONLY(UseAVX <= 2 &&) c->is_zero_double()) {
 662           __ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg());
 663         } else {
 664           __ movdbl(dest->as_xmm_double_reg(),
 665                     InternalAddress(double_constant(c->as_jdouble())));
 666         }
 667       } else {
 668 #ifndef _LP64
 669         assert(dest->is_double_fpu(), "must be");
 670         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
 671         if (c->is_zero_double()) {
 672           __ fldz();
 673         } else if (c->is_one_double()) {
 674           __ fld1();
 675         } else {
 676           __ fld_d (InternalAddress(double_constant(c->as_jdouble())));
 677         }
 678 #else
 679         ShouldNotReachHere();
 680 #endif // !_LP64
 681       }
 682       break;
 683     }
 684 
 685     default:
 686       ShouldNotReachHere();
 687   }
 688 }
 689 
 690 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
 691   assert(src->is_constant(), "should not call otherwise");
 692   assert(dest->is_stack(), "should not call otherwise");
 693   LIR_Const* c = src->as_constant_ptr();
 694 
 695   switch (c->type()) {
 696     case T_INT:  // fall through
 697     case T_FLOAT:
 698       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 699       break;
 700 
 701     case T_ADDRESS:
 702       __ movptr(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 703       break;
 704 
 705     case T_OBJECT:
 706       __ movoop(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jobject());
 707       break;
 708 
 709     case T_LONG:  // fall through
 710     case T_DOUBLE:
 711 #ifdef _LP64
 712       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 713                                             lo_word_offset_in_bytes), (intptr_t)c->as_jlong_bits());
 714 #else
 715       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 716                                               lo_word_offset_in_bytes), c->as_jint_lo_bits());
 717       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 718                                               hi_word_offset_in_bytes), c->as_jint_hi_bits());
 719 #endif // _LP64
 720       break;
 721 
 722     default:
 723       ShouldNotReachHere();
 724   }
 725 }
 726 
 727 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
 728   assert(src->is_constant(), "should not call otherwise");
 729   assert(dest->is_address(), "should not call otherwise");
 730   LIR_Const* c = src->as_constant_ptr();
 731   LIR_Address* addr = dest->as_address_ptr();
 732 
 733   int null_check_here = code_offset();
 734   switch (type) {
 735     case T_INT:    // fall through
 736     case T_FLOAT:
 737       __ movl(as_Address(addr), c->as_jint_bits());
 738       break;
 739 
 740     case T_ADDRESS:
 741       __ movptr(as_Address(addr), c->as_jint_bits());
 742       break;
 743 
 744     case T_OBJECT:  // fall through
 745     case T_ARRAY:
 746       if (c->as_jobject() == NULL) {
 747         if (UseCompressedOops && !wide) {
 748           __ movl(as_Address(addr), (int32_t)NULL_WORD);
 749         } else {
 750 #ifdef _LP64
 751           __ xorptr(rscratch1, rscratch1);
 752           null_check_here = code_offset();
 753           __ movptr(as_Address(addr), rscratch1);
 754 #else
 755           __ movptr(as_Address(addr), NULL_WORD);
 756 #endif
 757         }
 758       } else {
 759         if (is_literal_address(addr)) {
 760           ShouldNotReachHere();
 761           __ movoop(as_Address(addr, noreg), c->as_jobject());
 762         } else {
 763 #ifdef _LP64
 764           __ movoop(rscratch1, c->as_jobject());
 765           if (UseCompressedOops && !wide) {
 766             __ encode_heap_oop(rscratch1);
 767             null_check_here = code_offset();
 768             __ movl(as_Address_lo(addr), rscratch1);
 769           } else {
 770             null_check_here = code_offset();
 771             __ movptr(as_Address_lo(addr), rscratch1);
 772           }
 773 #else
 774           __ movoop(as_Address(addr), c->as_jobject());
 775 #endif
 776         }
 777       }
 778       break;
 779 
 780     case T_LONG:    // fall through
 781     case T_DOUBLE:
 782 #ifdef _LP64
 783       if (is_literal_address(addr)) {
 784         ShouldNotReachHere();
 785         __ movptr(as_Address(addr, r15_thread), (intptr_t)c->as_jlong_bits());
 786       } else {
 787         __ movptr(r10, (intptr_t)c->as_jlong_bits());
 788         null_check_here = code_offset();
 789         __ movptr(as_Address_lo(addr), r10);
 790       }
 791 #else
 792       // Always reachable in 32bit so this doesn't produce useless move literal
 793       __ movptr(as_Address_hi(addr), c->as_jint_hi_bits());
 794       __ movptr(as_Address_lo(addr), c->as_jint_lo_bits());
 795 #endif // _LP64
 796       break;
 797 
 798     case T_BOOLEAN: // fall through
 799     case T_BYTE:
 800       __ movb(as_Address(addr), c->as_jint() & 0xFF);
 801       break;
 802 
 803     case T_CHAR:    // fall through
 804     case T_SHORT:
 805       __ movw(as_Address(addr), c->as_jint() & 0xFFFF);
 806       break;
 807 
 808     default:
 809       ShouldNotReachHere();
 810   };
 811 
 812   if (info != NULL) {
 813     add_debug_info_for_null_check(null_check_here, info);
 814   }
 815 }
 816 
 817 
 818 void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
 819   assert(src->is_register(), "should not call otherwise");
 820   assert(dest->is_register(), "should not call otherwise");
 821 
 822   // move between cpu-registers
 823   if (dest->is_single_cpu()) {
 824 #ifdef _LP64
 825     if (src->type() == T_LONG) {
 826       // Can do LONG -> OBJECT
 827       move_regs(src->as_register_lo(), dest->as_register());
 828       return;
 829     }
 830 #endif
 831     assert(src->is_single_cpu(), "must match");
 832     if (src->type() == T_OBJECT) {
 833       __ verify_oop(src->as_register());
 834     }
 835     move_regs(src->as_register(), dest->as_register());
 836 
 837   } else if (dest->is_double_cpu()) {
 838 #ifdef _LP64
 839     if (is_reference_type(src->type())) {
 840       // Surprising to me but we can see move of a long to t_object
 841       __ verify_oop(src->as_register());
 842       move_regs(src->as_register(), dest->as_register_lo());
 843       return;
 844     }
 845 #endif
 846     assert(src->is_double_cpu(), "must match");
 847     Register f_lo = src->as_register_lo();
 848     Register f_hi = src->as_register_hi();
 849     Register t_lo = dest->as_register_lo();
 850     Register t_hi = dest->as_register_hi();
 851 #ifdef _LP64
 852     assert(f_hi == f_lo, "must be same");
 853     assert(t_hi == t_lo, "must be same");
 854     move_regs(f_lo, t_lo);
 855 #else
 856     assert(f_lo != f_hi && t_lo != t_hi, "invalid register allocation");
 857 
 858 
 859     if (f_lo == t_hi && f_hi == t_lo) {
 860       swap_reg(f_lo, f_hi);
 861     } else if (f_hi == t_lo) {
 862       assert(f_lo != t_hi, "overwriting register");
 863       move_regs(f_hi, t_hi);
 864       move_regs(f_lo, t_lo);
 865     } else {
 866       assert(f_hi != t_lo, "overwriting register");
 867       move_regs(f_lo, t_lo);
 868       move_regs(f_hi, t_hi);
 869     }
 870 #endif // LP64
 871 
 872 #ifndef _LP64
 873     // special moves from fpu-register to xmm-register
 874     // necessary for method results
 875   } else if (src->is_single_xmm() && !dest->is_single_xmm()) {
 876     __ movflt(Address(rsp, 0), src->as_xmm_float_reg());
 877     __ fld_s(Address(rsp, 0));
 878   } else if (src->is_double_xmm() && !dest->is_double_xmm()) {
 879     __ movdbl(Address(rsp, 0), src->as_xmm_double_reg());
 880     __ fld_d(Address(rsp, 0));
 881   } else if (dest->is_single_xmm() && !src->is_single_xmm()) {
 882     __ fstp_s(Address(rsp, 0));
 883     __ movflt(dest->as_xmm_float_reg(), Address(rsp, 0));
 884   } else if (dest->is_double_xmm() && !src->is_double_xmm()) {
 885     __ fstp_d(Address(rsp, 0));
 886     __ movdbl(dest->as_xmm_double_reg(), Address(rsp, 0));
 887 #endif // !_LP64
 888 
 889     // move between xmm-registers
 890   } else if (dest->is_single_xmm()) {
 891     assert(src->is_single_xmm(), "must match");
 892     __ movflt(dest->as_xmm_float_reg(), src->as_xmm_float_reg());
 893   } else if (dest->is_double_xmm()) {
 894     assert(src->is_double_xmm(), "must match");
 895     __ movdbl(dest->as_xmm_double_reg(), src->as_xmm_double_reg());
 896 
 897 #ifndef _LP64
 898     // move between fpu-registers (no instruction necessary because of fpu-stack)
 899   } else if (dest->is_single_fpu() || dest->is_double_fpu()) {
 900     assert(src->is_single_fpu() || src->is_double_fpu(), "must match");
 901     assert(src->fpu() == dest->fpu(), "currently should be nothing to do");
 902 #endif // !_LP64
 903 
 904   } else {
 905     ShouldNotReachHere();
 906   }
 907 }
 908 
 909 void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
 910   assert(src->is_register(), "should not call otherwise");
 911   assert(dest->is_stack(), "should not call otherwise");
 912 
 913   if (src->is_single_cpu()) {
 914     Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
 915     if (is_reference_type(type)) {
 916       __ verify_oop(src->as_register());
 917       __ movptr (dst, src->as_register());
 918     } else if (type == T_METADATA || type == T_ADDRESS) {
 919       __ movptr (dst, src->as_register());
 920     } else {
 921       __ movl (dst, src->as_register());
 922     }
 923 
 924   } else if (src->is_double_cpu()) {
 925     Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
 926     Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
 927     __ movptr (dstLO, src->as_register_lo());
 928     NOT_LP64(__ movptr (dstHI, src->as_register_hi()));
 929 
 930   } else if (src->is_single_xmm()) {
 931     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
 932     __ movflt(dst_addr, src->as_xmm_float_reg());
 933 
 934   } else if (src->is_double_xmm()) {
 935     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
 936     __ movdbl(dst_addr, src->as_xmm_double_reg());
 937 
 938 #ifndef _LP64
 939   } else if (src->is_single_fpu()) {
 940     assert(src->fpu_regnr() == 0, "argument must be on TOS");
 941     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
 942     if (pop_fpu_stack)     __ fstp_s (dst_addr);
 943     else                   __ fst_s  (dst_addr);
 944 
 945   } else if (src->is_double_fpu()) {
 946     assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
 947     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
 948     if (pop_fpu_stack)     __ fstp_d (dst_addr);
 949     else                   __ fst_d  (dst_addr);
 950 #endif // !_LP64
 951 
 952   } else {
 953     ShouldNotReachHere();
 954   }
 955 }
 956 
 957 
 958 void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
 959   LIR_Address* to_addr = dest->as_address_ptr();
 960   PatchingStub* patch = NULL;
 961   Register compressed_src = rscratch1;
 962 
 963   if (is_reference_type(type)) {
 964     __ verify_oop(src->as_register());
 965 #ifdef _LP64
 966     if (UseCompressedOops && !wide) {
 967       __ movptr(compressed_src, src->as_register());
 968       __ encode_heap_oop(compressed_src);
 969       if (patch_code != lir_patch_none) {
 970         info->oop_map()->set_narrowoop(compressed_src->as_VMReg());
 971       }
 972     }
 973 #endif
 974   }
 975 
 976   if (patch_code != lir_patch_none) {
 977     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
 978     Address toa = as_Address(to_addr);
 979     assert(toa.disp() != 0, "must have");
 980   }
 981 
 982   int null_check_here = code_offset();
 983   switch (type) {
 984     case T_FLOAT: {
 985 #ifdef _LP64
 986       assert(src->is_single_xmm(), "not a float");
 987       __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
 988 #else
 989       if (src->is_single_xmm()) {
 990         __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
 991       } else {
 992         assert(src->is_single_fpu(), "must be");
 993         assert(src->fpu_regnr() == 0, "argument must be on TOS");
 994         if (pop_fpu_stack)      __ fstp_s(as_Address(to_addr));
 995         else                    __ fst_s (as_Address(to_addr));
 996       }
 997 #endif // _LP64
 998       break;
 999     }
1000 
1001     case T_DOUBLE: {
1002 #ifdef _LP64
1003       assert(src->is_double_xmm(), "not a double");
1004       __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
1005 #else
1006       if (src->is_double_xmm()) {
1007         __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
1008       } else {
1009         assert(src->is_double_fpu(), "must be");
1010         assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
1011         if (pop_fpu_stack)      __ fstp_d(as_Address(to_addr));
1012         else                    __ fst_d (as_Address(to_addr));
1013       }
1014 #endif // _LP64
1015       break;
1016     }
1017 
1018     case T_ARRAY:   // fall through
1019     case T_OBJECT:  // fall through
1020       if (UseCompressedOops && !wide) {
1021         __ movl(as_Address(to_addr), compressed_src);
1022       } else {
1023         __ movptr(as_Address(to_addr), src->as_register());
1024       }
1025       break;
1026     case T_METADATA:
1027       // We get here to store a method pointer to the stack to pass to
1028       // a dtrace runtime call. This can't work on 64 bit with
1029       // compressed klass ptrs: T_METADATA can be a compressed klass
1030       // ptr or a 64 bit method pointer.
1031       LP64_ONLY(ShouldNotReachHere());
1032       __ movptr(as_Address(to_addr), src->as_register());
1033       break;
1034     case T_ADDRESS:
1035       __ movptr(as_Address(to_addr), src->as_register());
1036       break;
1037     case T_INT:
1038       __ movl(as_Address(to_addr), src->as_register());
1039       break;
1040 
1041     case T_LONG: {
1042       Register from_lo = src->as_register_lo();
1043       Register from_hi = src->as_register_hi();
1044 #ifdef _LP64
1045       __ movptr(as_Address_lo(to_addr), from_lo);
1046 #else
1047       Register base = to_addr->base()->as_register();
1048       Register index = noreg;
1049       if (to_addr->index()->is_register()) {
1050         index = to_addr->index()->as_register();
1051       }
1052       if (base == from_lo || index == from_lo) {
1053         assert(base != from_hi, "can't be");
1054         assert(index == noreg || (index != base && index != from_hi), "can't handle this");
1055         __ movl(as_Address_hi(to_addr), from_hi);
1056         if (patch != NULL) {
1057           patching_epilog(patch, lir_patch_high, base, info);
1058           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1059           patch_code = lir_patch_low;
1060         }
1061         __ movl(as_Address_lo(to_addr), from_lo);
1062       } else {
1063         assert(index == noreg || (index != base && index != from_lo), "can't handle this");
1064         __ movl(as_Address_lo(to_addr), from_lo);
1065         if (patch != NULL) {
1066           patching_epilog(patch, lir_patch_low, base, info);
1067           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1068           patch_code = lir_patch_high;
1069         }
1070         __ movl(as_Address_hi(to_addr), from_hi);
1071       }
1072 #endif // _LP64
1073       break;
1074     }
1075 
1076     case T_BYTE:    // fall through
1077     case T_BOOLEAN: {
1078       Register src_reg = src->as_register();
1079       Address dst_addr = as_Address(to_addr);
1080       assert(VM_Version::is_P6() || src_reg->has_byte_register(), "must use byte registers if not P6");
1081       __ movb(dst_addr, src_reg);
1082       break;
1083     }
1084 
1085     case T_CHAR:    // fall through
1086     case T_SHORT:
1087       __ movw(as_Address(to_addr), src->as_register());
1088       break;
1089 
1090     default:
1091       ShouldNotReachHere();
1092   }
1093   if (info != NULL) {
1094     add_debug_info_for_null_check(null_check_here, info);
1095   }
1096 
1097   if (patch_code != lir_patch_none) {
1098     patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
1099   }
1100 }
1101 
1102 
1103 void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
1104   assert(src->is_stack(), "should not call otherwise");
1105   assert(dest->is_register(), "should not call otherwise");
1106 
1107   if (dest->is_single_cpu()) {
1108     if (is_reference_type(type)) {
1109       __ movptr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1110       __ verify_oop(dest->as_register());
1111     } else if (type == T_METADATA || type == T_ADDRESS) {
1112       __ movptr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1113     } else {
1114       __ movl(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1115     }
1116 
1117   } else if (dest->is_double_cpu()) {
1118     Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
1119     Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
1120     __ movptr(dest->as_register_lo(), src_addr_LO);
1121     NOT_LP64(__ movptr(dest->as_register_hi(), src_addr_HI));
1122 
1123   } else if (dest->is_single_xmm()) {
1124     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1125     __ movflt(dest->as_xmm_float_reg(), src_addr);
1126 
1127   } else if (dest->is_double_xmm()) {
1128     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1129     __ movdbl(dest->as_xmm_double_reg(), src_addr);
1130 
1131 #ifndef _LP64
1132   } else if (dest->is_single_fpu()) {
1133     assert(dest->fpu_regnr() == 0, "dest must be TOS");
1134     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1135     __ fld_s(src_addr);
1136 
1137   } else if (dest->is_double_fpu()) {
1138     assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1139     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1140     __ fld_d(src_addr);
1141 #endif // _LP64
1142 
1143   } else {
1144     ShouldNotReachHere();
1145   }
1146 }
1147 
1148 
1149 void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
1150   if (src->is_single_stack()) {
1151     if (is_reference_type(type)) {
1152       __ pushptr(frame_map()->address_for_slot(src ->single_stack_ix()));
1153       __ popptr (frame_map()->address_for_slot(dest->single_stack_ix()));
1154     } else {
1155 #ifndef _LP64
1156       __ pushl(frame_map()->address_for_slot(src ->single_stack_ix()));
1157       __ popl (frame_map()->address_for_slot(dest->single_stack_ix()));
1158 #else
1159       //no pushl on 64bits
1160       __ movl(rscratch1, frame_map()->address_for_slot(src ->single_stack_ix()));
1161       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), rscratch1);
1162 #endif
1163     }
1164 
1165   } else if (src->is_double_stack()) {
1166 #ifdef _LP64
1167     __ pushptr(frame_map()->address_for_slot(src ->double_stack_ix()));
1168     __ popptr (frame_map()->address_for_slot(dest->double_stack_ix()));
1169 #else
1170     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 0));
1171     // push and pop the part at src + wordSize, adding wordSize for the previous push
1172     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 2 * wordSize));
1173     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 2 * wordSize));
1174     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 0));
1175 #endif // _LP64
1176 
1177   } else {
1178     ShouldNotReachHere();
1179   }
1180 }
1181 
1182 
1183 void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
1184   assert(src->is_address(), "should not call otherwise");
1185   assert(dest->is_register(), "should not call otherwise");
1186 
1187   LIR_Address* addr = src->as_address_ptr();
1188   Address from_addr = as_Address(addr);
1189   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
1190 
1191   if (addr->base()->type() == T_OBJECT) {
1192     __ verify_oop(addr->base()->as_pointer_register());
1193   }
1194 
1195   switch (type) {
1196     case T_BOOLEAN: // fall through
1197     case T_BYTE:    // fall through
1198     case T_CHAR:    // fall through
1199     case T_SHORT:
1200       if (!VM_Version::is_P6() && !from_addr.uses(dest->as_register())) {
1201         // on pre P6 processors we may get partial register stalls
1202         // so blow away the value of to_rinfo before loading a
1203         // partial word into it.  Do it here so that it precedes
1204         // the potential patch point below.
1205         __ xorptr(dest->as_register(), dest->as_register());
1206       }
1207       break;
1208    default:
1209      break;
1210   }
1211 
1212   PatchingStub* patch = NULL;
1213   if (patch_code != lir_patch_none) {
1214     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1215     assert(from_addr.disp() != 0, "must have");
1216   }
1217   if (info != NULL) {
1218     add_debug_info_for_null_check_here(info);
1219   }
1220 
1221   switch (type) {
1222     case T_FLOAT: {
1223       if (dest->is_single_xmm()) {
1224         __ movflt(dest->as_xmm_float_reg(), from_addr);
1225       } else {
1226 #ifndef _LP64
1227         assert(dest->is_single_fpu(), "must be");
1228         assert(dest->fpu_regnr() == 0, "dest must be TOS");
1229         __ fld_s(from_addr);
1230 #else
1231         ShouldNotReachHere();
1232 #endif // !LP64
1233       }
1234       break;
1235     }
1236 
1237     case T_DOUBLE: {
1238       if (dest->is_double_xmm()) {
1239         __ movdbl(dest->as_xmm_double_reg(), from_addr);
1240       } else {
1241 #ifndef _LP64
1242         assert(dest->is_double_fpu(), "must be");
1243         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1244         __ fld_d(from_addr);
1245 #else
1246         ShouldNotReachHere();
1247 #endif // !LP64
1248       }
1249       break;
1250     }
1251 
1252     case T_OBJECT:  // fall through
1253     case T_ARRAY:   // fall through
1254       if (UseCompressedOops && !wide) {
1255         __ movl(dest->as_register(), from_addr);
1256       } else {
1257         __ movptr(dest->as_register(), from_addr);
1258       }
1259       break;
1260 
1261     case T_ADDRESS:
1262       if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
1263         __ movl(dest->as_register(), from_addr);
1264       } else {
1265         __ movptr(dest->as_register(), from_addr);
1266       }
1267       break;
1268     case T_INT:
1269       __ movl(dest->as_register(), from_addr);
1270       break;
1271 
1272     case T_LONG: {
1273       Register to_lo = dest->as_register_lo();
1274       Register to_hi = dest->as_register_hi();
1275 #ifdef _LP64
1276       __ movptr(to_lo, as_Address_lo(addr));
1277 #else
1278       Register base = addr->base()->as_register();
1279       Register index = noreg;
1280       if (addr->index()->is_register()) {
1281         index = addr->index()->as_register();
1282       }
1283       if ((base == to_lo && index == to_hi) ||
1284           (base == to_hi && index == to_lo)) {
1285         // addresses with 2 registers are only formed as a result of
1286         // array access so this code will never have to deal with
1287         // patches or null checks.
1288         assert(info == NULL && patch == NULL, "must be");
1289         __ lea(to_hi, as_Address(addr));
1290         __ movl(to_lo, Address(to_hi, 0));
1291         __ movl(to_hi, Address(to_hi, BytesPerWord));
1292       } else if (base == to_lo || index == to_lo) {
1293         assert(base != to_hi, "can't be");
1294         assert(index == noreg || (index != base && index != to_hi), "can't handle this");
1295         __ movl(to_hi, as_Address_hi(addr));
1296         if (patch != NULL) {
1297           patching_epilog(patch, lir_patch_high, base, info);
1298           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1299           patch_code = lir_patch_low;
1300         }
1301         __ movl(to_lo, as_Address_lo(addr));
1302       } else {
1303         assert(index == noreg || (index != base && index != to_lo), "can't handle this");
1304         __ movl(to_lo, as_Address_lo(addr));
1305         if (patch != NULL) {
1306           patching_epilog(patch, lir_patch_low, base, info);
1307           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1308           patch_code = lir_patch_high;
1309         }
1310         __ movl(to_hi, as_Address_hi(addr));
1311       }
1312 #endif // _LP64
1313       break;
1314     }
1315 
1316     case T_BOOLEAN: // fall through
1317     case T_BYTE: {
1318       Register dest_reg = dest->as_register();
1319       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1320       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1321         __ movsbl(dest_reg, from_addr);
1322       } else {
1323         __ movb(dest_reg, from_addr);
1324         __ shll(dest_reg, 24);
1325         __ sarl(dest_reg, 24);
1326       }
1327       break;
1328     }
1329 
1330     case T_CHAR: {
1331       Register dest_reg = dest->as_register();
1332       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1333       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1334         __ movzwl(dest_reg, from_addr);
1335       } else {
1336         __ movw(dest_reg, from_addr);
1337       }
1338       break;
1339     }
1340 
1341     case T_SHORT: {
1342       Register dest_reg = dest->as_register();
1343       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1344         __ movswl(dest_reg, from_addr);
1345       } else {
1346         __ movw(dest_reg, from_addr);
1347         __ shll(dest_reg, 16);
1348         __ sarl(dest_reg, 16);
1349       }
1350       break;
1351     }
1352 
1353     default:
1354       ShouldNotReachHere();
1355   }
1356 
1357   if (patch != NULL) {
1358     patching_epilog(patch, patch_code, addr->base()->as_register(), info);
1359   }
1360 
1361   if (is_reference_type(type)) {
1362 #ifdef _LP64
1363     if (UseCompressedOops && !wide) {
1364       __ decode_heap_oop(dest->as_register());
1365     }
1366 #endif
1367 
1368     // Load barrier has not yet been applied, so ZGC can't verify the oop here
1369     if (!UseZGC) {
1370       __ verify_oop(dest->as_register());
1371     }
1372   } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
1373 #ifdef _LP64
1374     if (UseCompressedClassPointers) {
1375       __ decode_klass_not_null(dest->as_register(), tmp_load_klass);
1376     }
1377 #endif
1378   }
1379 }
1380 
1381 
1382 NEEDS_CLEANUP; // This could be static?
1383 Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
1384   int elem_size = type2aelembytes(type);
1385   switch (elem_size) {
1386     case 1: return Address::times_1;
1387     case 2: return Address::times_2;
1388     case 4: return Address::times_4;
1389     case 8: return Address::times_8;
1390   }
1391   ShouldNotReachHere();
1392   return Address::no_scale;
1393 }
1394 
1395 
1396 void LIR_Assembler::emit_op3(LIR_Op3* op) {
1397   switch (op->code()) {
1398     case lir_idiv:
1399     case lir_irem:
1400       arithmetic_idiv(op->code(),
1401                       op->in_opr1(),
1402                       op->in_opr2(),
1403                       op->in_opr3(),
1404                       op->result_opr(),
1405                       op->info());
1406       break;
1407     case lir_fmad:
1408       __ fmad(op->result_opr()->as_xmm_double_reg(),
1409               op->in_opr1()->as_xmm_double_reg(),
1410               op->in_opr2()->as_xmm_double_reg(),
1411               op->in_opr3()->as_xmm_double_reg());
1412       break;
1413     case lir_fmaf:
1414       __ fmaf(op->result_opr()->as_xmm_float_reg(),
1415               op->in_opr1()->as_xmm_float_reg(),
1416               op->in_opr2()->as_xmm_float_reg(),
1417               op->in_opr3()->as_xmm_float_reg());
1418       break;
1419     default:      ShouldNotReachHere(); break;
1420   }
1421 }
1422 
1423 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
1424 #ifdef ASSERT
1425   assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
1426   if (op->block() != NULL)  _branch_target_blocks.append(op->block());
1427   if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
1428 #endif
1429 
1430   if (op->cond() == lir_cond_always) {
1431     if (op->info() != NULL) add_debug_info_for_branch(op->info());
1432     __ jmp (*(op->label()));
1433   } else {
1434     Assembler::Condition acond = Assembler::zero;
1435     if (op->code() == lir_cond_float_branch) {
1436       assert(op->ublock() != NULL, "must have unordered successor");
1437       __ jcc(Assembler::parity, *(op->ublock()->label()));
1438       switch(op->cond()) {
1439         case lir_cond_equal:        acond = Assembler::equal;      break;
1440         case lir_cond_notEqual:     acond = Assembler::notEqual;   break;
1441         case lir_cond_less:         acond = Assembler::below;      break;
1442         case lir_cond_lessEqual:    acond = Assembler::belowEqual; break;
1443         case lir_cond_greaterEqual: acond = Assembler::aboveEqual; break;
1444         case lir_cond_greater:      acond = Assembler::above;      break;
1445         default:                         ShouldNotReachHere();
1446       }
1447     } else {
1448       switch (op->cond()) {
1449         case lir_cond_equal:        acond = Assembler::equal;       break;
1450         case lir_cond_notEqual:     acond = Assembler::notEqual;    break;
1451         case lir_cond_less:         acond = Assembler::less;        break;
1452         case lir_cond_lessEqual:    acond = Assembler::lessEqual;   break;
1453         case lir_cond_greaterEqual: acond = Assembler::greaterEqual;break;
1454         case lir_cond_greater:      acond = Assembler::greater;     break;
1455         case lir_cond_belowEqual:   acond = Assembler::belowEqual;  break;
1456         case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;  break;
1457         default:                         ShouldNotReachHere();
1458       }
1459     }
1460     __ jcc(acond,*(op->label()));
1461   }
1462 }
1463 
1464 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
1465   LIR_Opr src  = op->in_opr();
1466   LIR_Opr dest = op->result_opr();
1467 
1468   switch (op->bytecode()) {
1469     case Bytecodes::_i2l:
1470 #ifdef _LP64
1471       __ movl2ptr(dest->as_register_lo(), src->as_register());
1472 #else
1473       move_regs(src->as_register(), dest->as_register_lo());
1474       move_regs(src->as_register(), dest->as_register_hi());
1475       __ sarl(dest->as_register_hi(), 31);
1476 #endif // LP64
1477       break;
1478 
1479     case Bytecodes::_l2i:
1480 #ifdef _LP64
1481       __ movl(dest->as_register(), src->as_register_lo());
1482 #else
1483       move_regs(src->as_register_lo(), dest->as_register());
1484 #endif
1485       break;
1486 
1487     case Bytecodes::_i2b:
1488       move_regs(src->as_register(), dest->as_register());
1489       __ sign_extend_byte(dest->as_register());
1490       break;
1491 
1492     case Bytecodes::_i2c:
1493       move_regs(src->as_register(), dest->as_register());
1494       __ andl(dest->as_register(), 0xFFFF);
1495       break;
1496 
1497     case Bytecodes::_i2s:
1498       move_regs(src->as_register(), dest->as_register());
1499       __ sign_extend_short(dest->as_register());
1500       break;
1501 
1502 
1503 #ifdef _LP64
1504     case Bytecodes::_f2d:
1505       __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg());
1506       break;
1507 
1508     case Bytecodes::_d2f:
1509       __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg());
1510       break;
1511 
1512     case Bytecodes::_i2f:
1513       __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register());
1514       break;
1515 
1516     case Bytecodes::_i2d:
1517       __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register());
1518       break;
1519 
1520     case Bytecodes::_l2f:
1521       __ cvtsi2ssq(dest->as_xmm_float_reg(), src->as_register_lo());
1522       break;
1523 
1524     case Bytecodes::_l2d:
1525       __ cvtsi2sdq(dest->as_xmm_double_reg(), src->as_register_lo());
1526       break;
1527 
1528     case Bytecodes::_f2i:
1529       __ convert_f2i(dest->as_register(), src->as_xmm_float_reg());
1530       break;
1531 
1532     case Bytecodes::_d2i:
1533       __ convert_d2i(dest->as_register(), src->as_xmm_double_reg());
1534       break;
1535 
1536     case Bytecodes::_f2l:
1537       __ convert_f2l(dest->as_register_lo(), src->as_xmm_float_reg());
1538       break;
1539 
1540     case Bytecodes::_d2l:
1541       __ convert_d2l(dest->as_register_lo(), src->as_xmm_double_reg());
1542       break;
1543 #else
1544     case Bytecodes::_f2d:
1545     case Bytecodes::_d2f:
1546       if (dest->is_single_xmm()) {
1547         __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg());
1548       } else if (dest->is_double_xmm()) {
1549         __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg());
1550       } else {
1551         assert(src->fpu() == dest->fpu(), "register must be equal");
1552         // do nothing (float result is rounded later through spilling)
1553       }
1554       break;
1555 
1556     case Bytecodes::_i2f:
1557     case Bytecodes::_i2d:
1558       if (dest->is_single_xmm()) {
1559         __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register());
1560       } else if (dest->is_double_xmm()) {
1561         __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register());
1562       } else {
1563         assert(dest->fpu() == 0, "result must be on TOS");
1564         __ movl(Address(rsp, 0), src->as_register());
1565         __ fild_s(Address(rsp, 0));
1566       }
1567       break;
1568 
1569     case Bytecodes::_l2f:
1570     case Bytecodes::_l2d:
1571       assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)");
1572       assert(dest->fpu() == 0, "result must be on TOS");
1573       __ movptr(Address(rsp, 0),          src->as_register_lo());
1574       __ movl(Address(rsp, BytesPerWord), src->as_register_hi());
1575       __ fild_d(Address(rsp, 0));
1576       // float result is rounded later through spilling
1577       break;
1578 
1579     case Bytecodes::_f2i:
1580     case Bytecodes::_d2i:
1581       if (src->is_single_xmm()) {
1582         __ cvttss2sil(dest->as_register(), src->as_xmm_float_reg());
1583       } else if (src->is_double_xmm()) {
1584         __ cvttsd2sil(dest->as_register(), src->as_xmm_double_reg());
1585       } else {
1586         assert(src->fpu() == 0, "input must be on TOS");
1587         __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
1588         __ fist_s(Address(rsp, 0));
1589         __ movl(dest->as_register(), Address(rsp, 0));
1590         __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1591       }
1592       // IA32 conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
1593       assert(op->stub() != NULL, "stub required");
1594       __ cmpl(dest->as_register(), 0x80000000);
1595       __ jcc(Assembler::equal, *op->stub()->entry());
1596       __ bind(*op->stub()->continuation());
1597       break;
1598 
1599     case Bytecodes::_f2l:
1600     case Bytecodes::_d2l:
1601       assert(!src->is_xmm_register(), "input in xmm register not supported (no SSE instruction present)");
1602       assert(src->fpu() == 0, "input must be on TOS");
1603       assert(dest == FrameMap::long0_opr, "runtime stub places result in these registers");
1604 
1605       // instruction sequence too long to inline it here
1606       {
1607         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::fpu2long_stub_id)));
1608       }
1609       break;
1610 #endif // _LP64
1611 
1612     default: ShouldNotReachHere();
1613   }
1614 }
1615 
1616 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
1617   if (op->init_check()) {
1618     add_debug_info_for_null_check_here(op->stub()->info());
1619     __ cmpb(Address(op->klass()->as_register(),
1620                     InstanceKlass::init_state_offset()),
1621                     InstanceKlass::fully_initialized);
1622     __ jcc(Assembler::notEqual, *op->stub()->entry());
1623   }
1624   __ allocate_object(op->obj()->as_register(),
1625                      op->tmp1()->as_register(),
1626                      op->tmp2()->as_register(),
1627                      op->header_size(),
1628                      op->object_size(),
1629                      op->klass()->as_register(),
1630                      *op->stub()->entry());
1631   __ bind(*op->stub()->continuation());
1632 }
1633 
1634 void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
1635   Register len =  op->len()->as_register();
1636   LP64_ONLY( __ movslq(len, len); )
1637 
1638   if (UseSlowPath ||
1639       (!UseFastNewObjectArray && is_reference_type(op->type())) ||
1640       (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
1641     __ jmp(*op->stub()->entry());
1642   } else {
1643     Register tmp1 = op->tmp1()->as_register();
1644     Register tmp2 = op->tmp2()->as_register();
1645     Register tmp3 = op->tmp3()->as_register();
1646     if (len == tmp1) {
1647       tmp1 = tmp3;
1648     } else if (len == tmp2) {
1649       tmp2 = tmp3;
1650     } else if (len == tmp3) {
1651       // everything is ok
1652     } else {
1653       __ mov(tmp3, len);
1654     }
1655     __ allocate_array(op->obj()->as_register(),
1656                       len,
1657                       tmp1,
1658                       tmp2,
1659                       arrayOopDesc::header_size(op->type()),
1660                       array_element_size(op->type()),
1661                       op->klass()->as_register(),
1662                       *op->stub()->entry());
1663   }
1664   __ bind(*op->stub()->continuation());
1665 }
1666 
1667 void LIR_Assembler::type_profile_helper(Register mdo,
1668                                         ciMethodData *md, ciProfileData *data,
1669                                         Register recv, Label* update_done) {
1670   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1671     Label next_test;
1672     // See if the receiver is receiver[n].
1673     __ cmpptr(recv, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
1674     __ jccb(Assembler::notEqual, next_test);
1675     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
1676     __ addptr(data_addr, DataLayout::counter_increment);
1677     __ jmp(*update_done);
1678     __ bind(next_test);
1679   }
1680 
1681   // Didn't find receiver; find next empty slot and fill it in
1682   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1683     Label next_test;
1684     Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
1685     __ cmpptr(recv_addr, (intptr_t)NULL_WORD);
1686     __ jccb(Assembler::notEqual, next_test);
1687     __ movptr(recv_addr, recv);
1688     __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment);
1689     __ jmp(*update_done);
1690     __ bind(next_test);
1691   }
1692 }
1693 
1694 void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
1695   // we always need a stub for the failure case.
1696   CodeStub* stub = op->stub();
1697   Register obj = op->object()->as_register();
1698   Register k_RInfo = op->tmp1()->as_register();
1699   Register klass_RInfo = op->tmp2()->as_register();
1700   Register dst = op->result_opr()->as_register();
1701   ciKlass* k = op->klass();
1702   Register Rtmp1 = noreg;
1703   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
1704 
1705   // check if it needs to be profiled
1706   ciMethodData* md = NULL;
1707   ciProfileData* data = NULL;
1708 
1709   if (op->should_profile()) {
1710     ciMethod* method = op->profiled_method();
1711     assert(method != NULL, "Should have method");
1712     int bci = op->profiled_bci();
1713     md = method->method_data_or_null();
1714     assert(md != NULL, "Sanity");
1715     data = md->bci_to_data(bci);
1716     assert(data != NULL,                "need data for type check");
1717     assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1718   }
1719   Label profile_cast_success, profile_cast_failure;
1720   Label *success_target = op->should_profile() ? &profile_cast_success : success;
1721   Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
1722 
1723   if (obj == k_RInfo) {
1724     k_RInfo = dst;
1725   } else if (obj == klass_RInfo) {
1726     klass_RInfo = dst;
1727   }
1728   if (k->is_loaded() && !UseCompressedClassPointers) {
1729     select_different_registers(obj, dst, k_RInfo, klass_RInfo);
1730   } else {
1731     Rtmp1 = op->tmp3()->as_register();
1732     select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
1733   }
1734 
1735   assert_different_registers(obj, k_RInfo, klass_RInfo);
1736 
1737   __ cmpptr(obj, (int32_t)NULL_WORD);
1738   if (op->should_profile()) {
1739     Label not_null;
1740     __ jccb(Assembler::notEqual, not_null);
1741     // Object is null; update MDO and exit
1742     Register mdo  = klass_RInfo;
1743     __ mov_metadata(mdo, md->constant_encoding());
1744     Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
1745     int header_bits = BitData::null_seen_byte_constant();
1746     __ orb(data_addr, header_bits);
1747     __ jmp(*obj_is_null);
1748     __ bind(not_null);
1749   } else {
1750     __ jcc(Assembler::equal, *obj_is_null);
1751   }
1752 
1753   if (!k->is_loaded()) {
1754     klass2reg_with_patching(k_RInfo, op->info_for_patch());
1755   } else {
1756 #ifdef _LP64
1757     __ mov_metadata(k_RInfo, k->constant_encoding());
1758 #endif // _LP64
1759   }
1760   __ verify_oop(obj);
1761 
1762   if (op->fast_check()) {
1763     // get object class
1764     // not a safepoint as obj null check happens earlier
1765 #ifdef _LP64
1766     if (UseCompressedClassPointers) {
1767       __ load_klass(Rtmp1, obj, tmp_load_klass);
1768       __ cmpptr(k_RInfo, Rtmp1);
1769     } else {
1770       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1771     }
1772 #else
1773     if (k->is_loaded()) {
1774       __ cmpklass(Address(obj, oopDesc::klass_offset_in_bytes()), k->constant_encoding());
1775     } else {
1776       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1777     }
1778 #endif
1779     __ jcc(Assembler::notEqual, *failure_target);
1780     // successful cast, fall through to profile or jump
1781   } else {
1782     // get object class
1783     // not a safepoint as obj null check happens earlier
1784     __ load_klass(klass_RInfo, obj, tmp_load_klass);
1785     if (k->is_loaded()) {
1786       // See if we get an immediate positive hit
1787 #ifdef _LP64
1788       __ cmpptr(k_RInfo, Address(klass_RInfo, k->super_check_offset()));
1789 #else
1790       __ cmpklass(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
1791 #endif // _LP64
1792       if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
1793         __ jcc(Assembler::notEqual, *failure_target);
1794         // successful cast, fall through to profile or jump
1795       } else {
1796         // See if we get an immediate positive hit
1797         __ jcc(Assembler::equal, *success_target);
1798         // check for self
1799 #ifdef _LP64
1800         __ cmpptr(klass_RInfo, k_RInfo);
1801 #else
1802         __ cmpklass(klass_RInfo, k->constant_encoding());
1803 #endif // _LP64
1804         __ jcc(Assembler::equal, *success_target);
1805 
1806         __ push(klass_RInfo);
1807 #ifdef _LP64
1808         __ push(k_RInfo);
1809 #else
1810         __ pushklass(k->constant_encoding());
1811 #endif // _LP64
1812         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1813         __ pop(klass_RInfo);
1814         __ pop(klass_RInfo);
1815         // result is a boolean
1816         __ cmpl(klass_RInfo, 0);
1817         __ jcc(Assembler::equal, *failure_target);
1818         // successful cast, fall through to profile or jump
1819       }
1820     } else {
1821       // perform the fast part of the checking logic
1822       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1823       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1824       __ push(klass_RInfo);
1825       __ push(k_RInfo);
1826       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1827       __ pop(klass_RInfo);
1828       __ pop(k_RInfo);
1829       // result is a boolean
1830       __ cmpl(k_RInfo, 0);
1831       __ jcc(Assembler::equal, *failure_target);
1832       // successful cast, fall through to profile or jump
1833     }
1834   }
1835   if (op->should_profile()) {
1836     Register mdo  = klass_RInfo, recv = k_RInfo;
1837     __ bind(profile_cast_success);
1838     __ mov_metadata(mdo, md->constant_encoding());
1839     __ load_klass(recv, obj, tmp_load_klass);
1840     type_profile_helper(mdo, md, data, recv, success);
1841     __ jmp(*success);
1842 
1843     __ bind(profile_cast_failure);
1844     __ mov_metadata(mdo, md->constant_encoding());
1845     Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1846     __ subptr(counter_addr, DataLayout::counter_increment);
1847     __ jmp(*failure);
1848   }
1849   __ jmp(*success);
1850 }
1851 
1852 
1853 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
1854   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
1855   LIR_Code code = op->code();
1856   if (code == lir_store_check) {
1857     Register value = op->object()->as_register();
1858     Register array = op->array()->as_register();
1859     Register k_RInfo = op->tmp1()->as_register();
1860     Register klass_RInfo = op->tmp2()->as_register();
1861     Register Rtmp1 = op->tmp3()->as_register();
1862 
1863     CodeStub* stub = op->stub();
1864 
1865     // check if it needs to be profiled
1866     ciMethodData* md = NULL;
1867     ciProfileData* data = NULL;
1868 
1869     if (op->should_profile()) {
1870       ciMethod* method = op->profiled_method();
1871       assert(method != NULL, "Should have method");
1872       int bci = op->profiled_bci();
1873       md = method->method_data_or_null();
1874       assert(md != NULL, "Sanity");
1875       data = md->bci_to_data(bci);
1876       assert(data != NULL,                "need data for type check");
1877       assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1878     }
1879     Label profile_cast_success, profile_cast_failure, done;
1880     Label *success_target = op->should_profile() ? &profile_cast_success : &done;
1881     Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
1882 
1883     __ cmpptr(value, (int32_t)NULL_WORD);
1884     if (op->should_profile()) {
1885       Label not_null;
1886       __ jccb(Assembler::notEqual, not_null);
1887       // Object is null; update MDO and exit
1888       Register mdo  = klass_RInfo;
1889       __ mov_metadata(mdo, md->constant_encoding());
1890       Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
1891       int header_bits = BitData::null_seen_byte_constant();
1892       __ orb(data_addr, header_bits);
1893       __ jmp(done);
1894       __ bind(not_null);
1895     } else {
1896       __ jcc(Assembler::equal, done);
1897     }
1898 
1899     add_debug_info_for_null_check_here(op->info_for_exception());
1900     __ load_klass(k_RInfo, array, tmp_load_klass);
1901     __ load_klass(klass_RInfo, value, tmp_load_klass);
1902 
1903     // get instance klass (it's already uncompressed)
1904     __ movptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
1905     // perform the fast part of the checking logic
1906     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1907     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1908     __ push(klass_RInfo);
1909     __ push(k_RInfo);
1910     __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1911     __ pop(klass_RInfo);
1912     __ pop(k_RInfo);
1913     // result is a boolean
1914     __ cmpl(k_RInfo, 0);
1915     __ jcc(Assembler::equal, *failure_target);
1916     // fall through to the success case
1917 
1918     if (op->should_profile()) {
1919       Register mdo  = klass_RInfo, recv = k_RInfo;
1920       __ bind(profile_cast_success);
1921       __ mov_metadata(mdo, md->constant_encoding());
1922       __ load_klass(recv, value, tmp_load_klass);
1923       type_profile_helper(mdo, md, data, recv, &done);
1924       __ jmpb(done);
1925 
1926       __ bind(profile_cast_failure);
1927       __ mov_metadata(mdo, md->constant_encoding());
1928       Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1929       __ subptr(counter_addr, DataLayout::counter_increment);
1930       __ jmp(*stub->entry());
1931     }
1932 
1933     __ bind(done);
1934   } else
1935     if (code == lir_checkcast) {
1936       Register obj = op->object()->as_register();
1937       Register dst = op->result_opr()->as_register();
1938       Label success;
1939       emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
1940       __ bind(success);
1941       if (dst != obj) {
1942         __ mov(dst, obj);
1943       }
1944     } else
1945       if (code == lir_instanceof) {
1946         Register obj = op->object()->as_register();
1947         Register dst = op->result_opr()->as_register();
1948         Label success, failure, done;
1949         emit_typecheck_helper(op, &success, &failure, &failure);
1950         __ bind(failure);
1951         __ xorptr(dst, dst);
1952         __ jmpb(done);
1953         __ bind(success);
1954         __ movptr(dst, 1);
1955         __ bind(done);
1956       } else {
1957         ShouldNotReachHere();
1958       }
1959 
1960 }
1961 
1962 
1963 void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
1964   if (LP64_ONLY(false &&) op->code() == lir_cas_long && VM_Version::supports_cx8()) {
1965     assert(op->cmp_value()->as_register_lo() == rax, "wrong register");
1966     assert(op->cmp_value()->as_register_hi() == rdx, "wrong register");
1967     assert(op->new_value()->as_register_lo() == rbx, "wrong register");
1968     assert(op->new_value()->as_register_hi() == rcx, "wrong register");
1969     Register addr = op->addr()->as_register();
1970     __ lock();
1971     NOT_LP64(__ cmpxchg8(Address(addr, 0)));
1972 
1973   } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj ) {
1974     NOT_LP64(assert(op->addr()->is_single_cpu(), "must be single");)
1975     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
1976     Register newval = op->new_value()->as_register();
1977     Register cmpval = op->cmp_value()->as_register();
1978     assert(cmpval == rax, "wrong register");
1979     assert(newval != NULL, "new val must be register");
1980     assert(cmpval != newval, "cmp and new values must be in different registers");
1981     assert(cmpval != addr, "cmp and addr must be in different registers");
1982     assert(newval != addr, "new value and addr must be in different registers");
1983 
1984     if ( op->code() == lir_cas_obj) {
1985 #ifdef _LP64
1986       if (UseCompressedOops) {
1987         __ encode_heap_oop(cmpval);
1988         __ mov(rscratch1, newval);
1989         __ encode_heap_oop(rscratch1);
1990         __ lock();
1991         // cmpval (rax) is implicitly used by this instruction
1992         __ cmpxchgl(rscratch1, Address(addr, 0));
1993       } else
1994 #endif
1995       {
1996         __ lock();
1997         __ cmpxchgptr(newval, Address(addr, 0));
1998       }
1999     } else {
2000       assert(op->code() == lir_cas_int, "lir_cas_int expected");
2001       __ lock();
2002       __ cmpxchgl(newval, Address(addr, 0));
2003     }
2004 #ifdef _LP64
2005   } else if (op->code() == lir_cas_long) {
2006     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
2007     Register newval = op->new_value()->as_register_lo();
2008     Register cmpval = op->cmp_value()->as_register_lo();
2009     assert(cmpval == rax, "wrong register");
2010     assert(newval != NULL, "new val must be register");
2011     assert(cmpval != newval, "cmp and new values must be in different registers");
2012     assert(cmpval != addr, "cmp and addr must be in different registers");
2013     assert(newval != addr, "new value and addr must be in different registers");
2014     __ lock();
2015     __ cmpxchgq(newval, Address(addr, 0));
2016 #endif // _LP64
2017   } else {
2018     Unimplemented();
2019   }
2020 }
2021 
2022 void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
2023   Assembler::Condition acond, ncond;
2024   switch (condition) {
2025     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
2026     case lir_cond_notEqual:     acond = Assembler::notEqual;     ncond = Assembler::equal;        break;
2027     case lir_cond_less:         acond = Assembler::less;         ncond = Assembler::greaterEqual; break;
2028     case lir_cond_lessEqual:    acond = Assembler::lessEqual;    ncond = Assembler::greater;      break;
2029     case lir_cond_greaterEqual: acond = Assembler::greaterEqual; ncond = Assembler::less;         break;
2030     case lir_cond_greater:      acond = Assembler::greater;      ncond = Assembler::lessEqual;    break;
2031     case lir_cond_belowEqual:   acond = Assembler::belowEqual;   ncond = Assembler::above;        break;
2032     case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;   ncond = Assembler::below;        break;
2033     default:                    acond = Assembler::equal;        ncond = Assembler::notEqual;
2034                                 ShouldNotReachHere();
2035   }
2036 
2037   if (opr1->is_cpu_register()) {
2038     reg2reg(opr1, result);
2039   } else if (opr1->is_stack()) {
2040     stack2reg(opr1, result, result->type());
2041   } else if (opr1->is_constant()) {
2042     const2reg(opr1, result, lir_patch_none, NULL);
2043   } else {
2044     ShouldNotReachHere();
2045   }
2046 
2047   if (VM_Version::supports_cmov() && !opr2->is_constant()) {
2048     // optimized version that does not require a branch
2049     if (opr2->is_single_cpu()) {
2050       assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move");
2051       __ cmov(ncond, result->as_register(), opr2->as_register());
2052     } else if (opr2->is_double_cpu()) {
2053       assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2054       assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2055       __ cmovptr(ncond, result->as_register_lo(), opr2->as_register_lo());
2056       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), opr2->as_register_hi());)
2057     } else if (opr2->is_single_stack()) {
2058       __ cmovl(ncond, result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()));
2059     } else if (opr2->is_double_stack()) {
2060       __ cmovptr(ncond, result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix(), lo_word_offset_in_bytes));
2061       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), frame_map()->address_for_slot(opr2->double_stack_ix(), hi_word_offset_in_bytes));)
2062     } else {
2063       ShouldNotReachHere();
2064     }
2065 
2066   } else {
2067     Label skip;
2068     __ jcc (acond, skip);
2069     if (opr2->is_cpu_register()) {
2070       reg2reg(opr2, result);
2071     } else if (opr2->is_stack()) {
2072       stack2reg(opr2, result, result->type());
2073     } else if (opr2->is_constant()) {
2074       const2reg(opr2, result, lir_patch_none, NULL);
2075     } else {
2076       ShouldNotReachHere();
2077     }
2078     __ bind(skip);
2079   }
2080 }
2081 
2082 
2083 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
2084   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
2085 
2086   if (left->is_single_cpu()) {
2087     assert(left == dest, "left and dest must be equal");
2088     Register lreg = left->as_register();
2089 
2090     if (right->is_single_cpu()) {
2091       // cpu register - cpu register
2092       Register rreg = right->as_register();
2093       switch (code) {
2094         case lir_add: __ addl (lreg, rreg); break;
2095         case lir_sub: __ subl (lreg, rreg); break;
2096         case lir_mul: __ imull(lreg, rreg); break;
2097         default:      ShouldNotReachHere();
2098       }
2099 
2100     } else if (right->is_stack()) {
2101       // cpu register - stack
2102       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2103       switch (code) {
2104         case lir_add: __ addl(lreg, raddr); break;
2105         case lir_sub: __ subl(lreg, raddr); break;
2106         default:      ShouldNotReachHere();
2107       }
2108 
2109     } else if (right->is_constant()) {
2110       // cpu register - constant
2111       jint c = right->as_constant_ptr()->as_jint();
2112       switch (code) {
2113         case lir_add: {
2114           __ incrementl(lreg, c);
2115           break;
2116         }
2117         case lir_sub: {
2118           __ decrementl(lreg, c);
2119           break;
2120         }
2121         default: ShouldNotReachHere();
2122       }
2123 
2124     } else {
2125       ShouldNotReachHere();
2126     }
2127 
2128   } else if (left->is_double_cpu()) {
2129     assert(left == dest, "left and dest must be equal");
2130     Register lreg_lo = left->as_register_lo();
2131     Register lreg_hi = left->as_register_hi();
2132 
2133     if (right->is_double_cpu()) {
2134       // cpu register - cpu register
2135       Register rreg_lo = right->as_register_lo();
2136       Register rreg_hi = right->as_register_hi();
2137       NOT_LP64(assert_different_registers(lreg_lo, lreg_hi, rreg_lo, rreg_hi));
2138       LP64_ONLY(assert_different_registers(lreg_lo, rreg_lo));
2139       switch (code) {
2140         case lir_add:
2141           __ addptr(lreg_lo, rreg_lo);
2142           NOT_LP64(__ adcl(lreg_hi, rreg_hi));
2143           break;
2144         case lir_sub:
2145           __ subptr(lreg_lo, rreg_lo);
2146           NOT_LP64(__ sbbl(lreg_hi, rreg_hi));
2147           break;
2148         case lir_mul:
2149 #ifdef _LP64
2150           __ imulq(lreg_lo, rreg_lo);
2151 #else
2152           assert(lreg_lo == rax && lreg_hi == rdx, "must be");
2153           __ imull(lreg_hi, rreg_lo);
2154           __ imull(rreg_hi, lreg_lo);
2155           __ addl (rreg_hi, lreg_hi);
2156           __ mull (rreg_lo);
2157           __ addl (lreg_hi, rreg_hi);
2158 #endif // _LP64
2159           break;
2160         default:
2161           ShouldNotReachHere();
2162       }
2163 
2164     } else if (right->is_constant()) {
2165       // cpu register - constant
2166 #ifdef _LP64
2167       jlong c = right->as_constant_ptr()->as_jlong_bits();
2168       __ movptr(r10, (intptr_t) c);
2169       switch (code) {
2170         case lir_add:
2171           __ addptr(lreg_lo, r10);
2172           break;
2173         case lir_sub:
2174           __ subptr(lreg_lo, r10);
2175           break;
2176         default:
2177           ShouldNotReachHere();
2178       }
2179 #else
2180       jint c_lo = right->as_constant_ptr()->as_jint_lo();
2181       jint c_hi = right->as_constant_ptr()->as_jint_hi();
2182       switch (code) {
2183         case lir_add:
2184           __ addptr(lreg_lo, c_lo);
2185           __ adcl(lreg_hi, c_hi);
2186           break;
2187         case lir_sub:
2188           __ subptr(lreg_lo, c_lo);
2189           __ sbbl(lreg_hi, c_hi);
2190           break;
2191         default:
2192           ShouldNotReachHere();
2193       }
2194 #endif // _LP64
2195 
2196     } else {
2197       ShouldNotReachHere();
2198     }
2199 
2200   } else if (left->is_single_xmm()) {
2201     assert(left == dest, "left and dest must be equal");
2202     XMMRegister lreg = left->as_xmm_float_reg();
2203 
2204     if (right->is_single_xmm()) {
2205       XMMRegister rreg = right->as_xmm_float_reg();
2206       switch (code) {
2207         case lir_add: __ addss(lreg, rreg);  break;
2208         case lir_sub: __ subss(lreg, rreg);  break;
2209         case lir_mul: __ mulss(lreg, rreg);  break;
2210         case lir_div: __ divss(lreg, rreg);  break;
2211         default: ShouldNotReachHere();
2212       }
2213     } else {
2214       Address raddr;
2215       if (right->is_single_stack()) {
2216         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2217       } else if (right->is_constant()) {
2218         // hack for now
2219         raddr = __ as_Address(InternalAddress(float_constant(right->as_jfloat())));
2220       } else {
2221         ShouldNotReachHere();
2222       }
2223       switch (code) {
2224         case lir_add: __ addss(lreg, raddr);  break;
2225         case lir_sub: __ subss(lreg, raddr);  break;
2226         case lir_mul: __ mulss(lreg, raddr);  break;
2227         case lir_div: __ divss(lreg, raddr);  break;
2228         default: ShouldNotReachHere();
2229       }
2230     }
2231 
2232   } else if (left->is_double_xmm()) {
2233     assert(left == dest, "left and dest must be equal");
2234 
2235     XMMRegister lreg = left->as_xmm_double_reg();
2236     if (right->is_double_xmm()) {
2237       XMMRegister rreg = right->as_xmm_double_reg();
2238       switch (code) {
2239         case lir_add: __ addsd(lreg, rreg);  break;
2240         case lir_sub: __ subsd(lreg, rreg);  break;
2241         case lir_mul: __ mulsd(lreg, rreg);  break;
2242         case lir_div: __ divsd(lreg, rreg);  break;
2243         default: ShouldNotReachHere();
2244       }
2245     } else {
2246       Address raddr;
2247       if (right->is_double_stack()) {
2248         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2249       } else if (right->is_constant()) {
2250         // hack for now
2251         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2252       } else {
2253         ShouldNotReachHere();
2254       }
2255       switch (code) {
2256         case lir_add: __ addsd(lreg, raddr);  break;
2257         case lir_sub: __ subsd(lreg, raddr);  break;
2258         case lir_mul: __ mulsd(lreg, raddr);  break;
2259         case lir_div: __ divsd(lreg, raddr);  break;
2260         default: ShouldNotReachHere();
2261       }
2262     }
2263 
2264 #ifndef _LP64
2265   } else if (left->is_single_fpu()) {
2266     assert(dest->is_single_fpu(),  "fpu stack allocation required");
2267 
2268     if (right->is_single_fpu()) {
2269       arith_fpu_implementation(code, left->fpu_regnr(), right->fpu_regnr(), dest->fpu_regnr(), pop_fpu_stack);
2270 
2271     } else {
2272       assert(left->fpu_regnr() == 0, "left must be on TOS");
2273       assert(dest->fpu_regnr() == 0, "dest must be on TOS");
2274 
2275       Address raddr;
2276       if (right->is_single_stack()) {
2277         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2278       } else if (right->is_constant()) {
2279         address const_addr = float_constant(right->as_jfloat());
2280         assert(const_addr != NULL, "incorrect float/double constant maintainance");
2281         // hack for now
2282         raddr = __ as_Address(InternalAddress(const_addr));
2283       } else {
2284         ShouldNotReachHere();
2285       }
2286 
2287       switch (code) {
2288         case lir_add: __ fadd_s(raddr); break;
2289         case lir_sub: __ fsub_s(raddr); break;
2290         case lir_mul: __ fmul_s(raddr); break;
2291         case lir_div: __ fdiv_s(raddr); break;
2292         default:      ShouldNotReachHere();
2293       }
2294     }
2295 
2296   } else if (left->is_double_fpu()) {
2297     assert(dest->is_double_fpu(),  "fpu stack allocation required");
2298 
2299     if (code == lir_mul || code == lir_div) {
2300       // Double values require special handling for strictfp mul/div on x86
2301       __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1()));
2302       __ fmulp(left->fpu_regnrLo() + 1);
2303     }
2304 
2305     if (right->is_double_fpu()) {
2306       arith_fpu_implementation(code, left->fpu_regnrLo(), right->fpu_regnrLo(), dest->fpu_regnrLo(), pop_fpu_stack);
2307 
2308     } else {
2309       assert(left->fpu_regnrLo() == 0, "left must be on TOS");
2310       assert(dest->fpu_regnrLo() == 0, "dest must be on TOS");
2311 
2312       Address raddr;
2313       if (right->is_double_stack()) {
2314         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2315       } else if (right->is_constant()) {
2316         // hack for now
2317         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2318       } else {
2319         ShouldNotReachHere();
2320       }
2321 
2322       switch (code) {
2323         case lir_add: __ fadd_d(raddr); break;
2324         case lir_sub: __ fsub_d(raddr); break;
2325         case lir_mul: __ fmul_d(raddr); break;
2326         case lir_div: __ fdiv_d(raddr); break;
2327         default: ShouldNotReachHere();
2328       }
2329     }
2330 
2331     if (code == lir_mul || code == lir_div) {
2332       // Double values require special handling for strictfp mul/div on x86
2333       __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2()));
2334       __ fmulp(dest->fpu_regnrLo() + 1);
2335     }
2336 #endif // !_LP64
2337 
2338   } else if (left->is_single_stack() || left->is_address()) {
2339     assert(left == dest, "left and dest must be equal");
2340 
2341     Address laddr;
2342     if (left->is_single_stack()) {
2343       laddr = frame_map()->address_for_slot(left->single_stack_ix());
2344     } else if (left->is_address()) {
2345       laddr = as_Address(left->as_address_ptr());
2346     } else {
2347       ShouldNotReachHere();
2348     }
2349 
2350     if (right->is_single_cpu()) {
2351       Register rreg = right->as_register();
2352       switch (code) {
2353         case lir_add: __ addl(laddr, rreg); break;
2354         case lir_sub: __ subl(laddr, rreg); break;
2355         default:      ShouldNotReachHere();
2356       }
2357     } else if (right->is_constant()) {
2358       jint c = right->as_constant_ptr()->as_jint();
2359       switch (code) {
2360         case lir_add: {
2361           __ incrementl(laddr, c);
2362           break;
2363         }
2364         case lir_sub: {
2365           __ decrementl(laddr, c);
2366           break;
2367         }
2368         default: ShouldNotReachHere();
2369       }
2370     } else {
2371       ShouldNotReachHere();
2372     }
2373 
2374   } else {
2375     ShouldNotReachHere();
2376   }
2377 }
2378 
2379 #ifndef _LP64
2380 void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) {
2381   assert(pop_fpu_stack  || (left_index     == dest_index || right_index     == dest_index), "invalid LIR");
2382   assert(!pop_fpu_stack || (left_index - 1 == dest_index || right_index - 1 == dest_index), "invalid LIR");
2383   assert(left_index == 0 || right_index == 0, "either must be on top of stack");
2384 
2385   bool left_is_tos = (left_index == 0);
2386   bool dest_is_tos = (dest_index == 0);
2387   int non_tos_index = (left_is_tos ? right_index : left_index);
2388 
2389   switch (code) {
2390     case lir_add:
2391       if (pop_fpu_stack)       __ faddp(non_tos_index);
2392       else if (dest_is_tos)    __ fadd (non_tos_index);
2393       else                     __ fadda(non_tos_index);
2394       break;
2395 
2396     case lir_sub:
2397       if (left_is_tos) {
2398         if (pop_fpu_stack)     __ fsubrp(non_tos_index);
2399         else if (dest_is_tos)  __ fsub  (non_tos_index);
2400         else                   __ fsubra(non_tos_index);
2401       } else {
2402         if (pop_fpu_stack)     __ fsubp (non_tos_index);
2403         else if (dest_is_tos)  __ fsubr (non_tos_index);
2404         else                   __ fsuba (non_tos_index);
2405       }
2406       break;
2407 
2408     case lir_mul:
2409       if (pop_fpu_stack)       __ fmulp(non_tos_index);
2410       else if (dest_is_tos)    __ fmul (non_tos_index);
2411       else                     __ fmula(non_tos_index);
2412       break;
2413 
2414     case lir_div:
2415       if (left_is_tos) {
2416         if (pop_fpu_stack)     __ fdivrp(non_tos_index);
2417         else if (dest_is_tos)  __ fdiv  (non_tos_index);
2418         else                   __ fdivra(non_tos_index);
2419       } else {
2420         if (pop_fpu_stack)     __ fdivp (non_tos_index);
2421         else if (dest_is_tos)  __ fdivr (non_tos_index);
2422         else                   __ fdiva (non_tos_index);
2423       }
2424       break;
2425 
2426     case lir_rem:
2427       assert(left_is_tos && dest_is_tos && right_index == 1, "must be guaranteed by FPU stack allocation");
2428       __ fremr(noreg);
2429       break;
2430 
2431     default:
2432       ShouldNotReachHere();
2433   }
2434 }
2435 #endif // _LP64
2436 
2437 
2438 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
2439   if (value->is_double_xmm()) {
2440     switch(code) {
2441       case lir_abs :
2442         {
2443 #ifdef _LP64
2444           if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
2445             assert(tmp->is_valid(), "need temporary");
2446             __ vpandn(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), value->as_xmm_double_reg(), 2);
2447           } else
2448 #endif
2449           {
2450             if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
2451               __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
2452             }
2453             assert(!tmp->is_valid(), "do not need temporary");
2454             __ andpd(dest->as_xmm_double_reg(),
2455                      ExternalAddress((address)double_signmask_pool));
2456           }
2457         }
2458         break;
2459 
2460       case lir_sqrt: __ sqrtsd(dest->as_xmm_double_reg(), value->as_xmm_double_reg()); break;
2461       // all other intrinsics are not available in the SSE instruction set, so FPU is used
2462       default      : ShouldNotReachHere();
2463     }
2464 
2465 #ifndef _LP64
2466   } else if (value->is_double_fpu()) {
2467     assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
2468     switch(code) {
2469       case lir_abs   : __ fabs() ; break;
2470       case lir_sqrt  : __ fsqrt(); break;
2471       default      : ShouldNotReachHere();
2472     }
2473 #endif // !_LP64
2474   } else {
2475     Unimplemented();
2476   }
2477 }
2478 
2479 void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
2480   // assert(left->destroys_register(), "check");
2481   if (left->is_single_cpu()) {
2482     Register reg = left->as_register();
2483     if (right->is_constant()) {
2484       int val = right->as_constant_ptr()->as_jint();
2485       switch (code) {
2486         case lir_logic_and: __ andl (reg, val); break;
2487         case lir_logic_or:  __ orl  (reg, val); break;
2488         case lir_logic_xor: __ xorl (reg, val); break;
2489         default: ShouldNotReachHere();
2490       }
2491     } else if (right->is_stack()) {
2492       // added support for stack operands
2493       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2494       switch (code) {
2495         case lir_logic_and: __ andl (reg, raddr); break;
2496         case lir_logic_or:  __ orl  (reg, raddr); break;
2497         case lir_logic_xor: __ xorl (reg, raddr); break;
2498         default: ShouldNotReachHere();
2499       }
2500     } else {
2501       Register rright = right->as_register();
2502       switch (code) {
2503         case lir_logic_and: __ andptr (reg, rright); break;
2504         case lir_logic_or : __ orptr  (reg, rright); break;
2505         case lir_logic_xor: __ xorptr (reg, rright); break;
2506         default: ShouldNotReachHere();
2507       }
2508     }
2509     move_regs(reg, dst->as_register());
2510   } else {
2511     Register l_lo = left->as_register_lo();
2512     Register l_hi = left->as_register_hi();
2513     if (right->is_constant()) {
2514 #ifdef _LP64
2515       __ mov64(rscratch1, right->as_constant_ptr()->as_jlong());
2516       switch (code) {
2517         case lir_logic_and:
2518           __ andq(l_lo, rscratch1);
2519           break;
2520         case lir_logic_or:
2521           __ orq(l_lo, rscratch1);
2522           break;
2523         case lir_logic_xor:
2524           __ xorq(l_lo, rscratch1);
2525           break;
2526         default: ShouldNotReachHere();
2527       }
2528 #else
2529       int r_lo = right->as_constant_ptr()->as_jint_lo();
2530       int r_hi = right->as_constant_ptr()->as_jint_hi();
2531       switch (code) {
2532         case lir_logic_and:
2533           __ andl(l_lo, r_lo);
2534           __ andl(l_hi, r_hi);
2535           break;
2536         case lir_logic_or:
2537           __ orl(l_lo, r_lo);
2538           __ orl(l_hi, r_hi);
2539           break;
2540         case lir_logic_xor:
2541           __ xorl(l_lo, r_lo);
2542           __ xorl(l_hi, r_hi);
2543           break;
2544         default: ShouldNotReachHere();
2545       }
2546 #endif // _LP64
2547     } else {
2548 #ifdef _LP64
2549       Register r_lo;
2550       if (is_reference_type(right->type())) {
2551         r_lo = right->as_register();
2552       } else {
2553         r_lo = right->as_register_lo();
2554       }
2555 #else
2556       Register r_lo = right->as_register_lo();
2557       Register r_hi = right->as_register_hi();
2558       assert(l_lo != r_hi, "overwriting registers");
2559 #endif
2560       switch (code) {
2561         case lir_logic_and:
2562           __ andptr(l_lo, r_lo);
2563           NOT_LP64(__ andptr(l_hi, r_hi);)
2564           break;
2565         case lir_logic_or:
2566           __ orptr(l_lo, r_lo);
2567           NOT_LP64(__ orptr(l_hi, r_hi);)
2568           break;
2569         case lir_logic_xor:
2570           __ xorptr(l_lo, r_lo);
2571           NOT_LP64(__ xorptr(l_hi, r_hi);)
2572           break;
2573         default: ShouldNotReachHere();
2574       }
2575     }
2576 
2577     Register dst_lo = dst->as_register_lo();
2578     Register dst_hi = dst->as_register_hi();
2579 
2580 #ifdef _LP64
2581     move_regs(l_lo, dst_lo);
2582 #else
2583     if (dst_lo == l_hi) {
2584       assert(dst_hi != l_lo, "overwriting registers");
2585       move_regs(l_hi, dst_hi);
2586       move_regs(l_lo, dst_lo);
2587     } else {
2588       assert(dst_lo != l_hi, "overwriting registers");
2589       move_regs(l_lo, dst_lo);
2590       move_regs(l_hi, dst_hi);
2591     }
2592 #endif // _LP64
2593   }
2594 }
2595 
2596 
2597 // we assume that rax, and rdx can be overwritten
2598 void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
2599 
2600   assert(left->is_single_cpu(),   "left must be register");
2601   assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
2602   assert(result->is_single_cpu(), "result must be register");
2603 
2604   //  assert(left->destroys_register(), "check");
2605   //  assert(right->destroys_register(), "check");
2606 
2607   Register lreg = left->as_register();
2608   Register dreg = result->as_register();
2609 
2610   if (right->is_constant()) {
2611     jint divisor = right->as_constant_ptr()->as_jint();
2612     assert(divisor > 0 && is_power_of_2(divisor), "must be");
2613     if (code == lir_idiv) {
2614       assert(lreg == rax, "must be rax,");
2615       assert(temp->as_register() == rdx, "tmp register must be rdx");
2616       __ cdql(); // sign extend into rdx:rax
2617       if (divisor == 2) {
2618         __ subl(lreg, rdx);
2619       } else {
2620         __ andl(rdx, divisor - 1);
2621         __ addl(lreg, rdx);
2622       }
2623       __ sarl(lreg, log2i_exact(divisor));
2624       move_regs(lreg, dreg);
2625     } else if (code == lir_irem) {
2626       Label done;
2627       __ mov(dreg, lreg);
2628       __ andl(dreg, 0x80000000 | (divisor - 1));
2629       __ jcc(Assembler::positive, done);
2630       __ decrement(dreg);
2631       __ orl(dreg, ~(divisor - 1));
2632       __ increment(dreg);
2633       __ bind(done);
2634     } else {
2635       ShouldNotReachHere();
2636     }
2637   } else {
2638     Register rreg = right->as_register();
2639     assert(lreg == rax, "left register must be rax,");
2640     assert(rreg != rdx, "right register must not be rdx");
2641     assert(temp->as_register() == rdx, "tmp register must be rdx");
2642 
2643     move_regs(lreg, rax);
2644 
2645     int idivl_offset = __ corrected_idivl(rreg);
2646     if (ImplicitDiv0Checks) {
2647       add_debug_info_for_div0(idivl_offset, info);
2648     }
2649     if (code == lir_irem) {
2650       move_regs(rdx, dreg); // result is in rdx
2651     } else {
2652       move_regs(rax, dreg);
2653     }
2654   }
2655 }
2656 
2657 
2658 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
2659   if (opr1->is_single_cpu()) {
2660     Register reg1 = opr1->as_register();
2661     if (opr2->is_single_cpu()) {
2662       // cpu register - cpu register
2663       if (is_reference_type(opr1->type())) {
2664         __ cmpoop(reg1, opr2->as_register());
2665       } else {
2666         assert(!is_reference_type(opr2->type()), "cmp int, oop?");
2667         __ cmpl(reg1, opr2->as_register());
2668       }
2669     } else if (opr2->is_stack()) {
2670       // cpu register - stack
2671       if (is_reference_type(opr1->type())) {
2672         __ cmpoop(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2673       } else {
2674         __ cmpl(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2675       }
2676     } else if (opr2->is_constant()) {
2677       // cpu register - constant
2678       LIR_Const* c = opr2->as_constant_ptr();
2679       if (c->type() == T_INT) {
2680         __ cmpl(reg1, c->as_jint());
2681       } else if (c->type() == T_METADATA) {
2682         // All we need for now is a comparison with NULL for equality.
2683         assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "oops");
2684         Metadata* m = c->as_metadata();
2685         if (m == NULL) {
2686           __ cmpptr(reg1, (int32_t)0);
2687         } else {
2688           ShouldNotReachHere();
2689         }
2690       } else if (is_reference_type(c->type())) {
2691         // In 64bit oops are single register
2692         jobject o = c->as_jobject();
2693         if (o == NULL) {
2694           __ cmpptr(reg1, (int32_t)NULL_WORD);
2695         } else {
2696           __ cmpoop(reg1, o);
2697         }
2698       } else {
2699         fatal("unexpected type: %s", basictype_to_str(c->type()));
2700       }
2701       // cpu register - address
2702     } else if (opr2->is_address()) {
2703       if (op->info() != NULL) {
2704         add_debug_info_for_null_check_here(op->info());
2705       }
2706       __ cmpl(reg1, as_Address(opr2->as_address_ptr()));
2707     } else {
2708       ShouldNotReachHere();
2709     }
2710 
2711   } else if(opr1->is_double_cpu()) {
2712     Register xlo = opr1->as_register_lo();
2713     Register xhi = opr1->as_register_hi();
2714     if (opr2->is_double_cpu()) {
2715 #ifdef _LP64
2716       __ cmpptr(xlo, opr2->as_register_lo());
2717 #else
2718       // cpu register - cpu register
2719       Register ylo = opr2->as_register_lo();
2720       Register yhi = opr2->as_register_hi();
2721       __ subl(xlo, ylo);
2722       __ sbbl(xhi, yhi);
2723       if (condition == lir_cond_equal || condition == lir_cond_notEqual) {
2724         __ orl(xhi, xlo);
2725       }
2726 #endif // _LP64
2727     } else if (opr2->is_constant()) {
2728       // cpu register - constant 0
2729       assert(opr2->as_jlong() == (jlong)0, "only handles zero");
2730 #ifdef _LP64
2731       __ cmpptr(xlo, (int32_t)opr2->as_jlong());
2732 #else
2733       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles equals case");
2734       __ orl(xhi, xlo);
2735 #endif // _LP64
2736     } else {
2737       ShouldNotReachHere();
2738     }
2739 
2740   } else if (opr1->is_single_xmm()) {
2741     XMMRegister reg1 = opr1->as_xmm_float_reg();
2742     if (opr2->is_single_xmm()) {
2743       // xmm register - xmm register
2744       __ ucomiss(reg1, opr2->as_xmm_float_reg());
2745     } else if (opr2->is_stack()) {
2746       // xmm register - stack
2747       __ ucomiss(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2748     } else if (opr2->is_constant()) {
2749       // xmm register - constant
2750       __ ucomiss(reg1, InternalAddress(float_constant(opr2->as_jfloat())));
2751     } else if (opr2->is_address()) {
2752       // xmm register - address
2753       if (op->info() != NULL) {
2754         add_debug_info_for_null_check_here(op->info());
2755       }
2756       __ ucomiss(reg1, as_Address(opr2->as_address_ptr()));
2757     } else {
2758       ShouldNotReachHere();
2759     }
2760 
2761   } else if (opr1->is_double_xmm()) {
2762     XMMRegister reg1 = opr1->as_xmm_double_reg();
2763     if (opr2->is_double_xmm()) {
2764       // xmm register - xmm register
2765       __ ucomisd(reg1, opr2->as_xmm_double_reg());
2766     } else if (opr2->is_stack()) {
2767       // xmm register - stack
2768       __ ucomisd(reg1, frame_map()->address_for_slot(opr2->double_stack_ix()));
2769     } else if (opr2->is_constant()) {
2770       // xmm register - constant
2771       __ ucomisd(reg1, InternalAddress(double_constant(opr2->as_jdouble())));
2772     } else if (opr2->is_address()) {
2773       // xmm register - address
2774       if (op->info() != NULL) {
2775         add_debug_info_for_null_check_here(op->info());
2776       }
2777       __ ucomisd(reg1, as_Address(opr2->pointer()->as_address()));
2778     } else {
2779       ShouldNotReachHere();
2780     }
2781 
2782 #ifndef _LP64
2783   } else if(opr1->is_single_fpu() || opr1->is_double_fpu()) {
2784     assert(opr1->is_fpu_register() && opr1->fpu() == 0, "currently left-hand side must be on TOS (relax this restriction)");
2785     assert(opr2->is_fpu_register(), "both must be registers");
2786     __ fcmp(noreg, opr2->fpu(), op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2787 #endif // LP64
2788 
2789   } else if (opr1->is_address() && opr2->is_constant()) {
2790     LIR_Const* c = opr2->as_constant_ptr();
2791 #ifdef _LP64
2792     if (is_reference_type(c->type())) {
2793       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "need to reverse");
2794       __ movoop(rscratch1, c->as_jobject());
2795     }
2796 #endif // LP64
2797     if (op->info() != NULL) {
2798       add_debug_info_for_null_check_here(op->info());
2799     }
2800     // special case: address - constant
2801     LIR_Address* addr = opr1->as_address_ptr();
2802     if (c->type() == T_INT) {
2803       __ cmpl(as_Address(addr), c->as_jint());
2804     } else if (is_reference_type(c->type())) {
2805 #ifdef _LP64
2806       // %%% Make this explode if addr isn't reachable until we figure out a
2807       // better strategy by giving noreg as the temp for as_Address
2808       __ cmpoop(rscratch1, as_Address(addr, noreg));
2809 #else
2810       __ cmpoop(as_Address(addr), c->as_jobject());
2811 #endif // _LP64
2812     } else {
2813       ShouldNotReachHere();
2814     }
2815 
2816   } else {
2817     ShouldNotReachHere();
2818   }
2819 }
2820 
2821 void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
2822   if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
2823     if (left->is_single_xmm()) {
2824       assert(right->is_single_xmm(), "must match");
2825       __ cmpss2int(left->as_xmm_float_reg(), right->as_xmm_float_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2826     } else if (left->is_double_xmm()) {
2827       assert(right->is_double_xmm(), "must match");
2828       __ cmpsd2int(left->as_xmm_double_reg(), right->as_xmm_double_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2829 
2830     } else {
2831 #ifdef _LP64
2832       ShouldNotReachHere();
2833 #else
2834       assert(left->is_single_fpu() || left->is_double_fpu(), "must be");
2835       assert(right->is_single_fpu() || right->is_double_fpu(), "must match");
2836 
2837       assert(left->fpu() == 0, "left must be on TOS");
2838       __ fcmp2int(dst->as_register(), code == lir_ucmp_fd2i, right->fpu(),
2839                   op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2840 #endif // LP64
2841     }
2842   } else {
2843     assert(code == lir_cmp_l2i, "check");
2844 #ifdef _LP64
2845     Label done;
2846     Register dest = dst->as_register();
2847     __ cmpptr(left->as_register_lo(), right->as_register_lo());
2848     __ movl(dest, -1);
2849     __ jccb(Assembler::less, done);
2850     __ set_byte_if_not_zero(dest);
2851     __ movzbl(dest, dest);
2852     __ bind(done);
2853 #else
2854     __ lcmp2int(left->as_register_hi(),
2855                 left->as_register_lo(),
2856                 right->as_register_hi(),
2857                 right->as_register_lo());
2858     move_regs(left->as_register_hi(), dst->as_register());
2859 #endif // _LP64
2860   }
2861 }
2862 
2863 
2864 void LIR_Assembler::align_call(LIR_Code code) {
2865   // make sure that the displacement word of the call ends up word aligned
2866   int offset = __ offset();
2867   switch (code) {
2868   case lir_static_call:
2869   case lir_optvirtual_call:
2870   case lir_dynamic_call:
2871     offset += NativeCall::displacement_offset;
2872     break;
2873   case lir_icvirtual_call:
2874     offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
2875     break;
2876   default: ShouldNotReachHere();
2877   }
2878   __ align(BytesPerWord, offset);
2879 }
2880 
2881 
2882 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
2883   assert((__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
2884          "must be aligned");
2885   __ call(AddressLiteral(op->addr(), rtype));
2886   add_call_info(code_offset(), op->info());
2887   __ oopmap_metadata(op->info());
2888   __ post_call_nop();
2889 }
2890 
2891 
2892 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
2893   __ ic_call(op->addr());
2894   add_call_info(code_offset(), op->info());
2895   __ oopmap_metadata(op->info());
2896   assert((__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0,
2897          "must be aligned");
2898   __ post_call_nop();
2899 }
2900 
2901 
2902 void LIR_Assembler::emit_static_call_stub() {
2903   address call_pc = __ pc();
2904   address stub = __ start_a_stub(call_stub_size());
2905   if (stub == NULL) {
2906     bailout("static call stub overflow");
2907     return;
2908   }
2909 
2910   int start = __ offset();
2911 
2912   // make sure that the displacement word of the call ends up word aligned
2913   __ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset);
2914   __ relocate(static_stub_Relocation::spec(call_pc));
2915   __ mov_metadata(rbx, (Metadata*)NULL);
2916   // must be set to -1 at code generation time
2917   assert(((__ offset() + 1) % BytesPerWord) == 0, "must be aligned");
2918   // On 64bit this will die since it will take a movq & jmp, must be only a jmp
2919   __ jump(RuntimeAddress(__ pc()));
2920 
2921   assert(__ offset() - start <= call_stub_size(), "stub too big");
2922   __ end_a_stub();
2923 }
2924 
2925 
2926 void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
2927   assert(exceptionOop->as_register() == rax, "must match");
2928   assert(exceptionPC->as_register() == rdx, "must match");
2929 
2930   // exception object is not added to oop map by LinearScan
2931   // (LinearScan assumes that no oops are in fixed registers)
2932   info->add_register_oop(exceptionOop);
2933   Runtime1::StubID unwind_id;
2934 
2935   // get current pc information
2936   // pc is only needed if the method has an exception handler, the unwind code does not need it.
2937   int pc_for_athrow_offset = __ offset();
2938   InternalAddress pc_for_athrow(__ pc());
2939   __ lea(exceptionPC->as_register(), pc_for_athrow);
2940   add_call_info(pc_for_athrow_offset, info); // for exception handler
2941 
2942   __ verify_not_null_oop(rax);
2943   // search an exception handler (rax: exception oop, rdx: throwing pc)
2944   if (compilation()->has_fpu_code()) {
2945     unwind_id = Runtime1::handle_exception_id;
2946   } else {
2947     unwind_id = Runtime1::handle_exception_nofpu_id;
2948   }
2949   __ call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
2950 
2951   // enough room for two byte trap
2952   __ nop();
2953 }
2954 
2955 
2956 void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
2957   assert(exceptionOop->as_register() == rax, "must match");
2958 
2959   __ jmp(_unwind_handler_entry);
2960 }
2961 
2962 
2963 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
2964 
2965   // optimized version for linear scan:
2966   // * count must be already in ECX (guaranteed by LinearScan)
2967   // * left and dest must be equal
2968   // * tmp must be unused
2969   assert(count->as_register() == SHIFT_count, "count must be in ECX");
2970   assert(left == dest, "left and dest must be equal");
2971   assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
2972 
2973   if (left->is_single_cpu()) {
2974     Register value = left->as_register();
2975     assert(value != SHIFT_count, "left cannot be ECX");
2976 
2977     switch (code) {
2978       case lir_shl:  __ shll(value); break;
2979       case lir_shr:  __ sarl(value); break;
2980       case lir_ushr: __ shrl(value); break;
2981       default: ShouldNotReachHere();
2982     }
2983   } else if (left->is_double_cpu()) {
2984     Register lo = left->as_register_lo();
2985     Register hi = left->as_register_hi();
2986     assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");
2987 #ifdef _LP64
2988     switch (code) {
2989       case lir_shl:  __ shlptr(lo);        break;
2990       case lir_shr:  __ sarptr(lo);        break;
2991       case lir_ushr: __ shrptr(lo);        break;
2992       default: ShouldNotReachHere();
2993     }
2994 #else
2995 
2996     switch (code) {
2997       case lir_shl:  __ lshl(hi, lo);        break;
2998       case lir_shr:  __ lshr(hi, lo, true);  break;
2999       case lir_ushr: __ lshr(hi, lo, false); break;
3000       default: ShouldNotReachHere();
3001     }
3002 #endif // LP64
3003   } else {
3004     ShouldNotReachHere();
3005   }
3006 }
3007 
3008 
3009 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
3010   if (dest->is_single_cpu()) {
3011     // first move left into dest so that left is not destroyed by the shift
3012     Register value = dest->as_register();
3013     count = count & 0x1F; // Java spec
3014 
3015     move_regs(left->as_register(), value);
3016     switch (code) {
3017       case lir_shl:  __ shll(value, count); break;
3018       case lir_shr:  __ sarl(value, count); break;
3019       case lir_ushr: __ shrl(value, count); break;
3020       default: ShouldNotReachHere();
3021     }
3022   } else if (dest->is_double_cpu()) {
3023 #ifndef _LP64
3024     Unimplemented();
3025 #else
3026     // first move left into dest so that left is not destroyed by the shift
3027     Register value = dest->as_register_lo();
3028     count = count & 0x1F; // Java spec
3029 
3030     move_regs(left->as_register_lo(), value);
3031     switch (code) {
3032       case lir_shl:  __ shlptr(value, count); break;
3033       case lir_shr:  __ sarptr(value, count); break;
3034       case lir_ushr: __ shrptr(value, count); break;
3035       default: ShouldNotReachHere();
3036     }
3037 #endif // _LP64
3038   } else {
3039     ShouldNotReachHere();
3040   }
3041 }
3042 
3043 
3044 void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
3045   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3046   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3047   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3048   __ movptr (Address(rsp, offset_from_rsp_in_bytes), r);
3049 }
3050 
3051 
3052 void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
3053   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3054   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3055   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3056   __ movptr (Address(rsp, offset_from_rsp_in_bytes), c);
3057 }
3058 
3059 
3060 void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
3061   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3062   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3063   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3064   __ movoop (Address(rsp, offset_from_rsp_in_bytes), o);
3065 }
3066 
3067 
3068 void LIR_Assembler::store_parameter(Metadata* m,  int offset_from_rsp_in_words) {
3069   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3070   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3071   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3072   __ mov_metadata(Address(rsp, offset_from_rsp_in_bytes), m);
3073 }
3074 
3075 
3076 // This code replaces a call to arraycopy; no exception may
3077 // be thrown in this code, they must be thrown in the System.arraycopy
3078 // activation frame; we could save some checks if this would not be the case
3079 void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
3080   ciArrayKlass* default_type = op->expected_type();
3081   Register src = op->src()->as_register();
3082   Register dst = op->dst()->as_register();
3083   Register src_pos = op->src_pos()->as_register();
3084   Register dst_pos = op->dst_pos()->as_register();
3085   Register length  = op->length()->as_register();
3086   Register tmp = op->tmp()->as_register();
3087   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
3088 
3089   CodeStub* stub = op->stub();
3090   int flags = op->flags();
3091   BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
3092   if (is_reference_type(basic_type)) basic_type = T_OBJECT;
3093 
3094   // if we don't know anything, just go through the generic arraycopy
3095   if (default_type == NULL) {
3096     // save outgoing arguments on stack in case call to System.arraycopy is needed
3097     // HACK ALERT. This code used to push the parameters in a hardwired fashion
3098     // for interpreter calling conventions. Now we have to do it in new style conventions.
3099     // For the moment until C1 gets the new register allocator I just force all the
3100     // args to the right place (except the register args) and then on the back side
3101     // reload the register args properly if we go slow path. Yuck
3102 
3103     // These are proper for the calling convention
3104     store_parameter(length, 2);
3105     store_parameter(dst_pos, 1);
3106     store_parameter(dst, 0);
3107 
3108     // these are just temporary placements until we need to reload
3109     store_parameter(src_pos, 3);
3110     store_parameter(src, 4);
3111     NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");)
3112 
3113     address copyfunc_addr = StubRoutines::generic_arraycopy();
3114     assert(copyfunc_addr != NULL, "generic arraycopy stub required");
3115 
3116     // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint
3117 #ifdef _LP64
3118     // The arguments are in java calling convention so we can trivially shift them to C
3119     // convention
3120     assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
3121     __ mov(c_rarg0, j_rarg0);
3122     assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
3123     __ mov(c_rarg1, j_rarg1);
3124     assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
3125     __ mov(c_rarg2, j_rarg2);
3126     assert_different_registers(c_rarg3, j_rarg4);
3127     __ mov(c_rarg3, j_rarg3);
3128 #ifdef _WIN64
3129     // Allocate abi space for args but be sure to keep stack aligned
3130     __ subptr(rsp, 6*wordSize);
3131     store_parameter(j_rarg4, 4);
3132 #ifndef PRODUCT
3133     if (PrintC1Statistics) {
3134       __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3135     }
3136 #endif
3137     __ call(RuntimeAddress(copyfunc_addr));
3138     __ addptr(rsp, 6*wordSize);
3139 #else
3140     __ mov(c_rarg4, j_rarg4);
3141 #ifndef PRODUCT
3142     if (PrintC1Statistics) {
3143       __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3144     }
3145 #endif
3146     __ call(RuntimeAddress(copyfunc_addr));
3147 #endif // _WIN64
3148 #else
3149     __ push(length);
3150     __ push(dst_pos);
3151     __ push(dst);
3152     __ push(src_pos);
3153     __ push(src);
3154 
3155 #ifndef PRODUCT
3156     if (PrintC1Statistics) {
3157       __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3158     }
3159 #endif
3160     __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack
3161 
3162 #endif // _LP64
3163 
3164     __ cmpl(rax, 0);
3165     __ jcc(Assembler::equal, *stub->continuation());
3166 
3167     __ mov(tmp, rax);
3168     __ xorl(tmp, -1);
3169 
3170     // Reload values from the stack so they are where the stub
3171     // expects them.
3172     __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3173     __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3174     __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3175     __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3176     __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3177 
3178     __ subl(length, tmp);
3179     __ addl(src_pos, tmp);
3180     __ addl(dst_pos, tmp);
3181     __ jmp(*stub->entry());
3182 
3183     __ bind(*stub->continuation());
3184     return;
3185   }
3186 
3187   assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
3188 
3189   int elem_size = type2aelembytes(basic_type);
3190   Address::ScaleFactor scale;
3191 
3192   switch (elem_size) {
3193     case 1 :
3194       scale = Address::times_1;
3195       break;
3196     case 2 :
3197       scale = Address::times_2;
3198       break;
3199     case 4 :
3200       scale = Address::times_4;
3201       break;
3202     case 8 :
3203       scale = Address::times_8;
3204       break;
3205     default:
3206       scale = Address::no_scale;
3207       ShouldNotReachHere();
3208   }
3209 
3210   Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
3211   Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
3212   Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
3213   Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
3214 
3215   // length and pos's are all sign extended at this point on 64bit
3216 
3217   // test for NULL
3218   if (flags & LIR_OpArrayCopy::src_null_check) {
3219     __ testptr(src, src);
3220     __ jcc(Assembler::zero, *stub->entry());
3221   }
3222   if (flags & LIR_OpArrayCopy::dst_null_check) {
3223     __ testptr(dst, dst);
3224     __ jcc(Assembler::zero, *stub->entry());
3225   }
3226 
3227   // If the compiler was not able to prove that exact type of the source or the destination
3228   // of the arraycopy is an array type, check at runtime if the source or the destination is
3229   // an instance type.
3230   if (flags & LIR_OpArrayCopy::type_check) {
3231     if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
3232       __ load_klass(tmp, dst, tmp_load_klass);
3233       __ cmpl(Address(tmp, in_bytes(Klass::layout_helper_offset())), Klass::_lh_neutral_value);
3234       __ jcc(Assembler::greaterEqual, *stub->entry());
3235     }
3236 
3237     if (!(flags & LIR_OpArrayCopy::src_objarray)) {
3238       __ load_klass(tmp, src, tmp_load_klass);
3239       __ cmpl(Address(tmp, in_bytes(Klass::layout_helper_offset())), Klass::_lh_neutral_value);
3240       __ jcc(Assembler::greaterEqual, *stub->entry());
3241     }
3242   }
3243 
3244   // check if negative
3245   if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
3246     __ testl(src_pos, src_pos);
3247     __ jcc(Assembler::less, *stub->entry());
3248   }
3249   if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
3250     __ testl(dst_pos, dst_pos);
3251     __ jcc(Assembler::less, *stub->entry());
3252   }
3253 
3254   if (flags & LIR_OpArrayCopy::src_range_check) {
3255     __ lea(tmp, Address(src_pos, length, Address::times_1, 0));
3256     __ cmpl(tmp, src_length_addr);
3257     __ jcc(Assembler::above, *stub->entry());
3258   }
3259   if (flags & LIR_OpArrayCopy::dst_range_check) {
3260     __ lea(tmp, Address(dst_pos, length, Address::times_1, 0));
3261     __ cmpl(tmp, dst_length_addr);
3262     __ jcc(Assembler::above, *stub->entry());
3263   }
3264 
3265   if (flags & LIR_OpArrayCopy::length_positive_check) {
3266     __ testl(length, length);
3267     __ jcc(Assembler::less, *stub->entry());
3268   }
3269 
3270 #ifdef _LP64
3271   __ movl2ptr(src_pos, src_pos); //higher 32bits must be null
3272   __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null
3273 #endif
3274 
3275   if (flags & LIR_OpArrayCopy::type_check) {
3276     // We don't know the array types are compatible
3277     if (basic_type != T_OBJECT) {
3278       // Simple test for basic type arrays
3279       if (UseCompressedClassPointers) {
3280         __ movl(tmp, src_klass_addr);
3281         __ cmpl(tmp, dst_klass_addr);
3282       } else {
3283         __ movptr(tmp, src_klass_addr);
3284         __ cmpptr(tmp, dst_klass_addr);
3285       }
3286       __ jcc(Assembler::notEqual, *stub->entry());
3287     } else {
3288       // For object arrays, if src is a sub class of dst then we can
3289       // safely do the copy.
3290       Label cont, slow;
3291 
3292       __ push(src);
3293       __ push(dst);
3294 
3295       __ load_klass(src, src, tmp_load_klass);
3296       __ load_klass(dst, dst, tmp_load_klass);
3297 
3298       __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
3299 
3300       __ push(src);
3301       __ push(dst);
3302       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
3303       __ pop(dst);
3304       __ pop(src);
3305 
3306       __ cmpl(src, 0);
3307       __ jcc(Assembler::notEqual, cont);
3308 
3309       __ bind(slow);
3310       __ pop(dst);
3311       __ pop(src);
3312 
3313       address copyfunc_addr = StubRoutines::checkcast_arraycopy();
3314       if (copyfunc_addr != NULL) { // use stub if available
3315         // src is not a sub class of dst so we have to do a
3316         // per-element check.
3317 
3318         int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
3319         if ((flags & mask) != mask) {
3320           // Check that at least both of them object arrays.
3321           assert(flags & mask, "one of the two should be known to be an object array");
3322 
3323           if (!(flags & LIR_OpArrayCopy::src_objarray)) {
3324             __ load_klass(tmp, src, tmp_load_klass);
3325           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
3326             __ load_klass(tmp, dst, tmp_load_klass);
3327           }
3328           int lh_offset = in_bytes(Klass::layout_helper_offset());
3329           Address klass_lh_addr(tmp, lh_offset);
3330           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3331           __ cmpl(klass_lh_addr, objArray_lh);
3332           __ jcc(Assembler::notEqual, *stub->entry());
3333         }
3334 
3335        // Spill because stubs can use any register they like and it's
3336        // easier to restore just those that we care about.
3337        store_parameter(dst, 0);
3338        store_parameter(dst_pos, 1);
3339        store_parameter(length, 2);
3340        store_parameter(src_pos, 3);
3341        store_parameter(src, 4);
3342 
3343 #ifndef _LP64
3344         __ movptr(tmp, dst_klass_addr);
3345         __ movptr(tmp, Address(tmp, ObjArrayKlass::element_klass_offset()));
3346         __ push(tmp);
3347         __ movl(tmp, Address(tmp, Klass::super_check_offset_offset()));
3348         __ push(tmp);
3349         __ push(length);
3350         __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3351         __ push(tmp);
3352         __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3353         __ push(tmp);
3354 
3355         __ call_VM_leaf(copyfunc_addr, 5);
3356 #else
3357         __ movl2ptr(length, length); //higher 32bits must be null
3358 
3359         __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3360         assert_different_registers(c_rarg0, dst, dst_pos, length);
3361         __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3362         assert_different_registers(c_rarg1, dst, length);
3363 
3364         __ mov(c_rarg2, length);
3365         assert_different_registers(c_rarg2, dst);
3366 
3367 #ifdef _WIN64
3368         // Allocate abi space for args but be sure to keep stack aligned
3369         __ subptr(rsp, 6*wordSize);
3370         __ load_klass(c_rarg3, dst, tmp_load_klass);
3371         __ movptr(c_rarg3, Address(c_rarg3, ObjArrayKlass::element_klass_offset()));
3372         store_parameter(c_rarg3, 4);
3373         __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset()));
3374         __ call(RuntimeAddress(copyfunc_addr));
3375         __ addptr(rsp, 6*wordSize);
3376 #else
3377         __ load_klass(c_rarg4, dst, tmp_load_klass);
3378         __ movptr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
3379         __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
3380         __ call(RuntimeAddress(copyfunc_addr));
3381 #endif
3382 
3383 #endif
3384 
3385 #ifndef PRODUCT
3386         if (PrintC1Statistics) {
3387           Label failed;
3388           __ testl(rax, rax);
3389           __ jcc(Assembler::notZero, failed);
3390           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
3391           __ bind(failed);
3392         }
3393 #endif
3394 
3395         __ testl(rax, rax);
3396         __ jcc(Assembler::zero, *stub->continuation());
3397 
3398 #ifndef PRODUCT
3399         if (PrintC1Statistics) {
3400           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
3401         }
3402 #endif
3403 
3404         __ mov(tmp, rax);
3405 
3406         __ xorl(tmp, -1);
3407 
3408         // Restore previously spilled arguments
3409         __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3410         __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3411         __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3412         __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3413         __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3414 
3415 
3416         __ subl(length, tmp);
3417         __ addl(src_pos, tmp);
3418         __ addl(dst_pos, tmp);
3419       }
3420 
3421       __ jmp(*stub->entry());
3422 
3423       __ bind(cont);
3424       __ pop(dst);
3425       __ pop(src);
3426     }
3427   }
3428 
3429 #ifdef ASSERT
3430   if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
3431     // Sanity check the known type with the incoming class.  For the
3432     // primitive case the types must match exactly with src.klass and
3433     // dst.klass each exactly matching the default type.  For the
3434     // object array case, if no type check is needed then either the
3435     // dst type is exactly the expected type and the src type is a
3436     // subtype which we can't check or src is the same array as dst
3437     // but not necessarily exactly of type default_type.
3438     Label known_ok, halt;
3439     __ mov_metadata(tmp, default_type->constant_encoding());
3440 #ifdef _LP64
3441     if (UseCompressedClassPointers) {
3442       __ encode_klass_not_null(tmp, rscratch1);
3443     }
3444 #endif
3445 
3446     if (basic_type != T_OBJECT) {
3447 
3448       if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
3449       else                   __ cmpptr(tmp, dst_klass_addr);
3450       __ jcc(Assembler::notEqual, halt);
3451       if (UseCompressedClassPointers)          __ cmpl(tmp, src_klass_addr);
3452       else                   __ cmpptr(tmp, src_klass_addr);
3453       __ jcc(Assembler::equal, known_ok);
3454     } else {
3455       if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
3456       else                   __ cmpptr(tmp, dst_klass_addr);
3457       __ jcc(Assembler::equal, known_ok);
3458       __ cmpptr(src, dst);
3459       __ jcc(Assembler::equal, known_ok);
3460     }
3461     __ bind(halt);
3462     __ stop("incorrect type information in arraycopy");
3463     __ bind(known_ok);
3464   }
3465 #endif
3466 
3467 #ifndef PRODUCT
3468   if (PrintC1Statistics) {
3469     __ incrementl(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
3470   }
3471 #endif
3472 
3473 #ifdef _LP64
3474   assert_different_registers(c_rarg0, dst, dst_pos, length);
3475   __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3476   assert_different_registers(c_rarg1, length);
3477   __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3478   __ mov(c_rarg2, length);
3479 
3480 #else
3481   __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3482   store_parameter(tmp, 0);
3483   __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3484   store_parameter(tmp, 1);
3485   store_parameter(length, 2);
3486 #endif // _LP64
3487 
3488   bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
3489   bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
3490   const char *name;
3491   address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
3492   __ call_VM_leaf(entry, 0);
3493 
3494   __ bind(*stub->continuation());
3495 }
3496 
3497 void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
3498   assert(op->crc()->is_single_cpu(),  "crc must be register");
3499   assert(op->val()->is_single_cpu(),  "byte value must be register");
3500   assert(op->result_opr()->is_single_cpu(), "result must be register");
3501   Register crc = op->crc()->as_register();
3502   Register val = op->val()->as_register();
3503   Register res = op->result_opr()->as_register();
3504 
3505   assert_different_registers(val, crc, res);
3506 
3507   __ lea(res, ExternalAddress(StubRoutines::crc_table_addr()));
3508   __ notl(crc); // ~crc
3509   __ update_byte_crc32(crc, val, res);
3510   __ notl(crc); // ~crc
3511   __ mov(res, crc);
3512 }
3513 
3514 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
3515   Register obj = op->obj_opr()->as_register();  // may not be an oop
3516   Register hdr = op->hdr_opr()->as_register();
3517   Register lock = op->lock_opr()->as_register();
3518   if (!UseFastLocking) {
3519     __ jmp(*op->stub()->entry());
3520   } else if (op->code() == lir_lock) {
3521     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3522     // add debug info for NullPointerException only if one is possible
3523     int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
3524     if (op->info() != NULL) {
3525       add_debug_info_for_null_check(null_check_offset, op->info());
3526     }
3527     // done
3528   } else if (op->code() == lir_unlock) {
3529     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3530     __ unlock_object(hdr, obj, lock, *op->stub()->entry());
3531   } else {
3532     Unimplemented();
3533   }
3534   if (op->code() == lir_lock) {
3535     // If deoptimization happens in Runtime1::monitorenter, inc_held_monitor_count after backing from slowpath
3536     // will be skipped. Solution is
3537     // 1. Increase only in fastpath
3538     // 2. Runtime1::monitorenter increase count after locking
3539 #ifndef _LP64
3540     Register thread = rsi;
3541     __ push(thread);
3542     __ get_thread(thread);
3543 #else
3544     Register thread = r15_thread;
3545 #endif
3546     __ inc_held_monitor_count(thread);
3547 #ifndef _LP64
3548     __ pop(thread);
3549 #endif
3550   }
3551   __ bind(*op->stub()->continuation());
3552   if (op->code() == lir_unlock) {
3553     // unlock in slowpath is JRT_Leaf stub, no deoptimization can happen
3554 #ifndef _LP64
3555     Register thread = rsi;
3556     __ push(thread);
3557     __ get_thread(thread);
3558 #else
3559     Register thread = r15_thread;
3560 #endif
3561     __ dec_held_monitor_count(thread);
3562 #ifndef _LP64
3563     __ pop(thread);
3564 #endif
3565   }
3566 }
3567 
3568 
3569 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
3570   ciMethod* method = op->profiled_method();
3571   int bci          = op->profiled_bci();
3572   ciMethod* callee = op->profiled_callee();
3573   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
3574 
3575   // Update counter for all call types
3576   ciMethodData* md = method->method_data_or_null();
3577   assert(md != NULL, "Sanity");
3578   ciProfileData* data = md->bci_to_data(bci);
3579   assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
3580   assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
3581   Register mdo  = op->mdo()->as_register();
3582   __ mov_metadata(mdo, md->constant_encoding());
3583   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
3584   // Perform additional virtual call profiling for invokevirtual and
3585   // invokeinterface bytecodes
3586   if (op->should_profile_receiver_type()) {
3587     assert(op->recv()->is_single_cpu(), "recv must be allocated");
3588     Register recv = op->recv()->as_register();
3589     assert_different_registers(mdo, recv);
3590     assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
3591     ciKlass* known_klass = op->known_holder();
3592     if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
3593       // We know the type that will be seen at this call site; we can
3594       // statically update the MethodData* rather than needing to do
3595       // dynamic tests on the receiver type
3596 
3597       // NOTE: we should probably put a lock around this search to
3598       // avoid collisions by concurrent compilations
3599       ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
3600       uint i;
3601       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3602         ciKlass* receiver = vc_data->receiver(i);
3603         if (known_klass->equals(receiver)) {
3604           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3605           __ addptr(data_addr, DataLayout::counter_increment);
3606           return;
3607         }
3608       }
3609 
3610       // Receiver type not found in profile data; select an empty slot
3611 
3612       // Note that this is less efficient than it should be because it
3613       // always does a write to the receiver part of the
3614       // VirtualCallData rather than just the first time
3615       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3616         ciKlass* receiver = vc_data->receiver(i);
3617         if (receiver == NULL) {
3618           Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
3619           __ mov_metadata(recv_addr, known_klass->constant_encoding());
3620           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3621           __ addptr(data_addr, DataLayout::counter_increment);
3622           return;
3623         }
3624       }
3625     } else {
3626       __ load_klass(recv, recv, tmp_load_klass);
3627       Label update_done;
3628       type_profile_helper(mdo, md, data, recv, &update_done);
3629       // Receiver did not match any saved receiver and there is no empty row for it.
3630       // Increment total counter to indicate polymorphic case.
3631       __ addptr(counter_addr, DataLayout::counter_increment);
3632 
3633       __ bind(update_done);
3634     }
3635   } else {
3636     // Static call
3637     __ addptr(counter_addr, DataLayout::counter_increment);
3638   }
3639 }
3640 
3641 void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
3642   Register obj = op->obj()->as_register();
3643   Register tmp = op->tmp()->as_pointer_register();
3644   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
3645   Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
3646   ciKlass* exact_klass = op->exact_klass();
3647   intptr_t current_klass = op->current_klass();
3648   bool not_null = op->not_null();
3649   bool no_conflict = op->no_conflict();
3650 
3651   Label update, next, none;
3652 
3653   bool do_null = !not_null;
3654   bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
3655   bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
3656 
3657   assert(do_null || do_update, "why are we here?");
3658   assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
3659 
3660   __ verify_oop(obj);
3661 
3662   if (tmp != obj) {
3663     __ mov(tmp, obj);
3664   }
3665   if (do_null) {
3666     __ testptr(tmp, tmp);
3667     __ jccb(Assembler::notZero, update);
3668     if (!TypeEntries::was_null_seen(current_klass)) {
3669       __ orptr(mdo_addr, TypeEntries::null_seen);
3670     }
3671     if (do_update) {
3672 #ifndef ASSERT
3673       __ jmpb(next);
3674     }
3675 #else
3676       __ jmp(next);
3677     }
3678   } else {
3679     __ testptr(tmp, tmp);
3680     __ jcc(Assembler::notZero, update);
3681     __ stop("unexpect null obj");
3682 #endif
3683   }
3684 
3685   __ bind(update);
3686 
3687   if (do_update) {
3688 #ifdef ASSERT
3689     if (exact_klass != NULL) {
3690       Label ok;
3691       __ load_klass(tmp, tmp, tmp_load_klass);
3692       __ push(tmp);
3693       __ mov_metadata(tmp, exact_klass->constant_encoding());
3694       __ cmpptr(tmp, Address(rsp, 0));
3695       __ jcc(Assembler::equal, ok);
3696       __ stop("exact klass and actual klass differ");
3697       __ bind(ok);
3698       __ pop(tmp);
3699     }
3700 #endif
3701     if (!no_conflict) {
3702       if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
3703         if (exact_klass != NULL) {
3704           __ mov_metadata(tmp, exact_klass->constant_encoding());
3705         } else {
3706           __ load_klass(tmp, tmp, tmp_load_klass);
3707         }
3708 
3709         __ xorptr(tmp, mdo_addr);
3710         __ testptr(tmp, TypeEntries::type_klass_mask);
3711         // klass seen before, nothing to do. The unknown bit may have been
3712         // set already but no need to check.
3713         __ jccb(Assembler::zero, next);
3714 
3715         __ testptr(tmp, TypeEntries::type_unknown);
3716         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3717 
3718         if (TypeEntries::is_type_none(current_klass)) {
3719           __ cmpptr(mdo_addr, 0);
3720           __ jccb(Assembler::equal, none);
3721           __ cmpptr(mdo_addr, TypeEntries::null_seen);
3722           __ jccb(Assembler::equal, none);
3723           // There is a chance that the checks above (re-reading profiling
3724           // data from memory) fail if another thread has just set the
3725           // profiling to this obj's klass
3726           __ xorptr(tmp, mdo_addr);
3727           __ testptr(tmp, TypeEntries::type_klass_mask);
3728           __ jccb(Assembler::zero, next);
3729         }
3730       } else {
3731         assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
3732                ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
3733 
3734         __ movptr(tmp, mdo_addr);
3735         __ testptr(tmp, TypeEntries::type_unknown);
3736         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3737       }
3738 
3739       // different than before. Cannot keep accurate profile.
3740       __ orptr(mdo_addr, TypeEntries::type_unknown);
3741 
3742       if (TypeEntries::is_type_none(current_klass)) {
3743         __ jmpb(next);
3744 
3745         __ bind(none);
3746         // first time here. Set profile type.
3747         __ movptr(mdo_addr, tmp);
3748       }
3749     } else {
3750       // There's a single possible klass at this profile point
3751       assert(exact_klass != NULL, "should be");
3752       if (TypeEntries::is_type_none(current_klass)) {
3753         __ mov_metadata(tmp, exact_klass->constant_encoding());
3754         __ xorptr(tmp, mdo_addr);
3755         __ testptr(tmp, TypeEntries::type_klass_mask);
3756 #ifdef ASSERT
3757         __ jcc(Assembler::zero, next);
3758 
3759         {
3760           Label ok;
3761           __ push(tmp);
3762           __ cmpptr(mdo_addr, 0);
3763           __ jcc(Assembler::equal, ok);
3764           __ cmpptr(mdo_addr, TypeEntries::null_seen);
3765           __ jcc(Assembler::equal, ok);
3766           // may have been set by another thread
3767           __ mov_metadata(tmp, exact_klass->constant_encoding());
3768           __ xorptr(tmp, mdo_addr);
3769           __ testptr(tmp, TypeEntries::type_mask);
3770           __ jcc(Assembler::zero, ok);
3771 
3772           __ stop("unexpected profiling mismatch");
3773           __ bind(ok);
3774           __ pop(tmp);
3775         }
3776 #else
3777         __ jccb(Assembler::zero, next);
3778 #endif
3779         // first time here. Set profile type.
3780         __ movptr(mdo_addr, tmp);
3781       } else {
3782         assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
3783                ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
3784 
3785         __ movptr(tmp, mdo_addr);
3786         __ testptr(tmp, TypeEntries::type_unknown);
3787         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3788 
3789         __ orptr(mdo_addr, TypeEntries::type_unknown);
3790       }
3791     }
3792 
3793     __ bind(next);
3794   }
3795 }
3796 
3797 void LIR_Assembler::emit_delay(LIR_OpDelay*) {
3798   Unimplemented();
3799 }
3800 
3801 
3802 void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
3803   __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
3804 }
3805 
3806 
3807 void LIR_Assembler::align_backward_branch_target() {
3808   __ align(BytesPerWord);
3809 }
3810 
3811 
3812 void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
3813   if (left->is_single_cpu()) {
3814     __ negl(left->as_register());
3815     move_regs(left->as_register(), dest->as_register());
3816 
3817   } else if (left->is_double_cpu()) {
3818     Register lo = left->as_register_lo();
3819 #ifdef _LP64
3820     Register dst = dest->as_register_lo();
3821     __ movptr(dst, lo);
3822     __ negptr(dst);
3823 #else
3824     Register hi = left->as_register_hi();
3825     __ lneg(hi, lo);
3826     if (dest->as_register_lo() == hi) {
3827       assert(dest->as_register_hi() != lo, "destroying register");
3828       move_regs(hi, dest->as_register_hi());
3829       move_regs(lo, dest->as_register_lo());
3830     } else {
3831       move_regs(lo, dest->as_register_lo());
3832       move_regs(hi, dest->as_register_hi());
3833     }
3834 #endif // _LP64
3835 
3836   } else if (dest->is_single_xmm()) {
3837 #ifdef _LP64
3838     if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3839       assert(tmp->is_valid(), "need temporary");
3840       assert_different_registers(left->as_xmm_float_reg(), tmp->as_xmm_float_reg());
3841       __ vpxor(dest->as_xmm_float_reg(), tmp->as_xmm_float_reg(), left->as_xmm_float_reg(), 2);
3842     }
3843     else
3844 #endif
3845     {
3846       assert(!tmp->is_valid(), "do not need temporary");
3847       if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
3848         __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
3849       }
3850       __ xorps(dest->as_xmm_float_reg(),
3851                ExternalAddress((address)float_signflip_pool));
3852     }
3853   } else if (dest->is_double_xmm()) {
3854 #ifdef _LP64
3855     if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3856       assert(tmp->is_valid(), "need temporary");
3857       assert_different_registers(left->as_xmm_double_reg(), tmp->as_xmm_double_reg());
3858       __ vpxor(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), left->as_xmm_double_reg(), 2);
3859     }
3860     else
3861 #endif
3862     {
3863       assert(!tmp->is_valid(), "do not need temporary");
3864       if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
3865         __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
3866       }
3867       __ xorpd(dest->as_xmm_double_reg(),
3868                ExternalAddress((address)double_signflip_pool));
3869     }
3870 #ifndef _LP64
3871   } else if (left->is_single_fpu() || left->is_double_fpu()) {
3872     assert(left->fpu() == 0, "arg must be on TOS");
3873     assert(dest->fpu() == 0, "dest must be TOS");
3874     __ fchs();
3875 #endif // !_LP64
3876 
3877   } else {
3878     ShouldNotReachHere();
3879   }
3880 }
3881 
3882 
3883 void LIR_Assembler::leal(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
3884   assert(src->is_address(), "must be an address");
3885   assert(dest->is_register(), "must be a register");
3886 
3887   PatchingStub* patch = NULL;
3888   if (patch_code != lir_patch_none) {
3889     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
3890   }
3891 
3892   Register reg = dest->as_pointer_register();
3893   LIR_Address* addr = src->as_address_ptr();
3894   __ lea(reg, as_Address(addr));
3895 
3896   if (patch != NULL) {
3897     patching_epilog(patch, patch_code, addr->base()->as_register(), info);
3898   }
3899 }
3900 
3901 
3902 
3903 void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
3904   assert(!tmp->is_valid(), "don't need temporary");
3905   __ call(RuntimeAddress(dest));
3906   if (info != NULL) {
3907     add_call_info_here(info);
3908     __ oopmap_metadata(info);
3909   }
3910   __ post_call_nop();
3911 }
3912 
3913 
3914 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
3915   assert(type == T_LONG, "only for volatile long fields");
3916 
3917   if (info != NULL) {
3918     add_debug_info_for_null_check_here(info);
3919   }
3920 
3921   if (src->is_double_xmm()) {
3922     if (dest->is_double_cpu()) {
3923 #ifdef _LP64
3924       __ movdq(dest->as_register_lo(), src->as_xmm_double_reg());
3925 #else
3926       __ movdl(dest->as_register_lo(), src->as_xmm_double_reg());
3927       __ psrlq(src->as_xmm_double_reg(), 32);
3928       __ movdl(dest->as_register_hi(), src->as_xmm_double_reg());
3929 #endif // _LP64
3930     } else if (dest->is_double_stack()) {
3931       __ movdbl(frame_map()->address_for_slot(dest->double_stack_ix()), src->as_xmm_double_reg());
3932     } else if (dest->is_address()) {
3933       __ movdbl(as_Address(dest->as_address_ptr()), src->as_xmm_double_reg());
3934     } else {
3935       ShouldNotReachHere();
3936     }
3937 
3938   } else if (dest->is_double_xmm()) {
3939     if (src->is_double_stack()) {
3940       __ movdbl(dest->as_xmm_double_reg(), frame_map()->address_for_slot(src->double_stack_ix()));
3941     } else if (src->is_address()) {
3942       __ movdbl(dest->as_xmm_double_reg(), as_Address(src->as_address_ptr()));
3943     } else {
3944       ShouldNotReachHere();
3945     }
3946 
3947 #ifndef _LP64
3948   } else if (src->is_double_fpu()) {
3949     assert(src->fpu_regnrLo() == 0, "must be TOS");
3950     if (dest->is_double_stack()) {
3951       __ fistp_d(frame_map()->address_for_slot(dest->double_stack_ix()));
3952     } else if (dest->is_address()) {
3953       __ fistp_d(as_Address(dest->as_address_ptr()));
3954     } else {
3955       ShouldNotReachHere();
3956     }
3957 
3958   } else if (dest->is_double_fpu()) {
3959     assert(dest->fpu_regnrLo() == 0, "must be TOS");
3960     if (src->is_double_stack()) {
3961       __ fild_d(frame_map()->address_for_slot(src->double_stack_ix()));
3962     } else if (src->is_address()) {
3963       __ fild_d(as_Address(src->as_address_ptr()));
3964     } else {
3965       ShouldNotReachHere();
3966     }
3967 #endif // !_LP64
3968 
3969   } else {
3970     ShouldNotReachHere();
3971   }
3972 }
3973 
3974 #ifdef ASSERT
3975 // emit run-time assertion
3976 void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
3977   assert(op->code() == lir_assert, "must be");
3978 
3979   if (op->in_opr1()->is_valid()) {
3980     assert(op->in_opr2()->is_valid(), "both operands must be valid");
3981     comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
3982   } else {
3983     assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
3984     assert(op->condition() == lir_cond_always, "no other conditions allowed");
3985   }
3986 
3987   Label ok;
3988   if (op->condition() != lir_cond_always) {
3989     Assembler::Condition acond = Assembler::zero;
3990     switch (op->condition()) {
3991       case lir_cond_equal:        acond = Assembler::equal;       break;
3992       case lir_cond_notEqual:     acond = Assembler::notEqual;    break;
3993       case lir_cond_less:         acond = Assembler::less;        break;
3994       case lir_cond_lessEqual:    acond = Assembler::lessEqual;   break;
3995       case lir_cond_greaterEqual: acond = Assembler::greaterEqual;break;
3996       case lir_cond_greater:      acond = Assembler::greater;     break;
3997       case lir_cond_belowEqual:   acond = Assembler::belowEqual;  break;
3998       case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;  break;
3999       default:                    ShouldNotReachHere();
4000     }
4001     __ jcc(acond, ok);
4002   }
4003   if (op->halt()) {
4004     const char* str = __ code_string(op->msg());
4005     __ stop(str);
4006   } else {
4007     breakpoint();
4008   }
4009   __ bind(ok);
4010 }
4011 #endif
4012 
4013 void LIR_Assembler::membar() {
4014   // QQQ sparc TSO uses this,
4015   __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad));
4016 }
4017 
4018 void LIR_Assembler::membar_acquire() {
4019   // No x86 machines currently require load fences
4020 }
4021 
4022 void LIR_Assembler::membar_release() {
4023   // No x86 machines currently require store fences
4024 }
4025 
4026 void LIR_Assembler::membar_loadload() {
4027   // no-op
4028   //__ membar(Assembler::Membar_mask_bits(Assembler::loadload));
4029 }
4030 
4031 void LIR_Assembler::membar_storestore() {
4032   // no-op
4033   //__ membar(Assembler::Membar_mask_bits(Assembler::storestore));
4034 }
4035 
4036 void LIR_Assembler::membar_loadstore() {
4037   // no-op
4038   //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore));
4039 }
4040 
4041 void LIR_Assembler::membar_storeload() {
4042   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
4043 }
4044 
4045 void LIR_Assembler::on_spin_wait() {
4046   __ pause ();
4047 }
4048 
4049 void LIR_Assembler::get_thread(LIR_Opr result_reg) {
4050   assert(result_reg->is_register(), "check");
4051 #ifdef _LP64
4052   // __ get_thread(result_reg->as_register_lo());
4053   __ mov(result_reg->as_register(), r15_thread);
4054 #else
4055   __ get_thread(result_reg->as_register());
4056 #endif // _LP64
4057 }
4058 
4059 
4060 void LIR_Assembler::peephole(LIR_List*) {
4061   // do nothing for now
4062 }
4063 
4064 void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
4065   assert(data == dest, "xchg/xadd uses only 2 operands");
4066 
4067   if (data->type() == T_INT) {
4068     if (code == lir_xadd) {
4069       __ lock();
4070       __ xaddl(as_Address(src->as_address_ptr()), data->as_register());
4071     } else {
4072       __ xchgl(data->as_register(), as_Address(src->as_address_ptr()));
4073     }
4074   } else if (data->is_oop()) {
4075     assert (code == lir_xchg, "xadd for oops");
4076     Register obj = data->as_register();
4077 #ifdef _LP64
4078     if (UseCompressedOops) {
4079       __ encode_heap_oop(obj);
4080       __ xchgl(obj, as_Address(src->as_address_ptr()));
4081       __ decode_heap_oop(obj);
4082     } else {
4083       __ xchgptr(obj, as_Address(src->as_address_ptr()));
4084     }
4085 #else
4086     __ xchgl(obj, as_Address(src->as_address_ptr()));
4087 #endif
4088   } else if (data->type() == T_LONG) {
4089 #ifdef _LP64
4090     assert(data->as_register_lo() == data->as_register_hi(), "should be a single register");
4091     if (code == lir_xadd) {
4092       __ lock();
4093       __ xaddq(as_Address(src->as_address_ptr()), data->as_register_lo());
4094     } else {
4095       __ xchgq(data->as_register_lo(), as_Address(src->as_address_ptr()));
4096     }
4097 #else
4098     ShouldNotReachHere();
4099 #endif
4100   } else {
4101     ShouldNotReachHere();
4102   }
4103 }
4104 
4105 #undef __