Old src/hotspot/cpu/x86/x86

    1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  631 
  632   C->output()->set_frame_complete(cbuf.insts_size());
  633 
  634   if (C->has_mach_constant_base_node()) {
  635     // NOTE: We set the table base offset here because users might be
  636     // emitted before MachConstantBaseNode.
  637     ConstantTable& constant_table = C->output()->constant_table();
  638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  639   }
  640 }
  641 
  642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  643   return MachNode::size(ra_); // too many variables; just compute it the hard way
  644 }
  645 
  646 int MachPrologNode::reloc() const {
  647   return 0; // a large enough number
  648 }
  649 
  650 //=============================================================================
  651 #ifndef PRODUCT
  652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  653   Compile *C = ra_->C;
  654   int framesize = C->output()->frame_size_in_bytes();
  655   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  656   // Remove two words for return addr and rbp,
  657   framesize -= 2*wordSize;
  658 
  659   if (C->max_vector_size() > 16) {
  660     st->print("VZEROUPPER");
  661     st->cr(); st->print("\t");
  662   }
  663   if (C->in_24_bit_fp_mode()) {
  664     st->print("FLDCW  standard control word");
  665     st->cr(); st->print("\t");
  666   }
  667   if (framesize) {
  668     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  669     st->cr(); st->print("\t");
  670   }
  671   st->print_cr("POPL   EBP"); st->print("\t");
  672   if (do_polling() && C->is_method_compilation()) {
  673     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  674               "JA       #safepoint_stub\t"
  675               "# Safepoint: poll for GC");
  676   }
  677 }
  678 #endif
  679 
  680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  681   Compile *C = ra_->C;
  682   MacroAssembler _masm(&cbuf);
  683 
  684   if (C->max_vector_size() > 16) {
  685     // Clear upper bits of YMM registers when current compiled code uses
  686     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  687     _masm.vzeroupper();
  688   }
  689   // If method set FPU control word, restore to standard control word
  690   if (C->in_24_bit_fp_mode()) {
  691     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  692   }
  693 
  694   int framesize = C->output()->frame_size_in_bytes();
  695   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  696   // Remove two words for return addr and rbp,
  697   framesize -= 2*wordSize;
  698 
  699   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  700 
  701   if (framesize >= 128) {
  702     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  703     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  704     emit_d32(cbuf, framesize);
  705   } else if (framesize) {
  706     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  707     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  708     emit_d8(cbuf, framesize);
  709   }
  710 
  711   emit_opcode(cbuf, 0x58 | EBP_enc);
  712 
  713   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  714     __ reserved_stack_check();
  715   }
  716 
  717   if (do_polling() && C->is_method_compilation()) {
  718     Register thread = as_Register(EBX_enc);
  719     MacroAssembler masm(&cbuf);
  720     __ get_thread(thread);
  721     Label dummy_label;
  722     Label* code_stub = &dummy_label;
  723     if (!C->output()->in_scratch_emit_size()) {
  724       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  725     }
  726     __ relocate(relocInfo::poll_return_type);
  727     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  728   }
  729 }
  730 
  731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  732   return MachNode::size(ra_); // too many variables; just compute it
  733                               // the hard way
  734 }
  735 
  736 int MachEpilogNode::reloc() const {
  737   return 0; // a large enough number
  738 }
  739 
  740 const Pipeline * MachEpilogNode::pipeline() const {
  741   return MachNode::pipeline_class();
  742 }
  743 
  744 //=============================================================================
  745 
  746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  747 static enum RC rc_class( OptoReg::Name reg ) {
  748 
  749   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  750   if (OptoReg::is_stack(reg)) return rc_stack;
  751 
  752   VMReg r = OptoReg::as_VMReg(reg);
  753   if (r->is_Register()) return rc_int;
  754   if (r->is_FloatRegister()) {
  755     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  756     return rc_float;
  757   }
  758   if (r->is_KRegister()) return rc_kreg;
  759   assert(r->is_XMMRegister(), "must be");
  760   return rc_xmm;
  761 }
  762 
  763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  764                         int opcode, const char *op_str, int size, outputStream* st ) {
  765   if( cbuf ) {
  766     emit_opcode  (*cbuf, opcode );
  767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  768 #ifndef PRODUCT
  769   } else if( !do_size ) {
  770     if( size != 0 ) st->print("\n\t");
  771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  774     } else { // FLD, FST, PUSH, POP
  775       st->print("%s [ESP + #%d]",op_str,offset);
  776     }
  777 #endif
  778   }
  779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  780   return size+3+offset_size;
  781 }
  782 
  783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  786   int in_size_in_bits = Assembler::EVEX_32bit;
  787   int evex_encoding = 0;
  788   if (reg_lo+1 == reg_hi) {
  789     in_size_in_bits = Assembler::EVEX_64bit;
  790     evex_encoding = Assembler::VEX_W;
  791   }
  792   if (cbuf) {
  793     MacroAssembler _masm(cbuf);
  794     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  795     //                          it maps more cases to single byte displacement
  796     _masm.set_managed();
  797     if (reg_lo+1 == reg_hi) { // double move?
  798       if (is_load) {
  799         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  800       } else {
  801         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  802       }
  803     } else {
  804       if (is_load) {
  805         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  806       } else {
  807         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  808       }
  809     }
  810 #ifndef PRODUCT
  811   } else if (!do_size) {
  812     if (size != 0) st->print("\n\t");
  813     if (reg_lo+1 == reg_hi) { // double move?
  814       if (is_load) st->print("%s %s,[ESP + #%d]",
  815                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  816                               Matcher::regName[reg_lo], offset);
  817       else         st->print("MOVSD  [ESP + #%d],%s",
  818                               offset, Matcher::regName[reg_lo]);
  819     } else {
  820       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  821                               Matcher::regName[reg_lo], offset);
  822       else         st->print("MOVSS  [ESP + #%d],%s",
  823                               offset, Matcher::regName[reg_lo]);
  824     }
  825 #endif
  826   }
  827   bool is_single_byte = false;
  828   if ((UseAVX > 2) && (offset != 0)) {
  829     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  830   }
  831   int offset_size = 0;
  832   if (UseAVX > 2 ) {
  833     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  834   } else {
  835     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  836   }
  837   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  838   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  839   return size+5+offset_size;
  840 }
  841 
  842 
  843 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  844                             int src_hi, int dst_hi, int size, outputStream* st ) {
  845   if (cbuf) {
  846     MacroAssembler _masm(cbuf);
  847     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  848     _masm.set_managed();
  849     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  851                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  852     } else {
  853       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  854                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  855     }
  856 #ifndef PRODUCT
  857   } else if (!do_size) {
  858     if (size != 0) st->print("\n\t");
  859     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  860       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  861         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  862       } else {
  863         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  864       }
  865     } else {
  866       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  867         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  868       } else {
  869         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  870       }
  871     }
  872 #endif
  873   }
  874   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  875   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  876   int sz = (UseAVX > 2) ? 6 : 4;
  877   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  878       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  879   return size + sz;
  880 }
  881 
  882 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  883                             int src_hi, int dst_hi, int size, outputStream* st ) {
  884   // 32-bit
  885   if (cbuf) {
  886     MacroAssembler _masm(cbuf);
  887     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  888     _masm.set_managed();
  889     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  890              as_Register(Matcher::_regEncode[src_lo]));
  891 #ifndef PRODUCT
  892   } else if (!do_size) {
  893     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  894 #endif
  895   }
  896   return (UseAVX> 2) ? 6 : 4;
  897 }
  898 
  899 
  900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  901                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  902   // 32-bit
  903   if (cbuf) {
  904     MacroAssembler _masm(cbuf);
  905     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  906     _masm.set_managed();
  907     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  908              as_XMMRegister(Matcher::_regEncode[src_lo]));
  909 #ifndef PRODUCT
  910   } else if (!do_size) {
  911     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  912 #endif
  913   }
  914   return (UseAVX> 2) ? 6 : 4;
  915 }
  916 
  917 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  918   if( cbuf ) {
  919     emit_opcode(*cbuf, 0x8B );
  920     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  921 #ifndef PRODUCT
  922   } else if( !do_size ) {
  923     if( size != 0 ) st->print("\n\t");
  924     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  925 #endif
  926   }
  927   return size+2;
  928 }
  929 
  930 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  931                                  int offset, int size, outputStream* st ) {
  932   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  933     if( cbuf ) {
  934       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  935       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  936 #ifndef PRODUCT
  937     } else if( !do_size ) {
  938       if( size != 0 ) st->print("\n\t");
  939       st->print("FLD    %s",Matcher::regName[src_lo]);
  940 #endif
  941     }
  942     size += 2;
  943   }
  944 
  945   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  946   const char *op_str;
  947   int op;
  948   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  949     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  950     op = 0xDD;
  951   } else {                   // 32-bit store
  952     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  953     op = 0xD9;
  954     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  955   }
  956 
  957   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  958 }
  959 
  960 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  961 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  962                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  963 
  964 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  965                             int stack_offset, int reg, uint ireg, outputStream* st);
  966 
  967 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  968                                      int dst_offset, uint ireg, outputStream* st) {
  969   if (cbuf) {
  970     MacroAssembler _masm(cbuf);
  971     switch (ireg) {
  972     case Op_VecS:
  973       __ pushl(Address(rsp, src_offset));
  974       __ popl (Address(rsp, dst_offset));
  975       break;
  976     case Op_VecD:
  977       __ pushl(Address(rsp, src_offset));
  978       __ popl (Address(rsp, dst_offset));
  979       __ pushl(Address(rsp, src_offset+4));
  980       __ popl (Address(rsp, dst_offset+4));
  981       break;
  982     case Op_VecX:
  983       __ movdqu(Address(rsp, -16), xmm0);
  984       __ movdqu(xmm0, Address(rsp, src_offset));
  985       __ movdqu(Address(rsp, dst_offset), xmm0);
  986       __ movdqu(xmm0, Address(rsp, -16));
  987       break;
  988     case Op_VecY:
  989       __ vmovdqu(Address(rsp, -32), xmm0);
  990       __ vmovdqu(xmm0, Address(rsp, src_offset));
  991       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  992       __ vmovdqu(xmm0, Address(rsp, -32));
  993       break;
  994     case Op_VecZ:
  995       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  996       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  997       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  998       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  999       break;
 1000     default:
 1001       ShouldNotReachHere();
 1002     }
 1003 #ifndef PRODUCT
 1004   } else {
 1005     switch (ireg) {
 1006     case Op_VecS:
 1007       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1008                 "popl    [rsp + #%d]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecD:
 1012       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1013                 "popq    [rsp + #%d]\n\t"
 1014                 "pushl   [rsp + #%d]\n\t"
 1015                 "popq    [rsp + #%d]",
 1016                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1017       break;
 1018      case Op_VecX:
 1019       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1020                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1021                 "movdqu  [rsp + #%d], xmm0\n\t"
 1022                 "movdqu  xmm0, [rsp - #16]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     case Op_VecY:
 1026       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1027                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1028                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1029                 "vmovdqu xmm0, [rsp - #32]",
 1030                 src_offset, dst_offset);
 1031       break;
 1032     case Op_VecZ:
 1033       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1034                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1035                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1036                 "vmovdqu xmm0, [rsp - #64]",
 1037                 src_offset, dst_offset);
 1038       break;
 1039     default:
 1040       ShouldNotReachHere();
 1041     }
 1042 #endif
 1043   }
 1044 }
 1045 
 1046 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1047   // Get registers to move
 1048   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1049   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1050   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1051   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1052 
 1053   enum RC src_second_rc = rc_class(src_second);
 1054   enum RC src_first_rc = rc_class(src_first);
 1055   enum RC dst_second_rc = rc_class(dst_second);
 1056   enum RC dst_first_rc = rc_class(dst_first);
 1057 
 1058   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1059 
 1060   // Generate spill code!
 1061   int size = 0;
 1062 
 1063   if( src_first == dst_first && src_second == dst_second )
 1064     return size;            // Self copy, no move
 1065 
 1066   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1067     uint ireg = ideal_reg();
 1068     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1069     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1070     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1071     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1072       // mem -> mem
 1073       int src_offset = ra_->reg2offset(src_first);
 1074       int dst_offset = ra_->reg2offset(dst_first);
 1075       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1076     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1077       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1078     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1079       int stack_offset = ra_->reg2offset(dst_first);
 1080       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1081     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1082       int stack_offset = ra_->reg2offset(src_first);
 1083       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1084     } else {
 1085       ShouldNotReachHere();
 1086     }
 1087     return 0;
 1088   }
 1089 
 1090   // --------------------------------------
 1091   // Check for mem-mem move.  push/pop to move.
 1092   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1093     if( src_second == dst_first ) { // overlapping stack copy ranges
 1094       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1095       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1096       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1097       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1098     }
 1099     // move low bits
 1100     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1101     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1102     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1103       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1104       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1105     }
 1106     return size;
 1107   }
 1108 
 1109   // --------------------------------------
 1110   // Check for integer reg-reg copy
 1111   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1112     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1113 
 1114   // Check for integer store
 1115   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1116     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1117 
 1118   // Check for integer load
 1119   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1120     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1121 
 1122   // Check for integer reg-xmm reg copy
 1123   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1124     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1125             "no 64 bit integer-float reg moves" );
 1126     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1127   }
 1128   // --------------------------------------
 1129   // Check for float reg-reg copy
 1130   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1131     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1132             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1133     if( cbuf ) {
 1134 
 1135       // Note the mucking with the register encode to compensate for the 0/1
 1136       // indexing issue mentioned in a comment in the reg_def sections
 1137       // for FPR registers many lines above here.
 1138 
 1139       if( src_first != FPR1L_num ) {
 1140         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1141         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1142         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1143         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1144      } else {
 1145         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1146         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1147      }
 1148 #ifndef PRODUCT
 1149     } else if( !do_size ) {
 1150       if( size != 0 ) st->print("\n\t");
 1151       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1152       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1153 #endif
 1154     }
 1155     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1156   }
 1157 
 1158   // Check for float store
 1159   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1160     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1161   }
 1162 
 1163   // Check for float load
 1164   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1165     int offset = ra_->reg2offset(src_first);
 1166     const char *op_str;
 1167     int op;
 1168     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1169       op_str = "FLD_D";
 1170       op = 0xDD;
 1171     } else {                   // 32-bit load
 1172       op_str = "FLD_S";
 1173       op = 0xD9;
 1174       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1175     }
 1176     if( cbuf ) {
 1177       emit_opcode  (*cbuf, op );
 1178       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1179       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1180       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1181 #ifndef PRODUCT
 1182     } else if( !do_size ) {
 1183       if( size != 0 ) st->print("\n\t");
 1184       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1185 #endif
 1186     }
 1187     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1188     return size + 3+offset_size+2;
 1189   }
 1190 
 1191   // Check for xmm reg-reg copy
 1192   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1193     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1194             (src_first+1 == src_second && dst_first+1 == dst_second),
 1195             "no non-adjacent float-moves" );
 1196     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1197   }
 1198 
 1199   // Check for xmm reg-integer reg copy
 1200   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1201     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1202             "no 64 bit float-integer reg moves" );
 1203     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1204   }
 1205 
 1206   // Check for xmm store
 1207   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1208     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1209   }
 1210 
 1211   // Check for float xmm load
 1212   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1213     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1214   }
 1215 
 1216   // Copy from float reg to xmm reg
 1217   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1218     // copy to the top of stack from floating point reg
 1219     // and use LEA to preserve flags
 1220     if( cbuf ) {
 1221       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1222       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1223       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1224       emit_d8(*cbuf,0xF8);
 1225 #ifndef PRODUCT
 1226     } else if( !do_size ) {
 1227       if( size != 0 ) st->print("\n\t");
 1228       st->print("LEA    ESP,[ESP-8]");
 1229 #endif
 1230     }
 1231     size += 4;
 1232 
 1233     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1234 
 1235     // Copy from the temp memory to the xmm reg.
 1236     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1237 
 1238     if( cbuf ) {
 1239       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1240       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1241       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1242       emit_d8(*cbuf,0x08);
 1243 #ifndef PRODUCT
 1244     } else if( !do_size ) {
 1245       if( size != 0 ) st->print("\n\t");
 1246       st->print("LEA    ESP,[ESP+8]");
 1247 #endif
 1248     }
 1249     size += 4;
 1250     return size;
 1251   }
 1252 
 1253   // AVX-512 opmask specific spilling.
 1254   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1255     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1256     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1257     MacroAssembler _masm(cbuf);
 1258     int offset = ra_->reg2offset(src_first);
 1259     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1260     return 0;
 1261   }
 1262 
 1263   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1264     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1265     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1266     MacroAssembler _masm(cbuf);
 1267     int offset = ra_->reg2offset(dst_first);
 1268     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1269     return 0;
 1270   }
 1271 
 1272   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1273     Unimplemented();
 1274     return 0;
 1275   }
 1276 
 1277   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1278     Unimplemented();
 1279     return 0;
 1280   }
 1281 
 1282   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1283     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1284     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1285     MacroAssembler _masm(cbuf);
 1286     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1287     return 0;
 1288   }
 1289 
 1290   assert( size > 0, "missed a case" );
 1291 
 1292   // --------------------------------------------------------------------
 1293   // Check for second bits still needing moving.
 1294   if( src_second == dst_second )
 1295     return size;               // Self copy; no move
 1296   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1297 
 1298   // Check for second word int-int move
 1299   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1300     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1301 
 1302   // Check for second word integer store
 1303   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1304     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1305 
 1306   // Check for second word integer load
 1307   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1308     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1309 
 1310   Unimplemented();
 1311   return 0; // Mute compiler
 1312 }
 1313 
 1314 #ifndef PRODUCT
 1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1316   implementation( NULL, ra_, false, st );
 1317 }
 1318 #endif
 1319 
 1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1321   implementation( &cbuf, ra_, false, NULL );
 1322 }
 1323 
 1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1325   return MachNode::size(ra_);
 1326 }
 1327 
 1328 
 1329 //=============================================================================
 1330 #ifndef PRODUCT
 1331 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1332   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1333   int reg = ra_->get_reg_first(this);
 1334   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1335 }
 1336 #endif
 1337 
 1338 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_encode(this);
 1341   if( offset >= 128 ) {
 1342     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1343     emit_rm(cbuf, 0x2, reg, 0x04);
 1344     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1345     emit_d32(cbuf, offset);
 1346   }
 1347   else {
 1348     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1349     emit_rm(cbuf, 0x1, reg, 0x04);
 1350     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1351     emit_d8(cbuf, offset);
 1352   }
 1353 }
 1354 
 1355 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1356   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1357   if( offset >= 128 ) {
 1358     return 7;
 1359   }
 1360   else {
 1361     return 4;
 1362   }
 1363 }
 1364 
 1365 //=============================================================================
 1366 #ifndef PRODUCT
 1367 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1368   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1369   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1370   st->print_cr("\tNOP");
 1371   st->print_cr("\tNOP");
 1372   if( !OptoBreakpoint )
 1373     st->print_cr("\tNOP");
 1374 }
 1375 #endif
 1376 
 1377 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1378   MacroAssembler masm(&cbuf);
 1379 #ifdef ASSERT
 1380   uint insts_size = cbuf.insts_size();
 1381 #endif
 1382   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1383   masm.jump_cc(Assembler::notEqual,
 1384                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1385   /* WARNING these NOPs are critical so that verified entry point is properly
 1386      aligned for patching by NativeJump::patch_verified_entry() */
 1387   int nops_cnt = 2;
 1388   if( !OptoBreakpoint ) // Leave space for int3
 1389      nops_cnt += 1;
 1390   masm.nop(nops_cnt);
 1391 
 1392   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1393 }
 1394 
 1395 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1396   return OptoBreakpoint ? 11 : 12;
 1397 }
 1398 
 1399 
 1400 //=============================================================================
 1401 
 1402 // Vector calling convention not supported.
 1403 const bool Matcher::supports_vector_calling_convention() {
 1404   return false;
 1405 }
 1406 
 1407 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1408   Unimplemented();
 1409   return OptoRegPair(0, 0);
 1410 }
 1411 
 1412 // Is this branch offset short enough that a short branch can be used?
 1413 //
 1414 // NOTE: If the platform does not provide any short branch variants, then
 1415 //       this method should return false for offset 0.
 1416 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1417   // The passed offset is relative to address of the branch.
 1418   // On 86 a branch displacement is calculated relative to address
 1419   // of a next instruction.
 1420   offset -= br_size;
 1421 
 1422   // the short version of jmpConUCF2 contains multiple branches,
 1423   // making the reach slightly less
 1424   if (rule == jmpConUCF2_rule)
 1425     return (-126 <= offset && offset <= 125);
 1426   return (-128 <= offset && offset <= 127);
 1427 }
 1428 
 1429 // Return whether or not this register is ever used as an argument.  This
 1430 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1431 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1432 // arguments in those registers not be available to the callee.
 1433 bool Matcher::can_be_java_arg( int reg ) {
 1434   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1435   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1436   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1437   return false;
 1438 }
 1439 
 1440 bool Matcher::is_spillable_arg( int reg ) {
 1441   return can_be_java_arg(reg);
 1442 }
 1443 
 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1445   // Use hardware integer DIV instruction when
 1446   // it is faster than a code which use multiply.
 1447   // Only when constant divisor fits into 32 bit
 1448   // (min_jint is excluded to get only correct
 1449   // positive 32 bit values from negative).
 1450   return VM_Version::has_fast_idiv() &&
 1451          (divisor == (int)divisor && divisor != min_jint);
 1452 }
 1453 
 1454 // Register for DIVI projection of divmodI
 1455 RegMask Matcher::divI_proj_mask() {
 1456   return EAX_REG_mask();
 1457 }
 1458 
 1459 // Register for MODI projection of divmodI
 1460 RegMask Matcher::modI_proj_mask() {
 1461   return EDX_REG_mask();
 1462 }
 1463 
 1464 // Register for DIVL projection of divmodL
 1465 RegMask Matcher::divL_proj_mask() {
 1466   ShouldNotReachHere();
 1467   return RegMask();
 1468 }
 1469 
 1470 // Register for MODL projection of divmodL
 1471 RegMask Matcher::modL_proj_mask() {
 1472   ShouldNotReachHere();
 1473   return RegMask();
 1474 }
 1475 
 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1477   return NO_REG_mask();
 1478 }
 1479 
 1480 // Returns true if the high 32 bits of the value is known to be zero.
 1481 bool is_operand_hi32_zero(Node* n) {
 1482   int opc = n->Opcode();
 1483   if (opc == Op_AndL) {
 1484     Node* o2 = n->in(2);
 1485     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486       return true;
 1487     }
 1488   }
 1489   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1490     return true;
 1491   }
 1492   return false;
 1493 }
 1494 
 1495 %}
 1496 
 1497 //----------ENCODING BLOCK-----------------------------------------------------
 1498 // This block specifies the encoding classes used by the compiler to output
 1499 // byte streams.  Encoding classes generate functions which are called by
 1500 // Machine Instruction Nodes in order to generate the bit encoding of the
 1501 // instruction.  Operands specify their base encoding interface with the
 1502 // interface keyword.  There are currently supported four interfaces,
 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1504 // operand to generate a function which returns its register number when
 1505 // queried.   CONST_INTER causes an operand to generate a function which
 1506 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1507 // operand to generate four functions which return the Base Register, the
 1508 // Index Register, the Scale Value, and the Offset Value of the operand when
 1509 // queried.  COND_INTER causes an operand to generate six functions which
 1510 // return the encoding code (ie - encoding bits for the instruction)
 1511 // associated with each basic boolean condition for a conditional instruction.
 1512 // Instructions specify two basic values for encoding.  They use the
 1513 // ins_encode keyword to specify their encoding class (which must be one of
 1514 // the class names specified in the encoding block), and they use the
 1515 // opcode keyword to specify, in order, their primary, secondary, and
 1516 // tertiary opcode.  Only the opcode sections which a particular instruction
 1517 // needs for encoding need to be specified.
 1518 encode %{
 1519   // Build emit functions for each basic byte or larger field in the intel
 1520   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1521   // code in the enc_class source block.  Emit functions will live in the
 1522   // main source block for now.  In future, we can generalize this by
 1523   // adding a syntax that specifies the sizes of fields in an order,
 1524   // so that the adlc can build the emit functions automagically
 1525 
 1526   // Emit primary opcode
 1527   enc_class OpcP %{
 1528     emit_opcode(cbuf, $primary);
 1529   %}
 1530 
 1531   // Emit secondary opcode
 1532   enc_class OpcS %{
 1533     emit_opcode(cbuf, $secondary);
 1534   %}
 1535 
 1536   // Emit opcode directly
 1537   enc_class Opcode(immI d8) %{
 1538     emit_opcode(cbuf, $d8$$constant);
 1539   %}
 1540 
 1541   enc_class SizePrefix %{
 1542     emit_opcode(cbuf,0x66);
 1543   %}
 1544 
 1545   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1546     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1547   %}
 1548 
 1549   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1550     emit_opcode(cbuf,$opcode$$constant);
 1551     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1552   %}
 1553 
 1554   enc_class mov_r32_imm0( rRegI dst ) %{
 1555     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1556     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1557   %}
 1558 
 1559   enc_class cdq_enc %{
 1560     // Full implementation of Java idiv and irem; checks for
 1561     // special case as described in JVM spec., p.243 & p.271.
 1562     //
 1563     //         normal case                           special case
 1564     //
 1565     // input : rax,: dividend                         min_int
 1566     //         reg: divisor                          -1
 1567     //
 1568     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1569     //         rdx: remainder (= rax, irem reg)       0
 1570     //
 1571     //  Code sequnce:
 1572     //
 1573     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1574     //  0F 85 0B 00 00 00    jne         normal_case
 1575     //  33 D2                xor         rdx,edx
 1576     //  83 F9 FF             cmp         rcx,0FFh
 1577     //  0F 84 03 00 00 00    je          done
 1578     //                  normal_case:
 1579     //  99                   cdq
 1580     //  F7 F9                idiv        rax,ecx
 1581     //                  done:
 1582     //
 1583     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1584     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1585     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1586     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1587     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1588     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1589     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1590     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1591     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1592     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1594     // normal_case:
 1595     emit_opcode(cbuf,0x99);                                         // cdq
 1596     // idiv (note: must be emitted by the user of this rule)
 1597     // normal:
 1598   %}
 1599 
 1600   // Dense encoding for older common ops
 1601   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1602     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1603   %}
 1604 
 1605 
 1606   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1607   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1608     // Check for 8-bit immediate, and set sign extend bit in opcode
 1609     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1610       emit_opcode(cbuf, $primary | 0x02);
 1611     }
 1612     else {                          // If 32-bit immediate
 1613       emit_opcode(cbuf, $primary);
 1614     }
 1615   %}
 1616 
 1617   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1618     // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(cbuf, $primary | 0x02);    }
 1622     else {                          // If 32-bit immediate
 1623       emit_opcode(cbuf, $primary);
 1624     }
 1625     // Emit r/m byte with secondary opcode, after primary opcode.
 1626     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1627   %}
 1628 
 1629   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       $$$emit8$imm$$constant;
 1633     }
 1634     else {                          // If 32-bit immediate
 1635       // Output immediate
 1636       $$$emit32$imm$$constant;
 1637     }
 1638   %}
 1639 
 1640   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1641     // Emit primary opcode and set sign-extend bit
 1642     // Check for 8-bit immediate, and set sign extend bit in opcode
 1643     int con = (int)$imm$$constant; // Throw away top bits
 1644     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1645     // Emit r/m byte with secondary opcode, after primary opcode.
 1646     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1647     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1648     else                               emit_d32(cbuf,con);
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1655     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with tertiary opcode, after primary opcode.
 1657     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1659     else                               emit_d32(cbuf,con);
 1660   %}
 1661 
 1662   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1663     emit_cc(cbuf, $secondary, $dst$$reg );
 1664   %}
 1665 
 1666   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1667     int destlo = $dst$$reg;
 1668     int desthi = HIGH_FROM_LOW(destlo);
 1669     // bswap lo
 1670     emit_opcode(cbuf, 0x0F);
 1671     emit_cc(cbuf, 0xC8, destlo);
 1672     // bswap hi
 1673     emit_opcode(cbuf, 0x0F);
 1674     emit_cc(cbuf, 0xC8, desthi);
 1675     // xchg lo and hi
 1676     emit_opcode(cbuf, 0x87);
 1677     emit_rm(cbuf, 0x3, destlo, desthi);
 1678   %}
 1679 
 1680   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1681     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1682   %}
 1683 
 1684   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1685     $$$emit8$primary;
 1686     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1687   %}
 1688 
 1689   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1690     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1691     emit_d8(cbuf, op >> 8 );
 1692     emit_d8(cbuf, op & 255);
 1693   %}
 1694 
 1695   // emulate a CMOV with a conditional branch around a MOV
 1696   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1697     // Invert sense of branch from sense of CMOV
 1698     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1699     emit_d8( cbuf, $brOffs$$constant );
 1700   %}
 1701 
 1702   enc_class enc_PartialSubtypeCheck( ) %{
 1703     Register Redi = as_Register(EDI_enc); // result register
 1704     Register Reax = as_Register(EAX_enc); // super class
 1705     Register Recx = as_Register(ECX_enc); // killed
 1706     Register Resi = as_Register(ESI_enc); // sub class
 1707     Label miss;
 1708 
 1709     MacroAssembler _masm(&cbuf);
 1710     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1711                                      NULL, &miss,
 1712                                      /*set_cond_codes:*/ true);
 1713     if ($primary) {
 1714       __ xorptr(Redi, Redi);
 1715     }
 1716     __ bind(miss);
 1717   %}
 1718 
 1719   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1720     MacroAssembler masm(&cbuf);
 1721     int start = masm.offset();
 1722     if (UseSSE >= 2) {
 1723       if (VerifyFPU) {
 1724         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1725       }
 1726     } else {
 1727       // External c_calling_convention expects the FPU stack to be 'clean'.
 1728       // Compiled code leaves it dirty.  Do cleanup now.
 1729       masm.empty_FPU_stack();
 1730     }
 1731     if (sizeof_FFree_Float_Stack_All == -1) {
 1732       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1733     } else {
 1734       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1735     }
 1736   %}
 1737 
 1738   enc_class Verify_FPU_For_Leaf %{
 1739     if( VerifyFPU ) {
 1740       MacroAssembler masm(&cbuf);
 1741       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1742     }
 1743   %}
 1744 
 1745   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1746     // This is the instruction starting address for relocation info.
 1747     cbuf.set_insts_mark();
 1748     $$$emit8$primary;
 1749     // CALL directly to the runtime
 1750     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1751                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1752 
 1753     if (UseSSE >= 2) {
 1754       MacroAssembler _masm(&cbuf);
 1755       BasicType rt = tf()->return_type();
 1756 
 1757       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1758         // A C runtime call where the return value is unused.  In SSE2+
 1759         // mode the result needs to be removed from the FPU stack.  It's
 1760         // likely that this function call could be removed by the
 1761         // optimizer if the C function is a pure function.
 1762         __ ffree(0);
 1763       } else if (rt == T_FLOAT) {
 1764         __ lea(rsp, Address(rsp, -4));
 1765         __ fstp_s(Address(rsp, 0));
 1766         __ movflt(xmm0, Address(rsp, 0));
 1767         __ lea(rsp, Address(rsp,  4));
 1768       } else if (rt == T_DOUBLE) {
 1769         __ lea(rsp, Address(rsp, -8));
 1770         __ fstp_d(Address(rsp, 0));
 1771         __ movdbl(xmm0, Address(rsp, 0));
 1772         __ lea(rsp, Address(rsp,  8));
 1773       }
 1774     }
 1775   %}
 1776 
 1777   enc_class pre_call_resets %{
 1778     // If method sets FPU control word restore it here
 1779     debug_only(int off0 = cbuf.insts_size());
 1780     if (ra_->C->in_24_bit_fp_mode()) {
 1781       MacroAssembler _masm(&cbuf);
 1782       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1783     }
 1784     // Clear upper bits of YMM registers when current compiled code uses
 1785     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1786     MacroAssembler _masm(&cbuf);
 1787     __ vzeroupper();
 1788     debug_only(int off1 = cbuf.insts_size());
 1789     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1790   %}
 1791 
 1792   enc_class post_call_FPU %{
 1793     // If method sets FPU control word do it here also
 1794     if (Compile::current()->in_24_bit_fp_mode()) {
 1795       MacroAssembler masm(&cbuf);
 1796       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1797     }
 1798   %}
 1799 
 1800   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1801     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1802     // who we intended to call.
 1803     cbuf.set_insts_mark();
 1804     $$$emit8$primary;
 1805 
 1806     if (!_method) {
 1807       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1808                      runtime_call_Relocation::spec(),
 1809                      RELOC_IMM32);
 1810     } else {
 1811       int method_index = resolved_method_index(cbuf);
 1812       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1813                                                   : static_call_Relocation::spec(method_index);
 1814       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1815                      rspec, RELOC_DISP32);
 1816       // Emit stubs for static call.
 1817       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1818       if (stub == NULL) {
 1819         ciEnv::current()->record_failure("CodeCache is full");
 1820         return;
 1821       }
 1822     }
 1823   %}
 1824 
 1825   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1826     MacroAssembler _masm(&cbuf);
 1827     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1828   %}
 1829 
 1830   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1831     int disp = in_bytes(Method::from_compiled_offset());
 1832     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1833 
 1834     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1835     cbuf.set_insts_mark();
 1836     $$$emit8$primary;
 1837     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1838     emit_d8(cbuf, disp);             // Displacement
 1839 
 1840   %}
 1841 
 1842 //   Following encoding is no longer used, but may be restored if calling
 1843 //   convention changes significantly.
 1844 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1845 //
 1846 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1847 //     // int ic_reg     = Matcher::inline_cache_reg();
 1848 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1849 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1850 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1851 //
 1852 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1853 //     // // so we load it immediately before the call
 1854 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1855 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1856 //
 1857 //     // xor rbp,ebp
 1858 //     emit_opcode(cbuf, 0x33);
 1859 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1860 //
 1861 //     // CALL to interpreter.
 1862 //     cbuf.set_insts_mark();
 1863 //     $$$emit8$primary;
 1864 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1865 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1866 //   %}
 1867 
 1868   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1869     $$$emit8$primary;
 1870     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1871     $$$emit8$shift$$constant;
 1872   %}
 1873 
 1874   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1875     // Load immediate does not have a zero or sign extended version
 1876     // for 8-bit immediates
 1877     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1878     $$$emit32$src$$constant;
 1879   %}
 1880 
 1881   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1882     // Load immediate does not have a zero or sign extended version
 1883     // for 8-bit immediates
 1884     emit_opcode(cbuf, $primary + $dst$$reg);
 1885     $$$emit32$src$$constant;
 1886   %}
 1887 
 1888   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1889     // Load immediate does not have a zero or sign extended version
 1890     // for 8-bit immediates
 1891     int dst_enc = $dst$$reg;
 1892     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1893     if (src_con == 0) {
 1894       // xor dst, dst
 1895       emit_opcode(cbuf, 0x33);
 1896       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1897     } else {
 1898       emit_opcode(cbuf, $primary + dst_enc);
 1899       emit_d32(cbuf, src_con);
 1900     }
 1901   %}
 1902 
 1903   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1904     // Load immediate does not have a zero or sign extended version
 1905     // for 8-bit immediates
 1906     int dst_enc = $dst$$reg + 2;
 1907     int src_con = ((julong)($src$$constant)) >> 32;
 1908     if (src_con == 0) {
 1909       // xor dst, dst
 1910       emit_opcode(cbuf, 0x33);
 1911       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1912     } else {
 1913       emit_opcode(cbuf, $primary + dst_enc);
 1914       emit_d32(cbuf, src_con);
 1915     }
 1916   %}
 1917 
 1918 
 1919   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1920   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1921     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1922   %}
 1923 
 1924   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1925     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1926   %}
 1927 
 1928   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1929     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1930   %}
 1931 
 1932   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1933     $$$emit8$primary;
 1934     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1935   %}
 1936 
 1937   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1938     $$$emit8$secondary;
 1939     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1940   %}
 1941 
 1942   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1943     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1944   %}
 1945 
 1946   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1947     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1948   %}
 1949 
 1950   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1951     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1952   %}
 1953 
 1954   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1955     // Output immediate
 1956     $$$emit32$src$$constant;
 1957   %}
 1958 
 1959   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1960     // Output Float immediate bits
 1961     jfloat jf = $src$$constant;
 1962     int    jf_as_bits = jint_cast( jf );
 1963     emit_d32(cbuf, jf_as_bits);
 1964   %}
 1965 
 1966   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1967     // Output Float immediate bits
 1968     jfloat jf = $src$$constant;
 1969     int    jf_as_bits = jint_cast( jf );
 1970     emit_d32(cbuf, jf_as_bits);
 1971   %}
 1972 
 1973   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1974     // Output immediate
 1975     $$$emit16$src$$constant;
 1976   %}
 1977 
 1978   enc_class Con_d32(immI src) %{
 1979     emit_d32(cbuf,$src$$constant);
 1980   %}
 1981 
 1982   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1983     // Output immediate memory reference
 1984     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1985     emit_d32(cbuf, 0x00);
 1986   %}
 1987 
 1988   enc_class lock_prefix( ) %{
 1989     emit_opcode(cbuf,0xF0);         // [Lock]
 1990   %}
 1991 
 1992   // Cmp-xchg long value.
 1993   // Note: we need to swap rbx, and rcx before and after the
 1994   //       cmpxchg8 instruction because the instruction uses
 1995   //       rcx as the high order word of the new value to store but
 1996   //       our register encoding uses rbx,.
 1997   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1998 
 1999     // XCHG  rbx,ecx
 2000     emit_opcode(cbuf,0x87);
 2001     emit_opcode(cbuf,0xD9);
 2002     // [Lock]
 2003     emit_opcode(cbuf,0xF0);
 2004     // CMPXCHG8 [Eptr]
 2005     emit_opcode(cbuf,0x0F);
 2006     emit_opcode(cbuf,0xC7);
 2007     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2008     // XCHG  rbx,ecx
 2009     emit_opcode(cbuf,0x87);
 2010     emit_opcode(cbuf,0xD9);
 2011   %}
 2012 
 2013   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2014     // [Lock]
 2015     emit_opcode(cbuf,0xF0);
 2016 
 2017     // CMPXCHG [Eptr]
 2018     emit_opcode(cbuf,0x0F);
 2019     emit_opcode(cbuf,0xB1);
 2020     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2021   %}
 2022 
 2023   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2024     // [Lock]
 2025     emit_opcode(cbuf,0xF0);
 2026 
 2027     // CMPXCHGB [Eptr]
 2028     emit_opcode(cbuf,0x0F);
 2029     emit_opcode(cbuf,0xB0);
 2030     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2031   %}
 2032 
 2033   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2034     // [Lock]
 2035     emit_opcode(cbuf,0xF0);
 2036 
 2037     // 16-bit mode
 2038     emit_opcode(cbuf, 0x66);
 2039 
 2040     // CMPXCHGW [Eptr]
 2041     emit_opcode(cbuf,0x0F);
 2042     emit_opcode(cbuf,0xB1);
 2043     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2044   %}
 2045 
 2046   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2047     int res_encoding = $res$$reg;
 2048 
 2049     // MOV  res,0
 2050     emit_opcode( cbuf, 0xB8 + res_encoding);
 2051     emit_d32( cbuf, 0 );
 2052     // JNE,s  fail
 2053     emit_opcode(cbuf,0x75);
 2054     emit_d8(cbuf, 5 );
 2055     // MOV  res,1
 2056     emit_opcode( cbuf, 0xB8 + res_encoding);
 2057     emit_d32( cbuf, 1 );
 2058     // fail:
 2059   %}
 2060 
 2061   enc_class set_instruction_start( ) %{
 2062     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2063   %}
 2064 
 2065   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2066     int reg_encoding = $ereg$$reg;
 2067     int base  = $mem$$base;
 2068     int index = $mem$$index;
 2069     int scale = $mem$$scale;
 2070     int displace = $mem$$disp;
 2071     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2072     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2073   %}
 2074 
 2075   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2076     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2077     int base  = $mem$$base;
 2078     int index = $mem$$index;
 2079     int scale = $mem$$scale;
 2080     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2081     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2082     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2083   %}
 2084 
 2085   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2086     int r1, r2;
 2087     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2088     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2089     emit_opcode(cbuf,0x0F);
 2090     emit_opcode(cbuf,$tertiary);
 2091     emit_rm(cbuf, 0x3, r1, r2);
 2092     emit_d8(cbuf,$cnt$$constant);
 2093     emit_d8(cbuf,$primary);
 2094     emit_rm(cbuf, 0x3, $secondary, r1);
 2095     emit_d8(cbuf,$cnt$$constant);
 2096   %}
 2097 
 2098   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2099     emit_opcode( cbuf, 0x8B ); // Move
 2100     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2101     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2102       emit_d8(cbuf,$primary);
 2103       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2104       emit_d8(cbuf,$cnt$$constant-32);
 2105     }
 2106     emit_d8(cbuf,$primary);
 2107     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2108     emit_d8(cbuf,31);
 2109   %}
 2110 
 2111   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2112     int r1, r2;
 2113     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2114     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2115 
 2116     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2117     emit_rm(cbuf, 0x3, r1, r2);
 2118     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2119       emit_opcode(cbuf,$primary);
 2120       emit_rm(cbuf, 0x3, $secondary, r1);
 2121       emit_d8(cbuf,$cnt$$constant-32);
 2122     }
 2123     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2124     emit_rm(cbuf, 0x3, r2, r2);
 2125   %}
 2126 
 2127   // Clone of RegMem but accepts an extra parameter to access each
 2128   // half of a double in memory; it never needs relocation info.
 2129   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2130     emit_opcode(cbuf,$opcode$$constant);
 2131     int reg_encoding = $rm_reg$$reg;
 2132     int base     = $mem$$base;
 2133     int index    = $mem$$index;
 2134     int scale    = $mem$$scale;
 2135     int displace = $mem$$disp + $disp_for_half$$constant;
 2136     relocInfo::relocType disp_reloc = relocInfo::none;
 2137     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2138   %}
 2139 
 2140   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2141   //
 2142   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2143   // and it never needs relocation information.
 2144   // Frequently used to move data between FPU's Stack Top and memory.
 2145   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2146     int rm_byte_opcode = $rm_opcode$$constant;
 2147     int base     = $mem$$base;
 2148     int index    = $mem$$index;
 2149     int scale    = $mem$$scale;
 2150     int displace = $mem$$disp;
 2151     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2152     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2153   %}
 2154 
 2155   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2156     int rm_byte_opcode = $rm_opcode$$constant;
 2157     int base     = $mem$$base;
 2158     int index    = $mem$$index;
 2159     int scale    = $mem$$scale;
 2160     int displace = $mem$$disp;
 2161     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2162     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2163   %}
 2164 
 2165   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2166     int reg_encoding = $dst$$reg;
 2167     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2168     int index        = 0x04;            // 0x04 indicates no index
 2169     int scale        = 0x00;            // 0x00 indicates no scale
 2170     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2171     relocInfo::relocType disp_reloc = relocInfo::none;
 2172     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2173   %}
 2174 
 2175   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2176     // Compare dst,src
 2177     emit_opcode(cbuf,0x3B);
 2178     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2179     // jmp dst < src around move
 2180     emit_opcode(cbuf,0x7C);
 2181     emit_d8(cbuf,2);
 2182     // move dst,src
 2183     emit_opcode(cbuf,0x8B);
 2184     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2185   %}
 2186 
 2187   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2188     // Compare dst,src
 2189     emit_opcode(cbuf,0x3B);
 2190     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2191     // jmp dst > src around move
 2192     emit_opcode(cbuf,0x7F);
 2193     emit_d8(cbuf,2);
 2194     // move dst,src
 2195     emit_opcode(cbuf,0x8B);
 2196     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2197   %}
 2198 
 2199   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2200     // If src is FPR1, we can just FST to store it.
 2201     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2202     int reg_encoding = 0x2; // Just store
 2203     int base  = $mem$$base;
 2204     int index = $mem$$index;
 2205     int scale = $mem$$scale;
 2206     int displace = $mem$$disp;
 2207     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2208     if( $src$$reg != FPR1L_enc ) {
 2209       reg_encoding = 0x3;  // Store & pop
 2210       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2211       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2212     }
 2213     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2214     emit_opcode(cbuf,$primary);
 2215     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2216   %}
 2217 
 2218   enc_class neg_reg(rRegI dst) %{
 2219     // NEG $dst
 2220     emit_opcode(cbuf,0xF7);
 2221     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2222   %}
 2223 
 2224   enc_class setLT_reg(eCXRegI dst) %{
 2225     // SETLT $dst
 2226     emit_opcode(cbuf,0x0F);
 2227     emit_opcode(cbuf,0x9C);
 2228     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2229   %}
 2230 
 2231   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2232     int tmpReg = $tmp$$reg;
 2233 
 2234     // SUB $p,$q
 2235     emit_opcode(cbuf,0x2B);
 2236     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2237     // SBB $tmp,$tmp
 2238     emit_opcode(cbuf,0x1B);
 2239     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2240     // AND $tmp,$y
 2241     emit_opcode(cbuf,0x23);
 2242     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2243     // ADD $p,$tmp
 2244     emit_opcode(cbuf,0x03);
 2245     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2246   %}
 2247 
 2248   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2249     // TEST shift,32
 2250     emit_opcode(cbuf,0xF7);
 2251     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2252     emit_d32(cbuf,0x20);
 2253     // JEQ,s small
 2254     emit_opcode(cbuf, 0x74);
 2255     emit_d8(cbuf, 0x04);
 2256     // MOV    $dst.hi,$dst.lo
 2257     emit_opcode( cbuf, 0x8B );
 2258     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2259     // CLR    $dst.lo
 2260     emit_opcode(cbuf, 0x33);
 2261     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2262 // small:
 2263     // SHLD   $dst.hi,$dst.lo,$shift
 2264     emit_opcode(cbuf,0x0F);
 2265     emit_opcode(cbuf,0xA5);
 2266     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2267     // SHL    $dst.lo,$shift"
 2268     emit_opcode(cbuf,0xD3);
 2269     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2270   %}
 2271 
 2272   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2273     // TEST shift,32
 2274     emit_opcode(cbuf,0xF7);
 2275     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2276     emit_d32(cbuf,0x20);
 2277     // JEQ,s small
 2278     emit_opcode(cbuf, 0x74);
 2279     emit_d8(cbuf, 0x04);
 2280     // MOV    $dst.lo,$dst.hi
 2281     emit_opcode( cbuf, 0x8B );
 2282     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2283     // CLR    $dst.hi
 2284     emit_opcode(cbuf, 0x33);
 2285     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2286 // small:
 2287     // SHRD   $dst.lo,$dst.hi,$shift
 2288     emit_opcode(cbuf,0x0F);
 2289     emit_opcode(cbuf,0xAD);
 2290     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2291     // SHR    $dst.hi,$shift"
 2292     emit_opcode(cbuf,0xD3);
 2293     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2294   %}
 2295 
 2296   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2297     // TEST shift,32
 2298     emit_opcode(cbuf,0xF7);
 2299     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2300     emit_d32(cbuf,0x20);
 2301     // JEQ,s small
 2302     emit_opcode(cbuf, 0x74);
 2303     emit_d8(cbuf, 0x05);
 2304     // MOV    $dst.lo,$dst.hi
 2305     emit_opcode( cbuf, 0x8B );
 2306     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2307     // SAR    $dst.hi,31
 2308     emit_opcode(cbuf, 0xC1);
 2309     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2310     emit_d8(cbuf, 0x1F );
 2311 // small:
 2312     // SHRD   $dst.lo,$dst.hi,$shift
 2313     emit_opcode(cbuf,0x0F);
 2314     emit_opcode(cbuf,0xAD);
 2315     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2316     // SAR    $dst.hi,$shift"
 2317     emit_opcode(cbuf,0xD3);
 2318     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2319   %}
 2320 
 2321 
 2322   // ----------------- Encodings for floating point unit -----------------
 2323   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2324   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2325     $$$emit8$primary;
 2326     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2327   %}
 2328 
 2329   // Pop argument in FPR0 with FSTP ST(0)
 2330   enc_class PopFPU() %{
 2331     emit_opcode( cbuf, 0xDD );
 2332     emit_d8( cbuf, 0xD8 );
 2333   %}
 2334 
 2335   // !!!!! equivalent to Pop_Reg_F
 2336   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2337     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2338     emit_d8( cbuf, 0xD8+$dst$$reg );
 2339   %}
 2340 
 2341   enc_class Push_Reg_DPR( regDPR dst ) %{
 2342     emit_opcode( cbuf, 0xD9 );
 2343     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2344   %}
 2345 
 2346   enc_class strictfp_bias1( regDPR dst ) %{
 2347     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2348     emit_opcode( cbuf, 0x2D );
 2349     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2350     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2351     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2352   %}
 2353 
 2354   enc_class strictfp_bias2( regDPR dst ) %{
 2355     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2356     emit_opcode( cbuf, 0x2D );
 2357     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2358     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2359     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2360   %}
 2361 
 2362   // Special case for moving an integer register to a stack slot.
 2363   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2364     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2365   %}
 2366 
 2367   // Special case for moving a register to a stack slot.
 2368   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2369     // Opcode already emitted
 2370     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2371     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2372     emit_d32(cbuf, $dst$$disp);   // Displacement
 2373   %}
 2374 
 2375   // Push the integer in stackSlot 'src' onto FP-stack
 2376   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2377     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2378   %}
 2379 
 2380   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2381   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2382     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2383   %}
 2384 
 2385   // Same as Pop_Mem_F except for opcode
 2386   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2387   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2388     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2389   %}
 2390 
 2391   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2392     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2393     emit_d8( cbuf, 0xD8+$dst$$reg );
 2394   %}
 2395 
 2396   enc_class Push_Reg_FPR( regFPR dst ) %{
 2397     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2398     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2399   %}
 2400 
 2401   // Push FPU's float to a stack-slot, and pop FPU-stack
 2402   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2403     int pop = 0x02;
 2404     if ($src$$reg != FPR1L_enc) {
 2405       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2406       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2407       pop = 0x03;
 2408     }
 2409     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2410   %}
 2411 
 2412   // Push FPU's double to a stack-slot, and pop FPU-stack
 2413   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2414     int pop = 0x02;
 2415     if ($src$$reg != FPR1L_enc) {
 2416       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2417       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2418       pop = 0x03;
 2419     }
 2420     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2421   %}
 2422 
 2423   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2424   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2425     int pop = 0xD0 - 1; // -1 since we skip FLD
 2426     if ($src$$reg != FPR1L_enc) {
 2427       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2428       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2429       pop = 0xD8;
 2430     }
 2431     emit_opcode( cbuf, 0xDD );
 2432     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2433   %}
 2434 
 2435 
 2436   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2437     // load dst in FPR0
 2438     emit_opcode( cbuf, 0xD9 );
 2439     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2440     if ($src$$reg != FPR1L_enc) {
 2441       // fincstp
 2442       emit_opcode (cbuf, 0xD9);
 2443       emit_opcode (cbuf, 0xF7);
 2444       // swap src with FPR1:
 2445       // FXCH FPR1 with src
 2446       emit_opcode(cbuf, 0xD9);
 2447       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2448       // fdecstp
 2449       emit_opcode (cbuf, 0xD9);
 2450       emit_opcode (cbuf, 0xF6);
 2451     }
 2452   %}
 2453 
 2454   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2455     MacroAssembler _masm(&cbuf);
 2456     __ subptr(rsp, 8);
 2457     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2458     __ fld_d(Address(rsp, 0));
 2459     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2460     __ fld_d(Address(rsp, 0));
 2461   %}
 2462 
 2463   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2464     MacroAssembler _masm(&cbuf);
 2465     __ subptr(rsp, 4);
 2466     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2467     __ fld_s(Address(rsp, 0));
 2468     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2469     __ fld_s(Address(rsp, 0));
 2470   %}
 2471 
 2472   enc_class Push_ResultD(regD dst) %{
 2473     MacroAssembler _masm(&cbuf);
 2474     __ fstp_d(Address(rsp, 0));
 2475     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2476     __ addptr(rsp, 8);
 2477   %}
 2478 
 2479   enc_class Push_ResultF(regF dst, immI d8) %{
 2480     MacroAssembler _masm(&cbuf);
 2481     __ fstp_s(Address(rsp, 0));
 2482     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2483     __ addptr(rsp, $d8$$constant);
 2484   %}
 2485 
 2486   enc_class Push_SrcD(regD src) %{
 2487     MacroAssembler _masm(&cbuf);
 2488     __ subptr(rsp, 8);
 2489     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2490     __ fld_d(Address(rsp, 0));
 2491   %}
 2492 
 2493   enc_class push_stack_temp_qword() %{
 2494     MacroAssembler _masm(&cbuf);
 2495     __ subptr(rsp, 8);
 2496   %}
 2497 
 2498   enc_class pop_stack_temp_qword() %{
 2499     MacroAssembler _masm(&cbuf);
 2500     __ addptr(rsp, 8);
 2501   %}
 2502 
 2503   enc_class push_xmm_to_fpr1(regD src) %{
 2504     MacroAssembler _masm(&cbuf);
 2505     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2506     __ fld_d(Address(rsp, 0));
 2507   %}
 2508 
 2509   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2510     if ($src$$reg != FPR1L_enc) {
 2511       // fincstp
 2512       emit_opcode (cbuf, 0xD9);
 2513       emit_opcode (cbuf, 0xF7);
 2514       // FXCH FPR1 with src
 2515       emit_opcode(cbuf, 0xD9);
 2516       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2517       // fdecstp
 2518       emit_opcode (cbuf, 0xD9);
 2519       emit_opcode (cbuf, 0xF6);
 2520     }
 2521     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2522     // // FSTP   FPR$dst$$reg
 2523     // emit_opcode( cbuf, 0xDD );
 2524     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2525   %}
 2526 
 2527   enc_class fnstsw_sahf_skip_parity() %{
 2528     // fnstsw ax
 2529     emit_opcode( cbuf, 0xDF );
 2530     emit_opcode( cbuf, 0xE0 );
 2531     // sahf
 2532     emit_opcode( cbuf, 0x9E );
 2533     // jnp  ::skip
 2534     emit_opcode( cbuf, 0x7B );
 2535     emit_opcode( cbuf, 0x05 );
 2536   %}
 2537 
 2538   enc_class emitModDPR() %{
 2539     // fprem must be iterative
 2540     // :: loop
 2541     // fprem
 2542     emit_opcode( cbuf, 0xD9 );
 2543     emit_opcode( cbuf, 0xF8 );
 2544     // wait
 2545     emit_opcode( cbuf, 0x9b );
 2546     // fnstsw ax
 2547     emit_opcode( cbuf, 0xDF );
 2548     emit_opcode( cbuf, 0xE0 );
 2549     // sahf
 2550     emit_opcode( cbuf, 0x9E );
 2551     // jp  ::loop
 2552     emit_opcode( cbuf, 0x0F );
 2553     emit_opcode( cbuf, 0x8A );
 2554     emit_opcode( cbuf, 0xF4 );
 2555     emit_opcode( cbuf, 0xFF );
 2556     emit_opcode( cbuf, 0xFF );
 2557     emit_opcode( cbuf, 0xFF );
 2558   %}
 2559 
 2560   enc_class fpu_flags() %{
 2561     // fnstsw_ax
 2562     emit_opcode( cbuf, 0xDF);
 2563     emit_opcode( cbuf, 0xE0);
 2564     // test ax,0x0400
 2565     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2566     emit_opcode( cbuf, 0xA9 );
 2567     emit_d16   ( cbuf, 0x0400 );
 2568     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2569     // // test rax,0x0400
 2570     // emit_opcode( cbuf, 0xA9 );
 2571     // emit_d32   ( cbuf, 0x00000400 );
 2572     //
 2573     // jz exit (no unordered comparison)
 2574     emit_opcode( cbuf, 0x74 );
 2575     emit_d8    ( cbuf, 0x02 );
 2576     // mov ah,1 - treat as LT case (set carry flag)
 2577     emit_opcode( cbuf, 0xB4 );
 2578     emit_d8    ( cbuf, 0x01 );
 2579     // sahf
 2580     emit_opcode( cbuf, 0x9E);
 2581   %}
 2582 
 2583   enc_class cmpF_P6_fixup() %{
 2584     // Fixup the integer flags in case comparison involved a NaN
 2585     //
 2586     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2587     emit_opcode( cbuf, 0x7B );
 2588     emit_d8    ( cbuf, 0x03 );
 2589     // MOV AH,1 - treat as LT case (set carry flag)
 2590     emit_opcode( cbuf, 0xB4 );
 2591     emit_d8    ( cbuf, 0x01 );
 2592     // SAHF
 2593     emit_opcode( cbuf, 0x9E);
 2594     // NOP     // target for branch to avoid branch to branch
 2595     emit_opcode( cbuf, 0x90);
 2596   %}
 2597 
 2598 //     fnstsw_ax();
 2599 //     sahf();
 2600 //     movl(dst, nan_result);
 2601 //     jcc(Assembler::parity, exit);
 2602 //     movl(dst, less_result);
 2603 //     jcc(Assembler::below, exit);
 2604 //     movl(dst, equal_result);
 2605 //     jcc(Assembler::equal, exit);
 2606 //     movl(dst, greater_result);
 2607 
 2608 // less_result     =  1;
 2609 // greater_result  = -1;
 2610 // equal_result    = 0;
 2611 // nan_result      = -1;
 2612 
 2613   enc_class CmpF_Result(rRegI dst) %{
 2614     // fnstsw_ax();
 2615     emit_opcode( cbuf, 0xDF);
 2616     emit_opcode( cbuf, 0xE0);
 2617     // sahf
 2618     emit_opcode( cbuf, 0x9E);
 2619     // movl(dst, nan_result);
 2620     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2621     emit_d32( cbuf, -1 );
 2622     // jcc(Assembler::parity, exit);
 2623     emit_opcode( cbuf, 0x7A );
 2624     emit_d8    ( cbuf, 0x13 );
 2625     // movl(dst, less_result);
 2626     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2627     emit_d32( cbuf, -1 );
 2628     // jcc(Assembler::below, exit);
 2629     emit_opcode( cbuf, 0x72 );
 2630     emit_d8    ( cbuf, 0x0C );
 2631     // movl(dst, equal_result);
 2632     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2633     emit_d32( cbuf, 0 );
 2634     // jcc(Assembler::equal, exit);
 2635     emit_opcode( cbuf, 0x74 );
 2636     emit_d8    ( cbuf, 0x05 );
 2637     // movl(dst, greater_result);
 2638     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2639     emit_d32( cbuf, 1 );
 2640   %}
 2641 
 2642 
 2643   // Compare the longs and set flags
 2644   // BROKEN!  Do Not use as-is
 2645   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2646     // CMP    $src1.hi,$src2.hi
 2647     emit_opcode( cbuf, 0x3B );
 2648     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2649     // JNE,s  done
 2650     emit_opcode(cbuf,0x75);
 2651     emit_d8(cbuf, 2 );
 2652     // CMP    $src1.lo,$src2.lo
 2653     emit_opcode( cbuf, 0x3B );
 2654     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2655 // done:
 2656   %}
 2657 
 2658   enc_class convert_int_long( regL dst, rRegI src ) %{
 2659     // mov $dst.lo,$src
 2660     int dst_encoding = $dst$$reg;
 2661     int src_encoding = $src$$reg;
 2662     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2663     // mov $dst.hi,$src
 2664     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2665     // sar $dst.hi,31
 2666     emit_opcode( cbuf, 0xC1 );
 2667     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2668     emit_d8(cbuf, 0x1F );
 2669   %}
 2670 
 2671   enc_class convert_long_double( eRegL src ) %{
 2672     // push $src.hi
 2673     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2674     // push $src.lo
 2675     emit_opcode(cbuf, 0x50+$src$$reg  );
 2676     // fild 64-bits at [SP]
 2677     emit_opcode(cbuf,0xdf);
 2678     emit_d8(cbuf, 0x6C);
 2679     emit_d8(cbuf, 0x24);
 2680     emit_d8(cbuf, 0x00);
 2681     // pop stack
 2682     emit_opcode(cbuf, 0x83); // add  SP, #8
 2683     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2684     emit_d8(cbuf, 0x8);
 2685   %}
 2686 
 2687   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2688     // IMUL   EDX:EAX,$src1
 2689     emit_opcode( cbuf, 0xF7 );
 2690     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2691     // SAR    EDX,$cnt-32
 2692     int shift_count = ((int)$cnt$$constant) - 32;
 2693     if (shift_count > 0) {
 2694       emit_opcode(cbuf, 0xC1);
 2695       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2696       emit_d8(cbuf, shift_count);
 2697     }
 2698   %}
 2699 
 2700   // this version doesn't have add sp, 8
 2701   enc_class convert_long_double2( eRegL src ) %{
 2702     // push $src.hi
 2703     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2704     // push $src.lo
 2705     emit_opcode(cbuf, 0x50+$src$$reg  );
 2706     // fild 64-bits at [SP]
 2707     emit_opcode(cbuf,0xdf);
 2708     emit_d8(cbuf, 0x6C);
 2709     emit_d8(cbuf, 0x24);
 2710     emit_d8(cbuf, 0x00);
 2711   %}
 2712 
 2713   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2714     // Basic idea: long = (long)int * (long)int
 2715     // IMUL EDX:EAX, src
 2716     emit_opcode( cbuf, 0xF7 );
 2717     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2718   %}
 2719 
 2720   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2721     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2722     // MUL EDX:EAX, src
 2723     emit_opcode( cbuf, 0xF7 );
 2724     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2725   %}
 2726 
 2727   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2728     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2729     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2730     // MOV    $tmp,$src.lo
 2731     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2732     // IMUL   $tmp,EDX
 2733     emit_opcode( cbuf, 0x0F );
 2734     emit_opcode( cbuf, 0xAF );
 2735     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2736     // MOV    EDX,$src.hi
 2737     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2738     // IMUL   EDX,EAX
 2739     emit_opcode( cbuf, 0x0F );
 2740     emit_opcode( cbuf, 0xAF );
 2741     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2742     // ADD    $tmp,EDX
 2743     emit_opcode( cbuf, 0x03 );
 2744     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2745     // MUL   EDX:EAX,$src.lo
 2746     emit_opcode( cbuf, 0xF7 );
 2747     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2748     // ADD    EDX,ESI
 2749     emit_opcode( cbuf, 0x03 );
 2750     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2751   %}
 2752 
 2753   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2754     // Basic idea: lo(result) = lo(src * y_lo)
 2755     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2756     // IMUL   $tmp,EDX,$src
 2757     emit_opcode( cbuf, 0x6B );
 2758     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2759     emit_d8( cbuf, (int)$src$$constant );
 2760     // MOV    EDX,$src
 2761     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2762     emit_d32( cbuf, (int)$src$$constant );
 2763     // MUL   EDX:EAX,EDX
 2764     emit_opcode( cbuf, 0xF7 );
 2765     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2766     // ADD    EDX,ESI
 2767     emit_opcode( cbuf, 0x03 );
 2768     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2769   %}
 2770 
 2771   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2772     // PUSH src1.hi
 2773     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2774     // PUSH src1.lo
 2775     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2776     // PUSH src2.hi
 2777     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2778     // PUSH src2.lo
 2779     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2780     // CALL directly to the runtime
 2781     cbuf.set_insts_mark();
 2782     emit_opcode(cbuf,0xE8);       // Call into runtime
 2783     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2784     // Restore stack
 2785     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2786     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2787     emit_d8(cbuf, 4*4);
 2788   %}
 2789 
 2790   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2791     // PUSH src1.hi
 2792     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2793     // PUSH src1.lo
 2794     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2795     // PUSH src2.hi
 2796     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2797     // PUSH src2.lo
 2798     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2799     // CALL directly to the runtime
 2800     cbuf.set_insts_mark();
 2801     emit_opcode(cbuf,0xE8);       // Call into runtime
 2802     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2803     // Restore stack
 2804     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2805     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2806     emit_d8(cbuf, 4*4);
 2807   %}
 2808 
 2809   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2810     // MOV   $tmp,$src.lo
 2811     emit_opcode(cbuf, 0x8B);
 2812     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2813     // OR    $tmp,$src.hi
 2814     emit_opcode(cbuf, 0x0B);
 2815     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2816   %}
 2817 
 2818   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2819     // CMP    $src1.lo,$src2.lo
 2820     emit_opcode( cbuf, 0x3B );
 2821     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2822     // JNE,s  skip
 2823     emit_cc(cbuf, 0x70, 0x5);
 2824     emit_d8(cbuf,2);
 2825     // CMP    $src1.hi,$src2.hi
 2826     emit_opcode( cbuf, 0x3B );
 2827     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2828   %}
 2829 
 2830   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2831     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2832     emit_opcode( cbuf, 0x3B );
 2833     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2834     // MOV    $tmp,$src1.hi
 2835     emit_opcode( cbuf, 0x8B );
 2836     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2837     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2838     emit_opcode( cbuf, 0x1B );
 2839     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2840   %}
 2841 
 2842   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2843     // XOR    $tmp,$tmp
 2844     emit_opcode(cbuf,0x33);  // XOR
 2845     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2846     // CMP    $tmp,$src.lo
 2847     emit_opcode( cbuf, 0x3B );
 2848     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2849     // SBB    $tmp,$src.hi
 2850     emit_opcode( cbuf, 0x1B );
 2851     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2852   %}
 2853 
 2854  // Sniff, sniff... smells like Gnu Superoptimizer
 2855   enc_class neg_long( eRegL dst ) %{
 2856     emit_opcode(cbuf,0xF7);    // NEG hi
 2857     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2858     emit_opcode(cbuf,0xF7);    // NEG lo
 2859     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2860     emit_opcode(cbuf,0x83);    // SBB hi,0
 2861     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2862     emit_d8    (cbuf,0 );
 2863   %}
 2864 
 2865   enc_class enc_pop_rdx() %{
 2866     emit_opcode(cbuf,0x5A);
 2867   %}
 2868 
 2869   enc_class enc_rethrow() %{
 2870     cbuf.set_insts_mark();
 2871     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2872     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2873                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2874   %}
 2875 
 2876 
 2877   // Convert a double to an int.  Java semantics require we do complex
 2878   // manglelations in the corner cases.  So we set the rounding mode to
 2879   // 'zero', store the darned double down as an int, and reset the
 2880   // rounding mode to 'nearest'.  The hardware throws an exception which
 2881   // patches up the correct value directly to the stack.
 2882   enc_class DPR2I_encoding( regDPR src ) %{
 2883     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2884     // exceptions here, so that a NAN or other corner-case value will
 2885     // thrown an exception (but normal values get converted at full speed).
 2886     // However, I2C adapters and other float-stack manglers leave pending
 2887     // invalid-op exceptions hanging.  We would have to clear them before
 2888     // enabling them and that is more expensive than just testing for the
 2889     // invalid value Intel stores down in the corner cases.
 2890     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2891     emit_opcode(cbuf,0x2D);
 2892     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2893     // Allocate a word
 2894     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2895     emit_opcode(cbuf,0xEC);
 2896     emit_d8(cbuf,0x04);
 2897     // Encoding assumes a double has been pushed into FPR0.
 2898     // Store down the double as an int, popping the FPU stack
 2899     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2900     emit_opcode(cbuf,0x1C);
 2901     emit_d8(cbuf,0x24);
 2902     // Restore the rounding mode; mask the exception
 2903     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2904     emit_opcode(cbuf,0x2D);
 2905     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2906         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2907         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2908 
 2909     // Load the converted int; adjust CPU stack
 2910     emit_opcode(cbuf,0x58);       // POP EAX
 2911     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2912     emit_d32   (cbuf,0x80000000); //         0x80000000
 2913     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2914     emit_d8    (cbuf,0x07);       // Size of slow_call
 2915     // Push src onto stack slow-path
 2916     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2917     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2918     // CALL directly to the runtime
 2919     cbuf.set_insts_mark();
 2920     emit_opcode(cbuf,0xE8);       // Call into runtime
 2921     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2922     // Carry on here...
 2923   %}
 2924 
 2925   enc_class DPR2L_encoding( regDPR src ) %{
 2926     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2927     emit_opcode(cbuf,0x2D);
 2928     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2929     // Allocate a word
 2930     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2931     emit_opcode(cbuf,0xEC);
 2932     emit_d8(cbuf,0x08);
 2933     // Encoding assumes a double has been pushed into FPR0.
 2934     // Store down the double as a long, popping the FPU stack
 2935     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2936     emit_opcode(cbuf,0x3C);
 2937     emit_d8(cbuf,0x24);
 2938     // Restore the rounding mode; mask the exception
 2939     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2940     emit_opcode(cbuf,0x2D);
 2941     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2942         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2943         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2944 
 2945     // Load the converted int; adjust CPU stack
 2946     emit_opcode(cbuf,0x58);       // POP EAX
 2947     emit_opcode(cbuf,0x5A);       // POP EDX
 2948     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2949     emit_d8    (cbuf,0xFA);       // rdx
 2950     emit_d32   (cbuf,0x80000000); //         0x80000000
 2951     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2952     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2953     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2954     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2955     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2956     emit_d8    (cbuf,0x07);       // Size of slow_call
 2957     // Push src onto stack slow-path
 2958     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2959     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2960     // CALL directly to the runtime
 2961     cbuf.set_insts_mark();
 2962     emit_opcode(cbuf,0xE8);       // Call into runtime
 2963     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2964     // Carry on here...
 2965   %}
 2966 
 2967   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2968     // Operand was loaded from memory into fp ST (stack top)
 2969     // FMUL   ST,$src  /* D8 C8+i */
 2970     emit_opcode(cbuf, 0xD8);
 2971     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2972   %}
 2973 
 2974   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2975     // FADDP  ST,src2  /* D8 C0+i */
 2976     emit_opcode(cbuf, 0xD8);
 2977     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2978     //could use FADDP  src2,fpST  /* DE C0+i */
 2979   %}
 2980 
 2981   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2982     // FADDP  src2,ST  /* DE C0+i */
 2983     emit_opcode(cbuf, 0xDE);
 2984     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2985   %}
 2986 
 2987   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2988     // Operand has been loaded into fp ST (stack top)
 2989       // FSUB   ST,$src1
 2990       emit_opcode(cbuf, 0xD8);
 2991       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2992 
 2993       // FDIV
 2994       emit_opcode(cbuf, 0xD8);
 2995       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 2996   %}
 2997 
 2998   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2999     // Operand was loaded from memory into fp ST (stack top)
 3000     // FADD   ST,$src  /* D8 C0+i */
 3001     emit_opcode(cbuf, 0xD8);
 3002     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3003 
 3004     // FMUL  ST,src2  /* D8 C*+i */
 3005     emit_opcode(cbuf, 0xD8);
 3006     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3007   %}
 3008 
 3009 
 3010   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3011     // Operand was loaded from memory into fp ST (stack top)
 3012     // FADD   ST,$src  /* D8 C0+i */
 3013     emit_opcode(cbuf, 0xD8);
 3014     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3015 
 3016     // FMULP  src2,ST  /* DE C8+i */
 3017     emit_opcode(cbuf, 0xDE);
 3018     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3019   %}
 3020 
 3021   // Atomically load the volatile long
 3022   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3023     emit_opcode(cbuf,0xDF);
 3024     int rm_byte_opcode = 0x05;
 3025     int base     = $mem$$base;
 3026     int index    = $mem$$index;
 3027     int scale    = $mem$$scale;
 3028     int displace = $mem$$disp;
 3029     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3030     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3031     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3032   %}
 3033 
 3034   // Volatile Store Long.  Must be atomic, so move it into
 3035   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3036   // target address before the store (for null-ptr checks)
 3037   // so the memory operand is used twice in the encoding.
 3038   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3039     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3040     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3041     emit_opcode(cbuf,0xDF);
 3042     int rm_byte_opcode = 0x07;
 3043     int base     = $mem$$base;
 3044     int index    = $mem$$index;
 3045     int scale    = $mem$$scale;
 3046     int displace = $mem$$disp;
 3047     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3048     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3049   %}
 3050 
 3051 %}
 3052 
 3053 
 3054 //----------FRAME--------------------------------------------------------------
 3055 // Definition of frame structure and management information.
 3056 //
 3057 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3058 //                             |   (to get allocators register number
 3059 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3060 //  r   CALLER     |        |
 3061 //  o     |        +--------+      pad to even-align allocators stack-slot
 3062 //  w     V        |  pad0  |        numbers; owned by CALLER
 3063 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3064 //  h     ^        |   in   |  5
 3065 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3066 //  |     |        |        |  3
 3067 //  |     |        +--------+
 3068 //  V     |        | old out|      Empty on Intel, window on Sparc
 3069 //        |    old |preserve|      Must be even aligned.
 3070 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3071 //        |        |   in   |  3   area for Intel ret address
 3072 //     Owned by    |preserve|      Empty on Sparc.
 3073 //       SELF      +--------+
 3074 //        |        |  pad2  |  2   pad to align old SP
 3075 //        |        +--------+  1
 3076 //        |        | locks  |  0
 3077 //        |        +--------+----> OptoReg::stack0(), even aligned
 3078 //        |        |  pad1  | 11   pad to align new SP
 3079 //        |        +--------+
 3080 //        |        |        | 10
 3081 //        |        | spills |  9   spills
 3082 //        V        |        |  8   (pad0 slot for callee)
 3083 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3084 //        ^        |  out   |  7
 3085 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3086 //     Owned by    +--------+
 3087 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3088 //        |    new |preserve|      Must be even-aligned.
 3089 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3090 //        |        |        |
 3091 //
 3092 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3093 //         known from SELF's arguments and the Java calling convention.
 3094 //         Region 6-7 is determined per call site.
 3095 // Note 2: If the calling convention leaves holes in the incoming argument
 3096 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3097 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3098 //         incoming area, as the Java calling convention is completely under
 3099 //         the control of the AD file.  Doubles can be sorted and packed to
 3100 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3101 //         varargs C calling conventions.
 3102 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3103 //         even aligned with pad0 as needed.
 3104 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3105 //         region 6-11 is even aligned; it may be padded out more so that
 3106 //         the region from SP to FP meets the minimum stack alignment.
 3107 
 3108 frame %{
 3109   // These three registers define part of the calling convention
 3110   // between compiled code and the interpreter.
 3111   inline_cache_reg(EAX);                // Inline Cache Register
 3112 
 3113   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3114   cisc_spilling_operand_name(indOffset32);
 3115 
 3116   // Number of stack slots consumed by locking an object
 3117   sync_stack_slots(1);
 3118 
 3119   // Compiled code's Frame Pointer
 3120   frame_pointer(ESP);
 3121   // Interpreter stores its frame pointer in a register which is
 3122   // stored to the stack by I2CAdaptors.
 3123   // I2CAdaptors convert from interpreted java to compiled java.
 3124   interpreter_frame_pointer(EBP);
 3125 
 3126   // Stack alignment requirement
 3127   // Alignment size in bytes (128-bit -> 16 bytes)
 3128   stack_alignment(StackAlignmentInBytes);
 3129 
 3130   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3131   // for calls to C.  Supports the var-args backing area for register parms.
 3132   varargs_C_out_slots_killed(0);
 3133 
 3134   // The after-PROLOG location of the return address.  Location of
 3135   // return address specifies a type (REG or STACK) and a number
 3136   // representing the register number (i.e. - use a register name) or
 3137   // stack slot.
 3138   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3139   // Otherwise, it is above the locks and verification slot and alignment word
 3140   return_addr(STACK - 1 +
 3141               align_up((Compile::current()->in_preserve_stack_slots() +
 3142                         Compile::current()->fixed_slots()),
 3143                        stack_alignment_in_slots()));
 3144 
 3145   // Location of C & interpreter return values
 3146   c_return_value %{
 3147     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3148     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3149     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3150 
 3151     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3152     // that C functions return float and double results in XMM0.
 3153     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3154       return OptoRegPair(XMM0b_num,XMM0_num);
 3155     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3156       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3157 
 3158     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3159   %}
 3160 
 3161   // Location of return values
 3162   return_value %{
 3163     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3164     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3165     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3166     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3167       return OptoRegPair(XMM0b_num,XMM0_num);
 3168     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3169       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3170     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3171   %}
 3172 
 3173 %}
 3174 
 3175 //----------ATTRIBUTES---------------------------------------------------------
 3176 //----------Operand Attributes-------------------------------------------------
 3177 op_attrib op_cost(0);        // Required cost attribute
 3178 
 3179 //----------Instruction Attributes---------------------------------------------
 3180 ins_attrib ins_cost(100);       // Required cost attribute
 3181 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3182 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3183                                 // non-matching short branch variant of some
 3184                                                             // long branch?
 3185 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3186                                 // specifies the alignment that some part of the instruction (not
 3187                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3188                                 // function must be provided for the instruction
 3189 
 3190 //----------OPERANDS-----------------------------------------------------------
 3191 // Operand definitions must precede instruction definitions for correct parsing
 3192 // in the ADLC because operands constitute user defined types which are used in
 3193 // instruction definitions.
 3194 
 3195 //----------Simple Operands----------------------------------------------------
 3196 // Immediate Operands
 3197 // Integer Immediate
 3198 operand immI() %{
 3199   match(ConI);
 3200 
 3201   op_cost(10);
 3202   format %{ %}
 3203   interface(CONST_INTER);
 3204 %}
 3205 
 3206 // Constant for test vs zero
 3207 operand immI_0() %{
 3208   predicate(n->get_int() == 0);
 3209   match(ConI);
 3210 
 3211   op_cost(0);
 3212   format %{ %}
 3213   interface(CONST_INTER);
 3214 %}
 3215 
 3216 // Constant for increment
 3217 operand immI_1() %{
 3218   predicate(n->get_int() == 1);
 3219   match(ConI);
 3220 
 3221   op_cost(0);
 3222   format %{ %}
 3223   interface(CONST_INTER);
 3224 %}
 3225 
 3226 // Constant for decrement
 3227 operand immI_M1() %{
 3228   predicate(n->get_int() == -1);
 3229   match(ConI);
 3230 
 3231   op_cost(0);
 3232   format %{ %}
 3233   interface(CONST_INTER);
 3234 %}
 3235 
 3236 // Valid scale values for addressing modes
 3237 operand immI2() %{
 3238   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3239   match(ConI);
 3240 
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 operand immI8() %{
 3246   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3247   match(ConI);
 3248 
 3249   op_cost(5);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 operand immU8() %{
 3255   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3256   match(ConI);
 3257 
 3258   op_cost(5);
 3259   format %{ %}
 3260   interface(CONST_INTER);
 3261 %}
 3262 
 3263 operand immI16() %{
 3264   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3265   match(ConI);
 3266 
 3267   op_cost(10);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Int Immediate non-negative
 3273 operand immU31()
 3274 %{
 3275   predicate(n->get_int() >= 0);
 3276   match(ConI);
 3277 
 3278   op_cost(0);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 // Constant for long shifts
 3284 operand immI_32() %{
 3285   predicate( n->get_int() == 32 );
 3286   match(ConI);
 3287 
 3288   op_cost(0);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 operand immI_1_31() %{
 3294   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3295   match(ConI);
 3296 
 3297   op_cost(0);
 3298   format %{ %}
 3299   interface(CONST_INTER);
 3300 %}
 3301 
 3302 operand immI_32_63() %{
 3303   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3304   match(ConI);
 3305   op_cost(0);
 3306 
 3307   format %{ %}
 3308   interface(CONST_INTER);
 3309 %}
 3310 
 3311 operand immI_2() %{
 3312   predicate( n->get_int() == 2 );
 3313   match(ConI);
 3314 
 3315   op_cost(0);
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 operand immI_3() %{
 3321   predicate( n->get_int() == 3 );
 3322   match(ConI);
 3323 
 3324   op_cost(0);
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 operand immI_4()
 3330 %{
 3331   predicate(n->get_int() == 4);
 3332   match(ConI);
 3333 
 3334   op_cost(0);
 3335   format %{ %}
 3336   interface(CONST_INTER);
 3337 %}
 3338 
 3339 operand immI_8()
 3340 %{
 3341   predicate(n->get_int() == 8);
 3342   match(ConI);
 3343 
 3344   op_cost(0);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 // Pointer Immediate
 3350 operand immP() %{
 3351   match(ConP);
 3352 
 3353   op_cost(10);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 // NULL Pointer Immediate
 3359 operand immP0() %{
 3360   predicate( n->get_ptr() == 0 );
 3361   match(ConP);
 3362   op_cost(0);
 3363 
 3364   format %{ %}
 3365   interface(CONST_INTER);
 3366 %}
 3367 
 3368 // Long Immediate
 3369 operand immL() %{
 3370   match(ConL);
 3371 
 3372   op_cost(20);
 3373   format %{ %}
 3374   interface(CONST_INTER);
 3375 %}
 3376 
 3377 // Long Immediate zero
 3378 operand immL0() %{
 3379   predicate( n->get_long() == 0L );
 3380   match(ConL);
 3381   op_cost(0);
 3382 
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Long Immediate zero
 3388 operand immL_M1() %{
 3389   predicate( n->get_long() == -1L );
 3390   match(ConL);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long immediate from 0 to 127.
 3398 // Used for a shorter form of long mul by 10.
 3399 operand immL_127() %{
 3400   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3401   match(ConL);
 3402   op_cost(0);
 3403 
 3404   format %{ %}
 3405   interface(CONST_INTER);
 3406 %}
 3407 
 3408 // Long Immediate: low 32-bit mask
 3409 operand immL_32bits() %{
 3410   predicate(n->get_long() == 0xFFFFFFFFL);
 3411   match(ConL);
 3412   op_cost(0);
 3413 
 3414   format %{ %}
 3415   interface(CONST_INTER);
 3416 %}
 3417 
 3418 // Long Immediate: low 32-bit mask
 3419 operand immL32() %{
 3420   predicate(n->get_long() == (int)(n->get_long()));
 3421   match(ConL);
 3422   op_cost(20);
 3423 
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 //Double Immediate zero
 3429 operand immDPR0() %{
 3430   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3431   // bug that generates code such that NaNs compare equal to 0.0
 3432   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3433   match(ConD);
 3434 
 3435   op_cost(5);
 3436   format %{ %}
 3437   interface(CONST_INTER);
 3438 %}
 3439 
 3440 // Double Immediate one
 3441 operand immDPR1() %{
 3442   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3443   match(ConD);
 3444 
 3445   op_cost(5);
 3446   format %{ %}
 3447   interface(CONST_INTER);
 3448 %}
 3449 
 3450 // Double Immediate
 3451 operand immDPR() %{
 3452   predicate(UseSSE<=1);
 3453   match(ConD);
 3454 
 3455   op_cost(5);
 3456   format %{ %}
 3457   interface(CONST_INTER);
 3458 %}
 3459 
 3460 operand immD() %{
 3461   predicate(UseSSE>=2);
 3462   match(ConD);
 3463 
 3464   op_cost(5);
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 // Double Immediate zero
 3470 operand immD0() %{
 3471   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3472   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3473   // compare equal to -0.0.
 3474   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3475   match(ConD);
 3476 
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 // Float Immediate zero
 3482 operand immFPR0() %{
 3483   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3484   match(ConF);
 3485 
 3486   op_cost(5);
 3487   format %{ %}
 3488   interface(CONST_INTER);
 3489 %}
 3490 
 3491 // Float Immediate one
 3492 operand immFPR1() %{
 3493   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3494   match(ConF);
 3495 
 3496   op_cost(5);
 3497   format %{ %}
 3498   interface(CONST_INTER);
 3499 %}
 3500 
 3501 // Float Immediate
 3502 operand immFPR() %{
 3503   predicate( UseSSE == 0 );
 3504   match(ConF);
 3505 
 3506   op_cost(5);
 3507   format %{ %}
 3508   interface(CONST_INTER);
 3509 %}
 3510 
 3511 // Float Immediate
 3512 operand immF() %{
 3513   predicate(UseSSE >= 1);
 3514   match(ConF);
 3515 
 3516   op_cost(5);
 3517   format %{ %}
 3518   interface(CONST_INTER);
 3519 %}
 3520 
 3521 // Float Immediate zero.  Zero and not -0.0
 3522 operand immF0() %{
 3523   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3524   match(ConF);
 3525 
 3526   op_cost(5);
 3527   format %{ %}
 3528   interface(CONST_INTER);
 3529 %}
 3530 
 3531 // Immediates for special shifts (sign extend)
 3532 
 3533 // Constants for increment
 3534 operand immI_16() %{
 3535   predicate( n->get_int() == 16 );
 3536   match(ConI);
 3537 
 3538   format %{ %}
 3539   interface(CONST_INTER);
 3540 %}
 3541 
 3542 operand immI_24() %{
 3543   predicate( n->get_int() == 24 );
 3544   match(ConI);
 3545 
 3546   format %{ %}
 3547   interface(CONST_INTER);
 3548 %}
 3549 
 3550 // Constant for byte-wide masking
 3551 operand immI_255() %{
 3552   predicate( n->get_int() == 255 );
 3553   match(ConI);
 3554 
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 // Constant for short-wide masking
 3560 operand immI_65535() %{
 3561   predicate(n->get_int() == 65535);
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 operand kReg()
 3569 %{
 3570   constraint(ALLOC_IN_RC(vectmask_reg));
 3571   match(RegVectMask);
 3572   format %{%}
 3573   interface(REG_INTER);
 3574 %}
 3575 
 3576 operand kReg_K1()
 3577 %{
 3578   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3579   match(RegVectMask);
 3580   format %{%}
 3581   interface(REG_INTER);
 3582 %}
 3583 
 3584 operand kReg_K2()
 3585 %{
 3586   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3587   match(RegVectMask);
 3588   format %{%}
 3589   interface(REG_INTER);
 3590 %}
 3591 
 3592 // Special Registers
 3593 operand kReg_K3()
 3594 %{
 3595   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3596   match(RegVectMask);
 3597   format %{%}
 3598   interface(REG_INTER);
 3599 %}
 3600 
 3601 operand kReg_K4()
 3602 %{
 3603   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3604   match(RegVectMask);
 3605   format %{%}
 3606   interface(REG_INTER);
 3607 %}
 3608 
 3609 operand kReg_K5()
 3610 %{
 3611   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3612   match(RegVectMask);
 3613   format %{%}
 3614   interface(REG_INTER);
 3615 %}
 3616 
 3617 operand kReg_K6()
 3618 %{
 3619   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3620   match(RegVectMask);
 3621   format %{%}
 3622   interface(REG_INTER);
 3623 %}
 3624 
 3625 // Special Registers
 3626 operand kReg_K7()
 3627 %{
 3628   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3629   match(RegVectMask);
 3630   format %{%}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 // Register Operands
 3635 // Integer Register
 3636 operand rRegI() %{
 3637   constraint(ALLOC_IN_RC(int_reg));
 3638   match(RegI);
 3639   match(xRegI);
 3640   match(eAXRegI);
 3641   match(eBXRegI);
 3642   match(eCXRegI);
 3643   match(eDXRegI);
 3644   match(eDIRegI);
 3645   match(eSIRegI);
 3646 
 3647   format %{ %}
 3648   interface(REG_INTER);
 3649 %}
 3650 
 3651 // Subset of Integer Register
 3652 operand xRegI(rRegI reg) %{
 3653   constraint(ALLOC_IN_RC(int_x_reg));
 3654   match(reg);
 3655   match(eAXRegI);
 3656   match(eBXRegI);
 3657   match(eCXRegI);
 3658   match(eDXRegI);
 3659 
 3660   format %{ %}
 3661   interface(REG_INTER);
 3662 %}
 3663 
 3664 // Special Registers
 3665 operand eAXRegI(xRegI reg) %{
 3666   constraint(ALLOC_IN_RC(eax_reg));
 3667   match(reg);
 3668   match(rRegI);
 3669 
 3670   format %{ "EAX" %}
 3671   interface(REG_INTER);
 3672 %}
 3673 
 3674 // Special Registers
 3675 operand eBXRegI(xRegI reg) %{
 3676   constraint(ALLOC_IN_RC(ebx_reg));
 3677   match(reg);
 3678   match(rRegI);
 3679 
 3680   format %{ "EBX" %}
 3681   interface(REG_INTER);
 3682 %}
 3683 
 3684 operand eCXRegI(xRegI reg) %{
 3685   constraint(ALLOC_IN_RC(ecx_reg));
 3686   match(reg);
 3687   match(rRegI);
 3688 
 3689   format %{ "ECX" %}
 3690   interface(REG_INTER);
 3691 %}
 3692 
 3693 operand eDXRegI(xRegI reg) %{
 3694   constraint(ALLOC_IN_RC(edx_reg));
 3695   match(reg);
 3696   match(rRegI);
 3697 
 3698   format %{ "EDX" %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand eDIRegI(xRegI reg) %{
 3703   constraint(ALLOC_IN_RC(edi_reg));
 3704   match(reg);
 3705   match(rRegI);
 3706 
 3707   format %{ "EDI" %}
 3708   interface(REG_INTER);
 3709 %}
 3710 
 3711 operand naxRegI() %{
 3712   constraint(ALLOC_IN_RC(nax_reg));
 3713   match(RegI);
 3714   match(eCXRegI);
 3715   match(eDXRegI);
 3716   match(eSIRegI);
 3717   match(eDIRegI);
 3718 
 3719   format %{ %}
 3720   interface(REG_INTER);
 3721 %}
 3722 
 3723 operand nadxRegI() %{
 3724   constraint(ALLOC_IN_RC(nadx_reg));
 3725   match(RegI);
 3726   match(eBXRegI);
 3727   match(eCXRegI);
 3728   match(eSIRegI);
 3729   match(eDIRegI);
 3730 
 3731   format %{ %}
 3732   interface(REG_INTER);
 3733 %}
 3734 
 3735 operand ncxRegI() %{
 3736   constraint(ALLOC_IN_RC(ncx_reg));
 3737   match(RegI);
 3738   match(eAXRegI);
 3739   match(eDXRegI);
 3740   match(eSIRegI);
 3741   match(eDIRegI);
 3742 
 3743   format %{ %}
 3744   interface(REG_INTER);
 3745 %}
 3746 
 3747 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3748 // //
 3749 operand eSIRegI(xRegI reg) %{
 3750    constraint(ALLOC_IN_RC(esi_reg));
 3751    match(reg);
 3752    match(rRegI);
 3753 
 3754    format %{ "ESI" %}
 3755    interface(REG_INTER);
 3756 %}
 3757 
 3758 // Pointer Register
 3759 operand anyRegP() %{
 3760   constraint(ALLOC_IN_RC(any_reg));
 3761   match(RegP);
 3762   match(eAXRegP);
 3763   match(eBXRegP);
 3764   match(eCXRegP);
 3765   match(eDIRegP);
 3766   match(eRegP);
 3767 
 3768   format %{ %}
 3769   interface(REG_INTER);
 3770 %}
 3771 
 3772 operand eRegP() %{
 3773   constraint(ALLOC_IN_RC(int_reg));
 3774   match(RegP);
 3775   match(eAXRegP);
 3776   match(eBXRegP);
 3777   match(eCXRegP);
 3778   match(eDIRegP);
 3779 
 3780   format %{ %}
 3781   interface(REG_INTER);
 3782 %}
 3783 
 3784 operand rRegP() %{
 3785   constraint(ALLOC_IN_RC(int_reg));
 3786   match(RegP);
 3787   match(eAXRegP);
 3788   match(eBXRegP);
 3789   match(eCXRegP);
 3790   match(eDIRegP);
 3791 
 3792   format %{ %}
 3793   interface(REG_INTER);
 3794 %}
 3795 
 3796 // On windows95, EBP is not safe to use for implicit null tests.
 3797 operand eRegP_no_EBP() %{
 3798   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3799   match(RegP);
 3800   match(eAXRegP);
 3801   match(eBXRegP);
 3802   match(eCXRegP);
 3803   match(eDIRegP);
 3804 
 3805   op_cost(100);
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand naxRegP() %{
 3811   constraint(ALLOC_IN_RC(nax_reg));
 3812   match(RegP);
 3813   match(eBXRegP);
 3814   match(eDXRegP);
 3815   match(eCXRegP);
 3816   match(eSIRegP);
 3817   match(eDIRegP);
 3818 
 3819   format %{ %}
 3820   interface(REG_INTER);
 3821 %}
 3822 
 3823 operand nabxRegP() %{
 3824   constraint(ALLOC_IN_RC(nabx_reg));
 3825   match(RegP);
 3826   match(eCXRegP);
 3827   match(eDXRegP);
 3828   match(eSIRegP);
 3829   match(eDIRegP);
 3830 
 3831   format %{ %}
 3832   interface(REG_INTER);
 3833 %}
 3834 
 3835 operand pRegP() %{
 3836   constraint(ALLOC_IN_RC(p_reg));
 3837   match(RegP);
 3838   match(eBXRegP);
 3839   match(eDXRegP);
 3840   match(eSIRegP);
 3841   match(eDIRegP);
 3842 
 3843   format %{ %}
 3844   interface(REG_INTER);
 3845 %}
 3846 
 3847 // Special Registers
 3848 // Return a pointer value
 3849 operand eAXRegP(eRegP reg) %{
 3850   constraint(ALLOC_IN_RC(eax_reg));
 3851   match(reg);
 3852   format %{ "EAX" %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 // Used in AtomicAdd
 3857 operand eBXRegP(eRegP reg) %{
 3858   constraint(ALLOC_IN_RC(ebx_reg));
 3859   match(reg);
 3860   format %{ "EBX" %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 // Tail-call (interprocedural jump) to interpreter
 3865 operand eCXRegP(eRegP reg) %{
 3866   constraint(ALLOC_IN_RC(ecx_reg));
 3867   match(reg);
 3868   format %{ "ECX" %}
 3869   interface(REG_INTER);
 3870 %}
 3871 
 3872 operand eDXRegP(eRegP reg) %{
 3873   constraint(ALLOC_IN_RC(edx_reg));
 3874   match(reg);
 3875   format %{ "EDX" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 
 3879 operand eSIRegP(eRegP reg) %{
 3880   constraint(ALLOC_IN_RC(esi_reg));
 3881   match(reg);
 3882   format %{ "ESI" %}
 3883   interface(REG_INTER);
 3884 %}
 3885 
 3886 // Used in rep stosw
 3887 operand eDIRegP(eRegP reg) %{
 3888   constraint(ALLOC_IN_RC(edi_reg));
 3889   match(reg);
 3890   format %{ "EDI" %}
 3891   interface(REG_INTER);
 3892 %}
 3893 
 3894 operand eRegL() %{
 3895   constraint(ALLOC_IN_RC(long_reg));
 3896   match(RegL);
 3897   match(eADXRegL);
 3898 
 3899   format %{ %}
 3900   interface(REG_INTER);
 3901 %}
 3902 
 3903 operand eADXRegL( eRegL reg ) %{
 3904   constraint(ALLOC_IN_RC(eadx_reg));
 3905   match(reg);
 3906 
 3907   format %{ "EDX:EAX" %}
 3908   interface(REG_INTER);
 3909 %}
 3910 
 3911 operand eBCXRegL( eRegL reg ) %{
 3912   constraint(ALLOC_IN_RC(ebcx_reg));
 3913   match(reg);
 3914 
 3915   format %{ "EBX:ECX" %}
 3916   interface(REG_INTER);
 3917 %}
 3918 
 3919 // Special case for integer high multiply
 3920 operand eADXRegL_low_only() %{
 3921   constraint(ALLOC_IN_RC(eadx_reg));
 3922   match(RegL);
 3923 
 3924   format %{ "EAX" %}
 3925   interface(REG_INTER);
 3926 %}
 3927 
 3928 // Flags register, used as output of compare instructions
 3929 operand rFlagsReg() %{
 3930   constraint(ALLOC_IN_RC(int_flags));
 3931   match(RegFlags);
 3932 
 3933   format %{ "EFLAGS" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 // Flags register, used as output of compare instructions
 3938 operand eFlagsReg() %{
 3939   constraint(ALLOC_IN_RC(int_flags));
 3940   match(RegFlags);
 3941 
 3942   format %{ "EFLAGS" %}
 3943   interface(REG_INTER);
 3944 %}
 3945 
 3946 // Flags register, used as output of FLOATING POINT compare instructions
 3947 operand eFlagsRegU() %{
 3948   constraint(ALLOC_IN_RC(int_flags));
 3949   match(RegFlags);
 3950 
 3951   format %{ "EFLAGS_U" %}
 3952   interface(REG_INTER);
 3953 %}
 3954 
 3955 operand eFlagsRegUCF() %{
 3956   constraint(ALLOC_IN_RC(int_flags));
 3957   match(RegFlags);
 3958   predicate(false);
 3959 
 3960   format %{ "EFLAGS_U_CF" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 // Condition Code Register used by long compare
 3965 operand flagsReg_long_LTGE() %{
 3966   constraint(ALLOC_IN_RC(int_flags));
 3967   match(RegFlags);
 3968   format %{ "FLAGS_LTGE" %}
 3969   interface(REG_INTER);
 3970 %}
 3971 operand flagsReg_long_EQNE() %{
 3972   constraint(ALLOC_IN_RC(int_flags));
 3973   match(RegFlags);
 3974   format %{ "FLAGS_EQNE" %}
 3975   interface(REG_INTER);
 3976 %}
 3977 operand flagsReg_long_LEGT() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980   format %{ "FLAGS_LEGT" %}
 3981   interface(REG_INTER);
 3982 %}
 3983 
 3984 // Condition Code Register used by unsigned long compare
 3985 operand flagsReg_ulong_LTGE() %{
 3986   constraint(ALLOC_IN_RC(int_flags));
 3987   match(RegFlags);
 3988   format %{ "FLAGS_U_LTGE" %}
 3989   interface(REG_INTER);
 3990 %}
 3991 operand flagsReg_ulong_EQNE() %{
 3992   constraint(ALLOC_IN_RC(int_flags));
 3993   match(RegFlags);
 3994   format %{ "FLAGS_U_EQNE" %}
 3995   interface(REG_INTER);
 3996 %}
 3997 operand flagsReg_ulong_LEGT() %{
 3998   constraint(ALLOC_IN_RC(int_flags));
 3999   match(RegFlags);
 4000   format %{ "FLAGS_U_LEGT" %}
 4001   interface(REG_INTER);
 4002 %}
 4003 
 4004 // Float register operands
 4005 operand regDPR() %{
 4006   predicate( UseSSE < 2 );
 4007   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4008   match(RegD);
 4009   match(regDPR1);
 4010   match(regDPR2);
 4011   format %{ %}
 4012   interface(REG_INTER);
 4013 %}
 4014 
 4015 operand regDPR1(regDPR reg) %{
 4016   predicate( UseSSE < 2 );
 4017   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4018   match(reg);
 4019   format %{ "FPR1" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 
 4023 operand regDPR2(regDPR reg) %{
 4024   predicate( UseSSE < 2 );
 4025   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4026   match(reg);
 4027   format %{ "FPR2" %}
 4028   interface(REG_INTER);
 4029 %}
 4030 
 4031 operand regnotDPR1(regDPR reg) %{
 4032   predicate( UseSSE < 2 );
 4033   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4034   match(reg);
 4035   format %{ %}
 4036   interface(REG_INTER);
 4037 %}
 4038 
 4039 // Float register operands
 4040 operand regFPR() %{
 4041   predicate( UseSSE < 2 );
 4042   constraint(ALLOC_IN_RC(fp_flt_reg));
 4043   match(RegF);
 4044   match(regFPR1);
 4045   format %{ %}
 4046   interface(REG_INTER);
 4047 %}
 4048 
 4049 // Float register operands
 4050 operand regFPR1(regFPR reg) %{
 4051   predicate( UseSSE < 2 );
 4052   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4053   match(reg);
 4054   format %{ "FPR1" %}
 4055   interface(REG_INTER);
 4056 %}
 4057 
 4058 // XMM Float register operands
 4059 operand regF() %{
 4060   predicate( UseSSE>=1 );
 4061   constraint(ALLOC_IN_RC(float_reg_legacy));
 4062   match(RegF);
 4063   format %{ %}
 4064   interface(REG_INTER);
 4065 %}
 4066 
 4067 operand legRegF() %{
 4068   predicate( UseSSE>=1 );
 4069   constraint(ALLOC_IN_RC(float_reg_legacy));
 4070   match(RegF);
 4071   format %{ %}
 4072   interface(REG_INTER);
 4073 %}
 4074 
 4075 // Float register operands
 4076 operand vlRegF() %{
 4077    constraint(ALLOC_IN_RC(float_reg_vl));
 4078    match(RegF);
 4079 
 4080    format %{ %}
 4081    interface(REG_INTER);
 4082 %}
 4083 
 4084 // XMM Double register operands
 4085 operand regD() %{
 4086   predicate( UseSSE>=2 );
 4087   constraint(ALLOC_IN_RC(double_reg_legacy));
 4088   match(RegD);
 4089   format %{ %}
 4090   interface(REG_INTER);
 4091 %}
 4092 
 4093 // Double register operands
 4094 operand legRegD() %{
 4095   predicate( UseSSE>=2 );
 4096   constraint(ALLOC_IN_RC(double_reg_legacy));
 4097   match(RegD);
 4098   format %{ %}
 4099   interface(REG_INTER);
 4100 %}
 4101 
 4102 operand vlRegD() %{
 4103    constraint(ALLOC_IN_RC(double_reg_vl));
 4104    match(RegD);
 4105 
 4106    format %{ %}
 4107    interface(REG_INTER);
 4108 %}
 4109 
 4110 //----------Memory Operands----------------------------------------------------
 4111 // Direct Memory Operand
 4112 operand direct(immP addr) %{
 4113   match(addr);
 4114 
 4115   format %{ "[$addr]" %}
 4116   interface(MEMORY_INTER) %{
 4117     base(0xFFFFFFFF);
 4118     index(0x4);
 4119     scale(0x0);
 4120     disp($addr);
 4121   %}
 4122 %}
 4123 
 4124 // Indirect Memory Operand
 4125 operand indirect(eRegP reg) %{
 4126   constraint(ALLOC_IN_RC(int_reg));
 4127   match(reg);
 4128 
 4129   format %{ "[$reg]" %}
 4130   interface(MEMORY_INTER) %{
 4131     base($reg);
 4132     index(0x4);
 4133     scale(0x0);
 4134     disp(0x0);
 4135   %}
 4136 %}
 4137 
 4138 // Indirect Memory Plus Short Offset Operand
 4139 operand indOffset8(eRegP reg, immI8 off) %{
 4140   match(AddP reg off);
 4141 
 4142   format %{ "[$reg + $off]" %}
 4143   interface(MEMORY_INTER) %{
 4144     base($reg);
 4145     index(0x4);
 4146     scale(0x0);
 4147     disp($off);
 4148   %}
 4149 %}
 4150 
 4151 // Indirect Memory Plus Long Offset Operand
 4152 operand indOffset32(eRegP reg, immI off) %{
 4153   match(AddP reg off);
 4154 
 4155   format %{ "[$reg + $off]" %}
 4156   interface(MEMORY_INTER) %{
 4157     base($reg);
 4158     index(0x4);
 4159     scale(0x0);
 4160     disp($off);
 4161   %}
 4162 %}
 4163 
 4164 // Indirect Memory Plus Long Offset Operand
 4165 operand indOffset32X(rRegI reg, immP off) %{
 4166   match(AddP off reg);
 4167 
 4168   format %{ "[$reg + $off]" %}
 4169   interface(MEMORY_INTER) %{
 4170     base($reg);
 4171     index(0x4);
 4172     scale(0x0);
 4173     disp($off);
 4174   %}
 4175 %}
 4176 
 4177 // Indirect Memory Plus Index Register Plus Offset Operand
 4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4179   match(AddP (AddP reg ireg) off);
 4180 
 4181   op_cost(10);
 4182   format %{"[$reg + $off + $ireg]" %}
 4183   interface(MEMORY_INTER) %{
 4184     base($reg);
 4185     index($ireg);
 4186     scale(0x0);
 4187     disp($off);
 4188   %}
 4189 %}
 4190 
 4191 // Indirect Memory Plus Index Register Plus Offset Operand
 4192 operand indIndex(eRegP reg, rRegI ireg) %{
 4193   match(AddP reg ireg);
 4194 
 4195   op_cost(10);
 4196   format %{"[$reg + $ireg]" %}
 4197   interface(MEMORY_INTER) %{
 4198     base($reg);
 4199     index($ireg);
 4200     scale(0x0);
 4201     disp(0x0);
 4202   %}
 4203 %}
 4204 
 4205 // // -------------------------------------------------------------------------
 4206 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4207 // // -------------------------------------------------------------------------
 4208 // // Scaled Memory Operands
 4209 // // Indirect Memory Times Scale Plus Offset Operand
 4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4211 //   match(AddP off (LShiftI ireg scale));
 4212 //
 4213 //   op_cost(10);
 4214 //   format %{"[$off + $ireg << $scale]" %}
 4215 //   interface(MEMORY_INTER) %{
 4216 //     base(0x4);
 4217 //     index($ireg);
 4218 //     scale($scale);
 4219 //     disp($off);
 4220 //   %}
 4221 // %}
 4222 
 4223 // Indirect Memory Times Scale Plus Index Register
 4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4225   match(AddP reg (LShiftI ireg scale));
 4226 
 4227   op_cost(10);
 4228   format %{"[$reg + $ireg << $scale]" %}
 4229   interface(MEMORY_INTER) %{
 4230     base($reg);
 4231     index($ireg);
 4232     scale($scale);
 4233     disp(0x0);
 4234   %}
 4235 %}
 4236 
 4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4239   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4240 
 4241   op_cost(10);
 4242   format %{"[$reg + $off + $ireg << $scale]" %}
 4243   interface(MEMORY_INTER) %{
 4244     base($reg);
 4245     index($ireg);
 4246     scale($scale);
 4247     disp($off);
 4248   %}
 4249 %}
 4250 
 4251 //----------Load Long Memory Operands------------------------------------------
 4252 // The load-long idiom will use it's address expression again after loading
 4253 // the first word of the long.  If the load-long destination overlaps with
 4254 // registers used in the addressing expression, the 2nd half will be loaded
 4255 // from a clobbered address.  Fix this by requiring that load-long use
 4256 // address registers that do not overlap with the load-long target.
 4257 
 4258 // load-long support
 4259 operand load_long_RegP() %{
 4260   constraint(ALLOC_IN_RC(esi_reg));
 4261   match(RegP);
 4262   match(eSIRegP);
 4263   op_cost(100);
 4264   format %{  %}
 4265   interface(REG_INTER);
 4266 %}
 4267 
 4268 // Indirect Memory Operand Long
 4269 operand load_long_indirect(load_long_RegP reg) %{
 4270   constraint(ALLOC_IN_RC(esi_reg));
 4271   match(reg);
 4272 
 4273   format %{ "[$reg]" %}
 4274   interface(MEMORY_INTER) %{
 4275     base($reg);
 4276     index(0x4);
 4277     scale(0x0);
 4278     disp(0x0);
 4279   %}
 4280 %}
 4281 
 4282 // Indirect Memory Plus Long Offset Operand
 4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4284   match(AddP reg off);
 4285 
 4286   format %{ "[$reg + $off]" %}
 4287   interface(MEMORY_INTER) %{
 4288     base($reg);
 4289     index(0x4);
 4290     scale(0x0);
 4291     disp($off);
 4292   %}
 4293 %}
 4294 
 4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4296 
 4297 
 4298 //----------Special Memory Operands--------------------------------------------
 4299 // Stack Slot Operand - This operand is used for loading and storing temporary
 4300 //                      values on the stack where a match requires a value to
 4301 //                      flow through memory.
 4302 operand stackSlotP(sRegP reg) %{
 4303   constraint(ALLOC_IN_RC(stack_slots));
 4304   // No match rule because this operand is only generated in matching
 4305   format %{ "[$reg]" %}
 4306   interface(MEMORY_INTER) %{
 4307     base(0x4);   // ESP
 4308     index(0x4);  // No Index
 4309     scale(0x0);  // No Scale
 4310     disp($reg);  // Stack Offset
 4311   %}
 4312 %}
 4313 
 4314 operand stackSlotI(sRegI reg) %{
 4315   constraint(ALLOC_IN_RC(stack_slots));
 4316   // No match rule because this operand is only generated in matching
 4317   format %{ "[$reg]" %}
 4318   interface(MEMORY_INTER) %{
 4319     base(0x4);   // ESP
 4320     index(0x4);  // No Index
 4321     scale(0x0);  // No Scale
 4322     disp($reg);  // Stack Offset
 4323   %}
 4324 %}
 4325 
 4326 operand stackSlotF(sRegF reg) %{
 4327   constraint(ALLOC_IN_RC(stack_slots));
 4328   // No match rule because this operand is only generated in matching
 4329   format %{ "[$reg]" %}
 4330   interface(MEMORY_INTER) %{
 4331     base(0x4);   // ESP
 4332     index(0x4);  // No Index
 4333     scale(0x0);  // No Scale
 4334     disp($reg);  // Stack Offset
 4335   %}
 4336 %}
 4337 
 4338 operand stackSlotD(sRegD reg) %{
 4339   constraint(ALLOC_IN_RC(stack_slots));
 4340   // No match rule because this operand is only generated in matching
 4341   format %{ "[$reg]" %}
 4342   interface(MEMORY_INTER) %{
 4343     base(0x4);   // ESP
 4344     index(0x4);  // No Index
 4345     scale(0x0);  // No Scale
 4346     disp($reg);  // Stack Offset
 4347   %}
 4348 %}
 4349 
 4350 operand stackSlotL(sRegL reg) %{
 4351   constraint(ALLOC_IN_RC(stack_slots));
 4352   // No match rule because this operand is only generated in matching
 4353   format %{ "[$reg]" %}
 4354   interface(MEMORY_INTER) %{
 4355     base(0x4);   // ESP
 4356     index(0x4);  // No Index
 4357     scale(0x0);  // No Scale
 4358     disp($reg);  // Stack Offset
 4359   %}
 4360 %}
 4361 
 4362 //----------Conditional Branch Operands----------------------------------------
 4363 // Comparison Op  - This is the operation of the comparison, and is limited to
 4364 //                  the following set of codes:
 4365 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4366 //
 4367 // Other attributes of the comparison, such as unsignedness, are specified
 4368 // by the comparison instruction that sets a condition code flags register.
 4369 // That result is represented by a flags operand whose subtype is appropriate
 4370 // to the unsignedness (etc.) of the comparison.
 4371 //
 4372 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4373 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4374 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4375 
 4376 // Comparision Code
 4377 operand cmpOp() %{
 4378   match(Bool);
 4379 
 4380   format %{ "" %}
 4381   interface(COND_INTER) %{
 4382     equal(0x4, "e");
 4383     not_equal(0x5, "ne");
 4384     less(0xC, "l");
 4385     greater_equal(0xD, "ge");
 4386     less_equal(0xE, "le");
 4387     greater(0xF, "g");
 4388     overflow(0x0, "o");
 4389     no_overflow(0x1, "no");
 4390   %}
 4391 %}
 4392 
 4393 // Comparison Code, unsigned compare.  Used by FP also, with
 4394 // C2 (unordered) turned into GT or LT already.  The other bits
 4395 // C0 and C3 are turned into Carry & Zero flags.
 4396 operand cmpOpU() %{
 4397   match(Bool);
 4398 
 4399   format %{ "" %}
 4400   interface(COND_INTER) %{
 4401     equal(0x4, "e");
 4402     not_equal(0x5, "ne");
 4403     less(0x2, "b");
 4404     greater_equal(0x3, "nb");
 4405     less_equal(0x6, "be");
 4406     greater(0x7, "nbe");
 4407     overflow(0x0, "o");
 4408     no_overflow(0x1, "no");
 4409   %}
 4410 %}
 4411 
 4412 // Floating comparisons that don't require any fixup for the unordered case
 4413 operand cmpOpUCF() %{
 4414   match(Bool);
 4415   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4416             n->as_Bool()->_test._test == BoolTest::ge ||
 4417             n->as_Bool()->_test._test == BoolTest::le ||
 4418             n->as_Bool()->_test._test == BoolTest::gt);
 4419   format %{ "" %}
 4420   interface(COND_INTER) %{
 4421     equal(0x4, "e");
 4422     not_equal(0x5, "ne");
 4423     less(0x2, "b");
 4424     greater_equal(0x3, "nb");
 4425     less_equal(0x6, "be");
 4426     greater(0x7, "nbe");
 4427     overflow(0x0, "o");
 4428     no_overflow(0x1, "no");
 4429   %}
 4430 %}
 4431 
 4432 
 4433 // Floating comparisons that can be fixed up with extra conditional jumps
 4434 operand cmpOpUCF2() %{
 4435   match(Bool);
 4436   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4437             n->as_Bool()->_test._test == BoolTest::eq);
 4438   format %{ "" %}
 4439   interface(COND_INTER) %{
 4440     equal(0x4, "e");
 4441     not_equal(0x5, "ne");
 4442     less(0x2, "b");
 4443     greater_equal(0x3, "nb");
 4444     less_equal(0x6, "be");
 4445     greater(0x7, "nbe");
 4446     overflow(0x0, "o");
 4447     no_overflow(0x1, "no");
 4448   %}
 4449 %}
 4450 
 4451 // Comparison Code for FP conditional move
 4452 operand cmpOp_fcmov() %{
 4453   match(Bool);
 4454 
 4455   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4456             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4457   format %{ "" %}
 4458   interface(COND_INTER) %{
 4459     equal        (0x0C8);
 4460     not_equal    (0x1C8);
 4461     less         (0x0C0);
 4462     greater_equal(0x1C0);
 4463     less_equal   (0x0D0);
 4464     greater      (0x1D0);
 4465     overflow(0x0, "o"); // not really supported by the instruction
 4466     no_overflow(0x1, "no"); // not really supported by the instruction
 4467   %}
 4468 %}
 4469 
 4470 // Comparison Code used in long compares
 4471 operand cmpOp_commute() %{
 4472   match(Bool);
 4473 
 4474   format %{ "" %}
 4475   interface(COND_INTER) %{
 4476     equal(0x4, "e");
 4477     not_equal(0x5, "ne");
 4478     less(0xF, "g");
 4479     greater_equal(0xE, "le");
 4480     less_equal(0xD, "ge");
 4481     greater(0xC, "l");
 4482     overflow(0x0, "o");
 4483     no_overflow(0x1, "no");
 4484   %}
 4485 %}
 4486 
 4487 // Comparison Code used in unsigned long compares
 4488 operand cmpOpU_commute() %{
 4489   match(Bool);
 4490 
 4491   format %{ "" %}
 4492   interface(COND_INTER) %{
 4493     equal(0x4, "e");
 4494     not_equal(0x5, "ne");
 4495     less(0x7, "nbe");
 4496     greater_equal(0x6, "be");
 4497     less_equal(0x3, "nb");
 4498     greater(0x2, "b");
 4499     overflow(0x0, "o");
 4500     no_overflow(0x1, "no");
 4501   %}
 4502 %}
 4503 
 4504 //----------OPERAND CLASSES----------------------------------------------------
 4505 // Operand Classes are groups of operands that are used as to simplify
 4506 // instruction definitions by not requiring the AD writer to specify separate
 4507 // instructions for every form of operand when the instruction accepts
 4508 // multiple operand types with the same basic encoding and format.  The classic
 4509 // case of this is memory operands.
 4510 
 4511 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4512                indIndex, indIndexScale, indIndexScaleOffset);
 4513 
 4514 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4515 // This means some kind of offset is always required and you cannot use
 4516 // an oop as the offset (done when working on static globals).
 4517 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4518                     indIndex, indIndexScale, indIndexScaleOffset);
 4519 
 4520 
 4521 //----------PIPELINE-----------------------------------------------------------
 4522 // Rules which define the behavior of the target architectures pipeline.
 4523 pipeline %{
 4524 
 4525 //----------ATTRIBUTES---------------------------------------------------------
 4526 attributes %{
 4527   variable_size_instructions;        // Fixed size instructions
 4528   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4529   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4530   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4531   instruction_fetch_units = 1;       // of 16 bytes
 4532 
 4533   // List of nop instructions
 4534   nops( MachNop );
 4535 %}
 4536 
 4537 //----------RESOURCES----------------------------------------------------------
 4538 // Resources are the functional units available to the machine
 4539 
 4540 // Generic P2/P3 pipeline
 4541 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4542 // 3 instructions decoded per cycle.
 4543 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4544 // 2 ALU op, only ALU0 handles mul/div instructions.
 4545 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4546            MS0, MS1, MEM = MS0 | MS1,
 4547            BR, FPU,
 4548            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4549 
 4550 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4551 // Pipeline Description specifies the stages in the machine's pipeline
 4552 
 4553 // Generic P2/P3 pipeline
 4554 pipe_desc(S0, S1, S2, S3, S4, S5);
 4555 
 4556 //----------PIPELINE CLASSES---------------------------------------------------
 4557 // Pipeline Classes describe the stages in which input and output are
 4558 // referenced by the hardware pipeline.
 4559 
 4560 // Naming convention: ialu or fpu
 4561 // Then: _reg
 4562 // Then: _reg if there is a 2nd register
 4563 // Then: _long if it's a pair of instructions implementing a long
 4564 // Then: _fat if it requires the big decoder
 4565 //   Or: _mem if it requires the big decoder and a memory unit.
 4566 
 4567 // Integer ALU reg operation
 4568 pipe_class ialu_reg(rRegI dst) %{
 4569     single_instruction;
 4570     dst    : S4(write);
 4571     dst    : S3(read);
 4572     DECODE : S0;        // any decoder
 4573     ALU    : S3;        // any alu
 4574 %}
 4575 
 4576 // Long ALU reg operation
 4577 pipe_class ialu_reg_long(eRegL dst) %{
 4578     instruction_count(2);
 4579     dst    : S4(write);
 4580     dst    : S3(read);
 4581     DECODE : S0(2);     // any 2 decoders
 4582     ALU    : S3(2);     // both alus
 4583 %}
 4584 
 4585 // Integer ALU reg operation using big decoder
 4586 pipe_class ialu_reg_fat(rRegI dst) %{
 4587     single_instruction;
 4588     dst    : S4(write);
 4589     dst    : S3(read);
 4590     D0     : S0;        // big decoder only
 4591     ALU    : S3;        // any alu
 4592 %}
 4593 
 4594 // Long ALU reg operation using big decoder
 4595 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4596     instruction_count(2);
 4597     dst    : S4(write);
 4598     dst    : S3(read);
 4599     D0     : S0(2);     // big decoder only; twice
 4600     ALU    : S3(2);     // any 2 alus
 4601 %}
 4602 
 4603 // Integer ALU reg-reg operation
 4604 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4605     single_instruction;
 4606     dst    : S4(write);
 4607     src    : S3(read);
 4608     DECODE : S0;        // any decoder
 4609     ALU    : S3;        // any alu
 4610 %}
 4611 
 4612 // Long ALU reg-reg operation
 4613 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     ALU    : S3(2);     // both alus
 4619 %}
 4620 
 4621 // Integer ALU reg-reg operation
 4622 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4623     single_instruction;
 4624     dst    : S4(write);
 4625     src    : S3(read);
 4626     D0     : S0;        // big decoder only
 4627     ALU    : S3;        // any alu
 4628 %}
 4629 
 4630 // Long ALU reg-reg operation
 4631 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4632     instruction_count(2);
 4633     dst    : S4(write);
 4634     src    : S3(read);
 4635     D0     : S0(2);     // big decoder only; twice
 4636     ALU    : S3(2);     // both alus
 4637 %}
 4638 
 4639 // Integer ALU reg-mem operation
 4640 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4641     single_instruction;
 4642     dst    : S5(write);
 4643     mem    : S3(read);
 4644     D0     : S0;        // big decoder only
 4645     ALU    : S4;        // any alu
 4646     MEM    : S3;        // any mem
 4647 %}
 4648 
 4649 // Long ALU reg-mem operation
 4650 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4651     instruction_count(2);
 4652     dst    : S5(write);
 4653     mem    : S3(read);
 4654     D0     : S0(2);     // big decoder only; twice
 4655     ALU    : S4(2);     // any 2 alus
 4656     MEM    : S3(2);     // both mems
 4657 %}
 4658 
 4659 // Integer mem operation (prefetch)
 4660 pipe_class ialu_mem(memory mem)
 4661 %{
 4662     single_instruction;
 4663     mem    : S3(read);
 4664     D0     : S0;        // big decoder only
 4665     MEM    : S3;        // any mem
 4666 %}
 4667 
 4668 // Integer Store to Memory
 4669 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4670     single_instruction;
 4671     mem    : S3(read);
 4672     src    : S5(read);
 4673     D0     : S0;        // big decoder only
 4674     ALU    : S4;        // any alu
 4675     MEM    : S3;
 4676 %}
 4677 
 4678 // Long Store to Memory
 4679 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4680     instruction_count(2);
 4681     mem    : S3(read);
 4682     src    : S5(read);
 4683     D0     : S0(2);     // big decoder only; twice
 4684     ALU    : S4(2);     // any 2 alus
 4685     MEM    : S3(2);     // Both mems
 4686 %}
 4687 
 4688 // Integer Store to Memory
 4689 pipe_class ialu_mem_imm(memory mem) %{
 4690     single_instruction;
 4691     mem    : S3(read);
 4692     D0     : S0;        // big decoder only
 4693     ALU    : S4;        // any alu
 4694     MEM    : S3;
 4695 %}
 4696 
 4697 // Integer ALU0 reg-reg operation
 4698 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4699     single_instruction;
 4700     dst    : S4(write);
 4701     src    : S3(read);
 4702     D0     : S0;        // Big decoder only
 4703     ALU0   : S3;        // only alu0
 4704 %}
 4705 
 4706 // Integer ALU0 reg-mem operation
 4707 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4708     single_instruction;
 4709     dst    : S5(write);
 4710     mem    : S3(read);
 4711     D0     : S0;        // big decoder only
 4712     ALU0   : S4;        // ALU0 only
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 // Integer ALU reg-reg operation
 4717 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4718     single_instruction;
 4719     cr     : S4(write);
 4720     src1   : S3(read);
 4721     src2   : S3(read);
 4722     DECODE : S0;        // any decoder
 4723     ALU    : S3;        // any alu
 4724 %}
 4725 
 4726 // Integer ALU reg-imm operation
 4727 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4728     single_instruction;
 4729     cr     : S4(write);
 4730     src1   : S3(read);
 4731     DECODE : S0;        // any decoder
 4732     ALU    : S3;        // any alu
 4733 %}
 4734 
 4735 // Integer ALU reg-mem operation
 4736 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4737     single_instruction;
 4738     cr     : S4(write);
 4739     src1   : S3(read);
 4740     src2   : S3(read);
 4741     D0     : S0;        // big decoder only
 4742     ALU    : S4;        // any alu
 4743     MEM    : S3;
 4744 %}
 4745 
 4746 // Conditional move reg-reg
 4747 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4748     instruction_count(4);
 4749     y      : S4(read);
 4750     q      : S3(read);
 4751     p      : S3(read);
 4752     DECODE : S0(4);     // any decoder
 4753 %}
 4754 
 4755 // Conditional move reg-reg
 4756 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4757     single_instruction;
 4758     dst    : S4(write);
 4759     src    : S3(read);
 4760     cr     : S3(read);
 4761     DECODE : S0;        // any decoder
 4762 %}
 4763 
 4764 // Conditional move reg-mem
 4765 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4766     single_instruction;
 4767     dst    : S4(write);
 4768     src    : S3(read);
 4769     cr     : S3(read);
 4770     DECODE : S0;        // any decoder
 4771     MEM    : S3;
 4772 %}
 4773 
 4774 // Conditional move reg-reg long
 4775 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4776     single_instruction;
 4777     dst    : S4(write);
 4778     src    : S3(read);
 4779     cr     : S3(read);
 4780     DECODE : S0(2);     // any 2 decoders
 4781 %}
 4782 
 4783 // Conditional move double reg-reg
 4784 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4785     single_instruction;
 4786     dst    : S4(write);
 4787     src    : S3(read);
 4788     cr     : S3(read);
 4789     DECODE : S0;        // any decoder
 4790 %}
 4791 
 4792 // Float reg-reg operation
 4793 pipe_class fpu_reg(regDPR dst) %{
 4794     instruction_count(2);
 4795     dst    : S3(read);
 4796     DECODE : S0(2);     // any 2 decoders
 4797     FPU    : S3;
 4798 %}
 4799 
 4800 // Float reg-reg operation
 4801 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4802     instruction_count(2);
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     DECODE : S0(2);     // any 2 decoders
 4806     FPU    : S3;
 4807 %}
 4808 
 4809 // Float reg-reg operation
 4810 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4811     instruction_count(3);
 4812     dst    : S4(write);
 4813     src1   : S3(read);
 4814     src2   : S3(read);
 4815     DECODE : S0(3);     // any 3 decoders
 4816     FPU    : S3(2);
 4817 %}
 4818 
 4819 // Float reg-reg operation
 4820 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4821     instruction_count(4);
 4822     dst    : S4(write);
 4823     src1   : S3(read);
 4824     src2   : S3(read);
 4825     src3   : S3(read);
 4826     DECODE : S0(4);     // any 3 decoders
 4827     FPU    : S3(2);
 4828 %}
 4829 
 4830 // Float reg-reg operation
 4831 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4832     instruction_count(4);
 4833     dst    : S4(write);
 4834     src1   : S3(read);
 4835     src2   : S3(read);
 4836     src3   : S3(read);
 4837     DECODE : S1(3);     // any 3 decoders
 4838     D0     : S0;        // Big decoder only
 4839     FPU    : S3(2);
 4840     MEM    : S3;
 4841 %}
 4842 
 4843 // Float reg-mem operation
 4844 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4845     instruction_count(2);
 4846     dst    : S5(write);
 4847     mem    : S3(read);
 4848     D0     : S0;        // big decoder only
 4849     DECODE : S1;        // any decoder for FPU POP
 4850     FPU    : S4;
 4851     MEM    : S3;        // any mem
 4852 %}
 4853 
 4854 // Float reg-mem operation
 4855 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4856     instruction_count(3);
 4857     dst    : S5(write);
 4858     src1   : S3(read);
 4859     mem    : S3(read);
 4860     D0     : S0;        // big decoder only
 4861     DECODE : S1(2);     // any decoder for FPU POP
 4862     FPU    : S4;
 4863     MEM    : S3;        // any mem
 4864 %}
 4865 
 4866 // Float mem-reg operation
 4867 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4868     instruction_count(2);
 4869     src    : S5(read);
 4870     mem    : S3(read);
 4871     DECODE : S0;        // any decoder for FPU PUSH
 4872     D0     : S1;        // big decoder only
 4873     FPU    : S4;
 4874     MEM    : S3;        // any mem
 4875 %}
 4876 
 4877 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4878     instruction_count(3);
 4879     src1   : S3(read);
 4880     src2   : S3(read);
 4881     mem    : S3(read);
 4882     DECODE : S0(2);     // any decoder for FPU PUSH
 4883     D0     : S1;        // big decoder only
 4884     FPU    : S4;
 4885     MEM    : S3;        // any mem
 4886 %}
 4887 
 4888 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4889     instruction_count(3);
 4890     src1   : S3(read);
 4891     src2   : S3(read);
 4892     mem    : S4(read);
 4893     DECODE : S0;        // any decoder for FPU PUSH
 4894     D0     : S0(2);     // big decoder only
 4895     FPU    : S4;
 4896     MEM    : S3(2);     // any mem
 4897 %}
 4898 
 4899 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4900     instruction_count(2);
 4901     src1   : S3(read);
 4902     dst    : S4(read);
 4903     D0     : S0(2);     // big decoder only
 4904     MEM    : S3(2);     // any mem
 4905 %}
 4906 
 4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4908     instruction_count(3);
 4909     src1   : S3(read);
 4910     src2   : S3(read);
 4911     dst    : S4(read);
 4912     D0     : S0(3);     // big decoder only
 4913     FPU    : S4;
 4914     MEM    : S3(3);     // any mem
 4915 %}
 4916 
 4917 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4918     instruction_count(3);
 4919     src1   : S4(read);
 4920     mem    : S4(read);
 4921     DECODE : S0;        // any decoder for FPU PUSH
 4922     D0     : S0(2);     // big decoder only
 4923     FPU    : S4;
 4924     MEM    : S3(2);     // any mem
 4925 %}
 4926 
 4927 // Float load constant
 4928 pipe_class fpu_reg_con(regDPR dst) %{
 4929     instruction_count(2);
 4930     dst    : S5(write);
 4931     D0     : S0;        // big decoder only for the load
 4932     DECODE : S1;        // any decoder for FPU POP
 4933     FPU    : S4;
 4934     MEM    : S3;        // any mem
 4935 %}
 4936 
 4937 // Float load constant
 4938 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4939     instruction_count(3);
 4940     dst    : S5(write);
 4941     src    : S3(read);
 4942     D0     : S0;        // big decoder only for the load
 4943     DECODE : S1(2);     // any decoder for FPU POP
 4944     FPU    : S4;
 4945     MEM    : S3;        // any mem
 4946 %}
 4947 
 4948 // UnConditional branch
 4949 pipe_class pipe_jmp( label labl ) %{
 4950     single_instruction;
 4951     BR   : S3;
 4952 %}
 4953 
 4954 // Conditional branch
 4955 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4956     single_instruction;
 4957     cr    : S1(read);
 4958     BR    : S3;
 4959 %}
 4960 
 4961 // Allocation idiom
 4962 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4963     instruction_count(1); force_serialization;
 4964     fixed_latency(6);
 4965     heap_ptr : S3(read);
 4966     DECODE   : S0(3);
 4967     D0       : S2;
 4968     MEM      : S3;
 4969     ALU      : S3(2);
 4970     dst      : S5(write);
 4971     BR       : S5;
 4972 %}
 4973 
 4974 // Generic big/slow expanded idiom
 4975 pipe_class pipe_slow(  ) %{
 4976     instruction_count(10); multiple_bundles; force_serialization;
 4977     fixed_latency(100);
 4978     D0  : S0(2);
 4979     MEM : S3(2);
 4980 %}
 4981 
 4982 // The real do-nothing guy
 4983 pipe_class empty( ) %{
 4984     instruction_count(0);
 4985 %}
 4986 
 4987 // Define the class for the Nop node
 4988 define %{
 4989    MachNop = empty;
 4990 %}
 4991 
 4992 %}
 4993 
 4994 //----------INSTRUCTIONS-------------------------------------------------------
 4995 //
 4996 // match      -- States which machine-independent subtree may be replaced
 4997 //               by this instruction.
 4998 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4999 //               selection to identify a minimum cost tree of machine
 5000 //               instructions that matches a tree of machine-independent
 5001 //               instructions.
 5002 // format     -- A string providing the disassembly for this instruction.
 5003 //               The value of an instruction's operand may be inserted
 5004 //               by referring to it with a '$' prefix.
 5005 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5006 //               to within an encode class as $primary, $secondary, and $tertiary
 5007 //               respectively.  The primary opcode is commonly used to
 5008 //               indicate the type of machine instruction, while secondary
 5009 //               and tertiary are often used for prefix options or addressing
 5010 //               modes.
 5011 // ins_encode -- A list of encode classes with parameters. The encode class
 5012 //               name must have been defined in an 'enc_class' specification
 5013 //               in the encode section of the architecture description.
 5014 
 5015 //----------BSWAP-Instruction--------------------------------------------------
 5016 instruct bytes_reverse_int(rRegI dst) %{
 5017   match(Set dst (ReverseBytesI dst));
 5018 
 5019   format %{ "BSWAP  $dst" %}
 5020   opcode(0x0F, 0xC8);
 5021   ins_encode( OpcP, OpcSReg(dst) );
 5022   ins_pipe( ialu_reg );
 5023 %}
 5024 
 5025 instruct bytes_reverse_long(eRegL dst) %{
 5026   match(Set dst (ReverseBytesL dst));
 5027 
 5028   format %{ "BSWAP  $dst.lo\n\t"
 5029             "BSWAP  $dst.hi\n\t"
 5030             "XCHG   $dst.lo $dst.hi" %}
 5031 
 5032   ins_cost(125);
 5033   ins_encode( bswap_long_bytes(dst) );
 5034   ins_pipe( ialu_reg_reg);
 5035 %}
 5036 
 5037 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5038   match(Set dst (ReverseBytesUS dst));
 5039   effect(KILL cr);
 5040 
 5041   format %{ "BSWAP  $dst\n\t"
 5042             "SHR    $dst,16\n\t" %}
 5043   ins_encode %{
 5044     __ bswapl($dst$$Register);
 5045     __ shrl($dst$$Register, 16);
 5046   %}
 5047   ins_pipe( ialu_reg );
 5048 %}
 5049 
 5050 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5051   match(Set dst (ReverseBytesS dst));
 5052   effect(KILL cr);
 5053 
 5054   format %{ "BSWAP  $dst\n\t"
 5055             "SAR    $dst,16\n\t" %}
 5056   ins_encode %{
 5057     __ bswapl($dst$$Register);
 5058     __ sarl($dst$$Register, 16);
 5059   %}
 5060   ins_pipe( ialu_reg );
 5061 %}
 5062 
 5063 
 5064 //---------- Zeros Count Instructions ------------------------------------------
 5065 
 5066 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5067   predicate(UseCountLeadingZerosInstruction);
 5068   match(Set dst (CountLeadingZerosI src));
 5069   effect(KILL cr);
 5070 
 5071   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5072   ins_encode %{
 5073     __ lzcntl($dst$$Register, $src$$Register);
 5074   %}
 5075   ins_pipe(ialu_reg);
 5076 %}
 5077 
 5078 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5079   predicate(!UseCountLeadingZerosInstruction);
 5080   match(Set dst (CountLeadingZerosI src));
 5081   effect(KILL cr);
 5082 
 5083   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5084             "JNZ    skip\n\t"
 5085             "MOV    $dst, -1\n"
 5086       "skip:\n\t"
 5087             "NEG    $dst\n\t"
 5088             "ADD    $dst, 31" %}
 5089   ins_encode %{
 5090     Register Rdst = $dst$$Register;
 5091     Register Rsrc = $src$$Register;
 5092     Label skip;
 5093     __ bsrl(Rdst, Rsrc);
 5094     __ jccb(Assembler::notZero, skip);
 5095     __ movl(Rdst, -1);
 5096     __ bind(skip);
 5097     __ negl(Rdst);
 5098     __ addl(Rdst, BitsPerInt - 1);
 5099   %}
 5100   ins_pipe(ialu_reg);
 5101 %}
 5102 
 5103 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5104   predicate(UseCountLeadingZerosInstruction);
 5105   match(Set dst (CountLeadingZerosL src));
 5106   effect(TEMP dst, KILL cr);
 5107 
 5108   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5109             "JNC    done\n\t"
 5110             "LZCNT  $dst, $src.lo\n\t"
 5111             "ADD    $dst, 32\n"
 5112       "done:" %}
 5113   ins_encode %{
 5114     Register Rdst = $dst$$Register;
 5115     Register Rsrc = $src$$Register;
 5116     Label done;
 5117     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5118     __ jccb(Assembler::carryClear, done);
 5119     __ lzcntl(Rdst, Rsrc);
 5120     __ addl(Rdst, BitsPerInt);
 5121     __ bind(done);
 5122   %}
 5123   ins_pipe(ialu_reg);
 5124 %}
 5125 
 5126 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5127   predicate(!UseCountLeadingZerosInstruction);
 5128   match(Set dst (CountLeadingZerosL src));
 5129   effect(TEMP dst, KILL cr);
 5130 
 5131   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5132             "JZ     msw_is_zero\n\t"
 5133             "ADD    $dst, 32\n\t"
 5134             "JMP    not_zero\n"
 5135       "msw_is_zero:\n\t"
 5136             "BSR    $dst, $src.lo\n\t"
 5137             "JNZ    not_zero\n\t"
 5138             "MOV    $dst, -1\n"
 5139       "not_zero:\n\t"
 5140             "NEG    $dst\n\t"
 5141             "ADD    $dst, 63\n" %}
 5142  ins_encode %{
 5143     Register Rdst = $dst$$Register;
 5144     Register Rsrc = $src$$Register;
 5145     Label msw_is_zero;
 5146     Label not_zero;
 5147     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5148     __ jccb(Assembler::zero, msw_is_zero);
 5149     __ addl(Rdst, BitsPerInt);
 5150     __ jmpb(not_zero);
 5151     __ bind(msw_is_zero);
 5152     __ bsrl(Rdst, Rsrc);
 5153     __ jccb(Assembler::notZero, not_zero);
 5154     __ movl(Rdst, -1);
 5155     __ bind(not_zero);
 5156     __ negl(Rdst);
 5157     __ addl(Rdst, BitsPerLong - 1);
 5158   %}
 5159   ins_pipe(ialu_reg);
 5160 %}
 5161 
 5162 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5163   predicate(UseCountTrailingZerosInstruction);
 5164   match(Set dst (CountTrailingZerosI src));
 5165   effect(KILL cr);
 5166 
 5167   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5168   ins_encode %{
 5169     __ tzcntl($dst$$Register, $src$$Register);
 5170   %}
 5171   ins_pipe(ialu_reg);
 5172 %}
 5173 
 5174 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5175   predicate(!UseCountTrailingZerosInstruction);
 5176   match(Set dst (CountTrailingZerosI src));
 5177   effect(KILL cr);
 5178 
 5179   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5180             "JNZ    done\n\t"
 5181             "MOV    $dst, 32\n"
 5182       "done:" %}
 5183   ins_encode %{
 5184     Register Rdst = $dst$$Register;
 5185     Label done;
 5186     __ bsfl(Rdst, $src$$Register);
 5187     __ jccb(Assembler::notZero, done);
 5188     __ movl(Rdst, BitsPerInt);
 5189     __ bind(done);
 5190   %}
 5191   ins_pipe(ialu_reg);
 5192 %}
 5193 
 5194 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5195   predicate(UseCountTrailingZerosInstruction);
 5196   match(Set dst (CountTrailingZerosL src));
 5197   effect(TEMP dst, KILL cr);
 5198 
 5199   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5200             "JNC    done\n\t"
 5201             "TZCNT  $dst, $src.hi\n\t"
 5202             "ADD    $dst, 32\n"
 5203             "done:" %}
 5204   ins_encode %{
 5205     Register Rdst = $dst$$Register;
 5206     Register Rsrc = $src$$Register;
 5207     Label done;
 5208     __ tzcntl(Rdst, Rsrc);
 5209     __ jccb(Assembler::carryClear, done);
 5210     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5211     __ addl(Rdst, BitsPerInt);
 5212     __ bind(done);
 5213   %}
 5214   ins_pipe(ialu_reg);
 5215 %}
 5216 
 5217 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5218   predicate(!UseCountTrailingZerosInstruction);
 5219   match(Set dst (CountTrailingZerosL src));
 5220   effect(TEMP dst, KILL cr);
 5221 
 5222   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5223             "JNZ    done\n\t"
 5224             "BSF    $dst, $src.hi\n\t"
 5225             "JNZ    msw_not_zero\n\t"
 5226             "MOV    $dst, 32\n"
 5227       "msw_not_zero:\n\t"
 5228             "ADD    $dst, 32\n"
 5229       "done:" %}
 5230   ins_encode %{
 5231     Register Rdst = $dst$$Register;
 5232     Register Rsrc = $src$$Register;
 5233     Label msw_not_zero;
 5234     Label done;
 5235     __ bsfl(Rdst, Rsrc);
 5236     __ jccb(Assembler::notZero, done);
 5237     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5238     __ jccb(Assembler::notZero, msw_not_zero);
 5239     __ movl(Rdst, BitsPerInt);
 5240     __ bind(msw_not_zero);
 5241     __ addl(Rdst, BitsPerInt);
 5242     __ bind(done);
 5243   %}
 5244   ins_pipe(ialu_reg);
 5245 %}
 5246 
 5247 
 5248 //---------- Population Count Instructions -------------------------------------
 5249 
 5250 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5251   predicate(UsePopCountInstruction);
 5252   match(Set dst (PopCountI src));
 5253   effect(KILL cr);
 5254 
 5255   format %{ "POPCNT $dst, $src" %}
 5256   ins_encode %{
 5257     __ popcntl($dst$$Register, $src$$Register);
 5258   %}
 5259   ins_pipe(ialu_reg);
 5260 %}
 5261 
 5262 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5263   predicate(UsePopCountInstruction);
 5264   match(Set dst (PopCountI (LoadI mem)));
 5265   effect(KILL cr);
 5266 
 5267   format %{ "POPCNT $dst, $mem" %}
 5268   ins_encode %{
 5269     __ popcntl($dst$$Register, $mem$$Address);
 5270   %}
 5271   ins_pipe(ialu_reg);
 5272 %}
 5273 
 5274 // Note: Long.bitCount(long) returns an int.
 5275 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5276   predicate(UsePopCountInstruction);
 5277   match(Set dst (PopCountL src));
 5278   effect(KILL cr, TEMP tmp, TEMP dst);
 5279 
 5280   format %{ "POPCNT $dst, $src.lo\n\t"
 5281             "POPCNT $tmp, $src.hi\n\t"
 5282             "ADD    $dst, $tmp" %}
 5283   ins_encode %{
 5284     __ popcntl($dst$$Register, $src$$Register);
 5285     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5286     __ addl($dst$$Register, $tmp$$Register);
 5287   %}
 5288   ins_pipe(ialu_reg);
 5289 %}
 5290 
 5291 // Note: Long.bitCount(long) returns an int.
 5292 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5293   predicate(UsePopCountInstruction);
 5294   match(Set dst (PopCountL (LoadL mem)));
 5295   effect(KILL cr, TEMP tmp, TEMP dst);
 5296 
 5297   format %{ "POPCNT $dst, $mem\n\t"
 5298             "POPCNT $tmp, $mem+4\n\t"
 5299             "ADD    $dst, $tmp" %}
 5300   ins_encode %{
 5301     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5302     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5303     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5304     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5305     __ addl($dst$$Register, $tmp$$Register);
 5306   %}
 5307   ins_pipe(ialu_reg);
 5308 %}
 5309 
 5310 
 5311 //----------Load/Store/Move Instructions---------------------------------------
 5312 //----------Load Instructions--------------------------------------------------
 5313 // Load Byte (8bit signed)
 5314 instruct loadB(xRegI dst, memory mem) %{
 5315   match(Set dst (LoadB mem));
 5316 
 5317   ins_cost(125);
 5318   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5319 
 5320   ins_encode %{
 5321     __ movsbl($dst$$Register, $mem$$Address);
 5322   %}
 5323 
 5324   ins_pipe(ialu_reg_mem);
 5325 %}
 5326 
 5327 // Load Byte (8bit signed) into Long Register
 5328 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5329   match(Set dst (ConvI2L (LoadB mem)));
 5330   effect(KILL cr);
 5331 
 5332   ins_cost(375);
 5333   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5334             "MOV    $dst.hi,$dst.lo\n\t"
 5335             "SAR    $dst.hi,7" %}
 5336 
 5337   ins_encode %{
 5338     __ movsbl($dst$$Register, $mem$$Address);
 5339     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5340     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5341   %}
 5342 
 5343   ins_pipe(ialu_reg_mem);
 5344 %}
 5345 
 5346 // Load Unsigned Byte (8bit UNsigned)
 5347 instruct loadUB(xRegI dst, memory mem) %{
 5348   match(Set dst (LoadUB mem));
 5349 
 5350   ins_cost(125);
 5351   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5352 
 5353   ins_encode %{
 5354     __ movzbl($dst$$Register, $mem$$Address);
 5355   %}
 5356 
 5357   ins_pipe(ialu_reg_mem);
 5358 %}
 5359 
 5360 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5361 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5362   match(Set dst (ConvI2L (LoadUB mem)));
 5363   effect(KILL cr);
 5364 
 5365   ins_cost(250);
 5366   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5367             "XOR    $dst.hi,$dst.hi" %}
 5368 
 5369   ins_encode %{
 5370     Register Rdst = $dst$$Register;
 5371     __ movzbl(Rdst, $mem$$Address);
 5372     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5373   %}
 5374 
 5375   ins_pipe(ialu_reg_mem);
 5376 %}
 5377 
 5378 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5379 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5380   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5381   effect(KILL cr);
 5382 
 5383   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5384             "XOR    $dst.hi,$dst.hi\n\t"
 5385             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5386   ins_encode %{
 5387     Register Rdst = $dst$$Register;
 5388     __ movzbl(Rdst, $mem$$Address);
 5389     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5390     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5391   %}
 5392   ins_pipe(ialu_reg_mem);
 5393 %}
 5394 
 5395 // Load Short (16bit signed)
 5396 instruct loadS(rRegI dst, memory mem) %{
 5397   match(Set dst (LoadS mem));
 5398 
 5399   ins_cost(125);
 5400   format %{ "MOVSX  $dst,$mem\t# short" %}
 5401 
 5402   ins_encode %{
 5403     __ movswl($dst$$Register, $mem$$Address);
 5404   %}
 5405 
 5406   ins_pipe(ialu_reg_mem);
 5407 %}
 5408 
 5409 // Load Short (16 bit signed) to Byte (8 bit signed)
 5410 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5411   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5412 
 5413   ins_cost(125);
 5414   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5415   ins_encode %{
 5416     __ movsbl($dst$$Register, $mem$$Address);
 5417   %}
 5418   ins_pipe(ialu_reg_mem);
 5419 %}
 5420 
 5421 // Load Short (16bit signed) into Long Register
 5422 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5423   match(Set dst (ConvI2L (LoadS mem)));
 5424   effect(KILL cr);
 5425 
 5426   ins_cost(375);
 5427   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5428             "MOV    $dst.hi,$dst.lo\n\t"
 5429             "SAR    $dst.hi,15" %}
 5430 
 5431   ins_encode %{
 5432     __ movswl($dst$$Register, $mem$$Address);
 5433     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5434     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5435   %}
 5436 
 5437   ins_pipe(ialu_reg_mem);
 5438 %}
 5439 
 5440 // Load Unsigned Short/Char (16bit unsigned)
 5441 instruct loadUS(rRegI dst, memory mem) %{
 5442   match(Set dst (LoadUS mem));
 5443 
 5444   ins_cost(125);
 5445   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5446 
 5447   ins_encode %{
 5448     __ movzwl($dst$$Register, $mem$$Address);
 5449   %}
 5450 
 5451   ins_pipe(ialu_reg_mem);
 5452 %}
 5453 
 5454 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5455 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5456   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5457 
 5458   ins_cost(125);
 5459   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5460   ins_encode %{
 5461     __ movsbl($dst$$Register, $mem$$Address);
 5462   %}
 5463   ins_pipe(ialu_reg_mem);
 5464 %}
 5465 
 5466 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5467 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5468   match(Set dst (ConvI2L (LoadUS mem)));
 5469   effect(KILL cr);
 5470 
 5471   ins_cost(250);
 5472   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5473             "XOR    $dst.hi,$dst.hi" %}
 5474 
 5475   ins_encode %{
 5476     __ movzwl($dst$$Register, $mem$$Address);
 5477     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5478   %}
 5479 
 5480   ins_pipe(ialu_reg_mem);
 5481 %}
 5482 
 5483 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5484 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5485   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5486   effect(KILL cr);
 5487 
 5488   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5489             "XOR    $dst.hi,$dst.hi" %}
 5490   ins_encode %{
 5491     Register Rdst = $dst$$Register;
 5492     __ movzbl(Rdst, $mem$$Address);
 5493     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5494   %}
 5495   ins_pipe(ialu_reg_mem);
 5496 %}
 5497 
 5498 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5499 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5500   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5501   effect(KILL cr);
 5502 
 5503   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5504             "XOR    $dst.hi,$dst.hi\n\t"
 5505             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5506   ins_encode %{
 5507     Register Rdst = $dst$$Register;
 5508     __ movzwl(Rdst, $mem$$Address);
 5509     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5510     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5511   %}
 5512   ins_pipe(ialu_reg_mem);
 5513 %}
 5514 
 5515 // Load Integer
 5516 instruct loadI(rRegI dst, memory mem) %{
 5517   match(Set dst (LoadI mem));
 5518 
 5519   ins_cost(125);
 5520   format %{ "MOV    $dst,$mem\t# int" %}
 5521 
 5522   ins_encode %{
 5523     __ movl($dst$$Register, $mem$$Address);
 5524   %}
 5525 
 5526   ins_pipe(ialu_reg_mem);
 5527 %}
 5528 
 5529 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5530 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5531   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5532 
 5533   ins_cost(125);
 5534   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5535   ins_encode %{
 5536     __ movsbl($dst$$Register, $mem$$Address);
 5537   %}
 5538   ins_pipe(ialu_reg_mem);
 5539 %}
 5540 
 5541 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5542 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5543   match(Set dst (AndI (LoadI mem) mask));
 5544 
 5545   ins_cost(125);
 5546   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5547   ins_encode %{
 5548     __ movzbl($dst$$Register, $mem$$Address);
 5549   %}
 5550   ins_pipe(ialu_reg_mem);
 5551 %}
 5552 
 5553 // Load Integer (32 bit signed) to Short (16 bit signed)
 5554 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5555   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5556 
 5557   ins_cost(125);
 5558   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5559   ins_encode %{
 5560     __ movswl($dst$$Register, $mem$$Address);
 5561   %}
 5562   ins_pipe(ialu_reg_mem);
 5563 %}
 5564 
 5565 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5566 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5567   match(Set dst (AndI (LoadI mem) mask));
 5568 
 5569   ins_cost(125);
 5570   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5571   ins_encode %{
 5572     __ movzwl($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Integer into Long Register
 5578 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5579   match(Set dst (ConvI2L (LoadI mem)));
 5580   effect(KILL cr);
 5581 
 5582   ins_cost(375);
 5583   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5584             "MOV    $dst.hi,$dst.lo\n\t"
 5585             "SAR    $dst.hi,31" %}
 5586 
 5587   ins_encode %{
 5588     __ movl($dst$$Register, $mem$$Address);
 5589     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5590     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5591   %}
 5592 
 5593   ins_pipe(ialu_reg_mem);
 5594 %}
 5595 
 5596 // Load Integer with mask 0xFF into Long Register
 5597 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5598   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5599   effect(KILL cr);
 5600 
 5601   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5602             "XOR    $dst.hi,$dst.hi" %}
 5603   ins_encode %{
 5604     Register Rdst = $dst$$Register;
 5605     __ movzbl(Rdst, $mem$$Address);
 5606     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5607   %}
 5608   ins_pipe(ialu_reg_mem);
 5609 %}
 5610 
 5611 // Load Integer with mask 0xFFFF into Long Register
 5612 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5613   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5614   effect(KILL cr);
 5615 
 5616   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5617             "XOR    $dst.hi,$dst.hi" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movzwl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Integer with 31-bit mask into Long Register
 5627 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5628   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5629   effect(KILL cr);
 5630 
 5631   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5632             "XOR    $dst.hi,$dst.hi\n\t"
 5633             "AND    $dst.lo,$mask" %}
 5634   ins_encode %{
 5635     Register Rdst = $dst$$Register;
 5636     __ movl(Rdst, $mem$$Address);
 5637     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5638     __ andl(Rdst, $mask$$constant);
 5639   %}
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Unsigned Integer into Long Register
 5644 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5645   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5646   effect(KILL cr);
 5647 
 5648   ins_cost(250);
 5649   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5650             "XOR    $dst.hi,$dst.hi" %}
 5651 
 5652   ins_encode %{
 5653     __ movl($dst$$Register, $mem$$Address);
 5654     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5655   %}
 5656 
 5657   ins_pipe(ialu_reg_mem);
 5658 %}
 5659 
 5660 // Load Long.  Cannot clobber address while loading, so restrict address
 5661 // register to ESI
 5662 instruct loadL(eRegL dst, load_long_memory mem) %{
 5663   predicate(!((LoadLNode*)n)->require_atomic_access());
 5664   match(Set dst (LoadL mem));
 5665 
 5666   ins_cost(250);
 5667   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5668             "MOV    $dst.hi,$mem+4" %}
 5669 
 5670   ins_encode %{
 5671     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5672     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5673     __ movl($dst$$Register, Amemlo);
 5674     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5675   %}
 5676 
 5677   ins_pipe(ialu_reg_long_mem);
 5678 %}
 5679 
 5680 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5681 // then store it down to the stack and reload on the int
 5682 // side.
 5683 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5684   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5685   match(Set dst (LoadL mem));
 5686 
 5687   ins_cost(200);
 5688   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5689             "FISTp  $dst" %}
 5690   ins_encode(enc_loadL_volatile(mem,dst));
 5691   ins_pipe( fpu_reg_mem );
 5692 %}
 5693 
 5694 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5695   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5696   match(Set dst (LoadL mem));
 5697   effect(TEMP tmp);
 5698   ins_cost(180);
 5699   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5700             "MOVSD  $dst,$tmp" %}
 5701   ins_encode %{
 5702     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5703     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5704   %}
 5705   ins_pipe( pipe_slow );
 5706 %}
 5707 
 5708 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5709   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5710   match(Set dst (LoadL mem));
 5711   effect(TEMP tmp);
 5712   ins_cost(160);
 5713   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5714             "MOVD   $dst.lo,$tmp\n\t"
 5715             "PSRLQ  $tmp,32\n\t"
 5716             "MOVD   $dst.hi,$tmp" %}
 5717   ins_encode %{
 5718     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5719     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5720     __ psrlq($tmp$$XMMRegister, 32);
 5721     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5722   %}
 5723   ins_pipe( pipe_slow );
 5724 %}
 5725 
 5726 // Load Range
 5727 instruct loadRange(rRegI dst, memory mem) %{
 5728   match(Set dst (LoadRange mem));
 5729 
 5730   ins_cost(125);
 5731   format %{ "MOV    $dst,$mem" %}
 5732   opcode(0x8B);
 5733   ins_encode( OpcP, RegMem(dst,mem));
 5734   ins_pipe( ialu_reg_mem );
 5735 %}
 5736 
 5737 
 5738 // Load Pointer
 5739 instruct loadP(eRegP dst, memory mem) %{
 5740   match(Set dst (LoadP mem));
 5741 
 5742   ins_cost(125);
 5743   format %{ "MOV    $dst,$mem" %}
 5744   opcode(0x8B);
 5745   ins_encode( OpcP, RegMem(dst,mem));
 5746   ins_pipe( ialu_reg_mem );
 5747 %}
 5748 
 5749 // Load Klass Pointer
 5750 instruct loadKlass(eRegP dst, memory mem) %{
 5751   match(Set dst (LoadKlass mem));
 5752 
 5753   ins_cost(125);
 5754   format %{ "MOV    $dst,$mem" %}
 5755   opcode(0x8B);
 5756   ins_encode( OpcP, RegMem(dst,mem));
 5757   ins_pipe( ialu_reg_mem );
 5758 %}
 5759 
 5760 // Load Float
 5761 instruct MoveF2LEG(legRegF dst, regF src) %{
 5762   match(Set dst src);
 5763   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5764   ins_encode %{
 5765     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5766   %}
 5767   ins_pipe( fpu_reg_reg );
 5768 %}
 5769 
 5770 // Load Float
 5771 instruct MoveLEG2F(regF dst, legRegF src) %{
 5772   match(Set dst src);
 5773   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5774   ins_encode %{
 5775     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5776   %}
 5777   ins_pipe( fpu_reg_reg );
 5778 %}
 5779 
 5780 // Load Double
 5781 instruct MoveD2LEG(legRegD dst, regD src) %{
 5782   match(Set dst src);
 5783   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5784   ins_encode %{
 5785     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5786   %}
 5787   ins_pipe( fpu_reg_reg );
 5788 %}
 5789 
 5790 // Load Double
 5791 instruct MoveLEG2D(regD dst, legRegD src) %{
 5792   match(Set dst src);
 5793   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5794   ins_encode %{
 5795     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5796   %}
 5797   ins_pipe( fpu_reg_reg );
 5798 %}
 5799 
 5800 // Load Double
 5801 instruct loadDPR(regDPR dst, memory mem) %{
 5802   predicate(UseSSE<=1);
 5803   match(Set dst (LoadD mem));
 5804 
 5805   ins_cost(150);
 5806   format %{ "FLD_D  ST,$mem\n\t"
 5807             "FSTP   $dst" %}
 5808   opcode(0xDD);               /* DD /0 */
 5809   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5810               Pop_Reg_DPR(dst) );
 5811   ins_pipe( fpu_reg_mem );
 5812 %}
 5813 
 5814 // Load Double to XMM
 5815 instruct loadD(regD dst, memory mem) %{
 5816   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5817   match(Set dst (LoadD mem));
 5818   ins_cost(145);
 5819   format %{ "MOVSD  $dst,$mem" %}
 5820   ins_encode %{
 5821     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5822   %}
 5823   ins_pipe( pipe_slow );
 5824 %}
 5825 
 5826 instruct loadD_partial(regD dst, memory mem) %{
 5827   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5828   match(Set dst (LoadD mem));
 5829   ins_cost(145);
 5830   format %{ "MOVLPD $dst,$mem" %}
 5831   ins_encode %{
 5832     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5833   %}
 5834   ins_pipe( pipe_slow );
 5835 %}
 5836 
 5837 // Load to XMM register (single-precision floating point)
 5838 // MOVSS instruction
 5839 instruct loadF(regF dst, memory mem) %{
 5840   predicate(UseSSE>=1);
 5841   match(Set dst (LoadF mem));
 5842   ins_cost(145);
 5843   format %{ "MOVSS  $dst,$mem" %}
 5844   ins_encode %{
 5845     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5846   %}
 5847   ins_pipe( pipe_slow );
 5848 %}
 5849 
 5850 // Load Float
 5851 instruct loadFPR(regFPR dst, memory mem) %{
 5852   predicate(UseSSE==0);
 5853   match(Set dst (LoadF mem));
 5854 
 5855   ins_cost(150);
 5856   format %{ "FLD_S  ST,$mem\n\t"
 5857             "FSTP   $dst" %}
 5858   opcode(0xD9);               /* D9 /0 */
 5859   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5860               Pop_Reg_FPR(dst) );
 5861   ins_pipe( fpu_reg_mem );
 5862 %}
 5863 
 5864 // Load Effective Address
 5865 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5866   match(Set dst mem);
 5867 
 5868   ins_cost(110);
 5869   format %{ "LEA    $dst,$mem" %}
 5870   opcode(0x8D);
 5871   ins_encode( OpcP, RegMem(dst,mem));
 5872   ins_pipe( ialu_reg_reg_fat );
 5873 %}
 5874 
 5875 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5876   match(Set dst mem);
 5877 
 5878   ins_cost(110);
 5879   format %{ "LEA    $dst,$mem" %}
 5880   opcode(0x8D);
 5881   ins_encode( OpcP, RegMem(dst,mem));
 5882   ins_pipe( ialu_reg_reg_fat );
 5883 %}
 5884 
 5885 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5886   match(Set dst mem);
 5887 
 5888   ins_cost(110);
 5889   format %{ "LEA    $dst,$mem" %}
 5890   opcode(0x8D);
 5891   ins_encode( OpcP, RegMem(dst,mem));
 5892   ins_pipe( ialu_reg_reg_fat );
 5893 %}
 5894 
 5895 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5896   match(Set dst mem);
 5897 
 5898   ins_cost(110);
 5899   format %{ "LEA    $dst,$mem" %}
 5900   opcode(0x8D);
 5901   ins_encode( OpcP, RegMem(dst,mem));
 5902   ins_pipe( ialu_reg_reg_fat );
 5903 %}
 5904 
 5905 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5906   match(Set dst mem);
 5907 
 5908   ins_cost(110);
 5909   format %{ "LEA    $dst,$mem" %}
 5910   opcode(0x8D);
 5911   ins_encode( OpcP, RegMem(dst,mem));
 5912   ins_pipe( ialu_reg_reg_fat );
 5913 %}
 5914 
 5915 // Load Constant
 5916 instruct loadConI(rRegI dst, immI src) %{
 5917   match(Set dst src);
 5918 
 5919   format %{ "MOV    $dst,$src" %}
 5920   ins_encode( LdImmI(dst, src) );
 5921   ins_pipe( ialu_reg_fat );
 5922 %}
 5923 
 5924 // Load Constant zero
 5925 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5926   match(Set dst src);
 5927   effect(KILL cr);
 5928 
 5929   ins_cost(50);
 5930   format %{ "XOR    $dst,$dst" %}
 5931   opcode(0x33);  /* + rd */
 5932   ins_encode( OpcP, RegReg( dst, dst ) );
 5933   ins_pipe( ialu_reg );
 5934 %}
 5935 
 5936 instruct loadConP(eRegP dst, immP src) %{
 5937   match(Set dst src);
 5938 
 5939   format %{ "MOV    $dst,$src" %}
 5940   opcode(0xB8);  /* + rd */
 5941   ins_encode( LdImmP(dst, src) );
 5942   ins_pipe( ialu_reg_fat );
 5943 %}
 5944 
 5945 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5946   match(Set dst src);
 5947   effect(KILL cr);
 5948   ins_cost(200);
 5949   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5950             "MOV    $dst.hi,$src.hi" %}
 5951   opcode(0xB8);
 5952   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5953   ins_pipe( ialu_reg_long_fat );
 5954 %}
 5955 
 5956 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5957   match(Set dst src);
 5958   effect(KILL cr);
 5959   ins_cost(150);
 5960   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5961             "XOR    $dst.hi,$dst.hi" %}
 5962   opcode(0x33,0x33);
 5963   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5964   ins_pipe( ialu_reg_long );
 5965 %}
 5966 
 5967 // The instruction usage is guarded by predicate in operand immFPR().
 5968 instruct loadConFPR(regFPR dst, immFPR con) %{
 5969   match(Set dst con);
 5970   ins_cost(125);
 5971   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5972             "FSTP   $dst" %}
 5973   ins_encode %{
 5974     __ fld_s($constantaddress($con));
 5975     __ fstp_d($dst$$reg);
 5976   %}
 5977   ins_pipe(fpu_reg_con);
 5978 %}
 5979 
 5980 // The instruction usage is guarded by predicate in operand immFPR0().
 5981 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5982   match(Set dst con);
 5983   ins_cost(125);
 5984   format %{ "FLDZ   ST\n\t"
 5985             "FSTP   $dst" %}
 5986   ins_encode %{
 5987     __ fldz();
 5988     __ fstp_d($dst$$reg);
 5989   %}
 5990   ins_pipe(fpu_reg_con);
 5991 %}
 5992 
 5993 // The instruction usage is guarded by predicate in operand immFPR1().
 5994 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5995   match(Set dst con);
 5996   ins_cost(125);
 5997   format %{ "FLD1   ST\n\t"
 5998             "FSTP   $dst" %}
 5999   ins_encode %{
 6000     __ fld1();
 6001     __ fstp_d($dst$$reg);
 6002   %}
 6003   ins_pipe(fpu_reg_con);
 6004 %}
 6005 
 6006 // The instruction usage is guarded by predicate in operand immF().
 6007 instruct loadConF(regF dst, immF con) %{
 6008   match(Set dst con);
 6009   ins_cost(125);
 6010   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6011   ins_encode %{
 6012     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6013   %}
 6014   ins_pipe(pipe_slow);
 6015 %}
 6016 
 6017 // The instruction usage is guarded by predicate in operand immF0().
 6018 instruct loadConF0(regF dst, immF0 src) %{
 6019   match(Set dst src);
 6020   ins_cost(100);
 6021   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6022   ins_encode %{
 6023     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6024   %}
 6025   ins_pipe(pipe_slow);
 6026 %}
 6027 
 6028 // The instruction usage is guarded by predicate in operand immDPR().
 6029 instruct loadConDPR(regDPR dst, immDPR con) %{
 6030   match(Set dst con);
 6031   ins_cost(125);
 6032 
 6033   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6034             "FSTP   $dst" %}
 6035   ins_encode %{
 6036     __ fld_d($constantaddress($con));
 6037     __ fstp_d($dst$$reg);
 6038   %}
 6039   ins_pipe(fpu_reg_con);
 6040 %}
 6041 
 6042 // The instruction usage is guarded by predicate in operand immDPR0().
 6043 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6044   match(Set dst con);
 6045   ins_cost(125);
 6046 
 6047   format %{ "FLDZ   ST\n\t"
 6048             "FSTP   $dst" %}
 6049   ins_encode %{
 6050     __ fldz();
 6051     __ fstp_d($dst$$reg);
 6052   %}
 6053   ins_pipe(fpu_reg_con);
 6054 %}
 6055 
 6056 // The instruction usage is guarded by predicate in operand immDPR1().
 6057 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6058   match(Set dst con);
 6059   ins_cost(125);
 6060 
 6061   format %{ "FLD1   ST\n\t"
 6062             "FSTP   $dst" %}
 6063   ins_encode %{
 6064     __ fld1();
 6065     __ fstp_d($dst$$reg);
 6066   %}
 6067   ins_pipe(fpu_reg_con);
 6068 %}
 6069 
 6070 // The instruction usage is guarded by predicate in operand immD().
 6071 instruct loadConD(regD dst, immD con) %{
 6072   match(Set dst con);
 6073   ins_cost(125);
 6074   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6075   ins_encode %{
 6076     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6077   %}
 6078   ins_pipe(pipe_slow);
 6079 %}
 6080 
 6081 // The instruction usage is guarded by predicate in operand immD0().
 6082 instruct loadConD0(regD dst, immD0 src) %{
 6083   match(Set dst src);
 6084   ins_cost(100);
 6085   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6086   ins_encode %{
 6087     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6088   %}
 6089   ins_pipe( pipe_slow );
 6090 %}
 6091 
 6092 // Load Stack Slot
 6093 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6094   match(Set dst src);
 6095   ins_cost(125);
 6096 
 6097   format %{ "MOV    $dst,$src" %}
 6098   opcode(0x8B);
 6099   ins_encode( OpcP, RegMem(dst,src));
 6100   ins_pipe( ialu_reg_mem );
 6101 %}
 6102 
 6103 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6104   match(Set dst src);
 6105 
 6106   ins_cost(200);
 6107   format %{ "MOV    $dst,$src.lo\n\t"
 6108             "MOV    $dst+4,$src.hi" %}
 6109   opcode(0x8B, 0x8B);
 6110   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6111   ins_pipe( ialu_mem_long_reg );
 6112 %}
 6113 
 6114 // Load Stack Slot
 6115 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6116   match(Set dst src);
 6117   ins_cost(125);
 6118 
 6119   format %{ "MOV    $dst,$src" %}
 6120   opcode(0x8B);
 6121   ins_encode( OpcP, RegMem(dst,src));
 6122   ins_pipe( ialu_reg_mem );
 6123 %}
 6124 
 6125 // Load Stack Slot
 6126 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6127   match(Set dst src);
 6128   ins_cost(125);
 6129 
 6130   format %{ "FLD_S  $src\n\t"
 6131             "FSTP   $dst" %}
 6132   opcode(0xD9);               /* D9 /0, FLD m32real */
 6133   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6134               Pop_Reg_FPR(dst) );
 6135   ins_pipe( fpu_reg_mem );
 6136 %}
 6137 
 6138 // Load Stack Slot
 6139 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6140   match(Set dst src);
 6141   ins_cost(125);
 6142 
 6143   format %{ "FLD_D  $src\n\t"
 6144             "FSTP   $dst" %}
 6145   opcode(0xDD);               /* DD /0, FLD m64real */
 6146   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6147               Pop_Reg_DPR(dst) );
 6148   ins_pipe( fpu_reg_mem );
 6149 %}
 6150 
 6151 // Prefetch instructions for allocation.
 6152 // Must be safe to execute with invalid address (cannot fault).
 6153 
 6154 instruct prefetchAlloc0( memory mem ) %{
 6155   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6156   match(PrefetchAllocation mem);
 6157   ins_cost(0);
 6158   size(0);
 6159   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6160   ins_encode();
 6161   ins_pipe(empty);
 6162 %}
 6163 
 6164 instruct prefetchAlloc( memory mem ) %{
 6165   predicate(AllocatePrefetchInstr==3);
 6166   match( PrefetchAllocation mem );
 6167   ins_cost(100);
 6168 
 6169   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6170   ins_encode %{
 6171     __ prefetchw($mem$$Address);
 6172   %}
 6173   ins_pipe(ialu_mem);
 6174 %}
 6175 
 6176 instruct prefetchAllocNTA( memory mem ) %{
 6177   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6178   match(PrefetchAllocation mem);
 6179   ins_cost(100);
 6180 
 6181   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6182   ins_encode %{
 6183     __ prefetchnta($mem$$Address);
 6184   %}
 6185   ins_pipe(ialu_mem);
 6186 %}
 6187 
 6188 instruct prefetchAllocT0( memory mem ) %{
 6189   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6190   match(PrefetchAllocation mem);
 6191   ins_cost(100);
 6192 
 6193   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6194   ins_encode %{
 6195     __ prefetcht0($mem$$Address);
 6196   %}
 6197   ins_pipe(ialu_mem);
 6198 %}
 6199 
 6200 instruct prefetchAllocT2( memory mem ) %{
 6201   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6202   match(PrefetchAllocation mem);
 6203   ins_cost(100);
 6204 
 6205   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6206   ins_encode %{
 6207     __ prefetcht2($mem$$Address);
 6208   %}
 6209   ins_pipe(ialu_mem);
 6210 %}
 6211 
 6212 //----------Store Instructions-------------------------------------------------
 6213 
 6214 // Store Byte
 6215 instruct storeB(memory mem, xRegI src) %{
 6216   match(Set mem (StoreB mem src));
 6217 
 6218   ins_cost(125);
 6219   format %{ "MOV8   $mem,$src" %}
 6220   opcode(0x88);
 6221   ins_encode( OpcP, RegMem( src, mem ) );
 6222   ins_pipe( ialu_mem_reg );
 6223 %}
 6224 
 6225 // Store Char/Short
 6226 instruct storeC(memory mem, rRegI src) %{
 6227   match(Set mem (StoreC mem src));
 6228 
 6229   ins_cost(125);
 6230   format %{ "MOV16  $mem,$src" %}
 6231   opcode(0x89, 0x66);
 6232   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6233   ins_pipe( ialu_mem_reg );
 6234 %}
 6235 
 6236 // Store Integer
 6237 instruct storeI(memory mem, rRegI src) %{
 6238   match(Set mem (StoreI mem src));
 6239 
 6240   ins_cost(125);
 6241   format %{ "MOV    $mem,$src" %}
 6242   opcode(0x89);
 6243   ins_encode( OpcP, RegMem( src, mem ) );
 6244   ins_pipe( ialu_mem_reg );
 6245 %}
 6246 
 6247 // Store Long
 6248 instruct storeL(long_memory mem, eRegL src) %{
 6249   predicate(!((StoreLNode*)n)->require_atomic_access());
 6250   match(Set mem (StoreL mem src));
 6251 
 6252   ins_cost(200);
 6253   format %{ "MOV    $mem,$src.lo\n\t"
 6254             "MOV    $mem+4,$src.hi" %}
 6255   opcode(0x89, 0x89);
 6256   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6257   ins_pipe( ialu_mem_long_reg );
 6258 %}
 6259 
 6260 // Store Long to Integer
 6261 instruct storeL2I(memory mem, eRegL src) %{
 6262   match(Set mem (StoreI mem (ConvL2I src)));
 6263 
 6264   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6265   ins_encode %{
 6266     __ movl($mem$$Address, $src$$Register);
 6267   %}
 6268   ins_pipe(ialu_mem_reg);
 6269 %}
 6270 
 6271 // Volatile Store Long.  Must be atomic, so move it into
 6272 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6273 // target address before the store (for null-ptr checks)
 6274 // so the memory operand is used twice in the encoding.
 6275 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6276   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6277   match(Set mem (StoreL mem src));
 6278   effect( KILL cr );
 6279   ins_cost(400);
 6280   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6281             "FILD   $src\n\t"
 6282             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6283   opcode(0x3B);
 6284   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6285   ins_pipe( fpu_reg_mem );
 6286 %}
 6287 
 6288 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6289   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6290   match(Set mem (StoreL mem src));
 6291   effect( TEMP tmp, KILL cr );
 6292   ins_cost(380);
 6293   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6294             "MOVSD  $tmp,$src\n\t"
 6295             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6296   ins_encode %{
 6297     __ cmpl(rax, $mem$$Address);
 6298     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6299     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6300   %}
 6301   ins_pipe( pipe_slow );
 6302 %}
 6303 
 6304 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6305   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6306   match(Set mem (StoreL mem src));
 6307   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6308   ins_cost(360);
 6309   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6310             "MOVD   $tmp,$src.lo\n\t"
 6311             "MOVD   $tmp2,$src.hi\n\t"
 6312             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6313             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6314   ins_encode %{
 6315     __ cmpl(rax, $mem$$Address);
 6316     __ movdl($tmp$$XMMRegister, $src$$Register);
 6317     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6318     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6319     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6320   %}
 6321   ins_pipe( pipe_slow );
 6322 %}
 6323 
 6324 // Store Pointer; for storing unknown oops and raw pointers
 6325 instruct storeP(memory mem, anyRegP src) %{
 6326   match(Set mem (StoreP mem src));
 6327 
 6328   ins_cost(125);
 6329   format %{ "MOV    $mem,$src" %}
 6330   opcode(0x89);
 6331   ins_encode( OpcP, RegMem( src, mem ) );
 6332   ins_pipe( ialu_mem_reg );
 6333 %}
 6334 
 6335 // Store Integer Immediate
 6336 instruct storeImmI(memory mem, immI src) %{
 6337   match(Set mem (StoreI mem src));
 6338 
 6339   ins_cost(150);
 6340   format %{ "MOV    $mem,$src" %}
 6341   opcode(0xC7);               /* C7 /0 */
 6342   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6343   ins_pipe( ialu_mem_imm );
 6344 %}
 6345 
 6346 // Store Short/Char Immediate
 6347 instruct storeImmI16(memory mem, immI16 src) %{
 6348   predicate(UseStoreImmI16);
 6349   match(Set mem (StoreC mem src));
 6350 
 6351   ins_cost(150);
 6352   format %{ "MOV16  $mem,$src" %}
 6353   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6354   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6355   ins_pipe( ialu_mem_imm );
 6356 %}
 6357 
 6358 // Store Pointer Immediate; null pointers or constant oops that do not
 6359 // need card-mark barriers.
 6360 instruct storeImmP(memory mem, immP src) %{
 6361   match(Set mem (StoreP mem src));
 6362 
 6363   ins_cost(150);
 6364   format %{ "MOV    $mem,$src" %}
 6365   opcode(0xC7);               /* C7 /0 */
 6366   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6367   ins_pipe( ialu_mem_imm );
 6368 %}
 6369 
 6370 // Store Byte Immediate
 6371 instruct storeImmB(memory mem, immI8 src) %{
 6372   match(Set mem (StoreB mem src));
 6373 
 6374   ins_cost(150);
 6375   format %{ "MOV8   $mem,$src" %}
 6376   opcode(0xC6);               /* C6 /0 */
 6377   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6378   ins_pipe( ialu_mem_imm );
 6379 %}
 6380 
 6381 // Store CMS card-mark Immediate
 6382 instruct storeImmCM(memory mem, immI8 src) %{
 6383   match(Set mem (StoreCM mem src));
 6384 
 6385   ins_cost(150);
 6386   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6387   opcode(0xC6);               /* C6 /0 */
 6388   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6389   ins_pipe( ialu_mem_imm );
 6390 %}
 6391 
 6392 // Store Double
 6393 instruct storeDPR( memory mem, regDPR1 src) %{
 6394   predicate(UseSSE<=1);
 6395   match(Set mem (StoreD mem src));
 6396 
 6397   ins_cost(100);
 6398   format %{ "FST_D  $mem,$src" %}
 6399   opcode(0xDD);       /* DD /2 */
 6400   ins_encode( enc_FPR_store(mem,src) );
 6401   ins_pipe( fpu_mem_reg );
 6402 %}
 6403 
 6404 // Store double does rounding on x86
 6405 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6406   predicate(UseSSE<=1);
 6407   match(Set mem (StoreD mem (RoundDouble src)));
 6408 
 6409   ins_cost(100);
 6410   format %{ "FST_D  $mem,$src\t# round" %}
 6411   opcode(0xDD);       /* DD /2 */
 6412   ins_encode( enc_FPR_store(mem,src) );
 6413   ins_pipe( fpu_mem_reg );
 6414 %}
 6415 
 6416 // Store XMM register to memory (double-precision floating points)
 6417 // MOVSD instruction
 6418 instruct storeD(memory mem, regD src) %{
 6419   predicate(UseSSE>=2);
 6420   match(Set mem (StoreD mem src));
 6421   ins_cost(95);
 6422   format %{ "MOVSD  $mem,$src" %}
 6423   ins_encode %{
 6424     __ movdbl($mem$$Address, $src$$XMMRegister);
 6425   %}
 6426   ins_pipe( pipe_slow );
 6427 %}
 6428 
 6429 // Load Double
 6430 instruct MoveD2VL(vlRegD dst, regD src) %{
 6431   match(Set dst src);
 6432   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6433   ins_encode %{
 6434     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6435   %}
 6436   ins_pipe( fpu_reg_reg );
 6437 %}
 6438 
 6439 // Load Double
 6440 instruct MoveVL2D(regD dst, vlRegD src) %{
 6441   match(Set dst src);
 6442   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6443   ins_encode %{
 6444     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6445   %}
 6446   ins_pipe( fpu_reg_reg );
 6447 %}
 6448 
 6449 // Store XMM register to memory (single-precision floating point)
 6450 // MOVSS instruction
 6451 instruct storeF(memory mem, regF src) %{
 6452   predicate(UseSSE>=1);
 6453   match(Set mem (StoreF mem src));
 6454   ins_cost(95);
 6455   format %{ "MOVSS  $mem,$src" %}
 6456   ins_encode %{
 6457     __ movflt($mem$$Address, $src$$XMMRegister);
 6458   %}
 6459   ins_pipe( pipe_slow );
 6460 %}
 6461 
 6462 // Load Float
 6463 instruct MoveF2VL(vlRegF dst, regF src) %{
 6464   match(Set dst src);
 6465   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6466   ins_encode %{
 6467     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6468   %}
 6469   ins_pipe( fpu_reg_reg );
 6470 %}
 6471 
 6472 // Load Float
 6473 instruct MoveVL2F(regF dst, vlRegF src) %{
 6474   match(Set dst src);
 6475   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6476   ins_encode %{
 6477     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6478   %}
 6479   ins_pipe( fpu_reg_reg );
 6480 %}
 6481 
 6482 // Store Float
 6483 instruct storeFPR( memory mem, regFPR1 src) %{
 6484   predicate(UseSSE==0);
 6485   match(Set mem (StoreF mem src));
 6486 
 6487   ins_cost(100);
 6488   format %{ "FST_S  $mem,$src" %}
 6489   opcode(0xD9);       /* D9 /2 */
 6490   ins_encode( enc_FPR_store(mem,src) );
 6491   ins_pipe( fpu_mem_reg );
 6492 %}
 6493 
 6494 // Store Float does rounding on x86
 6495 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6496   predicate(UseSSE==0);
 6497   match(Set mem (StoreF mem (RoundFloat src)));
 6498 
 6499   ins_cost(100);
 6500   format %{ "FST_S  $mem,$src\t# round" %}
 6501   opcode(0xD9);       /* D9 /2 */
 6502   ins_encode( enc_FPR_store(mem,src) );
 6503   ins_pipe( fpu_mem_reg );
 6504 %}
 6505 
 6506 // Store Float does rounding on x86
 6507 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6508   predicate(UseSSE<=1);
 6509   match(Set mem (StoreF mem (ConvD2F src)));
 6510 
 6511   ins_cost(100);
 6512   format %{ "FST_S  $mem,$src\t# D-round" %}
 6513   opcode(0xD9);       /* D9 /2 */
 6514   ins_encode( enc_FPR_store(mem,src) );
 6515   ins_pipe( fpu_mem_reg );
 6516 %}
 6517 
 6518 // Store immediate Float value (it is faster than store from FPU register)
 6519 // The instruction usage is guarded by predicate in operand immFPR().
 6520 instruct storeFPR_imm( memory mem, immFPR src) %{
 6521   match(Set mem (StoreF mem src));
 6522 
 6523   ins_cost(50);
 6524   format %{ "MOV    $mem,$src\t# store float" %}
 6525   opcode(0xC7);               /* C7 /0 */
 6526   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6527   ins_pipe( ialu_mem_imm );
 6528 %}
 6529 
 6530 // Store immediate Float value (it is faster than store from XMM register)
 6531 // The instruction usage is guarded by predicate in operand immF().
 6532 instruct storeF_imm( memory mem, immF src) %{
 6533   match(Set mem (StoreF mem src));
 6534 
 6535   ins_cost(50);
 6536   format %{ "MOV    $mem,$src\t# store float" %}
 6537   opcode(0xC7);               /* C7 /0 */
 6538   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6539   ins_pipe( ialu_mem_imm );
 6540 %}
 6541 
 6542 // Store Integer to stack slot
 6543 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6544   match(Set dst src);
 6545 
 6546   ins_cost(100);
 6547   format %{ "MOV    $dst,$src" %}
 6548   opcode(0x89);
 6549   ins_encode( OpcPRegSS( dst, src ) );
 6550   ins_pipe( ialu_mem_reg );
 6551 %}
 6552 
 6553 // Store Integer to stack slot
 6554 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6555   match(Set dst src);
 6556 
 6557   ins_cost(100);
 6558   format %{ "MOV    $dst,$src" %}
 6559   opcode(0x89);
 6560   ins_encode( OpcPRegSS( dst, src ) );
 6561   ins_pipe( ialu_mem_reg );
 6562 %}
 6563 
 6564 // Store Long to stack slot
 6565 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6566   match(Set dst src);
 6567 
 6568   ins_cost(200);
 6569   format %{ "MOV    $dst,$src.lo\n\t"
 6570             "MOV    $dst+4,$src.hi" %}
 6571   opcode(0x89, 0x89);
 6572   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6573   ins_pipe( ialu_mem_long_reg );
 6574 %}
 6575 
 6576 //----------MemBar Instructions-----------------------------------------------
 6577 // Memory barrier flavors
 6578 
 6579 instruct membar_acquire() %{
 6580   match(MemBarAcquire);
 6581   match(LoadFence);
 6582   ins_cost(400);
 6583 
 6584   size(0);
 6585   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6586   ins_encode();
 6587   ins_pipe(empty);
 6588 %}
 6589 
 6590 instruct membar_acquire_lock() %{
 6591   match(MemBarAcquireLock);
 6592   ins_cost(0);
 6593 
 6594   size(0);
 6595   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6596   ins_encode( );
 6597   ins_pipe(empty);
 6598 %}
 6599 
 6600 instruct membar_release() %{
 6601   match(MemBarRelease);
 6602   match(StoreFence);
 6603   ins_cost(400);
 6604 
 6605   size(0);
 6606   format %{ "MEMBAR-release ! (empty encoding)" %}
 6607   ins_encode( );
 6608   ins_pipe(empty);
 6609 %}
 6610 
 6611 instruct membar_release_lock() %{
 6612   match(MemBarReleaseLock);
 6613   ins_cost(0);
 6614 
 6615   size(0);
 6616   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6617   ins_encode( );
 6618   ins_pipe(empty);
 6619 %}
 6620 
 6621 instruct membar_volatile(eFlagsReg cr) %{
 6622   match(MemBarVolatile);
 6623   effect(KILL cr);
 6624   ins_cost(400);
 6625 
 6626   format %{
 6627     $$template
 6628     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6629   %}
 6630   ins_encode %{
 6631     __ membar(Assembler::StoreLoad);
 6632   %}
 6633   ins_pipe(pipe_slow);
 6634 %}
 6635 
 6636 instruct unnecessary_membar_volatile() %{
 6637   match(MemBarVolatile);
 6638   predicate(Matcher::post_store_load_barrier(n));
 6639   ins_cost(0);
 6640 
 6641   size(0);
 6642   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6643   ins_encode( );
 6644   ins_pipe(empty);
 6645 %}
 6646 
 6647 instruct membar_storestore() %{
 6648   match(MemBarStoreStore);
 6649   match(StoreStoreFence);
 6650   ins_cost(0);
 6651 
 6652   size(0);
 6653   format %{ "MEMBAR-storestore (empty encoding)" %}
 6654   ins_encode( );
 6655   ins_pipe(empty);
 6656 %}
 6657 
 6658 //----------Move Instructions--------------------------------------------------
 6659 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6660   match(Set dst (CastX2P src));
 6661   format %{ "# X2P  $dst, $src" %}
 6662   ins_encode( /*empty encoding*/ );
 6663   ins_cost(0);
 6664   ins_pipe(empty);
 6665 %}
 6666 
 6667 instruct castP2X(rRegI dst, eRegP src ) %{
 6668   match(Set dst (CastP2X src));
 6669   ins_cost(50);
 6670   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6671   ins_encode( enc_Copy( dst, src) );
 6672   ins_pipe( ialu_reg_reg );
 6673 %}
 6674 
 6675 //----------Conditional Move---------------------------------------------------
 6676 // Conditional move
 6677 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6678   predicate(!VM_Version::supports_cmov() );
 6679   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6680   ins_cost(200);
 6681   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6682             "MOV    $dst,$src\n"
 6683       "skip:" %}
 6684   ins_encode %{
 6685     Label Lskip;
 6686     // Invert sense of branch from sense of CMOV
 6687     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6688     __ movl($dst$$Register, $src$$Register);
 6689     __ bind(Lskip);
 6690   %}
 6691   ins_pipe( pipe_cmov_reg );
 6692 %}
 6693 
 6694 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6695   predicate(!VM_Version::supports_cmov() );
 6696   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6697   ins_cost(200);
 6698   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6699             "MOV    $dst,$src\n"
 6700       "skip:" %}
 6701   ins_encode %{
 6702     Label Lskip;
 6703     // Invert sense of branch from sense of CMOV
 6704     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6705     __ movl($dst$$Register, $src$$Register);
 6706     __ bind(Lskip);
 6707   %}
 6708   ins_pipe( pipe_cmov_reg );
 6709 %}
 6710 
 6711 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6712   predicate(VM_Version::supports_cmov() );
 6713   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6714   ins_cost(200);
 6715   format %{ "CMOV$cop $dst,$src" %}
 6716   opcode(0x0F,0x40);
 6717   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6718   ins_pipe( pipe_cmov_reg );
 6719 %}
 6720 
 6721 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6722   predicate(VM_Version::supports_cmov() );
 6723   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6724   ins_cost(200);
 6725   format %{ "CMOV$cop $dst,$src" %}
 6726   opcode(0x0F,0x40);
 6727   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6728   ins_pipe( pipe_cmov_reg );
 6729 %}
 6730 
 6731 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6732   predicate(VM_Version::supports_cmov() );
 6733   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6734   ins_cost(200);
 6735   expand %{
 6736     cmovI_regU(cop, cr, dst, src);
 6737   %}
 6738 %}
 6739 
 6740 // Conditional move
 6741 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6742   predicate(VM_Version::supports_cmov() );
 6743   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6744   ins_cost(250);
 6745   format %{ "CMOV$cop $dst,$src" %}
 6746   opcode(0x0F,0x40);
 6747   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6748   ins_pipe( pipe_cmov_mem );
 6749 %}
 6750 
 6751 // Conditional move
 6752 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6753   predicate(VM_Version::supports_cmov() );
 6754   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6755   ins_cost(250);
 6756   format %{ "CMOV$cop $dst,$src" %}
 6757   opcode(0x0F,0x40);
 6758   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6759   ins_pipe( pipe_cmov_mem );
 6760 %}
 6761 
 6762 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6763   predicate(VM_Version::supports_cmov() );
 6764   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6765   ins_cost(250);
 6766   expand %{
 6767     cmovI_memU(cop, cr, dst, src);
 6768   %}
 6769 %}
 6770 
 6771 // Conditional move
 6772 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6773   predicate(VM_Version::supports_cmov() );
 6774   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6775   ins_cost(200);
 6776   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6777   opcode(0x0F,0x40);
 6778   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6779   ins_pipe( pipe_cmov_reg );
 6780 %}
 6781 
 6782 // Conditional move (non-P6 version)
 6783 // Note:  a CMoveP is generated for  stubs and native wrappers
 6784 //        regardless of whether we are on a P6, so we
 6785 //        emulate a cmov here
 6786 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6787   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6788   ins_cost(300);
 6789   format %{ "Jn$cop   skip\n\t"
 6790           "MOV    $dst,$src\t# pointer\n"
 6791       "skip:" %}
 6792   opcode(0x8b);
 6793   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6794   ins_pipe( pipe_cmov_reg );
 6795 %}
 6796 
 6797 // Conditional move
 6798 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6799   predicate(VM_Version::supports_cmov() );
 6800   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6801   ins_cost(200);
 6802   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6803   opcode(0x0F,0x40);
 6804   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6805   ins_pipe( pipe_cmov_reg );
 6806 %}
 6807 
 6808 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6809   predicate(VM_Version::supports_cmov() );
 6810   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6811   ins_cost(200);
 6812   expand %{
 6813     cmovP_regU(cop, cr, dst, src);
 6814   %}
 6815 %}
 6816 
 6817 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6818 // correctly meets the two pointer arguments; one is an incoming
 6819 // register but the other is a memory operand.  ALSO appears to
 6820 // be buggy with implicit null checks.
 6821 //
 6822 //// Conditional move
 6823 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6824 //  predicate(VM_Version::supports_cmov() );
 6825 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6826 //  ins_cost(250);
 6827 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6828 //  opcode(0x0F,0x40);
 6829 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6830 //  ins_pipe( pipe_cmov_mem );
 6831 //%}
 6832 //
 6833 //// Conditional move
 6834 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6835 //  predicate(VM_Version::supports_cmov() );
 6836 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6837 //  ins_cost(250);
 6838 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6839 //  opcode(0x0F,0x40);
 6840 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6841 //  ins_pipe( pipe_cmov_mem );
 6842 //%}
 6843 
 6844 // Conditional move
 6845 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6846   predicate(UseSSE<=1);
 6847   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6848   ins_cost(200);
 6849   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6850   opcode(0xDA);
 6851   ins_encode( enc_cmov_dpr(cop,src) );
 6852   ins_pipe( pipe_cmovDPR_reg );
 6853 %}
 6854 
 6855 // Conditional move
 6856 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6857   predicate(UseSSE==0);
 6858   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6859   ins_cost(200);
 6860   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6861   opcode(0xDA);
 6862   ins_encode( enc_cmov_dpr(cop,src) );
 6863   ins_pipe( pipe_cmovDPR_reg );
 6864 %}
 6865 
 6866 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6867 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6868   predicate(UseSSE<=1);
 6869   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6870   ins_cost(200);
 6871   format %{ "Jn$cop   skip\n\t"
 6872             "MOV    $dst,$src\t# double\n"
 6873       "skip:" %}
 6874   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6875   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6876   ins_pipe( pipe_cmovDPR_reg );
 6877 %}
 6878 
 6879 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6880 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6881   predicate(UseSSE==0);
 6882   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6883   ins_cost(200);
 6884   format %{ "Jn$cop    skip\n\t"
 6885             "MOV    $dst,$src\t# float\n"
 6886       "skip:" %}
 6887   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6888   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6889   ins_pipe( pipe_cmovDPR_reg );
 6890 %}
 6891 
 6892 // No CMOVE with SSE/SSE2
 6893 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6894   predicate (UseSSE>=1);
 6895   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6896   ins_cost(200);
 6897   format %{ "Jn$cop   skip\n\t"
 6898             "MOVSS  $dst,$src\t# float\n"
 6899       "skip:" %}
 6900   ins_encode %{
 6901     Label skip;
 6902     // Invert sense of branch from sense of CMOV
 6903     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6904     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6905     __ bind(skip);
 6906   %}
 6907   ins_pipe( pipe_slow );
 6908 %}
 6909 
 6910 // No CMOVE with SSE/SSE2
 6911 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6912   predicate (UseSSE>=2);
 6913   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6914   ins_cost(200);
 6915   format %{ "Jn$cop   skip\n\t"
 6916             "MOVSD  $dst,$src\t# float\n"
 6917       "skip:" %}
 6918   ins_encode %{
 6919     Label skip;
 6920     // Invert sense of branch from sense of CMOV
 6921     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6922     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6923     __ bind(skip);
 6924   %}
 6925   ins_pipe( pipe_slow );
 6926 %}
 6927 
 6928 // unsigned version
 6929 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6930   predicate (UseSSE>=1);
 6931   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6932   ins_cost(200);
 6933   format %{ "Jn$cop   skip\n\t"
 6934             "MOVSS  $dst,$src\t# float\n"
 6935       "skip:" %}
 6936   ins_encode %{
 6937     Label skip;
 6938     // Invert sense of branch from sense of CMOV
 6939     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6940     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6941     __ bind(skip);
 6942   %}
 6943   ins_pipe( pipe_slow );
 6944 %}
 6945 
 6946 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6947   predicate (UseSSE>=1);
 6948   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6949   ins_cost(200);
 6950   expand %{
 6951     fcmovF_regU(cop, cr, dst, src);
 6952   %}
 6953 %}
 6954 
 6955 // unsigned version
 6956 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6957   predicate (UseSSE>=2);
 6958   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6959   ins_cost(200);
 6960   format %{ "Jn$cop   skip\n\t"
 6961             "MOVSD  $dst,$src\t# float\n"
 6962       "skip:" %}
 6963   ins_encode %{
 6964     Label skip;
 6965     // Invert sense of branch from sense of CMOV
 6966     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6967     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6968     __ bind(skip);
 6969   %}
 6970   ins_pipe( pipe_slow );
 6971 %}
 6972 
 6973 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6974   predicate (UseSSE>=2);
 6975   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6976   ins_cost(200);
 6977   expand %{
 6978     fcmovD_regU(cop, cr, dst, src);
 6979   %}
 6980 %}
 6981 
 6982 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6983   predicate(VM_Version::supports_cmov() );
 6984   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6985   ins_cost(200);
 6986   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6987             "CMOV$cop $dst.hi,$src.hi" %}
 6988   opcode(0x0F,0x40);
 6989   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6990   ins_pipe( pipe_cmov_reg_long );
 6991 %}
 6992 
 6993 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6994   predicate(VM_Version::supports_cmov() );
 6995   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6996   ins_cost(200);
 6997   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6998             "CMOV$cop $dst.hi,$src.hi" %}
 6999   opcode(0x0F,0x40);
 7000   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7001   ins_pipe( pipe_cmov_reg_long );
 7002 %}
 7003 
 7004 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7005   predicate(VM_Version::supports_cmov() );
 7006   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7007   ins_cost(200);
 7008   expand %{
 7009     cmovL_regU(cop, cr, dst, src);
 7010   %}
 7011 %}
 7012 
 7013 //----------Arithmetic Instructions--------------------------------------------
 7014 //----------Addition Instructions----------------------------------------------
 7015 
 7016 // Integer Addition Instructions
 7017 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7018   match(Set dst (AddI dst src));
 7019   effect(KILL cr);
 7020 
 7021   size(2);
 7022   format %{ "ADD    $dst,$src" %}
 7023   opcode(0x03);
 7024   ins_encode( OpcP, RegReg( dst, src) );
 7025   ins_pipe( ialu_reg_reg );
 7026 %}
 7027 
 7028 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7029   match(Set dst (AddI dst src));
 7030   effect(KILL cr);
 7031 
 7032   format %{ "ADD    $dst,$src" %}
 7033   opcode(0x81, 0x00); /* /0 id */
 7034   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7035   ins_pipe( ialu_reg );
 7036 %}
 7037 
 7038 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7039   predicate(UseIncDec);
 7040   match(Set dst (AddI dst src));
 7041   effect(KILL cr);
 7042 
 7043   size(1);
 7044   format %{ "INC    $dst" %}
 7045   opcode(0x40); /*  */
 7046   ins_encode( Opc_plus( primary, dst ) );
 7047   ins_pipe( ialu_reg );
 7048 %}
 7049 
 7050 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7051   match(Set dst (AddI src0 src1));
 7052   ins_cost(110);
 7053 
 7054   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7055   opcode(0x8D); /* 0x8D /r */
 7056   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7057   ins_pipe( ialu_reg_reg );
 7058 %}
 7059 
 7060 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7061   match(Set dst (AddP src0 src1));
 7062   ins_cost(110);
 7063 
 7064   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7065   opcode(0x8D); /* 0x8D /r */
 7066   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7067   ins_pipe( ialu_reg_reg );
 7068 %}
 7069 
 7070 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7071   predicate(UseIncDec);
 7072   match(Set dst (AddI dst src));
 7073   effect(KILL cr);
 7074 
 7075   size(1);
 7076   format %{ "DEC    $dst" %}
 7077   opcode(0x48); /*  */
 7078   ins_encode( Opc_plus( primary, dst ) );
 7079   ins_pipe( ialu_reg );
 7080 %}
 7081 
 7082 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7083   match(Set dst (AddP dst src));
 7084   effect(KILL cr);
 7085 
 7086   size(2);
 7087   format %{ "ADD    $dst,$src" %}
 7088   opcode(0x03);
 7089   ins_encode( OpcP, RegReg( dst, src) );
 7090   ins_pipe( ialu_reg_reg );
 7091 %}
 7092 
 7093 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7094   match(Set dst (AddP dst src));
 7095   effect(KILL cr);
 7096 
 7097   format %{ "ADD    $dst,$src" %}
 7098   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7099   // ins_encode( RegImm( dst, src) );
 7100   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7101   ins_pipe( ialu_reg );
 7102 %}
 7103 
 7104 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7105   match(Set dst (AddI dst (LoadI src)));
 7106   effect(KILL cr);
 7107 
 7108   ins_cost(125);
 7109   format %{ "ADD    $dst,$src" %}
 7110   opcode(0x03);
 7111   ins_encode( OpcP, RegMem( dst, src) );
 7112   ins_pipe( ialu_reg_mem );
 7113 %}
 7114 
 7115 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7116   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7117   effect(KILL cr);
 7118 
 7119   ins_cost(150);
 7120   format %{ "ADD    $dst,$src" %}
 7121   opcode(0x01);  /* Opcode 01 /r */
 7122   ins_encode( OpcP, RegMem( src, dst ) );
 7123   ins_pipe( ialu_mem_reg );
 7124 %}
 7125 
 7126 // Add Memory with Immediate
 7127 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7129   effect(KILL cr);
 7130 
 7131   ins_cost(125);
 7132   format %{ "ADD    $dst,$src" %}
 7133   opcode(0x81);               /* Opcode 81 /0 id */
 7134   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7135   ins_pipe( ialu_mem_imm );
 7136 %}
 7137 
 7138 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7139   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7140   effect(KILL cr);
 7141 
 7142   ins_cost(125);
 7143   format %{ "INC    $dst" %}
 7144   opcode(0xFF);               /* Opcode FF /0 */
 7145   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7146   ins_pipe( ialu_mem_imm );
 7147 %}
 7148 
 7149 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7150   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7151   effect(KILL cr);
 7152 
 7153   ins_cost(125);
 7154   format %{ "DEC    $dst" %}
 7155   opcode(0xFF);               /* Opcode FF /1 */
 7156   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7157   ins_pipe( ialu_mem_imm );
 7158 %}
 7159 
 7160 
 7161 instruct checkCastPP( eRegP dst ) %{
 7162   match(Set dst (CheckCastPP dst));
 7163 
 7164   size(0);
 7165   format %{ "#checkcastPP of $dst" %}
 7166   ins_encode( /*empty encoding*/ );
 7167   ins_pipe( empty );
 7168 %}
 7169 
 7170 instruct castPP( eRegP dst ) %{
 7171   match(Set dst (CastPP dst));
 7172   format %{ "#castPP of $dst" %}
 7173   ins_encode( /*empty encoding*/ );
 7174   ins_pipe( empty );
 7175 %}
 7176 
 7177 instruct castII( rRegI dst ) %{
 7178   match(Set dst (CastII dst));
 7179   format %{ "#castII of $dst" %}
 7180   ins_encode( /*empty encoding*/ );
 7181   ins_cost(0);
 7182   ins_pipe( empty );
 7183 %}
 7184 
 7185 instruct castLL( eRegL dst ) %{
 7186   match(Set dst (CastLL dst));
 7187   format %{ "#castLL of $dst" %}
 7188   ins_encode( /*empty encoding*/ );
 7189   ins_cost(0);
 7190   ins_pipe( empty );
 7191 %}
 7192 
 7193 instruct castFF( regF dst ) %{
 7194   predicate(UseSSE >= 1);
 7195   match(Set dst (CastFF dst));
 7196   format %{ "#castFF of $dst" %}
 7197   ins_encode( /*empty encoding*/ );
 7198   ins_cost(0);
 7199   ins_pipe( empty );
 7200 %}
 7201 
 7202 instruct castDD( regD dst ) %{
 7203   predicate(UseSSE >= 2);
 7204   match(Set dst (CastDD dst));
 7205   format %{ "#castDD of $dst" %}
 7206   ins_encode( /*empty encoding*/ );
 7207   ins_cost(0);
 7208   ins_pipe( empty );
 7209 %}
 7210 
 7211 instruct castFF_PR( regFPR dst ) %{
 7212   predicate(UseSSE < 1);
 7213   match(Set dst (CastFF dst));
 7214   format %{ "#castFF of $dst" %}
 7215   ins_encode( /*empty encoding*/ );
 7216   ins_cost(0);
 7217   ins_pipe( empty );
 7218 %}
 7219 
 7220 instruct castDD_PR( regDPR dst ) %{
 7221   predicate(UseSSE < 2);
 7222   match(Set dst (CastDD dst));
 7223   format %{ "#castDD of $dst" %}
 7224   ins_encode( /*empty encoding*/ );
 7225   ins_cost(0);
 7226   ins_pipe( empty );
 7227 %}
 7228 
 7229 // Load-locked - same as a regular pointer load when used with compare-swap
 7230 instruct loadPLocked(eRegP dst, memory mem) %{
 7231   match(Set dst (LoadPLocked mem));
 7232 
 7233   ins_cost(125);
 7234   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7235   opcode(0x8B);
 7236   ins_encode( OpcP, RegMem(dst,mem));
 7237   ins_pipe( ialu_reg_mem );
 7238 %}
 7239 
 7240 // Conditional-store of the updated heap-top.
 7241 // Used during allocation of the shared heap.
 7242 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7243 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7244   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7245   // EAX is killed if there is contention, but then it's also unused.
 7246   // In the common case of no contention, EAX holds the new oop address.
 7247   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7248   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7249   ins_pipe( pipe_cmpxchg );
 7250 %}
 7251 
 7252 // Conditional-store of an int value.
 7253 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7254 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7255   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7256   effect(KILL oldval);
 7257   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7258   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7259   ins_pipe( pipe_cmpxchg );
 7260 %}
 7261 
 7262 // Conditional-store of a long value.
 7263 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7264 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7265   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7266   effect(KILL oldval);
 7267   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7268             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7269             "XCHG   EBX,ECX"
 7270   %}
 7271   ins_encode %{
 7272     // Note: we need to swap rbx, and rcx before and after the
 7273     //       cmpxchg8 instruction because the instruction uses
 7274     //       rcx as the high order word of the new value to store but
 7275     //       our register encoding uses rbx.
 7276     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7277     __ lock();
 7278     __ cmpxchg8($mem$$Address);
 7279     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7280   %}
 7281   ins_pipe( pipe_cmpxchg );
 7282 %}
 7283 
 7284 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7285 
 7286 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7287   predicate(VM_Version::supports_cx8());
 7288   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7289   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7290   effect(KILL cr, KILL oldval);
 7291   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7292             "MOV    $res,0\n\t"
 7293             "JNE,s  fail\n\t"
 7294             "MOV    $res,1\n"
 7295           "fail:" %}
 7296   ins_encode( enc_cmpxchg8(mem_ptr),
 7297               enc_flags_ne_to_boolean(res) );
 7298   ins_pipe( pipe_cmpxchg );
 7299 %}
 7300 
 7301 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7302   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7303   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7304   effect(KILL cr, KILL oldval);
 7305   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7306             "MOV    $res,0\n\t"
 7307             "JNE,s  fail\n\t"
 7308             "MOV    $res,1\n"
 7309           "fail:" %}
 7310   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7311   ins_pipe( pipe_cmpxchg );
 7312 %}
 7313 
 7314 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7315   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7316   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7317   effect(KILL cr, KILL oldval);
 7318   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7319             "MOV    $res,0\n\t"
 7320             "JNE,s  fail\n\t"
 7321             "MOV    $res,1\n"
 7322           "fail:" %}
 7323   ins_encode( enc_cmpxchgb(mem_ptr),
 7324               enc_flags_ne_to_boolean(res) );
 7325   ins_pipe( pipe_cmpxchg );
 7326 %}
 7327 
 7328 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7329   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7330   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7331   effect(KILL cr, KILL oldval);
 7332   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7333             "MOV    $res,0\n\t"
 7334             "JNE,s  fail\n\t"
 7335             "MOV    $res,1\n"
 7336           "fail:" %}
 7337   ins_encode( enc_cmpxchgw(mem_ptr),
 7338               enc_flags_ne_to_boolean(res) );
 7339   ins_pipe( pipe_cmpxchg );
 7340 %}
 7341 
 7342 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7343   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7344   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7345   effect(KILL cr, KILL oldval);
 7346   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7347             "MOV    $res,0\n\t"
 7348             "JNE,s  fail\n\t"
 7349             "MOV    $res,1\n"
 7350           "fail:" %}
 7351   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7352   ins_pipe( pipe_cmpxchg );
 7353 %}
 7354 
 7355 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7356   predicate(VM_Version::supports_cx8());
 7357   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7358   effect(KILL cr);
 7359   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7360   ins_encode( enc_cmpxchg8(mem_ptr) );
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7365   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7366   effect(KILL cr);
 7367   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7368   ins_encode( enc_cmpxchg(mem_ptr) );
 7369   ins_pipe( pipe_cmpxchg );
 7370 %}
 7371 
 7372 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7373   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7374   effect(KILL cr);
 7375   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7376   ins_encode( enc_cmpxchgb(mem_ptr) );
 7377   ins_pipe( pipe_cmpxchg );
 7378 %}
 7379 
 7380 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7381   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7382   effect(KILL cr);
 7383   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7384   ins_encode( enc_cmpxchgw(mem_ptr) );
 7385   ins_pipe( pipe_cmpxchg );
 7386 %}
 7387 
 7388 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7389   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7390   effect(KILL cr);
 7391   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7392   ins_encode( enc_cmpxchg(mem_ptr) );
 7393   ins_pipe( pipe_cmpxchg );
 7394 %}
 7395 
 7396 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7397   predicate(n->as_LoadStore()->result_not_used());
 7398   match(Set dummy (GetAndAddB mem add));
 7399   effect(KILL cr);
 7400   format %{ "ADDB  [$mem],$add" %}
 7401   ins_encode %{
 7402     __ lock();
 7403     __ addb($mem$$Address, $add$$constant);
 7404   %}
 7405   ins_pipe( pipe_cmpxchg );
 7406 %}
 7407 
 7408 // Important to match to xRegI: only 8-bit regs.
 7409 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7410   match(Set newval (GetAndAddB mem newval));
 7411   effect(KILL cr);
 7412   format %{ "XADDB  [$mem],$newval" %}
 7413   ins_encode %{
 7414     __ lock();
 7415     __ xaddb($mem$$Address, $newval$$Register);
 7416   %}
 7417   ins_pipe( pipe_cmpxchg );
 7418 %}
 7419 
 7420 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7421   predicate(n->as_LoadStore()->result_not_used());
 7422   match(Set dummy (GetAndAddS mem add));
 7423   effect(KILL cr);
 7424   format %{ "ADDS  [$mem],$add" %}
 7425   ins_encode %{
 7426     __ lock();
 7427     __ addw($mem$$Address, $add$$constant);
 7428   %}
 7429   ins_pipe( pipe_cmpxchg );
 7430 %}
 7431 
 7432 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7433   match(Set newval (GetAndAddS mem newval));
 7434   effect(KILL cr);
 7435   format %{ "XADDS  [$mem],$newval" %}
 7436   ins_encode %{
 7437     __ lock();
 7438     __ xaddw($mem$$Address, $newval$$Register);
 7439   %}
 7440   ins_pipe( pipe_cmpxchg );
 7441 %}
 7442 
 7443 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7444   predicate(n->as_LoadStore()->result_not_used());
 7445   match(Set dummy (GetAndAddI mem add));
 7446   effect(KILL cr);
 7447   format %{ "ADDL  [$mem],$add" %}
 7448   ins_encode %{
 7449     __ lock();
 7450     __ addl($mem$$Address, $add$$constant);
 7451   %}
 7452   ins_pipe( pipe_cmpxchg );
 7453 %}
 7454 
 7455 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7456   match(Set newval (GetAndAddI mem newval));
 7457   effect(KILL cr);
 7458   format %{ "XADDL  [$mem],$newval" %}
 7459   ins_encode %{
 7460     __ lock();
 7461     __ xaddl($mem$$Address, $newval$$Register);
 7462   %}
 7463   ins_pipe( pipe_cmpxchg );
 7464 %}
 7465 
 7466 // Important to match to xRegI: only 8-bit regs.
 7467 instruct xchgB( memory mem, xRegI newval) %{
 7468   match(Set newval (GetAndSetB mem newval));
 7469   format %{ "XCHGB  $newval,[$mem]" %}
 7470   ins_encode %{
 7471     __ xchgb($newval$$Register, $mem$$Address);
 7472   %}
 7473   ins_pipe( pipe_cmpxchg );
 7474 %}
 7475 
 7476 instruct xchgS( memory mem, rRegI newval) %{
 7477   match(Set newval (GetAndSetS mem newval));
 7478   format %{ "XCHGW  $newval,[$mem]" %}
 7479   ins_encode %{
 7480     __ xchgw($newval$$Register, $mem$$Address);
 7481   %}
 7482   ins_pipe( pipe_cmpxchg );
 7483 %}
 7484 
 7485 instruct xchgI( memory mem, rRegI newval) %{
 7486   match(Set newval (GetAndSetI mem newval));
 7487   format %{ "XCHGL  $newval,[$mem]" %}
 7488   ins_encode %{
 7489     __ xchgl($newval$$Register, $mem$$Address);
 7490   %}
 7491   ins_pipe( pipe_cmpxchg );
 7492 %}
 7493 
 7494 instruct xchgP( memory mem, pRegP newval) %{
 7495   match(Set newval (GetAndSetP mem newval));
 7496   format %{ "XCHGL  $newval,[$mem]" %}
 7497   ins_encode %{
 7498     __ xchgl($newval$$Register, $mem$$Address);
 7499   %}
 7500   ins_pipe( pipe_cmpxchg );
 7501 %}
 7502 
 7503 //----------Subtraction Instructions-------------------------------------------
 7504 
 7505 // Integer Subtraction Instructions
 7506 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7507   match(Set dst (SubI dst src));
 7508   effect(KILL cr);
 7509 
 7510   size(2);
 7511   format %{ "SUB    $dst,$src" %}
 7512   opcode(0x2B);
 7513   ins_encode( OpcP, RegReg( dst, src) );
 7514   ins_pipe( ialu_reg_reg );
 7515 %}
 7516 
 7517 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7518   match(Set dst (SubI dst src));
 7519   effect(KILL cr);
 7520 
 7521   format %{ "SUB    $dst,$src" %}
 7522   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7523   // ins_encode( RegImm( dst, src) );
 7524   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7525   ins_pipe( ialu_reg );
 7526 %}
 7527 
 7528 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7529   match(Set dst (SubI dst (LoadI src)));
 7530   effect(KILL cr);
 7531 
 7532   ins_cost(125);
 7533   format %{ "SUB    $dst,$src" %}
 7534   opcode(0x2B);
 7535   ins_encode( OpcP, RegMem( dst, src) );
 7536   ins_pipe( ialu_reg_mem );
 7537 %}
 7538 
 7539 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7540   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7541   effect(KILL cr);
 7542 
 7543   ins_cost(150);
 7544   format %{ "SUB    $dst,$src" %}
 7545   opcode(0x29);  /* Opcode 29 /r */
 7546   ins_encode( OpcP, RegMem( src, dst ) );
 7547   ins_pipe( ialu_mem_reg );
 7548 %}
 7549 
 7550 // Subtract from a pointer
 7551 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7552   match(Set dst (AddP dst (SubI zero src)));
 7553   effect(KILL cr);
 7554 
 7555   size(2);
 7556   format %{ "SUB    $dst,$src" %}
 7557   opcode(0x2B);
 7558   ins_encode( OpcP, RegReg( dst, src) );
 7559   ins_pipe( ialu_reg_reg );
 7560 %}
 7561 
 7562 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7563   match(Set dst (SubI zero dst));
 7564   effect(KILL cr);
 7565 
 7566   size(2);
 7567   format %{ "NEG    $dst" %}
 7568   opcode(0xF7,0x03);  // Opcode F7 /3
 7569   ins_encode( OpcP, RegOpc( dst ) );
 7570   ins_pipe( ialu_reg );
 7571 %}
 7572 
 7573 //----------Multiplication/Division Instructions-------------------------------
 7574 // Integer Multiplication Instructions
 7575 // Multiply Register
 7576 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7577   match(Set dst (MulI dst src));
 7578   effect(KILL cr);
 7579 
 7580   size(3);
 7581   ins_cost(300);
 7582   format %{ "IMUL   $dst,$src" %}
 7583   opcode(0xAF, 0x0F);
 7584   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7585   ins_pipe( ialu_reg_reg_alu0 );
 7586 %}
 7587 
 7588 // Multiply 32-bit Immediate
 7589 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7590   match(Set dst (MulI src imm));
 7591   effect(KILL cr);
 7592 
 7593   ins_cost(300);
 7594   format %{ "IMUL   $dst,$src,$imm" %}
 7595   opcode(0x69);  /* 69 /r id */
 7596   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7597   ins_pipe( ialu_reg_reg_alu0 );
 7598 %}
 7599 
 7600 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7601   match(Set dst src);
 7602   effect(KILL cr);
 7603 
 7604   // Note that this is artificially increased to make it more expensive than loadConL
 7605   ins_cost(250);
 7606   format %{ "MOV    EAX,$src\t// low word only" %}
 7607   opcode(0xB8);
 7608   ins_encode( LdImmL_Lo(dst, src) );
 7609   ins_pipe( ialu_reg_fat );
 7610 %}
 7611 
 7612 // Multiply by 32-bit Immediate, taking the shifted high order results
 7613 //  (special case for shift by 32)
 7614 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7615   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7616   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7617              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7618              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7619   effect(USE src1, KILL cr);
 7620 
 7621   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7622   ins_cost(0*100 + 1*400 - 150);
 7623   format %{ "IMUL   EDX:EAX,$src1" %}
 7624   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7625   ins_pipe( pipe_slow );
 7626 %}
 7627 
 7628 // Multiply by 32-bit Immediate, taking the shifted high order results
 7629 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7630   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7631   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7632              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7633              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7634   effect(USE src1, KILL cr);
 7635 
 7636   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7637   ins_cost(1*100 + 1*400 - 150);
 7638   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7639             "SAR    EDX,$cnt-32" %}
 7640   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7641   ins_pipe( pipe_slow );
 7642 %}
 7643 
 7644 // Multiply Memory 32-bit Immediate
 7645 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7646   match(Set dst (MulI (LoadI src) imm));
 7647   effect(KILL cr);
 7648 
 7649   ins_cost(300);
 7650   format %{ "IMUL   $dst,$src,$imm" %}
 7651   opcode(0x69);  /* 69 /r id */
 7652   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7653   ins_pipe( ialu_reg_mem_alu0 );
 7654 %}
 7655 
 7656 // Multiply Memory
 7657 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7658   match(Set dst (MulI dst (LoadI src)));
 7659   effect(KILL cr);
 7660 
 7661   ins_cost(350);
 7662   format %{ "IMUL   $dst,$src" %}
 7663   opcode(0xAF, 0x0F);
 7664   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7665   ins_pipe( ialu_reg_mem_alu0 );
 7666 %}
 7667 
 7668 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7669 %{
 7670   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7671   effect(KILL cr, KILL src2);
 7672 
 7673   expand %{ mulI_eReg(dst, src1, cr);
 7674            mulI_eReg(src2, src3, cr);
 7675            addI_eReg(dst, src2, cr); %}
 7676 %}
 7677 
 7678 // Multiply Register Int to Long
 7679 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7680   // Basic Idea: long = (long)int * (long)int
 7681   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7682   effect(DEF dst, USE src, USE src1, KILL flags);
 7683 
 7684   ins_cost(300);
 7685   format %{ "IMUL   $dst,$src1" %}
 7686 
 7687   ins_encode( long_int_multiply( dst, src1 ) );
 7688   ins_pipe( ialu_reg_reg_alu0 );
 7689 %}
 7690 
 7691 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7692   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7693   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7694   effect(KILL flags);
 7695 
 7696   ins_cost(300);
 7697   format %{ "MUL    $dst,$src1" %}
 7698 
 7699   ins_encode( long_uint_multiply(dst, src1) );
 7700   ins_pipe( ialu_reg_reg_alu0 );
 7701 %}
 7702 
 7703 // Multiply Register Long
 7704 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7705   match(Set dst (MulL dst src));
 7706   effect(KILL cr, TEMP tmp);
 7707   ins_cost(4*100+3*400);
 7708 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7709 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7710   format %{ "MOV    $tmp,$src.lo\n\t"
 7711             "IMUL   $tmp,EDX\n\t"
 7712             "MOV    EDX,$src.hi\n\t"
 7713             "IMUL   EDX,EAX\n\t"
 7714             "ADD    $tmp,EDX\n\t"
 7715             "MUL    EDX:EAX,$src.lo\n\t"
 7716             "ADD    EDX,$tmp" %}
 7717   ins_encode( long_multiply( dst, src, tmp ) );
 7718   ins_pipe( pipe_slow );
 7719 %}
 7720 
 7721 // Multiply Register Long where the left operand's high 32 bits are zero
 7722 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7723   predicate(is_operand_hi32_zero(n->in(1)));
 7724   match(Set dst (MulL dst src));
 7725   effect(KILL cr, TEMP tmp);
 7726   ins_cost(2*100+2*400);
 7727 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7728 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7729   format %{ "MOV    $tmp,$src.hi\n\t"
 7730             "IMUL   $tmp,EAX\n\t"
 7731             "MUL    EDX:EAX,$src.lo\n\t"
 7732             "ADD    EDX,$tmp" %}
 7733   ins_encode %{
 7734     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7735     __ imull($tmp$$Register, rax);
 7736     __ mull($src$$Register);
 7737     __ addl(rdx, $tmp$$Register);
 7738   %}
 7739   ins_pipe( pipe_slow );
 7740 %}
 7741 
 7742 // Multiply Register Long where the right operand's high 32 bits are zero
 7743 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7744   predicate(is_operand_hi32_zero(n->in(2)));
 7745   match(Set dst (MulL dst src));
 7746   effect(KILL cr, TEMP tmp);
 7747   ins_cost(2*100+2*400);
 7748 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7749 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7750   format %{ "MOV    $tmp,$src.lo\n\t"
 7751             "IMUL   $tmp,EDX\n\t"
 7752             "MUL    EDX:EAX,$src.lo\n\t"
 7753             "ADD    EDX,$tmp" %}
 7754   ins_encode %{
 7755     __ movl($tmp$$Register, $src$$Register);
 7756     __ imull($tmp$$Register, rdx);
 7757     __ mull($src$$Register);
 7758     __ addl(rdx, $tmp$$Register);
 7759   %}
 7760   ins_pipe( pipe_slow );
 7761 %}
 7762 
 7763 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7764 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7765   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7766   match(Set dst (MulL dst src));
 7767   effect(KILL cr);
 7768   ins_cost(1*400);
 7769 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7770 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7771   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7772   ins_encode %{
 7773     __ mull($src$$Register);
 7774   %}
 7775   ins_pipe( pipe_slow );
 7776 %}
 7777 
 7778 // Multiply Register Long by small constant
 7779 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7780   match(Set dst (MulL dst src));
 7781   effect(KILL cr, TEMP tmp);
 7782   ins_cost(2*100+2*400);
 7783   size(12);
 7784 // Basic idea: lo(result) = lo(src * EAX)
 7785 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7786   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7787             "MOV    EDX,$src\n\t"
 7788             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7789             "ADD    EDX,$tmp" %}
 7790   ins_encode( long_multiply_con( dst, src, tmp ) );
 7791   ins_pipe( pipe_slow );
 7792 %}
 7793 
 7794 // Integer DIV with Register
 7795 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7796   match(Set rax (DivI rax div));
 7797   effect(KILL rdx, KILL cr);
 7798   size(26);
 7799   ins_cost(30*100+10*100);
 7800   format %{ "CMP    EAX,0x80000000\n\t"
 7801             "JNE,s  normal\n\t"
 7802             "XOR    EDX,EDX\n\t"
 7803             "CMP    ECX,-1\n\t"
 7804             "JE,s   done\n"
 7805     "normal: CDQ\n\t"
 7806             "IDIV   $div\n\t"
 7807     "done:"        %}
 7808   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7809   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7810   ins_pipe( ialu_reg_reg_alu0 );
 7811 %}
 7812 
 7813 // Divide Register Long
 7814 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7815   match(Set dst (DivL src1 src2));
 7816   effect(CALL);
 7817   ins_cost(10000);
 7818   format %{ "PUSH   $src1.hi\n\t"
 7819             "PUSH   $src1.lo\n\t"
 7820             "PUSH   $src2.hi\n\t"
 7821             "PUSH   $src2.lo\n\t"
 7822             "CALL   SharedRuntime::ldiv\n\t"
 7823             "ADD    ESP,16" %}
 7824   ins_encode( long_div(src1,src2) );
 7825   ins_pipe( pipe_slow );
 7826 %}
 7827 
 7828 // Integer DIVMOD with Register, both quotient and mod results
 7829 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7830   match(DivModI rax div);
 7831   effect(KILL cr);
 7832   size(26);
 7833   ins_cost(30*100+10*100);
 7834   format %{ "CMP    EAX,0x80000000\n\t"
 7835             "JNE,s  normal\n\t"
 7836             "XOR    EDX,EDX\n\t"
 7837             "CMP    ECX,-1\n\t"
 7838             "JE,s   done\n"
 7839     "normal: CDQ\n\t"
 7840             "IDIV   $div\n\t"
 7841     "done:"        %}
 7842   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7843   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7844   ins_pipe( pipe_slow );
 7845 %}
 7846 
 7847 // Integer MOD with Register
 7848 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7849   match(Set rdx (ModI rax div));
 7850   effect(KILL rax, KILL cr);
 7851 
 7852   size(26);
 7853   ins_cost(300);
 7854   format %{ "CDQ\n\t"
 7855             "IDIV   $div" %}
 7856   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7857   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7858   ins_pipe( ialu_reg_reg_alu0 );
 7859 %}
 7860 
 7861 // Remainder Register Long
 7862 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7863   match(Set dst (ModL src1 src2));
 7864   effect(CALL);
 7865   ins_cost(10000);
 7866   format %{ "PUSH   $src1.hi\n\t"
 7867             "PUSH   $src1.lo\n\t"
 7868             "PUSH   $src2.hi\n\t"
 7869             "PUSH   $src2.lo\n\t"
 7870             "CALL   SharedRuntime::lrem\n\t"
 7871             "ADD    ESP,16" %}
 7872   ins_encode( long_mod(src1,src2) );
 7873   ins_pipe( pipe_slow );
 7874 %}
 7875 
 7876 // Divide Register Long (no special case since divisor != -1)
 7877 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7878   match(Set dst (DivL dst imm));
 7879   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7880   ins_cost(1000);
 7881   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7882             "XOR    $tmp2,$tmp2\n\t"
 7883             "CMP    $tmp,EDX\n\t"
 7884             "JA,s   fast\n\t"
 7885             "MOV    $tmp2,EAX\n\t"
 7886             "MOV    EAX,EDX\n\t"
 7887             "MOV    EDX,0\n\t"
 7888             "JLE,s  pos\n\t"
 7889             "LNEG   EAX : $tmp2\n\t"
 7890             "DIV    $tmp # unsigned division\n\t"
 7891             "XCHG   EAX,$tmp2\n\t"
 7892             "DIV    $tmp\n\t"
 7893             "LNEG   $tmp2 : EAX\n\t"
 7894             "JMP,s  done\n"
 7895     "pos:\n\t"
 7896             "DIV    $tmp\n\t"
 7897             "XCHG   EAX,$tmp2\n"
 7898     "fast:\n\t"
 7899             "DIV    $tmp\n"
 7900     "done:\n\t"
 7901             "MOV    EDX,$tmp2\n\t"
 7902             "NEG    EDX:EAX # if $imm < 0" %}
 7903   ins_encode %{
 7904     int con = (int)$imm$$constant;
 7905     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7906     int pcon = (con > 0) ? con : -con;
 7907     Label Lfast, Lpos, Ldone;
 7908 
 7909     __ movl($tmp$$Register, pcon);
 7910     __ xorl($tmp2$$Register,$tmp2$$Register);
 7911     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7912     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7913 
 7914     __ movl($tmp2$$Register, $dst$$Register); // save
 7915     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7916     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7917     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7918 
 7919     // Negative dividend.
 7920     // convert value to positive to use unsigned division
 7921     __ lneg($dst$$Register, $tmp2$$Register);
 7922     __ divl($tmp$$Register);
 7923     __ xchgl($dst$$Register, $tmp2$$Register);
 7924     __ divl($tmp$$Register);
 7925     // revert result back to negative
 7926     __ lneg($tmp2$$Register, $dst$$Register);
 7927     __ jmpb(Ldone);
 7928 
 7929     __ bind(Lpos);
 7930     __ divl($tmp$$Register); // Use unsigned division
 7931     __ xchgl($dst$$Register, $tmp2$$Register);
 7932     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7933 
 7934     __ bind(Lfast);
 7935     // fast path: src is positive
 7936     __ divl($tmp$$Register); // Use unsigned division
 7937 
 7938     __ bind(Ldone);
 7939     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7940     if (con < 0) {
 7941       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7942     }
 7943   %}
 7944   ins_pipe( pipe_slow );
 7945 %}
 7946 
 7947 // Remainder Register Long (remainder fit into 32 bits)
 7948 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7949   match(Set dst (ModL dst imm));
 7950   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7951   ins_cost(1000);
 7952   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7953             "CMP    $tmp,EDX\n\t"
 7954             "JA,s   fast\n\t"
 7955             "MOV    $tmp2,EAX\n\t"
 7956             "MOV    EAX,EDX\n\t"
 7957             "MOV    EDX,0\n\t"
 7958             "JLE,s  pos\n\t"
 7959             "LNEG   EAX : $tmp2\n\t"
 7960             "DIV    $tmp # unsigned division\n\t"
 7961             "MOV    EAX,$tmp2\n\t"
 7962             "DIV    $tmp\n\t"
 7963             "NEG    EDX\n\t"
 7964             "JMP,s  done\n"
 7965     "pos:\n\t"
 7966             "DIV    $tmp\n\t"
 7967             "MOV    EAX,$tmp2\n"
 7968     "fast:\n\t"
 7969             "DIV    $tmp\n"
 7970     "done:\n\t"
 7971             "MOV    EAX,EDX\n\t"
 7972             "SAR    EDX,31\n\t" %}
 7973   ins_encode %{
 7974     int con = (int)$imm$$constant;
 7975     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7976     int pcon = (con > 0) ? con : -con;
 7977     Label  Lfast, Lpos, Ldone;
 7978 
 7979     __ movl($tmp$$Register, pcon);
 7980     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7981     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7982 
 7983     __ movl($tmp2$$Register, $dst$$Register); // save
 7984     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7985     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7986     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7987 
 7988     // Negative dividend.
 7989     // convert value to positive to use unsigned division
 7990     __ lneg($dst$$Register, $tmp2$$Register);
 7991     __ divl($tmp$$Register);
 7992     __ movl($dst$$Register, $tmp2$$Register);
 7993     __ divl($tmp$$Register);
 7994     // revert remainder back to negative
 7995     __ negl(HIGH_FROM_LOW($dst$$Register));
 7996     __ jmpb(Ldone);
 7997 
 7998     __ bind(Lpos);
 7999     __ divl($tmp$$Register);
 8000     __ movl($dst$$Register, $tmp2$$Register);
 8001 
 8002     __ bind(Lfast);
 8003     // fast path: src is positive
 8004     __ divl($tmp$$Register);
 8005 
 8006     __ bind(Ldone);
 8007     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8008     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8009 
 8010   %}
 8011   ins_pipe( pipe_slow );
 8012 %}
 8013 
 8014 // Integer Shift Instructions
 8015 // Shift Left by one
 8016 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8017   match(Set dst (LShiftI dst shift));
 8018   effect(KILL cr);
 8019 
 8020   size(2);
 8021   format %{ "SHL    $dst,$shift" %}
 8022   opcode(0xD1, 0x4);  /* D1 /4 */
 8023   ins_encode( OpcP, RegOpc( dst ) );
 8024   ins_pipe( ialu_reg );
 8025 %}
 8026 
 8027 // Shift Left by 8-bit immediate
 8028 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8029   match(Set dst (LShiftI dst shift));
 8030   effect(KILL cr);
 8031 
 8032   size(3);
 8033   format %{ "SHL    $dst,$shift" %}
 8034   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8035   ins_encode( RegOpcImm( dst, shift) );
 8036   ins_pipe( ialu_reg );
 8037 %}
 8038 
 8039 // Shift Left by variable
 8040 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8041   match(Set dst (LShiftI dst shift));
 8042   effect(KILL cr);
 8043 
 8044   size(2);
 8045   format %{ "SHL    $dst,$shift" %}
 8046   opcode(0xD3, 0x4);  /* D3 /4 */
 8047   ins_encode( OpcP, RegOpc( dst ) );
 8048   ins_pipe( ialu_reg_reg );
 8049 %}
 8050 
 8051 // Arithmetic shift right by one
 8052 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8053   match(Set dst (RShiftI dst shift));
 8054   effect(KILL cr);
 8055 
 8056   size(2);
 8057   format %{ "SAR    $dst,$shift" %}
 8058   opcode(0xD1, 0x7);  /* D1 /7 */
 8059   ins_encode( OpcP, RegOpc( dst ) );
 8060   ins_pipe( ialu_reg );
 8061 %}
 8062 
 8063 // Arithmetic shift right by one
 8064 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8065   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8066   effect(KILL cr);
 8067   format %{ "SAR    $dst,$shift" %}
 8068   opcode(0xD1, 0x7);  /* D1 /7 */
 8069   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8070   ins_pipe( ialu_mem_imm );
 8071 %}
 8072 
 8073 // Arithmetic Shift Right by 8-bit immediate
 8074 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8075   match(Set dst (RShiftI dst shift));
 8076   effect(KILL cr);
 8077 
 8078   size(3);
 8079   format %{ "SAR    $dst,$shift" %}
 8080   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8081   ins_encode( RegOpcImm( dst, shift ) );
 8082   ins_pipe( ialu_mem_imm );
 8083 %}
 8084 
 8085 // Arithmetic Shift Right by 8-bit immediate
 8086 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8087   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8088   effect(KILL cr);
 8089 
 8090   format %{ "SAR    $dst,$shift" %}
 8091   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8092   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8093   ins_pipe( ialu_mem_imm );
 8094 %}
 8095 
 8096 // Arithmetic Shift Right by variable
 8097 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8098   match(Set dst (RShiftI dst shift));
 8099   effect(KILL cr);
 8100 
 8101   size(2);
 8102   format %{ "SAR    $dst,$shift" %}
 8103   opcode(0xD3, 0x7);  /* D3 /7 */
 8104   ins_encode( OpcP, RegOpc( dst ) );
 8105   ins_pipe( ialu_reg_reg );
 8106 %}
 8107 
 8108 // Logical shift right by one
 8109 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8110   match(Set dst (URShiftI dst shift));
 8111   effect(KILL cr);
 8112 
 8113   size(2);
 8114   format %{ "SHR    $dst,$shift" %}
 8115   opcode(0xD1, 0x5);  /* D1 /5 */
 8116   ins_encode( OpcP, RegOpc( dst ) );
 8117   ins_pipe( ialu_reg );
 8118 %}
 8119 
 8120 // Logical Shift Right by 8-bit immediate
 8121 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8122   match(Set dst (URShiftI dst shift));
 8123   effect(KILL cr);
 8124 
 8125   size(3);
 8126   format %{ "SHR    $dst,$shift" %}
 8127   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8128   ins_encode( RegOpcImm( dst, shift) );
 8129   ins_pipe( ialu_reg );
 8130 %}
 8131 
 8132 
 8133 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8134 // This idiom is used by the compiler for the i2b bytecode.
 8135 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8136   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8137 
 8138   size(3);
 8139   format %{ "MOVSX  $dst,$src :8" %}
 8140   ins_encode %{
 8141     __ movsbl($dst$$Register, $src$$Register);
 8142   %}
 8143   ins_pipe(ialu_reg_reg);
 8144 %}
 8145 
 8146 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8147 // This idiom is used by the compiler the i2s bytecode.
 8148 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8149   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8150 
 8151   size(3);
 8152   format %{ "MOVSX  $dst,$src :16" %}
 8153   ins_encode %{
 8154     __ movswl($dst$$Register, $src$$Register);
 8155   %}
 8156   ins_pipe(ialu_reg_reg);
 8157 %}
 8158 
 8159 
 8160 // Logical Shift Right by variable
 8161 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8162   match(Set dst (URShiftI dst shift));
 8163   effect(KILL cr);
 8164 
 8165   size(2);
 8166   format %{ "SHR    $dst,$shift" %}
 8167   opcode(0xD3, 0x5);  /* D3 /5 */
 8168   ins_encode( OpcP, RegOpc( dst ) );
 8169   ins_pipe( ialu_reg_reg );
 8170 %}
 8171 
 8172 
 8173 //----------Logical Instructions-----------------------------------------------
 8174 //----------Integer Logical Instructions---------------------------------------
 8175 // And Instructions
 8176 // And Register with Register
 8177 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8178   match(Set dst (AndI dst src));
 8179   effect(KILL cr);
 8180 
 8181   size(2);
 8182   format %{ "AND    $dst,$src" %}
 8183   opcode(0x23);
 8184   ins_encode( OpcP, RegReg( dst, src) );
 8185   ins_pipe( ialu_reg_reg );
 8186 %}
 8187 
 8188 // And Register with Immediate
 8189 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8190   match(Set dst (AndI dst src));
 8191   effect(KILL cr);
 8192 
 8193   format %{ "AND    $dst,$src" %}
 8194   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8195   // ins_encode( RegImm( dst, src) );
 8196   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8197   ins_pipe( ialu_reg );
 8198 %}
 8199 
 8200 // And Register with Memory
 8201 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8202   match(Set dst (AndI dst (LoadI src)));
 8203   effect(KILL cr);
 8204 
 8205   ins_cost(125);
 8206   format %{ "AND    $dst,$src" %}
 8207   opcode(0x23);
 8208   ins_encode( OpcP, RegMem( dst, src) );
 8209   ins_pipe( ialu_reg_mem );
 8210 %}
 8211 
 8212 // And Memory with Register
 8213 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8214   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8215   effect(KILL cr);
 8216 
 8217   ins_cost(150);
 8218   format %{ "AND    $dst,$src" %}
 8219   opcode(0x21);  /* Opcode 21 /r */
 8220   ins_encode( OpcP, RegMem( src, dst ) );
 8221   ins_pipe( ialu_mem_reg );
 8222 %}
 8223 
 8224 // And Memory with Immediate
 8225 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8226   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8227   effect(KILL cr);
 8228 
 8229   ins_cost(125);
 8230   format %{ "AND    $dst,$src" %}
 8231   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8232   // ins_encode( MemImm( dst, src) );
 8233   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8234   ins_pipe( ialu_mem_imm );
 8235 %}
 8236 
 8237 // BMI1 instructions
 8238 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8239   match(Set dst (AndI (XorI src1 minus_1) src2));
 8240   predicate(UseBMI1Instructions);
 8241   effect(KILL cr);
 8242 
 8243   format %{ "ANDNL  $dst, $src1, $src2" %}
 8244 
 8245   ins_encode %{
 8246     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8247   %}
 8248   ins_pipe(ialu_reg);
 8249 %}
 8250 
 8251 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8252   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8253   predicate(UseBMI1Instructions);
 8254   effect(KILL cr);
 8255 
 8256   ins_cost(125);
 8257   format %{ "ANDNL  $dst, $src1, $src2" %}
 8258 
 8259   ins_encode %{
 8260     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8261   %}
 8262   ins_pipe(ialu_reg_mem);
 8263 %}
 8264 
 8265 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8266   match(Set dst (AndI (SubI imm_zero src) src));
 8267   predicate(UseBMI1Instructions);
 8268   effect(KILL cr);
 8269 
 8270   format %{ "BLSIL  $dst, $src" %}
 8271 
 8272   ins_encode %{
 8273     __ blsil($dst$$Register, $src$$Register);
 8274   %}
 8275   ins_pipe(ialu_reg);
 8276 %}
 8277 
 8278 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8279   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8280   predicate(UseBMI1Instructions);
 8281   effect(KILL cr);
 8282 
 8283   ins_cost(125);
 8284   format %{ "BLSIL  $dst, $src" %}
 8285 
 8286   ins_encode %{
 8287     __ blsil($dst$$Register, $src$$Address);
 8288   %}
 8289   ins_pipe(ialu_reg_mem);
 8290 %}
 8291 
 8292 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8293 %{
 8294   match(Set dst (XorI (AddI src minus_1) src));
 8295   predicate(UseBMI1Instructions);
 8296   effect(KILL cr);
 8297 
 8298   format %{ "BLSMSKL $dst, $src" %}
 8299 
 8300   ins_encode %{
 8301     __ blsmskl($dst$$Register, $src$$Register);
 8302   %}
 8303 
 8304   ins_pipe(ialu_reg);
 8305 %}
 8306 
 8307 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8308 %{
 8309   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8310   predicate(UseBMI1Instructions);
 8311   effect(KILL cr);
 8312 
 8313   ins_cost(125);
 8314   format %{ "BLSMSKL $dst, $src" %}
 8315 
 8316   ins_encode %{
 8317     __ blsmskl($dst$$Register, $src$$Address);
 8318   %}
 8319 
 8320   ins_pipe(ialu_reg_mem);
 8321 %}
 8322 
 8323 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8324 %{
 8325   match(Set dst (AndI (AddI src minus_1) src) );
 8326   predicate(UseBMI1Instructions);
 8327   effect(KILL cr);
 8328 
 8329   format %{ "BLSRL  $dst, $src" %}
 8330 
 8331   ins_encode %{
 8332     __ blsrl($dst$$Register, $src$$Register);
 8333   %}
 8334 
 8335   ins_pipe(ialu_reg);
 8336 %}
 8337 
 8338 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8339 %{
 8340   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8341   predicate(UseBMI1Instructions);
 8342   effect(KILL cr);
 8343 
 8344   ins_cost(125);
 8345   format %{ "BLSRL  $dst, $src" %}
 8346 
 8347   ins_encode %{
 8348     __ blsrl($dst$$Register, $src$$Address);
 8349   %}
 8350 
 8351   ins_pipe(ialu_reg_mem);
 8352 %}
 8353 
 8354 // Or Instructions
 8355 // Or Register with Register
 8356 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8357   match(Set dst (OrI dst src));
 8358   effect(KILL cr);
 8359 
 8360   size(2);
 8361   format %{ "OR     $dst,$src" %}
 8362   opcode(0x0B);
 8363   ins_encode( OpcP, RegReg( dst, src) );
 8364   ins_pipe( ialu_reg_reg );
 8365 %}
 8366 
 8367 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8368   match(Set dst (OrI dst (CastP2X src)));
 8369   effect(KILL cr);
 8370 
 8371   size(2);
 8372   format %{ "OR     $dst,$src" %}
 8373   opcode(0x0B);
 8374   ins_encode( OpcP, RegReg( dst, src) );
 8375   ins_pipe( ialu_reg_reg );
 8376 %}
 8377 
 8378 
 8379 // Or Register with Immediate
 8380 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8381   match(Set dst (OrI dst src));
 8382   effect(KILL cr);
 8383 
 8384   format %{ "OR     $dst,$src" %}
 8385   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8386   // ins_encode( RegImm( dst, src) );
 8387   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8388   ins_pipe( ialu_reg );
 8389 %}
 8390 
 8391 // Or Register with Memory
 8392 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8393   match(Set dst (OrI dst (LoadI src)));
 8394   effect(KILL cr);
 8395 
 8396   ins_cost(125);
 8397   format %{ "OR     $dst,$src" %}
 8398   opcode(0x0B);
 8399   ins_encode( OpcP, RegMem( dst, src) );
 8400   ins_pipe( ialu_reg_mem );
 8401 %}
 8402 
 8403 // Or Memory with Register
 8404 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8405   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8406   effect(KILL cr);
 8407 
 8408   ins_cost(150);
 8409   format %{ "OR     $dst,$src" %}
 8410   opcode(0x09);  /* Opcode 09 /r */
 8411   ins_encode( OpcP, RegMem( src, dst ) );
 8412   ins_pipe( ialu_mem_reg );
 8413 %}
 8414 
 8415 // Or Memory with Immediate
 8416 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8417   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8418   effect(KILL cr);
 8419 
 8420   ins_cost(125);
 8421   format %{ "OR     $dst,$src" %}
 8422   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8423   // ins_encode( MemImm( dst, src) );
 8424   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8425   ins_pipe( ialu_mem_imm );
 8426 %}
 8427 
 8428 // ROL/ROR
 8429 // ROL expand
 8430 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8431   effect(USE_DEF dst, USE shift, KILL cr);
 8432 
 8433   format %{ "ROL    $dst, $shift" %}
 8434   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8435   ins_encode( OpcP, RegOpc( dst ));
 8436   ins_pipe( ialu_reg );
 8437 %}
 8438 
 8439 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8440   effect(USE_DEF dst, USE shift, KILL cr);
 8441 
 8442   format %{ "ROL    $dst, $shift" %}
 8443   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8444   ins_encode( RegOpcImm(dst, shift) );
 8445   ins_pipe(ialu_reg);
 8446 %}
 8447 
 8448 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8449   effect(USE_DEF dst, USE shift, KILL cr);
 8450 
 8451   format %{ "ROL    $dst, $shift" %}
 8452   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8453   ins_encode(OpcP, RegOpc(dst));
 8454   ins_pipe( ialu_reg_reg );
 8455 %}
 8456 // end of ROL expand
 8457 
 8458 // ROL 32bit by one once
 8459 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8460   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8461 
 8462   expand %{
 8463     rolI_eReg_imm1(dst, lshift, cr);
 8464   %}
 8465 %}
 8466 
 8467 // ROL 32bit var by imm8 once
 8468 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8469   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8470   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8471 
 8472   expand %{
 8473     rolI_eReg_imm8(dst, lshift, cr);
 8474   %}
 8475 %}
 8476 
 8477 // ROL 32bit var by var once
 8478 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8479   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8480 
 8481   expand %{
 8482     rolI_eReg_CL(dst, shift, cr);
 8483   %}
 8484 %}
 8485 
 8486 // ROL 32bit var by var once
 8487 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8488   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8489 
 8490   expand %{
 8491     rolI_eReg_CL(dst, shift, cr);
 8492   %}
 8493 %}
 8494 
 8495 // ROR expand
 8496 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8497   effect(USE_DEF dst, USE shift, KILL cr);
 8498 
 8499   format %{ "ROR    $dst, $shift" %}
 8500   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8501   ins_encode( OpcP, RegOpc( dst ) );
 8502   ins_pipe( ialu_reg );
 8503 %}
 8504 
 8505 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8506   effect (USE_DEF dst, USE shift, KILL cr);
 8507 
 8508   format %{ "ROR    $dst, $shift" %}
 8509   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8510   ins_encode( RegOpcImm(dst, shift) );
 8511   ins_pipe( ialu_reg );
 8512 %}
 8513 
 8514 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8515   effect(USE_DEF dst, USE shift, KILL cr);
 8516 
 8517   format %{ "ROR    $dst, $shift" %}
 8518   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8519   ins_encode(OpcP, RegOpc(dst));
 8520   ins_pipe( ialu_reg_reg );
 8521 %}
 8522 // end of ROR expand
 8523 
 8524 // ROR right once
 8525 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8526   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8527 
 8528   expand %{
 8529     rorI_eReg_imm1(dst, rshift, cr);
 8530   %}
 8531 %}
 8532 
 8533 // ROR 32bit by immI8 once
 8534 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8535   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8536   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8537 
 8538   expand %{
 8539     rorI_eReg_imm8(dst, rshift, cr);
 8540   %}
 8541 %}
 8542 
 8543 // ROR 32bit var by var once
 8544 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8545   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8546 
 8547   expand %{
 8548     rorI_eReg_CL(dst, shift, cr);
 8549   %}
 8550 %}
 8551 
 8552 // ROR 32bit var by var once
 8553 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8554   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8555 
 8556   expand %{
 8557     rorI_eReg_CL(dst, shift, cr);
 8558   %}
 8559 %}
 8560 
 8561 // Xor Instructions
 8562 // Xor Register with Register
 8563 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8564   match(Set dst (XorI dst src));
 8565   effect(KILL cr);
 8566 
 8567   size(2);
 8568   format %{ "XOR    $dst,$src" %}
 8569   opcode(0x33);
 8570   ins_encode( OpcP, RegReg( dst, src) );
 8571   ins_pipe( ialu_reg_reg );
 8572 %}
 8573 
 8574 // Xor Register with Immediate -1
 8575 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8576   match(Set dst (XorI dst imm));
 8577 
 8578   size(2);
 8579   format %{ "NOT    $dst" %}
 8580   ins_encode %{
 8581      __ notl($dst$$Register);
 8582   %}
 8583   ins_pipe( ialu_reg );
 8584 %}
 8585 
 8586 // Xor Register with Immediate
 8587 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8588   match(Set dst (XorI dst src));
 8589   effect(KILL cr);
 8590 
 8591   format %{ "XOR    $dst,$src" %}
 8592   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8593   // ins_encode( RegImm( dst, src) );
 8594   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8595   ins_pipe( ialu_reg );
 8596 %}
 8597 
 8598 // Xor Register with Memory
 8599 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8600   match(Set dst (XorI dst (LoadI src)));
 8601   effect(KILL cr);
 8602 
 8603   ins_cost(125);
 8604   format %{ "XOR    $dst,$src" %}
 8605   opcode(0x33);
 8606   ins_encode( OpcP, RegMem(dst, src) );
 8607   ins_pipe( ialu_reg_mem );
 8608 %}
 8609 
 8610 // Xor Memory with Register
 8611 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8612   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8613   effect(KILL cr);
 8614 
 8615   ins_cost(150);
 8616   format %{ "XOR    $dst,$src" %}
 8617   opcode(0x31);  /* Opcode 31 /r */
 8618   ins_encode( OpcP, RegMem( src, dst ) );
 8619   ins_pipe( ialu_mem_reg );
 8620 %}
 8621 
 8622 // Xor Memory with Immediate
 8623 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8624   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8625   effect(KILL cr);
 8626 
 8627   ins_cost(125);
 8628   format %{ "XOR    $dst,$src" %}
 8629   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8630   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8631   ins_pipe( ialu_mem_imm );
 8632 %}
 8633 
 8634 //----------Convert Int to Boolean---------------------------------------------
 8635 
 8636 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8637   effect( DEF dst, USE src );
 8638   format %{ "MOV    $dst,$src" %}
 8639   ins_encode( enc_Copy( dst, src) );
 8640   ins_pipe( ialu_reg_reg );
 8641 %}
 8642 
 8643 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8644   effect( USE_DEF dst, USE src, KILL cr );
 8645 
 8646   size(4);
 8647   format %{ "NEG    $dst\n\t"
 8648             "ADC    $dst,$src" %}
 8649   ins_encode( neg_reg(dst),
 8650               OpcRegReg(0x13,dst,src) );
 8651   ins_pipe( ialu_reg_reg_long );
 8652 %}
 8653 
 8654 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8655   match(Set dst (Conv2B src));
 8656 
 8657   expand %{
 8658     movI_nocopy(dst,src);
 8659     ci2b(dst,src,cr);
 8660   %}
 8661 %}
 8662 
 8663 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8664   effect( DEF dst, USE src );
 8665   format %{ "MOV    $dst,$src" %}
 8666   ins_encode( enc_Copy( dst, src) );
 8667   ins_pipe( ialu_reg_reg );
 8668 %}
 8669 
 8670 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8671   effect( USE_DEF dst, USE src, KILL cr );
 8672   format %{ "NEG    $dst\n\t"
 8673             "ADC    $dst,$src" %}
 8674   ins_encode( neg_reg(dst),
 8675               OpcRegReg(0x13,dst,src) );
 8676   ins_pipe( ialu_reg_reg_long );
 8677 %}
 8678 
 8679 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8680   match(Set dst (Conv2B src));
 8681 
 8682   expand %{
 8683     movP_nocopy(dst,src);
 8684     cp2b(dst,src,cr);
 8685   %}
 8686 %}
 8687 
 8688 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8689   match(Set dst (CmpLTMask p q));
 8690   effect(KILL cr);
 8691   ins_cost(400);
 8692 
 8693   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8694   format %{ "XOR    $dst,$dst\n\t"
 8695             "CMP    $p,$q\n\t"
 8696             "SETlt  $dst\n\t"
 8697             "NEG    $dst" %}
 8698   ins_encode %{
 8699     Register Rp = $p$$Register;
 8700     Register Rq = $q$$Register;
 8701     Register Rd = $dst$$Register;
 8702     Label done;
 8703     __ xorl(Rd, Rd);
 8704     __ cmpl(Rp, Rq);
 8705     __ setb(Assembler::less, Rd);
 8706     __ negl(Rd);
 8707   %}
 8708 
 8709   ins_pipe(pipe_slow);
 8710 %}
 8711 
 8712 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8713   match(Set dst (CmpLTMask dst zero));
 8714   effect(DEF dst, KILL cr);
 8715   ins_cost(100);
 8716 
 8717   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8718   ins_encode %{
 8719   __ sarl($dst$$Register, 31);
 8720   %}
 8721   ins_pipe(ialu_reg);
 8722 %}
 8723 
 8724 /* better to save a register than avoid a branch */
 8725 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8726   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8727   effect(KILL cr);
 8728   ins_cost(400);
 8729   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8730             "JGE    done\n\t"
 8731             "ADD    $p,$y\n"
 8732             "done:  " %}
 8733   ins_encode %{
 8734     Register Rp = $p$$Register;
 8735     Register Rq = $q$$Register;
 8736     Register Ry = $y$$Register;
 8737     Label done;
 8738     __ subl(Rp, Rq);
 8739     __ jccb(Assembler::greaterEqual, done);
 8740     __ addl(Rp, Ry);
 8741     __ bind(done);
 8742   %}
 8743 
 8744   ins_pipe(pipe_cmplt);
 8745 %}
 8746 
 8747 /* better to save a register than avoid a branch */
 8748 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8749   match(Set y (AndI (CmpLTMask p q) y));
 8750   effect(KILL cr);
 8751 
 8752   ins_cost(300);
 8753 
 8754   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8755             "JLT      done\n\t"
 8756             "XORL     $y, $y\n"
 8757             "done:  " %}
 8758   ins_encode %{
 8759     Register Rp = $p$$Register;
 8760     Register Rq = $q$$Register;
 8761     Register Ry = $y$$Register;
 8762     Label done;
 8763     __ cmpl(Rp, Rq);
 8764     __ jccb(Assembler::less, done);
 8765     __ xorl(Ry, Ry);
 8766     __ bind(done);
 8767   %}
 8768 
 8769   ins_pipe(pipe_cmplt);
 8770 %}
 8771 
 8772 /* If I enable this, I encourage spilling in the inner loop of compress.
 8773 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8774   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8775 */
 8776 //----------Overflow Math Instructions-----------------------------------------
 8777 
 8778 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8779 %{
 8780   match(Set cr (OverflowAddI op1 op2));
 8781   effect(DEF cr, USE_KILL op1, USE op2);
 8782 
 8783   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8784 
 8785   ins_encode %{
 8786     __ addl($op1$$Register, $op2$$Register);
 8787   %}
 8788   ins_pipe(ialu_reg_reg);
 8789 %}
 8790 
 8791 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8792 %{
 8793   match(Set cr (OverflowAddI op1 op2));
 8794   effect(DEF cr, USE_KILL op1, USE op2);
 8795 
 8796   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8797 
 8798   ins_encode %{
 8799     __ addl($op1$$Register, $op2$$constant);
 8800   %}
 8801   ins_pipe(ialu_reg_reg);
 8802 %}
 8803 
 8804 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8805 %{
 8806   match(Set cr (OverflowSubI op1 op2));
 8807 
 8808   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8809   ins_encode %{
 8810     __ cmpl($op1$$Register, $op2$$Register);
 8811   %}
 8812   ins_pipe(ialu_reg_reg);
 8813 %}
 8814 
 8815 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8816 %{
 8817   match(Set cr (OverflowSubI op1 op2));
 8818 
 8819   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8820   ins_encode %{
 8821     __ cmpl($op1$$Register, $op2$$constant);
 8822   %}
 8823   ins_pipe(ialu_reg_reg);
 8824 %}
 8825 
 8826 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8827 %{
 8828   match(Set cr (OverflowSubI zero op2));
 8829   effect(DEF cr, USE_KILL op2);
 8830 
 8831   format %{ "NEG    $op2\t# overflow check int" %}
 8832   ins_encode %{
 8833     __ negl($op2$$Register);
 8834   %}
 8835   ins_pipe(ialu_reg_reg);
 8836 %}
 8837 
 8838 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8839 %{
 8840   match(Set cr (OverflowMulI op1 op2));
 8841   effect(DEF cr, USE_KILL op1, USE op2);
 8842 
 8843   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8844   ins_encode %{
 8845     __ imull($op1$$Register, $op2$$Register);
 8846   %}
 8847   ins_pipe(ialu_reg_reg_alu0);
 8848 %}
 8849 
 8850 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8851 %{
 8852   match(Set cr (OverflowMulI op1 op2));
 8853   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8854 
 8855   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8856   ins_encode %{
 8857     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8858   %}
 8859   ins_pipe(ialu_reg_reg_alu0);
 8860 %}
 8861 
 8862 // Integer Absolute Instructions
 8863 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8864 %{
 8865   match(Set dst (AbsI src));
 8866   effect(TEMP dst, TEMP tmp, KILL cr);
 8867   format %{ "movl $tmp, $src\n\t"
 8868             "sarl $tmp, 31\n\t"
 8869             "movl $dst, $src\n\t"
 8870             "xorl $dst, $tmp\n\t"
 8871             "subl $dst, $tmp\n"
 8872           %}
 8873   ins_encode %{
 8874     __ movl($tmp$$Register, $src$$Register);
 8875     __ sarl($tmp$$Register, 31);
 8876     __ movl($dst$$Register, $src$$Register);
 8877     __ xorl($dst$$Register, $tmp$$Register);
 8878     __ subl($dst$$Register, $tmp$$Register);
 8879   %}
 8880 
 8881   ins_pipe(ialu_reg_reg);
 8882 %}
 8883 
 8884 //----------Long Instructions------------------------------------------------
 8885 // Add Long Register with Register
 8886 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8887   match(Set dst (AddL dst src));
 8888   effect(KILL cr);
 8889   ins_cost(200);
 8890   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8891             "ADC    $dst.hi,$src.hi" %}
 8892   opcode(0x03, 0x13);
 8893   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8894   ins_pipe( ialu_reg_reg_long );
 8895 %}
 8896 
 8897 // Add Long Register with Immediate
 8898 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8899   match(Set dst (AddL dst src));
 8900   effect(KILL cr);
 8901   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8902             "ADC    $dst.hi,$src.hi" %}
 8903   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8904   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8905   ins_pipe( ialu_reg_long );
 8906 %}
 8907 
 8908 // Add Long Register with Memory
 8909 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8910   match(Set dst (AddL dst (LoadL mem)));
 8911   effect(KILL cr);
 8912   ins_cost(125);
 8913   format %{ "ADD    $dst.lo,$mem\n\t"
 8914             "ADC    $dst.hi,$mem+4" %}
 8915   opcode(0x03, 0x13);
 8916   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8917   ins_pipe( ialu_reg_long_mem );
 8918 %}
 8919 
 8920 // Subtract Long Register with Register.
 8921 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8922   match(Set dst (SubL dst src));
 8923   effect(KILL cr);
 8924   ins_cost(200);
 8925   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8926             "SBB    $dst.hi,$src.hi" %}
 8927   opcode(0x2B, 0x1B);
 8928   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8929   ins_pipe( ialu_reg_reg_long );
 8930 %}
 8931 
 8932 // Subtract Long Register with Immediate
 8933 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8934   match(Set dst (SubL dst src));
 8935   effect(KILL cr);
 8936   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8937             "SBB    $dst.hi,$src.hi" %}
 8938   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8939   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8940   ins_pipe( ialu_reg_long );
 8941 %}
 8942 
 8943 // Subtract Long Register with Memory
 8944 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8945   match(Set dst (SubL dst (LoadL mem)));
 8946   effect(KILL cr);
 8947   ins_cost(125);
 8948   format %{ "SUB    $dst.lo,$mem\n\t"
 8949             "SBB    $dst.hi,$mem+4" %}
 8950   opcode(0x2B, 0x1B);
 8951   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8952   ins_pipe( ialu_reg_long_mem );
 8953 %}
 8954 
 8955 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8956   match(Set dst (SubL zero dst));
 8957   effect(KILL cr);
 8958   ins_cost(300);
 8959   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8960   ins_encode( neg_long(dst) );
 8961   ins_pipe( ialu_reg_reg_long );
 8962 %}
 8963 
 8964 // And Long Register with Register
 8965 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8966   match(Set dst (AndL dst src));
 8967   effect(KILL cr);
 8968   format %{ "AND    $dst.lo,$src.lo\n\t"
 8969             "AND    $dst.hi,$src.hi" %}
 8970   opcode(0x23,0x23);
 8971   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8972   ins_pipe( ialu_reg_reg_long );
 8973 %}
 8974 
 8975 // And Long Register with Immediate
 8976 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8977   match(Set dst (AndL dst src));
 8978   effect(KILL cr);
 8979   format %{ "AND    $dst.lo,$src.lo\n\t"
 8980             "AND    $dst.hi,$src.hi" %}
 8981   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8982   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8983   ins_pipe( ialu_reg_long );
 8984 %}
 8985 
 8986 // And Long Register with Memory
 8987 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8988   match(Set dst (AndL dst (LoadL mem)));
 8989   effect(KILL cr);
 8990   ins_cost(125);
 8991   format %{ "AND    $dst.lo,$mem\n\t"
 8992             "AND    $dst.hi,$mem+4" %}
 8993   opcode(0x23, 0x23);
 8994   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8995   ins_pipe( ialu_reg_long_mem );
 8996 %}
 8997 
 8998 // BMI1 instructions
 8999 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 9000   match(Set dst (AndL (XorL src1 minus_1) src2));
 9001   predicate(UseBMI1Instructions);
 9002   effect(KILL cr, TEMP dst);
 9003 
 9004   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9005             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9006          %}
 9007 
 9008   ins_encode %{
 9009     Register Rdst = $dst$$Register;
 9010     Register Rsrc1 = $src1$$Register;
 9011     Register Rsrc2 = $src2$$Register;
 9012     __ andnl(Rdst, Rsrc1, Rsrc2);
 9013     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9014   %}
 9015   ins_pipe(ialu_reg_reg_long);
 9016 %}
 9017 
 9018 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9019   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9020   predicate(UseBMI1Instructions);
 9021   effect(KILL cr, TEMP dst);
 9022 
 9023   ins_cost(125);
 9024   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9025             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9026          %}
 9027 
 9028   ins_encode %{
 9029     Register Rdst = $dst$$Register;
 9030     Register Rsrc1 = $src1$$Register;
 9031     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9032 
 9033     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9034     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9035   %}
 9036   ins_pipe(ialu_reg_mem);
 9037 %}
 9038 
 9039 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9040   match(Set dst (AndL (SubL imm_zero src) src));
 9041   predicate(UseBMI1Instructions);
 9042   effect(KILL cr, TEMP dst);
 9043 
 9044   format %{ "MOVL   $dst.hi, 0\n\t"
 9045             "BLSIL  $dst.lo, $src.lo\n\t"
 9046             "JNZ    done\n\t"
 9047             "BLSIL  $dst.hi, $src.hi\n"
 9048             "done:"
 9049          %}
 9050 
 9051   ins_encode %{
 9052     Label done;
 9053     Register Rdst = $dst$$Register;
 9054     Register Rsrc = $src$$Register;
 9055     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9056     __ blsil(Rdst, Rsrc);
 9057     __ jccb(Assembler::notZero, done);
 9058     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9059     __ bind(done);
 9060   %}
 9061   ins_pipe(ialu_reg);
 9062 %}
 9063 
 9064 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9065   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9066   predicate(UseBMI1Instructions);
 9067   effect(KILL cr, TEMP dst);
 9068 
 9069   ins_cost(125);
 9070   format %{ "MOVL   $dst.hi, 0\n\t"
 9071             "BLSIL  $dst.lo, $src\n\t"
 9072             "JNZ    done\n\t"
 9073             "BLSIL  $dst.hi, $src+4\n"
 9074             "done:"
 9075          %}
 9076 
 9077   ins_encode %{
 9078     Label done;
 9079     Register Rdst = $dst$$Register;
 9080     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9081 
 9082     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9083     __ blsil(Rdst, $src$$Address);
 9084     __ jccb(Assembler::notZero, done);
 9085     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9086     __ bind(done);
 9087   %}
 9088   ins_pipe(ialu_reg_mem);
 9089 %}
 9090 
 9091 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9092 %{
 9093   match(Set dst (XorL (AddL src minus_1) src));
 9094   predicate(UseBMI1Instructions);
 9095   effect(KILL cr, TEMP dst);
 9096 
 9097   format %{ "MOVL    $dst.hi, 0\n\t"
 9098             "BLSMSKL $dst.lo, $src.lo\n\t"
 9099             "JNC     done\n\t"
 9100             "BLSMSKL $dst.hi, $src.hi\n"
 9101             "done:"
 9102          %}
 9103 
 9104   ins_encode %{
 9105     Label done;
 9106     Register Rdst = $dst$$Register;
 9107     Register Rsrc = $src$$Register;
 9108     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9109     __ blsmskl(Rdst, Rsrc);
 9110     __ jccb(Assembler::carryClear, done);
 9111     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9112     __ bind(done);
 9113   %}
 9114 
 9115   ins_pipe(ialu_reg);
 9116 %}
 9117 
 9118 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9119 %{
 9120   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9121   predicate(UseBMI1Instructions);
 9122   effect(KILL cr, TEMP dst);
 9123 
 9124   ins_cost(125);
 9125   format %{ "MOVL    $dst.hi, 0\n\t"
 9126             "BLSMSKL $dst.lo, $src\n\t"
 9127             "JNC     done\n\t"
 9128             "BLSMSKL $dst.hi, $src+4\n"
 9129             "done:"
 9130          %}
 9131 
 9132   ins_encode %{
 9133     Label done;
 9134     Register Rdst = $dst$$Register;
 9135     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9136 
 9137     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9138     __ blsmskl(Rdst, $src$$Address);
 9139     __ jccb(Assembler::carryClear, done);
 9140     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9141     __ bind(done);
 9142   %}
 9143 
 9144   ins_pipe(ialu_reg_mem);
 9145 %}
 9146 
 9147 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9148 %{
 9149   match(Set dst (AndL (AddL src minus_1) src) );
 9150   predicate(UseBMI1Instructions);
 9151   effect(KILL cr, TEMP dst);
 9152 
 9153   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9154             "BLSRL  $dst.lo, $src.lo\n\t"
 9155             "JNC    done\n\t"
 9156             "BLSRL  $dst.hi, $src.hi\n"
 9157             "done:"
 9158   %}
 9159 
 9160   ins_encode %{
 9161     Label done;
 9162     Register Rdst = $dst$$Register;
 9163     Register Rsrc = $src$$Register;
 9164     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9165     __ blsrl(Rdst, Rsrc);
 9166     __ jccb(Assembler::carryClear, done);
 9167     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9168     __ bind(done);
 9169   %}
 9170 
 9171   ins_pipe(ialu_reg);
 9172 %}
 9173 
 9174 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9175 %{
 9176   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9177   predicate(UseBMI1Instructions);
 9178   effect(KILL cr, TEMP dst);
 9179 
 9180   ins_cost(125);
 9181   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9182             "BLSRL  $dst.lo, $src\n\t"
 9183             "JNC    done\n\t"
 9184             "BLSRL  $dst.hi, $src+4\n"
 9185             "done:"
 9186   %}
 9187 
 9188   ins_encode %{
 9189     Label done;
 9190     Register Rdst = $dst$$Register;
 9191     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9192     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9193     __ blsrl(Rdst, $src$$Address);
 9194     __ jccb(Assembler::carryClear, done);
 9195     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9196     __ bind(done);
 9197   %}
 9198 
 9199   ins_pipe(ialu_reg_mem);
 9200 %}
 9201 
 9202 // Or Long Register with Register
 9203 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9204   match(Set dst (OrL dst src));
 9205   effect(KILL cr);
 9206   format %{ "OR     $dst.lo,$src.lo\n\t"
 9207             "OR     $dst.hi,$src.hi" %}
 9208   opcode(0x0B,0x0B);
 9209   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9210   ins_pipe( ialu_reg_reg_long );
 9211 %}
 9212 
 9213 // Or Long Register with Immediate
 9214 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9215   match(Set dst (OrL dst src));
 9216   effect(KILL cr);
 9217   format %{ "OR     $dst.lo,$src.lo\n\t"
 9218             "OR     $dst.hi,$src.hi" %}
 9219   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9220   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9221   ins_pipe( ialu_reg_long );
 9222 %}
 9223 
 9224 // Or Long Register with Memory
 9225 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9226   match(Set dst (OrL dst (LoadL mem)));
 9227   effect(KILL cr);
 9228   ins_cost(125);
 9229   format %{ "OR     $dst.lo,$mem\n\t"
 9230             "OR     $dst.hi,$mem+4" %}
 9231   opcode(0x0B,0x0B);
 9232   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9233   ins_pipe( ialu_reg_long_mem );
 9234 %}
 9235 
 9236 // Xor Long Register with Register
 9237 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9238   match(Set dst (XorL dst src));
 9239   effect(KILL cr);
 9240   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9241             "XOR    $dst.hi,$src.hi" %}
 9242   opcode(0x33,0x33);
 9243   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9244   ins_pipe( ialu_reg_reg_long );
 9245 %}
 9246 
 9247 // Xor Long Register with Immediate -1
 9248 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9249   match(Set dst (XorL dst imm));
 9250   format %{ "NOT    $dst.lo\n\t"
 9251             "NOT    $dst.hi" %}
 9252   ins_encode %{
 9253      __ notl($dst$$Register);
 9254      __ notl(HIGH_FROM_LOW($dst$$Register));
 9255   %}
 9256   ins_pipe( ialu_reg_long );
 9257 %}
 9258 
 9259 // Xor Long Register with Immediate
 9260 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9261   match(Set dst (XorL dst src));
 9262   effect(KILL cr);
 9263   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9264             "XOR    $dst.hi,$src.hi" %}
 9265   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9266   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9267   ins_pipe( ialu_reg_long );
 9268 %}
 9269 
 9270 // Xor Long Register with Memory
 9271 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9272   match(Set dst (XorL dst (LoadL mem)));
 9273   effect(KILL cr);
 9274   ins_cost(125);
 9275   format %{ "XOR    $dst.lo,$mem\n\t"
 9276             "XOR    $dst.hi,$mem+4" %}
 9277   opcode(0x33,0x33);
 9278   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9279   ins_pipe( ialu_reg_long_mem );
 9280 %}
 9281 
 9282 // Shift Left Long by 1
 9283 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9284   predicate(UseNewLongLShift);
 9285   match(Set dst (LShiftL dst cnt));
 9286   effect(KILL cr);
 9287   ins_cost(100);
 9288   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9289             "ADC    $dst.hi,$dst.hi" %}
 9290   ins_encode %{
 9291     __ addl($dst$$Register,$dst$$Register);
 9292     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9293   %}
 9294   ins_pipe( ialu_reg_long );
 9295 %}
 9296 
 9297 // Shift Left Long by 2
 9298 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9299   predicate(UseNewLongLShift);
 9300   match(Set dst (LShiftL dst cnt));
 9301   effect(KILL cr);
 9302   ins_cost(100);
 9303   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9304             "ADC    $dst.hi,$dst.hi\n\t"
 9305             "ADD    $dst.lo,$dst.lo\n\t"
 9306             "ADC    $dst.hi,$dst.hi" %}
 9307   ins_encode %{
 9308     __ addl($dst$$Register,$dst$$Register);
 9309     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9310     __ addl($dst$$Register,$dst$$Register);
 9311     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9312   %}
 9313   ins_pipe( ialu_reg_long );
 9314 %}
 9315 
 9316 // Shift Left Long by 3
 9317 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9318   predicate(UseNewLongLShift);
 9319   match(Set dst (LShiftL dst cnt));
 9320   effect(KILL cr);
 9321   ins_cost(100);
 9322   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9323             "ADC    $dst.hi,$dst.hi\n\t"
 9324             "ADD    $dst.lo,$dst.lo\n\t"
 9325             "ADC    $dst.hi,$dst.hi\n\t"
 9326             "ADD    $dst.lo,$dst.lo\n\t"
 9327             "ADC    $dst.hi,$dst.hi" %}
 9328   ins_encode %{
 9329     __ addl($dst$$Register,$dst$$Register);
 9330     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9331     __ addl($dst$$Register,$dst$$Register);
 9332     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9333     __ addl($dst$$Register,$dst$$Register);
 9334     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9335   %}
 9336   ins_pipe( ialu_reg_long );
 9337 %}
 9338 
 9339 // Shift Left Long by 1-31
 9340 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9341   match(Set dst (LShiftL dst cnt));
 9342   effect(KILL cr);
 9343   ins_cost(200);
 9344   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9345             "SHL    $dst.lo,$cnt" %}
 9346   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9347   ins_encode( move_long_small_shift(dst,cnt) );
 9348   ins_pipe( ialu_reg_long );
 9349 %}
 9350 
 9351 // Shift Left Long by 32-63
 9352 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9353   match(Set dst (LShiftL dst cnt));
 9354   effect(KILL cr);
 9355   ins_cost(300);
 9356   format %{ "MOV    $dst.hi,$dst.lo\n"
 9357           "\tSHL    $dst.hi,$cnt-32\n"
 9358           "\tXOR    $dst.lo,$dst.lo" %}
 9359   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9360   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9361   ins_pipe( ialu_reg_long );
 9362 %}
 9363 
 9364 // Shift Left Long by variable
 9365 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9366   match(Set dst (LShiftL dst shift));
 9367   effect(KILL cr);
 9368   ins_cost(500+200);
 9369   size(17);
 9370   format %{ "TEST   $shift,32\n\t"
 9371             "JEQ,s  small\n\t"
 9372             "MOV    $dst.hi,$dst.lo\n\t"
 9373             "XOR    $dst.lo,$dst.lo\n"
 9374     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9375             "SHL    $dst.lo,$shift" %}
 9376   ins_encode( shift_left_long( dst, shift ) );
 9377   ins_pipe( pipe_slow );
 9378 %}
 9379 
 9380 // Shift Right Long by 1-31
 9381 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9382   match(Set dst (URShiftL dst cnt));
 9383   effect(KILL cr);
 9384   ins_cost(200);
 9385   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9386             "SHR    $dst.hi,$cnt" %}
 9387   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9388   ins_encode( move_long_small_shift(dst,cnt) );
 9389   ins_pipe( ialu_reg_long );
 9390 %}
 9391 
 9392 // Shift Right Long by 32-63
 9393 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9394   match(Set dst (URShiftL dst cnt));
 9395   effect(KILL cr);
 9396   ins_cost(300);
 9397   format %{ "MOV    $dst.lo,$dst.hi\n"
 9398           "\tSHR    $dst.lo,$cnt-32\n"
 9399           "\tXOR    $dst.hi,$dst.hi" %}
 9400   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9401   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9402   ins_pipe( ialu_reg_long );
 9403 %}
 9404 
 9405 // Shift Right Long by variable
 9406 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9407   match(Set dst (URShiftL dst shift));
 9408   effect(KILL cr);
 9409   ins_cost(600);
 9410   size(17);
 9411   format %{ "TEST   $shift,32\n\t"
 9412             "JEQ,s  small\n\t"
 9413             "MOV    $dst.lo,$dst.hi\n\t"
 9414             "XOR    $dst.hi,$dst.hi\n"
 9415     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9416             "SHR    $dst.hi,$shift" %}
 9417   ins_encode( shift_right_long( dst, shift ) );
 9418   ins_pipe( pipe_slow );
 9419 %}
 9420 
 9421 // Shift Right Long by 1-31
 9422 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9423   match(Set dst (RShiftL dst cnt));
 9424   effect(KILL cr);
 9425   ins_cost(200);
 9426   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9427             "SAR    $dst.hi,$cnt" %}
 9428   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9429   ins_encode( move_long_small_shift(dst,cnt) );
 9430   ins_pipe( ialu_reg_long );
 9431 %}
 9432 
 9433 // Shift Right Long by 32-63
 9434 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9435   match(Set dst (RShiftL dst cnt));
 9436   effect(KILL cr);
 9437   ins_cost(300);
 9438   format %{ "MOV    $dst.lo,$dst.hi\n"
 9439           "\tSAR    $dst.lo,$cnt-32\n"
 9440           "\tSAR    $dst.hi,31" %}
 9441   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9442   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9443   ins_pipe( ialu_reg_long );
 9444 %}
 9445 
 9446 // Shift Right arithmetic Long by variable
 9447 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9448   match(Set dst (RShiftL dst shift));
 9449   effect(KILL cr);
 9450   ins_cost(600);
 9451   size(18);
 9452   format %{ "TEST   $shift,32\n\t"
 9453             "JEQ,s  small\n\t"
 9454             "MOV    $dst.lo,$dst.hi\n\t"
 9455             "SAR    $dst.hi,31\n"
 9456     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9457             "SAR    $dst.hi,$shift" %}
 9458   ins_encode( shift_right_arith_long( dst, shift ) );
 9459   ins_pipe( pipe_slow );
 9460 %}
 9461 
 9462 
 9463 //----------Double Instructions------------------------------------------------
 9464 // Double Math
 9465 
 9466 // Compare & branch
 9467 
 9468 // P6 version of float compare, sets condition codes in EFLAGS
 9469 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9470   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9471   match(Set cr (CmpD src1 src2));
 9472   effect(KILL rax);
 9473   ins_cost(150);
 9474   format %{ "FLD    $src1\n\t"
 9475             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9476             "JNP    exit\n\t"
 9477             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9478             "SAHF\n"
 9479      "exit:\tNOP               // avoid branch to branch" %}
 9480   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9481   ins_encode( Push_Reg_DPR(src1),
 9482               OpcP, RegOpc(src2),
 9483               cmpF_P6_fixup );
 9484   ins_pipe( pipe_slow );
 9485 %}
 9486 
 9487 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9488   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9489   match(Set cr (CmpD src1 src2));
 9490   ins_cost(150);
 9491   format %{ "FLD    $src1\n\t"
 9492             "FUCOMIP ST,$src2  // P6 instruction" %}
 9493   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9494   ins_encode( Push_Reg_DPR(src1),
 9495               OpcP, RegOpc(src2));
 9496   ins_pipe( pipe_slow );
 9497 %}
 9498 
 9499 // Compare & branch
 9500 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9501   predicate(UseSSE<=1);
 9502   match(Set cr (CmpD src1 src2));
 9503   effect(KILL rax);
 9504   ins_cost(200);
 9505   format %{ "FLD    $src1\n\t"
 9506             "FCOMp  $src2\n\t"
 9507             "FNSTSW AX\n\t"
 9508             "TEST   AX,0x400\n\t"
 9509             "JZ,s   flags\n\t"
 9510             "MOV    AH,1\t# unordered treat as LT\n"
 9511     "flags:\tSAHF" %}
 9512   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9513   ins_encode( Push_Reg_DPR(src1),
 9514               OpcP, RegOpc(src2),
 9515               fpu_flags);
 9516   ins_pipe( pipe_slow );
 9517 %}
 9518 
 9519 // Compare vs zero into -1,0,1
 9520 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9521   predicate(UseSSE<=1);
 9522   match(Set dst (CmpD3 src1 zero));
 9523   effect(KILL cr, KILL rax);
 9524   ins_cost(280);
 9525   format %{ "FTSTD  $dst,$src1" %}
 9526   opcode(0xE4, 0xD9);
 9527   ins_encode( Push_Reg_DPR(src1),
 9528               OpcS, OpcP, PopFPU,
 9529               CmpF_Result(dst));
 9530   ins_pipe( pipe_slow );
 9531 %}
 9532 
 9533 // Compare into -1,0,1
 9534 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9535   predicate(UseSSE<=1);
 9536   match(Set dst (CmpD3 src1 src2));
 9537   effect(KILL cr, KILL rax);
 9538   ins_cost(300);
 9539   format %{ "FCMPD  $dst,$src1,$src2" %}
 9540   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9541   ins_encode( Push_Reg_DPR(src1),
 9542               OpcP, RegOpc(src2),
 9543               CmpF_Result(dst));
 9544   ins_pipe( pipe_slow );
 9545 %}
 9546 
 9547 // float compare and set condition codes in EFLAGS by XMM regs
 9548 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9549   predicate(UseSSE>=2);
 9550   match(Set cr (CmpD src1 src2));
 9551   ins_cost(145);
 9552   format %{ "UCOMISD $src1,$src2\n\t"
 9553             "JNP,s   exit\n\t"
 9554             "PUSHF\t# saw NaN, set CF\n\t"
 9555             "AND     [rsp], #0xffffff2b\n\t"
 9556             "POPF\n"
 9557     "exit:" %}
 9558   ins_encode %{
 9559     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9560     emit_cmpfp_fixup(_masm);
 9561   %}
 9562   ins_pipe( pipe_slow );
 9563 %}
 9564 
 9565 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9566   predicate(UseSSE>=2);
 9567   match(Set cr (CmpD src1 src2));
 9568   ins_cost(100);
 9569   format %{ "UCOMISD $src1,$src2" %}
 9570   ins_encode %{
 9571     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9572   %}
 9573   ins_pipe( pipe_slow );
 9574 %}
 9575 
 9576 // float compare and set condition codes in EFLAGS by XMM regs
 9577 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9578   predicate(UseSSE>=2);
 9579   match(Set cr (CmpD src1 (LoadD src2)));
 9580   ins_cost(145);
 9581   format %{ "UCOMISD $src1,$src2\n\t"
 9582             "JNP,s   exit\n\t"
 9583             "PUSHF\t# saw NaN, set CF\n\t"
 9584             "AND     [rsp], #0xffffff2b\n\t"
 9585             "POPF\n"
 9586     "exit:" %}
 9587   ins_encode %{
 9588     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9589     emit_cmpfp_fixup(_masm);
 9590   %}
 9591   ins_pipe( pipe_slow );
 9592 %}
 9593 
 9594 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9595   predicate(UseSSE>=2);
 9596   match(Set cr (CmpD src1 (LoadD src2)));
 9597   ins_cost(100);
 9598   format %{ "UCOMISD $src1,$src2" %}
 9599   ins_encode %{
 9600     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9601   %}
 9602   ins_pipe( pipe_slow );
 9603 %}
 9604 
 9605 // Compare into -1,0,1 in XMM
 9606 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9607   predicate(UseSSE>=2);
 9608   match(Set dst (CmpD3 src1 src2));
 9609   effect(KILL cr);
 9610   ins_cost(255);
 9611   format %{ "UCOMISD $src1, $src2\n\t"
 9612             "MOV     $dst, #-1\n\t"
 9613             "JP,s    done\n\t"
 9614             "JB,s    done\n\t"
 9615             "SETNE   $dst\n\t"
 9616             "MOVZB   $dst, $dst\n"
 9617     "done:" %}
 9618   ins_encode %{
 9619     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9620     emit_cmpfp3(_masm, $dst$$Register);
 9621   %}
 9622   ins_pipe( pipe_slow );
 9623 %}
 9624 
 9625 // Compare into -1,0,1 in XMM and memory
 9626 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9627   predicate(UseSSE>=2);
 9628   match(Set dst (CmpD3 src1 (LoadD src2)));
 9629   effect(KILL cr);
 9630   ins_cost(275);
 9631   format %{ "UCOMISD $src1, $src2\n\t"
 9632             "MOV     $dst, #-1\n\t"
 9633             "JP,s    done\n\t"
 9634             "JB,s    done\n\t"
 9635             "SETNE   $dst\n\t"
 9636             "MOVZB   $dst, $dst\n"
 9637     "done:" %}
 9638   ins_encode %{
 9639     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9640     emit_cmpfp3(_masm, $dst$$Register);
 9641   %}
 9642   ins_pipe( pipe_slow );
 9643 %}
 9644 
 9645 
 9646 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9647   predicate (UseSSE <=1);
 9648   match(Set dst (SubD dst src));
 9649 
 9650   format %{ "FLD    $src\n\t"
 9651             "DSUBp  $dst,ST" %}
 9652   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9653   ins_cost(150);
 9654   ins_encode( Push_Reg_DPR(src),
 9655               OpcP, RegOpc(dst) );
 9656   ins_pipe( fpu_reg_reg );
 9657 %}
 9658 
 9659 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9660   predicate (UseSSE <=1);
 9661   match(Set dst (RoundDouble (SubD src1 src2)));
 9662   ins_cost(250);
 9663 
 9664   format %{ "FLD    $src2\n\t"
 9665             "DSUB   ST,$src1\n\t"
 9666             "FSTP_D $dst\t# D-round" %}
 9667   opcode(0xD8, 0x5);
 9668   ins_encode( Push_Reg_DPR(src2),
 9669               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9670   ins_pipe( fpu_mem_reg_reg );
 9671 %}
 9672 
 9673 
 9674 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9675   predicate (UseSSE <=1);
 9676   match(Set dst (SubD dst (LoadD src)));
 9677   ins_cost(150);
 9678 
 9679   format %{ "FLD    $src\n\t"
 9680             "DSUBp  $dst,ST" %}
 9681   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9682   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9683               OpcP, RegOpc(dst) );
 9684   ins_pipe( fpu_reg_mem );
 9685 %}
 9686 
 9687 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9688   predicate (UseSSE<=1);
 9689   match(Set dst (AbsD src));
 9690   ins_cost(100);
 9691   format %{ "FABS" %}
 9692   opcode(0xE1, 0xD9);
 9693   ins_encode( OpcS, OpcP );
 9694   ins_pipe( fpu_reg_reg );
 9695 %}
 9696 
 9697 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9698   predicate(UseSSE<=1);
 9699   match(Set dst (NegD src));
 9700   ins_cost(100);
 9701   format %{ "FCHS" %}
 9702   opcode(0xE0, 0xD9);
 9703   ins_encode( OpcS, OpcP );
 9704   ins_pipe( fpu_reg_reg );
 9705 %}
 9706 
 9707 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9708   predicate(UseSSE<=1);
 9709   match(Set dst (AddD dst src));
 9710   format %{ "FLD    $src\n\t"
 9711             "DADD   $dst,ST" %}
 9712   size(4);
 9713   ins_cost(150);
 9714   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9715   ins_encode( Push_Reg_DPR(src),
 9716               OpcP, RegOpc(dst) );
 9717   ins_pipe( fpu_reg_reg );
 9718 %}
 9719 
 9720 
 9721 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9722   predicate(UseSSE<=1);
 9723   match(Set dst (RoundDouble (AddD src1 src2)));
 9724   ins_cost(250);
 9725 
 9726   format %{ "FLD    $src2\n\t"
 9727             "DADD   ST,$src1\n\t"
 9728             "FSTP_D $dst\t# D-round" %}
 9729   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9730   ins_encode( Push_Reg_DPR(src2),
 9731               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9732   ins_pipe( fpu_mem_reg_reg );
 9733 %}
 9734 
 9735 
 9736 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9737   predicate(UseSSE<=1);
 9738   match(Set dst (AddD dst (LoadD src)));
 9739   ins_cost(150);
 9740 
 9741   format %{ "FLD    $src\n\t"
 9742             "DADDp  $dst,ST" %}
 9743   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9744   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9745               OpcP, RegOpc(dst) );
 9746   ins_pipe( fpu_reg_mem );
 9747 %}
 9748 
 9749 // add-to-memory
 9750 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9751   predicate(UseSSE<=1);
 9752   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9753   ins_cost(150);
 9754 
 9755   format %{ "FLD_D  $dst\n\t"
 9756             "DADD   ST,$src\n\t"
 9757             "FST_D  $dst" %}
 9758   opcode(0xDD, 0x0);
 9759   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9760               Opcode(0xD8), RegOpc(src),
 9761               set_instruction_start,
 9762               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9763   ins_pipe( fpu_reg_mem );
 9764 %}
 9765 
 9766 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9767   predicate(UseSSE<=1);
 9768   match(Set dst (AddD dst con));
 9769   ins_cost(125);
 9770   format %{ "FLD1\n\t"
 9771             "DADDp  $dst,ST" %}
 9772   ins_encode %{
 9773     __ fld1();
 9774     __ faddp($dst$$reg);
 9775   %}
 9776   ins_pipe(fpu_reg);
 9777 %}
 9778 
 9779 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9780   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9781   match(Set dst (AddD dst con));
 9782   ins_cost(200);
 9783   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9784             "DADDp  $dst,ST" %}
 9785   ins_encode %{
 9786     __ fld_d($constantaddress($con));
 9787     __ faddp($dst$$reg);
 9788   %}
 9789   ins_pipe(fpu_reg_mem);
 9790 %}
 9791 
 9792 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9793   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9794   match(Set dst (RoundDouble (AddD src con)));
 9795   ins_cost(200);
 9796   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9797             "DADD   ST,$src\n\t"
 9798             "FSTP_D $dst\t# D-round" %}
 9799   ins_encode %{
 9800     __ fld_d($constantaddress($con));
 9801     __ fadd($src$$reg);
 9802     __ fstp_d(Address(rsp, $dst$$disp));
 9803   %}
 9804   ins_pipe(fpu_mem_reg_con);
 9805 %}
 9806 
 9807 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9808   predicate(UseSSE<=1);
 9809   match(Set dst (MulD dst src));
 9810   format %{ "FLD    $src\n\t"
 9811             "DMULp  $dst,ST" %}
 9812   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9813   ins_cost(150);
 9814   ins_encode( Push_Reg_DPR(src),
 9815               OpcP, RegOpc(dst) );
 9816   ins_pipe( fpu_reg_reg );
 9817 %}
 9818 
 9819 // Strict FP instruction biases argument before multiply then
 9820 // biases result to avoid double rounding of subnormals.
 9821 //
 9822 // scale arg1 by multiplying arg1 by 2^(-15360)
 9823 // load arg2
 9824 // multiply scaled arg1 by arg2
 9825 // rescale product by 2^(15360)
 9826 //
 9827 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9828   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9829   match(Set dst (MulD dst src));
 9830   ins_cost(1);   // Select this instruction for all FP double multiplies
 9831 
 9832   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9833             "DMULp  $dst,ST\n\t"
 9834             "FLD    $src\n\t"
 9835             "DMULp  $dst,ST\n\t"
 9836             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9837             "DMULp  $dst,ST\n\t" %}
 9838   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9839   ins_encode( strictfp_bias1(dst),
 9840               Push_Reg_DPR(src),
 9841               OpcP, RegOpc(dst),
 9842               strictfp_bias2(dst) );
 9843   ins_pipe( fpu_reg_reg );
 9844 %}
 9845 
 9846 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9847   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9848   match(Set dst (MulD dst con));
 9849   ins_cost(200);
 9850   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9851             "DMULp  $dst,ST" %}
 9852   ins_encode %{
 9853     __ fld_d($constantaddress($con));
 9854     __ fmulp($dst$$reg);
 9855   %}
 9856   ins_pipe(fpu_reg_mem);
 9857 %}
 9858 
 9859 
 9860 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9861   predicate( UseSSE<=1 );
 9862   match(Set dst (MulD dst (LoadD src)));
 9863   ins_cost(200);
 9864   format %{ "FLD_D  $src\n\t"
 9865             "DMULp  $dst,ST" %}
 9866   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9867   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9868               OpcP, RegOpc(dst) );
 9869   ins_pipe( fpu_reg_mem );
 9870 %}
 9871 
 9872 //
 9873 // Cisc-alternate to reg-reg multiply
 9874 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9875   predicate( UseSSE<=1 );
 9876   match(Set dst (MulD src (LoadD mem)));
 9877   ins_cost(250);
 9878   format %{ "FLD_D  $mem\n\t"
 9879             "DMUL   ST,$src\n\t"
 9880             "FSTP_D $dst" %}
 9881   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9882   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9883               OpcReg_FPR(src),
 9884               Pop_Reg_DPR(dst) );
 9885   ins_pipe( fpu_reg_reg_mem );
 9886 %}
 9887 
 9888 
 9889 // MACRO3 -- addDPR a mulDPR
 9890 // This instruction is a '2-address' instruction in that the result goes
 9891 // back to src2.  This eliminates a move from the macro; possibly the
 9892 // register allocator will have to add it back (and maybe not).
 9893 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9894   predicate( UseSSE<=1 );
 9895   match(Set src2 (AddD (MulD src0 src1) src2));
 9896   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9897             "DMUL   ST,$src1\n\t"
 9898             "DADDp  $src2,ST" %}
 9899   ins_cost(250);
 9900   opcode(0xDD); /* LoadD DD /0 */
 9901   ins_encode( Push_Reg_FPR(src0),
 9902               FMul_ST_reg(src1),
 9903               FAddP_reg_ST(src2) );
 9904   ins_pipe( fpu_reg_reg_reg );
 9905 %}
 9906 
 9907 
 9908 // MACRO3 -- subDPR a mulDPR
 9909 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9910   predicate( UseSSE<=1 );
 9911   match(Set src2 (SubD (MulD src0 src1) src2));
 9912   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9913             "DMUL   ST,$src1\n\t"
 9914             "DSUBRp $src2,ST" %}
 9915   ins_cost(250);
 9916   ins_encode( Push_Reg_FPR(src0),
 9917               FMul_ST_reg(src1),
 9918               Opcode(0xDE), Opc_plus(0xE0,src2));
 9919   ins_pipe( fpu_reg_reg_reg );
 9920 %}
 9921 
 9922 
 9923 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9924   predicate( UseSSE<=1 );
 9925   match(Set dst (DivD dst src));
 9926 
 9927   format %{ "FLD    $src\n\t"
 9928             "FDIVp  $dst,ST" %}
 9929   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9930   ins_cost(150);
 9931   ins_encode( Push_Reg_DPR(src),
 9932               OpcP, RegOpc(dst) );
 9933   ins_pipe( fpu_reg_reg );
 9934 %}
 9935 
 9936 // Strict FP instruction biases argument before division then
 9937 // biases result, to avoid double rounding of subnormals.
 9938 //
 9939 // scale dividend by multiplying dividend by 2^(-15360)
 9940 // load divisor
 9941 // divide scaled dividend by divisor
 9942 // rescale quotient by 2^(15360)
 9943 //
 9944 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9945   predicate (UseSSE<=1);
 9946   match(Set dst (DivD dst src));
 9947   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9948   ins_cost(01);
 9949 
 9950   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9951             "DMULp  $dst,ST\n\t"
 9952             "FLD    $src\n\t"
 9953             "FDIVp  $dst,ST\n\t"
 9954             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9955             "DMULp  $dst,ST\n\t" %}
 9956   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9957   ins_encode( strictfp_bias1(dst),
 9958               Push_Reg_DPR(src),
 9959               OpcP, RegOpc(dst),
 9960               strictfp_bias2(dst) );
 9961   ins_pipe( fpu_reg_reg );
 9962 %}
 9963 
 9964 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9965   predicate(UseSSE<=1);
 9966   match(Set dst (ModD dst src));
 9967   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9968 
 9969   format %{ "DMOD   $dst,$src" %}
 9970   ins_cost(250);
 9971   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9972               emitModDPR(),
 9973               Push_Result_Mod_DPR(src),
 9974               Pop_Reg_DPR(dst));
 9975   ins_pipe( pipe_slow );
 9976 %}
 9977 
 9978 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9979   predicate(UseSSE>=2);
 9980   match(Set dst (ModD src0 src1));
 9981   effect(KILL rax, KILL cr);
 9982 
 9983   format %{ "SUB    ESP,8\t # DMOD\n"
 9984           "\tMOVSD  [ESP+0],$src1\n"
 9985           "\tFLD_D  [ESP+0]\n"
 9986           "\tMOVSD  [ESP+0],$src0\n"
 9987           "\tFLD_D  [ESP+0]\n"
 9988      "loop:\tFPREM\n"
 9989           "\tFWAIT\n"
 9990           "\tFNSTSW AX\n"
 9991           "\tSAHF\n"
 9992           "\tJP     loop\n"
 9993           "\tFSTP_D [ESP+0]\n"
 9994           "\tMOVSD  $dst,[ESP+0]\n"
 9995           "\tADD    ESP,8\n"
 9996           "\tFSTP   ST0\t # Restore FPU Stack"
 9997     %}
 9998   ins_cost(250);
 9999   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10000   ins_pipe( pipe_slow );
10001 %}
10002 
10003 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10004   predicate (UseSSE<=1);
10005   match(Set dst(AtanD dst src));
10006   format %{ "DATA   $dst,$src" %}
10007   opcode(0xD9, 0xF3);
10008   ins_encode( Push_Reg_DPR(src),
10009               OpcP, OpcS, RegOpc(dst) );
10010   ins_pipe( pipe_slow );
10011 %}
10012 
10013 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10014   predicate (UseSSE>=2);
10015   match(Set dst(AtanD dst src));
10016   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10017   format %{ "DATA   $dst,$src" %}
10018   opcode(0xD9, 0xF3);
10019   ins_encode( Push_SrcD(src),
10020               OpcP, OpcS, Push_ResultD(dst) );
10021   ins_pipe( pipe_slow );
10022 %}
10023 
10024 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10025   predicate (UseSSE<=1);
10026   match(Set dst (SqrtD src));
10027   format %{ "DSQRT  $dst,$src" %}
10028   opcode(0xFA, 0xD9);
10029   ins_encode( Push_Reg_DPR(src),
10030               OpcS, OpcP, Pop_Reg_DPR(dst) );
10031   ins_pipe( pipe_slow );
10032 %}
10033 
10034 //-------------Float Instructions-------------------------------
10035 // Float Math
10036 
10037 // Code for float compare:
10038 //     fcompp();
10039 //     fwait(); fnstsw_ax();
10040 //     sahf();
10041 //     movl(dst, unordered_result);
10042 //     jcc(Assembler::parity, exit);
10043 //     movl(dst, less_result);
10044 //     jcc(Assembler::below, exit);
10045 //     movl(dst, equal_result);
10046 //     jcc(Assembler::equal, exit);
10047 //     movl(dst, greater_result);
10048 //   exit:
10049 
10050 // P6 version of float compare, sets condition codes in EFLAGS
10051 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10052   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10053   match(Set cr (CmpF src1 src2));
10054   effect(KILL rax);
10055   ins_cost(150);
10056   format %{ "FLD    $src1\n\t"
10057             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10058             "JNP    exit\n\t"
10059             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10060             "SAHF\n"
10061      "exit:\tNOP               // avoid branch to branch" %}
10062   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10063   ins_encode( Push_Reg_DPR(src1),
10064               OpcP, RegOpc(src2),
10065               cmpF_P6_fixup );
10066   ins_pipe( pipe_slow );
10067 %}
10068 
10069 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10070   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10071   match(Set cr (CmpF src1 src2));
10072   ins_cost(100);
10073   format %{ "FLD    $src1\n\t"
10074             "FUCOMIP ST,$src2  // P6 instruction" %}
10075   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10076   ins_encode( Push_Reg_DPR(src1),
10077               OpcP, RegOpc(src2));
10078   ins_pipe( pipe_slow );
10079 %}
10080 
10081 
10082 // Compare & branch
10083 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10084   predicate(UseSSE == 0);
10085   match(Set cr (CmpF src1 src2));
10086   effect(KILL rax);
10087   ins_cost(200);
10088   format %{ "FLD    $src1\n\t"
10089             "FCOMp  $src2\n\t"
10090             "FNSTSW AX\n\t"
10091             "TEST   AX,0x400\n\t"
10092             "JZ,s   flags\n\t"
10093             "MOV    AH,1\t# unordered treat as LT\n"
10094     "flags:\tSAHF" %}
10095   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10096   ins_encode( Push_Reg_DPR(src1),
10097               OpcP, RegOpc(src2),
10098               fpu_flags);
10099   ins_pipe( pipe_slow );
10100 %}
10101 
10102 // Compare vs zero into -1,0,1
10103 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10104   predicate(UseSSE == 0);
10105   match(Set dst (CmpF3 src1 zero));
10106   effect(KILL cr, KILL rax);
10107   ins_cost(280);
10108   format %{ "FTSTF  $dst,$src1" %}
10109   opcode(0xE4, 0xD9);
10110   ins_encode( Push_Reg_DPR(src1),
10111               OpcS, OpcP, PopFPU,
10112               CmpF_Result(dst));
10113   ins_pipe( pipe_slow );
10114 %}
10115 
10116 // Compare into -1,0,1
10117 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10118   predicate(UseSSE == 0);
10119   match(Set dst (CmpF3 src1 src2));
10120   effect(KILL cr, KILL rax);
10121   ins_cost(300);
10122   format %{ "FCMPF  $dst,$src1,$src2" %}
10123   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10124   ins_encode( Push_Reg_DPR(src1),
10125               OpcP, RegOpc(src2),
10126               CmpF_Result(dst));
10127   ins_pipe( pipe_slow );
10128 %}
10129 
10130 // float compare and set condition codes in EFLAGS by XMM regs
10131 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10132   predicate(UseSSE>=1);
10133   match(Set cr (CmpF src1 src2));
10134   ins_cost(145);
10135   format %{ "UCOMISS $src1,$src2\n\t"
10136             "JNP,s   exit\n\t"
10137             "PUSHF\t# saw NaN, set CF\n\t"
10138             "AND     [rsp], #0xffffff2b\n\t"
10139             "POPF\n"
10140     "exit:" %}
10141   ins_encode %{
10142     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10143     emit_cmpfp_fixup(_masm);
10144   %}
10145   ins_pipe( pipe_slow );
10146 %}
10147 
10148 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10149   predicate(UseSSE>=1);
10150   match(Set cr (CmpF src1 src2));
10151   ins_cost(100);
10152   format %{ "UCOMISS $src1,$src2" %}
10153   ins_encode %{
10154     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10155   %}
10156   ins_pipe( pipe_slow );
10157 %}
10158 
10159 // float compare and set condition codes in EFLAGS by XMM regs
10160 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10161   predicate(UseSSE>=1);
10162   match(Set cr (CmpF src1 (LoadF src2)));
10163   ins_cost(165);
10164   format %{ "UCOMISS $src1,$src2\n\t"
10165             "JNP,s   exit\n\t"
10166             "PUSHF\t# saw NaN, set CF\n\t"
10167             "AND     [rsp], #0xffffff2b\n\t"
10168             "POPF\n"
10169     "exit:" %}
10170   ins_encode %{
10171     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10172     emit_cmpfp_fixup(_masm);
10173   %}
10174   ins_pipe( pipe_slow );
10175 %}
10176 
10177 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10178   predicate(UseSSE>=1);
10179   match(Set cr (CmpF src1 (LoadF src2)));
10180   ins_cost(100);
10181   format %{ "UCOMISS $src1,$src2" %}
10182   ins_encode %{
10183     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10184   %}
10185   ins_pipe( pipe_slow );
10186 %}
10187 
10188 // Compare into -1,0,1 in XMM
10189 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10190   predicate(UseSSE>=1);
10191   match(Set dst (CmpF3 src1 src2));
10192   effect(KILL cr);
10193   ins_cost(255);
10194   format %{ "UCOMISS $src1, $src2\n\t"
10195             "MOV     $dst, #-1\n\t"
10196             "JP,s    done\n\t"
10197             "JB,s    done\n\t"
10198             "SETNE   $dst\n\t"
10199             "MOVZB   $dst, $dst\n"
10200     "done:" %}
10201   ins_encode %{
10202     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10203     emit_cmpfp3(_masm, $dst$$Register);
10204   %}
10205   ins_pipe( pipe_slow );
10206 %}
10207 
10208 // Compare into -1,0,1 in XMM and memory
10209 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10210   predicate(UseSSE>=1);
10211   match(Set dst (CmpF3 src1 (LoadF src2)));
10212   effect(KILL cr);
10213   ins_cost(275);
10214   format %{ "UCOMISS $src1, $src2\n\t"
10215             "MOV     $dst, #-1\n\t"
10216             "JP,s    done\n\t"
10217             "JB,s    done\n\t"
10218             "SETNE   $dst\n\t"
10219             "MOVZB   $dst, $dst\n"
10220     "done:" %}
10221   ins_encode %{
10222     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10223     emit_cmpfp3(_masm, $dst$$Register);
10224   %}
10225   ins_pipe( pipe_slow );
10226 %}
10227 
10228 // Spill to obtain 24-bit precision
10229 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10230   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10231   match(Set dst (SubF src1 src2));
10232 
10233   format %{ "FSUB   $dst,$src1 - $src2" %}
10234   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10235   ins_encode( Push_Reg_FPR(src1),
10236               OpcReg_FPR(src2),
10237               Pop_Mem_FPR(dst) );
10238   ins_pipe( fpu_mem_reg_reg );
10239 %}
10240 //
10241 // This instruction does not round to 24-bits
10242 instruct subFPR_reg(regFPR dst, regFPR src) %{
10243   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10244   match(Set dst (SubF dst src));
10245 
10246   format %{ "FSUB   $dst,$src" %}
10247   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10248   ins_encode( Push_Reg_FPR(src),
10249               OpcP, RegOpc(dst) );
10250   ins_pipe( fpu_reg_reg );
10251 %}
10252 
10253 // Spill to obtain 24-bit precision
10254 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10255   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10256   match(Set dst (AddF src1 src2));
10257 
10258   format %{ "FADD   $dst,$src1,$src2" %}
10259   opcode(0xD8, 0x0); /* D8 C0+i */
10260   ins_encode( Push_Reg_FPR(src2),
10261               OpcReg_FPR(src1),
10262               Pop_Mem_FPR(dst) );
10263   ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct addFPR_reg(regFPR dst, regFPR src) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (AddF dst src));
10270 
10271   format %{ "FLD    $src\n\t"
10272             "FADDp  $dst,ST" %}
10273   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10274   ins_encode( Push_Reg_FPR(src),
10275               OpcP, RegOpc(dst) );
10276   ins_pipe( fpu_reg_reg );
10277 %}
10278 
10279 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10280   predicate(UseSSE==0);
10281   match(Set dst (AbsF src));
10282   ins_cost(100);
10283   format %{ "FABS" %}
10284   opcode(0xE1, 0xD9);
10285   ins_encode( OpcS, OpcP );
10286   ins_pipe( fpu_reg_reg );
10287 %}
10288 
10289 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10290   predicate(UseSSE==0);
10291   match(Set dst (NegF src));
10292   ins_cost(100);
10293   format %{ "FCHS" %}
10294   opcode(0xE0, 0xD9);
10295   ins_encode( OpcS, OpcP );
10296   ins_pipe( fpu_reg_reg );
10297 %}
10298 
10299 // Cisc-alternate to addFPR_reg
10300 // Spill to obtain 24-bit precision
10301 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10302   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303   match(Set dst (AddF src1 (LoadF src2)));
10304 
10305   format %{ "FLD    $src2\n\t"
10306             "FADD   ST,$src1\n\t"
10307             "FSTP_S $dst" %}
10308   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10309   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10310               OpcReg_FPR(src1),
10311               Pop_Mem_FPR(dst) );
10312   ins_pipe( fpu_mem_reg_mem );
10313 %}
10314 //
10315 // Cisc-alternate to addFPR_reg
10316 // This instruction does not round to 24-bits
10317 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10318   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10319   match(Set dst (AddF dst (LoadF src)));
10320 
10321   format %{ "FADD   $dst,$src" %}
10322   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10323   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10324               OpcP, RegOpc(dst) );
10325   ins_pipe( fpu_reg_mem );
10326 %}
10327 
10328 // // Following two instructions for _222_mpegaudio
10329 // Spill to obtain 24-bit precision
10330 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10331   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10332   match(Set dst (AddF src1 src2));
10333 
10334   format %{ "FADD   $dst,$src1,$src2" %}
10335   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10336   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10337               OpcReg_FPR(src2),
10338               Pop_Mem_FPR(dst) );
10339   ins_pipe( fpu_mem_reg_mem );
10340 %}
10341 
10342 // Cisc-spill variant
10343 // Spill to obtain 24-bit precision
10344 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10345   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10346   match(Set dst (AddF src1 (LoadF src2)));
10347 
10348   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10349   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10350   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10351               set_instruction_start,
10352               OpcP, RMopc_Mem(secondary,src1),
10353               Pop_Mem_FPR(dst) );
10354   ins_pipe( fpu_mem_mem_mem );
10355 %}
10356 
10357 // Spill to obtain 24-bit precision
10358 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10359   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10360   match(Set dst (AddF src1 src2));
10361 
10362   format %{ "FADD   $dst,$src1,$src2" %}
10363   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10364   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10365               set_instruction_start,
10366               OpcP, RMopc_Mem(secondary,src1),
10367               Pop_Mem_FPR(dst) );
10368   ins_pipe( fpu_mem_mem_mem );
10369 %}
10370 
10371 
10372 // Spill to obtain 24-bit precision
10373 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10374   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10375   match(Set dst (AddF src con));
10376   format %{ "FLD    $src\n\t"
10377             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10378             "FSTP_S $dst"  %}
10379   ins_encode %{
10380     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10381     __ fadd_s($constantaddress($con));
10382     __ fstp_s(Address(rsp, $dst$$disp));
10383   %}
10384   ins_pipe(fpu_mem_reg_con);
10385 %}
10386 //
10387 // This instruction does not round to 24-bits
10388 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10389   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390   match(Set dst (AddF src con));
10391   format %{ "FLD    $src\n\t"
10392             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10393             "FSTP   $dst"  %}
10394   ins_encode %{
10395     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10396     __ fadd_s($constantaddress($con));
10397     __ fstp_d($dst$$reg);
10398   %}
10399   ins_pipe(fpu_reg_reg_con);
10400 %}
10401 
10402 // Spill to obtain 24-bit precision
10403 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10404   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10405   match(Set dst (MulF src1 src2));
10406 
10407   format %{ "FLD    $src1\n\t"
10408             "FMUL   $src2\n\t"
10409             "FSTP_S $dst"  %}
10410   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10411   ins_encode( Push_Reg_FPR(src1),
10412               OpcReg_FPR(src2),
10413               Pop_Mem_FPR(dst) );
10414   ins_pipe( fpu_mem_reg_reg );
10415 %}
10416 //
10417 // This instruction does not round to 24-bits
10418 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10419   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10420   match(Set dst (MulF src1 src2));
10421 
10422   format %{ "FLD    $src1\n\t"
10423             "FMUL   $src2\n\t"
10424             "FSTP_S $dst"  %}
10425   opcode(0xD8, 0x1); /* D8 C8+i */
10426   ins_encode( Push_Reg_FPR(src2),
10427               OpcReg_FPR(src1),
10428               Pop_Reg_FPR(dst) );
10429   ins_pipe( fpu_reg_reg_reg );
10430 %}
10431 
10432 
10433 // Spill to obtain 24-bit precision
10434 // Cisc-alternate to reg-reg multiply
10435 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10436   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10437   match(Set dst (MulF src1 (LoadF src2)));
10438 
10439   format %{ "FLD_S  $src2\n\t"
10440             "FMUL   $src1\n\t"
10441             "FSTP_S $dst"  %}
10442   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10443   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10444               OpcReg_FPR(src1),
10445               Pop_Mem_FPR(dst) );
10446   ins_pipe( fpu_mem_reg_mem );
10447 %}
10448 //
10449 // This instruction does not round to 24-bits
10450 // Cisc-alternate to reg-reg multiply
10451 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10452   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10453   match(Set dst (MulF src1 (LoadF src2)));
10454 
10455   format %{ "FMUL   $dst,$src1,$src2" %}
10456   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10457   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10458               OpcReg_FPR(src1),
10459               Pop_Reg_FPR(dst) );
10460   ins_pipe( fpu_reg_reg_mem );
10461 %}
10462 
10463 // Spill to obtain 24-bit precision
10464 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10465   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10466   match(Set dst (MulF src1 src2));
10467 
10468   format %{ "FMUL   $dst,$src1,$src2" %}
10469   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10470   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10471               set_instruction_start,
10472               OpcP, RMopc_Mem(secondary,src1),
10473               Pop_Mem_FPR(dst) );
10474   ins_pipe( fpu_mem_mem_mem );
10475 %}
10476 
10477 // Spill to obtain 24-bit precision
10478 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10479   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10480   match(Set dst (MulF src con));
10481 
10482   format %{ "FLD    $src\n\t"
10483             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10484             "FSTP_S $dst"  %}
10485   ins_encode %{
10486     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10487     __ fmul_s($constantaddress($con));
10488     __ fstp_s(Address(rsp, $dst$$disp));
10489   %}
10490   ins_pipe(fpu_mem_reg_con);
10491 %}
10492 //
10493 // This instruction does not round to 24-bits
10494 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10495   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10496   match(Set dst (MulF src con));
10497 
10498   format %{ "FLD    $src\n\t"
10499             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10500             "FSTP   $dst"  %}
10501   ins_encode %{
10502     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10503     __ fmul_s($constantaddress($con));
10504     __ fstp_d($dst$$reg);
10505   %}
10506   ins_pipe(fpu_reg_reg_con);
10507 %}
10508 
10509 
10510 //
10511 // MACRO1 -- subsume unshared load into mulFPR
10512 // This instruction does not round to 24-bits
10513 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10514   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10515   match(Set dst (MulF (LoadF mem1) src));
10516 
10517   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10518             "FMUL   ST,$src\n\t"
10519             "FSTP   $dst" %}
10520   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10521   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10522               OpcReg_FPR(src),
10523               Pop_Reg_FPR(dst) );
10524   ins_pipe( fpu_reg_reg_mem );
10525 %}
10526 //
10527 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10528 // This instruction does not round to 24-bits
10529 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10530   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10531   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10532   ins_cost(95);
10533 
10534   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10535             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10536             "FADD   ST,$src2\n\t"
10537             "FSTP   $dst" %}
10538   opcode(0xD9); /* LoadF D9 /0 */
10539   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10540               FMul_ST_reg(src1),
10541               FAdd_ST_reg(src2),
10542               Pop_Reg_FPR(dst) );
10543   ins_pipe( fpu_reg_mem_reg_reg );
10544 %}
10545 
10546 // MACRO3 -- addFPR a mulFPR
10547 // This instruction does not round to 24-bits.  It is a '2-address'
10548 // instruction in that the result goes back to src2.  This eliminates
10549 // a move from the macro; possibly the register allocator will have
10550 // to add it back (and maybe not).
10551 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10552   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10553   match(Set src2 (AddF (MulF src0 src1) src2));
10554 
10555   format %{ "FLD    $src0     ===MACRO3===\n\t"
10556             "FMUL   ST,$src1\n\t"
10557             "FADDP  $src2,ST" %}
10558   opcode(0xD9); /* LoadF D9 /0 */
10559   ins_encode( Push_Reg_FPR(src0),
10560               FMul_ST_reg(src1),
10561               FAddP_reg_ST(src2) );
10562   ins_pipe( fpu_reg_reg_reg );
10563 %}
10564 
10565 // MACRO4 -- divFPR subFPR
10566 // This instruction does not round to 24-bits
10567 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10568   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10569   match(Set dst (DivF (SubF src2 src1) src3));
10570 
10571   format %{ "FLD    $src2   ===MACRO4===\n\t"
10572             "FSUB   ST,$src1\n\t"
10573             "FDIV   ST,$src3\n\t"
10574             "FSTP  $dst" %}
10575   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10576   ins_encode( Push_Reg_FPR(src2),
10577               subFPR_divFPR_encode(src1,src3),
10578               Pop_Reg_FPR(dst) );
10579   ins_pipe( fpu_reg_reg_reg_reg );
10580 %}
10581 
10582 // Spill to obtain 24-bit precision
10583 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10584   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10585   match(Set dst (DivF src1 src2));
10586 
10587   format %{ "FDIV   $dst,$src1,$src2" %}
10588   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10589   ins_encode( Push_Reg_FPR(src1),
10590               OpcReg_FPR(src2),
10591               Pop_Mem_FPR(dst) );
10592   ins_pipe( fpu_mem_reg_reg );
10593 %}
10594 //
10595 // This instruction does not round to 24-bits
10596 instruct divFPR_reg(regFPR dst, regFPR src) %{
10597   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10598   match(Set dst (DivF dst src));
10599 
10600   format %{ "FDIV   $dst,$src" %}
10601   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10602   ins_encode( Push_Reg_FPR(src),
10603               OpcP, RegOpc(dst) );
10604   ins_pipe( fpu_reg_reg );
10605 %}
10606 
10607 
10608 // Spill to obtain 24-bit precision
10609 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10610   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10611   match(Set dst (ModF src1 src2));
10612   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10613 
10614   format %{ "FMOD   $dst,$src1,$src2" %}
10615   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10616               emitModDPR(),
10617               Push_Result_Mod_DPR(src2),
10618               Pop_Mem_FPR(dst));
10619   ins_pipe( pipe_slow );
10620 %}
10621 //
10622 // This instruction does not round to 24-bits
10623 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10624   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10625   match(Set dst (ModF dst src));
10626   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10627 
10628   format %{ "FMOD   $dst,$src" %}
10629   ins_encode(Push_Reg_Mod_DPR(dst, src),
10630               emitModDPR(),
10631               Push_Result_Mod_DPR(src),
10632               Pop_Reg_FPR(dst));
10633   ins_pipe( pipe_slow );
10634 %}
10635 
10636 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10637   predicate(UseSSE>=1);
10638   match(Set dst (ModF src0 src1));
10639   effect(KILL rax, KILL cr);
10640   format %{ "SUB    ESP,4\t # FMOD\n"
10641           "\tMOVSS  [ESP+0],$src1\n"
10642           "\tFLD_S  [ESP+0]\n"
10643           "\tMOVSS  [ESP+0],$src0\n"
10644           "\tFLD_S  [ESP+0]\n"
10645      "loop:\tFPREM\n"
10646           "\tFWAIT\n"
10647           "\tFNSTSW AX\n"
10648           "\tSAHF\n"
10649           "\tJP     loop\n"
10650           "\tFSTP_S [ESP+0]\n"
10651           "\tMOVSS  $dst,[ESP+0]\n"
10652           "\tADD    ESP,4\n"
10653           "\tFSTP   ST0\t # Restore FPU Stack"
10654     %}
10655   ins_cost(250);
10656   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10657   ins_pipe( pipe_slow );
10658 %}
10659 
10660 
10661 //----------Arithmetic Conversion Instructions---------------------------------
10662 // The conversions operations are all Alpha sorted.  Please keep it that way!
10663 
10664 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10665   predicate(UseSSE==0);
10666   match(Set dst (RoundFloat src));
10667   ins_cost(125);
10668   format %{ "FST_S  $dst,$src\t# F-round" %}
10669   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10670   ins_pipe( fpu_mem_reg );
10671 %}
10672 
10673 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10674   predicate(UseSSE<=1);
10675   match(Set dst (RoundDouble src));
10676   ins_cost(125);
10677   format %{ "FST_D  $dst,$src\t# D-round" %}
10678   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10679   ins_pipe( fpu_mem_reg );
10680 %}
10681 
10682 // Force rounding to 24-bit precision and 6-bit exponent
10683 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10684   predicate(UseSSE==0);
10685   match(Set dst (ConvD2F src));
10686   format %{ "FST_S  $dst,$src\t# F-round" %}
10687   expand %{
10688     roundFloat_mem_reg(dst,src);
10689   %}
10690 %}
10691 
10692 // Force rounding to 24-bit precision and 6-bit exponent
10693 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10694   predicate(UseSSE==1);
10695   match(Set dst (ConvD2F src));
10696   effect( KILL cr );
10697   format %{ "SUB    ESP,4\n\t"
10698             "FST_S  [ESP],$src\t# F-round\n\t"
10699             "MOVSS  $dst,[ESP]\n\t"
10700             "ADD ESP,4" %}
10701   ins_encode %{
10702     __ subptr(rsp, 4);
10703     if ($src$$reg != FPR1L_enc) {
10704       __ fld_s($src$$reg-1);
10705       __ fstp_s(Address(rsp, 0));
10706     } else {
10707       __ fst_s(Address(rsp, 0));
10708     }
10709     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10710     __ addptr(rsp, 4);
10711   %}
10712   ins_pipe( pipe_slow );
10713 %}
10714 
10715 // Force rounding double precision to single precision
10716 instruct convD2F_reg(regF dst, regD src) %{
10717   predicate(UseSSE>=2);
10718   match(Set dst (ConvD2F src));
10719   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10720   ins_encode %{
10721     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10722   %}
10723   ins_pipe( pipe_slow );
10724 %}
10725 
10726 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10727   predicate(UseSSE==0);
10728   match(Set dst (ConvF2D src));
10729   format %{ "FST_S  $dst,$src\t# D-round" %}
10730   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10731   ins_pipe( fpu_reg_reg );
10732 %}
10733 
10734 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10735   predicate(UseSSE==1);
10736   match(Set dst (ConvF2D src));
10737   format %{ "FST_D  $dst,$src\t# D-round" %}
10738   expand %{
10739     roundDouble_mem_reg(dst,src);
10740   %}
10741 %}
10742 
10743 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10744   predicate(UseSSE==1);
10745   match(Set dst (ConvF2D src));
10746   effect( KILL cr );
10747   format %{ "SUB    ESP,4\n\t"
10748             "MOVSS  [ESP] $src\n\t"
10749             "FLD_S  [ESP]\n\t"
10750             "ADD    ESP,4\n\t"
10751             "FSTP   $dst\t# D-round" %}
10752   ins_encode %{
10753     __ subptr(rsp, 4);
10754     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10755     __ fld_s(Address(rsp, 0));
10756     __ addptr(rsp, 4);
10757     __ fstp_d($dst$$reg);
10758   %}
10759   ins_pipe( pipe_slow );
10760 %}
10761 
10762 instruct convF2D_reg(regD dst, regF src) %{
10763   predicate(UseSSE>=2);
10764   match(Set dst (ConvF2D src));
10765   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10766   ins_encode %{
10767     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10768   %}
10769   ins_pipe( pipe_slow );
10770 %}
10771 
10772 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10773 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10774   predicate(UseSSE<=1);
10775   match(Set dst (ConvD2I src));
10776   effect( KILL tmp, KILL cr );
10777   format %{ "FLD    $src\t# Convert double to int \n\t"
10778             "FLDCW  trunc mode\n\t"
10779             "SUB    ESP,4\n\t"
10780             "FISTp  [ESP + #0]\n\t"
10781             "FLDCW  std/24-bit mode\n\t"
10782             "POP    EAX\n\t"
10783             "CMP    EAX,0x80000000\n\t"
10784             "JNE,s  fast\n\t"
10785             "FLD_D  $src\n\t"
10786             "CALL   d2i_wrapper\n"
10787       "fast:" %}
10788   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10789   ins_pipe( pipe_slow );
10790 %}
10791 
10792 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10793 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10794   predicate(UseSSE>=2);
10795   match(Set dst (ConvD2I src));
10796   effect( KILL tmp, KILL cr );
10797   format %{ "CVTTSD2SI $dst, $src\n\t"
10798             "CMP    $dst,0x80000000\n\t"
10799             "JNE,s  fast\n\t"
10800             "SUB    ESP, 8\n\t"
10801             "MOVSD  [ESP], $src\n\t"
10802             "FLD_D  [ESP]\n\t"
10803             "ADD    ESP, 8\n\t"
10804             "CALL   d2i_wrapper\n"
10805       "fast:" %}
10806   ins_encode %{
10807     Label fast;
10808     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10809     __ cmpl($dst$$Register, 0x80000000);
10810     __ jccb(Assembler::notEqual, fast);
10811     __ subptr(rsp, 8);
10812     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10813     __ fld_d(Address(rsp, 0));
10814     __ addptr(rsp, 8);
10815     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10816     __ bind(fast);
10817   %}
10818   ins_pipe( pipe_slow );
10819 %}
10820 
10821 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10822   predicate(UseSSE<=1);
10823   match(Set dst (ConvD2L src));
10824   effect( KILL cr );
10825   format %{ "FLD    $src\t# Convert double to long\n\t"
10826             "FLDCW  trunc mode\n\t"
10827             "SUB    ESP,8\n\t"
10828             "FISTp  [ESP + #0]\n\t"
10829             "FLDCW  std/24-bit mode\n\t"
10830             "POP    EAX\n\t"
10831             "POP    EDX\n\t"
10832             "CMP    EDX,0x80000000\n\t"
10833             "JNE,s  fast\n\t"
10834             "TEST   EAX,EAX\n\t"
10835             "JNE,s  fast\n\t"
10836             "FLD    $src\n\t"
10837             "CALL   d2l_wrapper\n"
10838       "fast:" %}
10839   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10840   ins_pipe( pipe_slow );
10841 %}
10842 
10843 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10844 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10845   predicate (UseSSE>=2);
10846   match(Set dst (ConvD2L src));
10847   effect( KILL cr );
10848   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10849             "MOVSD  [ESP],$src\n\t"
10850             "FLD_D  [ESP]\n\t"
10851             "FLDCW  trunc mode\n\t"
10852             "FISTp  [ESP + #0]\n\t"
10853             "FLDCW  std/24-bit mode\n\t"
10854             "POP    EAX\n\t"
10855             "POP    EDX\n\t"
10856             "CMP    EDX,0x80000000\n\t"
10857             "JNE,s  fast\n\t"
10858             "TEST   EAX,EAX\n\t"
10859             "JNE,s  fast\n\t"
10860             "SUB    ESP,8\n\t"
10861             "MOVSD  [ESP],$src\n\t"
10862             "FLD_D  [ESP]\n\t"
10863             "ADD    ESP,8\n\t"
10864             "CALL   d2l_wrapper\n"
10865       "fast:" %}
10866   ins_encode %{
10867     Label fast;
10868     __ subptr(rsp, 8);
10869     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10870     __ fld_d(Address(rsp, 0));
10871     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10872     __ fistp_d(Address(rsp, 0));
10873     // Restore the rounding mode, mask the exception
10874     if (Compile::current()->in_24_bit_fp_mode()) {
10875       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10876     } else {
10877       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10878     }
10879     // Load the converted long, adjust CPU stack
10880     __ pop(rax);
10881     __ pop(rdx);
10882     __ cmpl(rdx, 0x80000000);
10883     __ jccb(Assembler::notEqual, fast);
10884     __ testl(rax, rax);
10885     __ jccb(Assembler::notEqual, fast);
10886     __ subptr(rsp, 8);
10887     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10888     __ fld_d(Address(rsp, 0));
10889     __ addptr(rsp, 8);
10890     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10891     __ bind(fast);
10892   %}
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 // Convert a double to an int.  Java semantics require we do complex
10897 // manglations in the corner cases.  So we set the rounding mode to
10898 // 'zero', store the darned double down as an int, and reset the
10899 // rounding mode to 'nearest'.  The hardware stores a flag value down
10900 // if we would overflow or converted a NAN; we check for this and
10901 // and go the slow path if needed.
10902 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10903   predicate(UseSSE==0);
10904   match(Set dst (ConvF2I src));
10905   effect( KILL tmp, KILL cr );
10906   format %{ "FLD    $src\t# Convert float to int \n\t"
10907             "FLDCW  trunc mode\n\t"
10908             "SUB    ESP,4\n\t"
10909             "FISTp  [ESP + #0]\n\t"
10910             "FLDCW  std/24-bit mode\n\t"
10911             "POP    EAX\n\t"
10912             "CMP    EAX,0x80000000\n\t"
10913             "JNE,s  fast\n\t"
10914             "FLD    $src\n\t"
10915             "CALL   d2i_wrapper\n"
10916       "fast:" %}
10917   // DPR2I_encoding works for FPR2I
10918   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10919   ins_pipe( pipe_slow );
10920 %}
10921 
10922 // Convert a float in xmm to an int reg.
10923 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10924   predicate(UseSSE>=1);
10925   match(Set dst (ConvF2I src));
10926   effect( KILL tmp, KILL cr );
10927   format %{ "CVTTSS2SI $dst, $src\n\t"
10928             "CMP    $dst,0x80000000\n\t"
10929             "JNE,s  fast\n\t"
10930             "SUB    ESP, 4\n\t"
10931             "MOVSS  [ESP], $src\n\t"
10932             "FLD    [ESP]\n\t"
10933             "ADD    ESP, 4\n\t"
10934             "CALL   d2i_wrapper\n"
10935       "fast:" %}
10936   ins_encode %{
10937     Label fast;
10938     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10939     __ cmpl($dst$$Register, 0x80000000);
10940     __ jccb(Assembler::notEqual, fast);
10941     __ subptr(rsp, 4);
10942     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10943     __ fld_s(Address(rsp, 0));
10944     __ addptr(rsp, 4);
10945     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10946     __ bind(fast);
10947   %}
10948   ins_pipe( pipe_slow );
10949 %}
10950 
10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10952   predicate(UseSSE==0);
10953   match(Set dst (ConvF2L src));
10954   effect( KILL cr );
10955   format %{ "FLD    $src\t# Convert float to long\n\t"
10956             "FLDCW  trunc mode\n\t"
10957             "SUB    ESP,8\n\t"
10958             "FISTp  [ESP + #0]\n\t"
10959             "FLDCW  std/24-bit mode\n\t"
10960             "POP    EAX\n\t"
10961             "POP    EDX\n\t"
10962             "CMP    EDX,0x80000000\n\t"
10963             "JNE,s  fast\n\t"
10964             "TEST   EAX,EAX\n\t"
10965             "JNE,s  fast\n\t"
10966             "FLD    $src\n\t"
10967             "CALL   d2l_wrapper\n"
10968       "fast:" %}
10969   // DPR2L_encoding works for FPR2L
10970   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10971   ins_pipe( pipe_slow );
10972 %}
10973 
10974 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10976   predicate (UseSSE>=1);
10977   match(Set dst (ConvF2L src));
10978   effect( KILL cr );
10979   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10980             "MOVSS  [ESP],$src\n\t"
10981             "FLD_S  [ESP]\n\t"
10982             "FLDCW  trunc mode\n\t"
10983             "FISTp  [ESP + #0]\n\t"
10984             "FLDCW  std/24-bit mode\n\t"
10985             "POP    EAX\n\t"
10986             "POP    EDX\n\t"
10987             "CMP    EDX,0x80000000\n\t"
10988             "JNE,s  fast\n\t"
10989             "TEST   EAX,EAX\n\t"
10990             "JNE,s  fast\n\t"
10991             "SUB    ESP,4\t# Convert float to long\n\t"
10992             "MOVSS  [ESP],$src\n\t"
10993             "FLD_S  [ESP]\n\t"
10994             "ADD    ESP,4\n\t"
10995             "CALL   d2l_wrapper\n"
10996       "fast:" %}
10997   ins_encode %{
10998     Label fast;
10999     __ subptr(rsp, 8);
11000     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001     __ fld_s(Address(rsp, 0));
11002     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11003     __ fistp_d(Address(rsp, 0));
11004     // Restore the rounding mode, mask the exception
11005     if (Compile::current()->in_24_bit_fp_mode()) {
11006       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11007     } else {
11008       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11009     }
11010     // Load the converted long, adjust CPU stack
11011     __ pop(rax);
11012     __ pop(rdx);
11013     __ cmpl(rdx, 0x80000000);
11014     __ jccb(Assembler::notEqual, fast);
11015     __ testl(rax, rax);
11016     __ jccb(Assembler::notEqual, fast);
11017     __ subptr(rsp, 4);
11018     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11019     __ fld_s(Address(rsp, 0));
11020     __ addptr(rsp, 4);
11021     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11022     __ bind(fast);
11023   %}
11024   ins_pipe( pipe_slow );
11025 %}
11026 
11027 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11028   predicate( UseSSE<=1 );
11029   match(Set dst (ConvI2D src));
11030   format %{ "FILD   $src\n\t"
11031             "FSTP   $dst" %}
11032   opcode(0xDB, 0x0);  /* DB /0 */
11033   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11034   ins_pipe( fpu_reg_mem );
11035 %}
11036 
11037 instruct convI2D_reg(regD dst, rRegI src) %{
11038   predicate( UseSSE>=2 && !UseXmmI2D );
11039   match(Set dst (ConvI2D src));
11040   format %{ "CVTSI2SD $dst,$src" %}
11041   ins_encode %{
11042     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11043   %}
11044   ins_pipe( pipe_slow );
11045 %}
11046 
11047 instruct convI2D_mem(regD dst, memory mem) %{
11048   predicate( UseSSE>=2 );
11049   match(Set dst (ConvI2D (LoadI mem)));
11050   format %{ "CVTSI2SD $dst,$mem" %}
11051   ins_encode %{
11052     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11053   %}
11054   ins_pipe( pipe_slow );
11055 %}
11056 
11057 instruct convXI2D_reg(regD dst, rRegI src)
11058 %{
11059   predicate( UseSSE>=2 && UseXmmI2D );
11060   match(Set dst (ConvI2D src));
11061 
11062   format %{ "MOVD  $dst,$src\n\t"
11063             "CVTDQ2PD $dst,$dst\t# i2d" %}
11064   ins_encode %{
11065     __ movdl($dst$$XMMRegister, $src$$Register);
11066     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11067   %}
11068   ins_pipe(pipe_slow); // XXX
11069 %}
11070 
11071 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11072   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11073   match(Set dst (ConvI2D (LoadI mem)));
11074   format %{ "FILD   $mem\n\t"
11075             "FSTP   $dst" %}
11076   opcode(0xDB);      /* DB /0 */
11077   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11078               Pop_Reg_DPR(dst));
11079   ins_pipe( fpu_reg_mem );
11080 %}
11081 
11082 // Convert a byte to a float; no rounding step needed.
11083 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11084   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11085   match(Set dst (ConvI2F src));
11086   format %{ "FILD   $src\n\t"
11087             "FSTP   $dst" %}
11088 
11089   opcode(0xDB, 0x0);  /* DB /0 */
11090   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11091   ins_pipe( fpu_reg_mem );
11092 %}
11093 
11094 // In 24-bit mode, force exponent rounding by storing back out
11095 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11096   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11097   match(Set dst (ConvI2F src));
11098   ins_cost(200);
11099   format %{ "FILD   $src\n\t"
11100             "FSTP_S $dst" %}
11101   opcode(0xDB, 0x0);  /* DB /0 */
11102   ins_encode( Push_Mem_I(src),
11103               Pop_Mem_FPR(dst));
11104   ins_pipe( fpu_mem_mem );
11105 %}
11106 
11107 // In 24-bit mode, force exponent rounding by storing back out
11108 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11109   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11110   match(Set dst (ConvI2F (LoadI mem)));
11111   ins_cost(200);
11112   format %{ "FILD   $mem\n\t"
11113             "FSTP_S $dst" %}
11114   opcode(0xDB);  /* DB /0 */
11115   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11116               Pop_Mem_FPR(dst));
11117   ins_pipe( fpu_mem_mem );
11118 %}
11119 
11120 // This instruction does not round to 24-bits
11121 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11122   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11123   match(Set dst (ConvI2F src));
11124   format %{ "FILD   $src\n\t"
11125             "FSTP   $dst" %}
11126   opcode(0xDB, 0x0);  /* DB /0 */
11127   ins_encode( Push_Mem_I(src),
11128               Pop_Reg_FPR(dst));
11129   ins_pipe( fpu_reg_mem );
11130 %}
11131 
11132 // This instruction does not round to 24-bits
11133 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11134   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11135   match(Set dst (ConvI2F (LoadI mem)));
11136   format %{ "FILD   $mem\n\t"
11137             "FSTP   $dst" %}
11138   opcode(0xDB);      /* DB /0 */
11139   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11140               Pop_Reg_FPR(dst));
11141   ins_pipe( fpu_reg_mem );
11142 %}
11143 
11144 // Convert an int to a float in xmm; no rounding step needed.
11145 instruct convI2F_reg(regF dst, rRegI src) %{
11146   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11147   match(Set dst (ConvI2F src));
11148   format %{ "CVTSI2SS $dst, $src" %}
11149   ins_encode %{
11150     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11151   %}
11152   ins_pipe( pipe_slow );
11153 %}
11154 
11155  instruct convXI2F_reg(regF dst, rRegI src)
11156 %{
11157   predicate( UseSSE>=2 && UseXmmI2F );
11158   match(Set dst (ConvI2F src));
11159 
11160   format %{ "MOVD  $dst,$src\n\t"
11161             "CVTDQ2PS $dst,$dst\t# i2f" %}
11162   ins_encode %{
11163     __ movdl($dst$$XMMRegister, $src$$Register);
11164     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11165   %}
11166   ins_pipe(pipe_slow); // XXX
11167 %}
11168 
11169 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11170   match(Set dst (ConvI2L src));
11171   effect(KILL cr);
11172   ins_cost(375);
11173   format %{ "MOV    $dst.lo,$src\n\t"
11174             "MOV    $dst.hi,$src\n\t"
11175             "SAR    $dst.hi,31" %}
11176   ins_encode(convert_int_long(dst,src));
11177   ins_pipe( ialu_reg_reg_long );
11178 %}
11179 
11180 // Zero-extend convert int to long
11181 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11182   match(Set dst (AndL (ConvI2L src) mask) );
11183   effect( KILL flags );
11184   ins_cost(250);
11185   format %{ "MOV    $dst.lo,$src\n\t"
11186             "XOR    $dst.hi,$dst.hi" %}
11187   opcode(0x33); // XOR
11188   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11189   ins_pipe( ialu_reg_reg_long );
11190 %}
11191 
11192 // Zero-extend long
11193 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11194   match(Set dst (AndL src mask) );
11195   effect( KILL flags );
11196   ins_cost(250);
11197   format %{ "MOV    $dst.lo,$src.lo\n\t"
11198             "XOR    $dst.hi,$dst.hi\n\t" %}
11199   opcode(0x33); // XOR
11200   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11201   ins_pipe( ialu_reg_reg_long );
11202 %}
11203 
11204 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11205   predicate (UseSSE<=1);
11206   match(Set dst (ConvL2D src));
11207   effect( KILL cr );
11208   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11209             "PUSH   $src.lo\n\t"
11210             "FILD   ST,[ESP + #0]\n\t"
11211             "ADD    ESP,8\n\t"
11212             "FSTP_D $dst\t# D-round" %}
11213   opcode(0xDF, 0x5);  /* DF /5 */
11214   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11215   ins_pipe( pipe_slow );
11216 %}
11217 
11218 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11219   predicate (UseSSE>=2);
11220   match(Set dst (ConvL2D src));
11221   effect( KILL cr );
11222   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11223             "PUSH   $src.lo\n\t"
11224             "FILD_D [ESP]\n\t"
11225             "FSTP_D [ESP]\n\t"
11226             "MOVSD  $dst,[ESP]\n\t"
11227             "ADD    ESP,8" %}
11228   opcode(0xDF, 0x5);  /* DF /5 */
11229   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11230   ins_pipe( pipe_slow );
11231 %}
11232 
11233 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11234   predicate (UseSSE>=1);
11235   match(Set dst (ConvL2F src));
11236   effect( KILL cr );
11237   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11238             "PUSH   $src.lo\n\t"
11239             "FILD_D [ESP]\n\t"
11240             "FSTP_S [ESP]\n\t"
11241             "MOVSS  $dst,[ESP]\n\t"
11242             "ADD    ESP,8" %}
11243   opcode(0xDF, 0x5);  /* DF /5 */
11244   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11249   match(Set dst (ConvL2F src));
11250   effect( KILL cr );
11251   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11252             "PUSH   $src.lo\n\t"
11253             "FILD   ST,[ESP + #0]\n\t"
11254             "ADD    ESP,8\n\t"
11255             "FSTP_S $dst\t# F-round" %}
11256   opcode(0xDF, 0x5);  /* DF /5 */
11257   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11262   match(Set dst (ConvL2I src));
11263   effect( DEF dst, USE src );
11264   format %{ "MOV    $dst,$src.lo" %}
11265   ins_encode(enc_CopyL_Lo(dst,src));
11266   ins_pipe( ialu_reg_reg );
11267 %}
11268 
11269 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11270   match(Set dst (MoveF2I src));
11271   effect( DEF dst, USE src );
11272   ins_cost(100);
11273   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11274   ins_encode %{
11275     __ movl($dst$$Register, Address(rsp, $src$$disp));
11276   %}
11277   ins_pipe( ialu_reg_mem );
11278 %}
11279 
11280 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11281   predicate(UseSSE==0);
11282   match(Set dst (MoveF2I src));
11283   effect( DEF dst, USE src );
11284 
11285   ins_cost(125);
11286   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11287   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11288   ins_pipe( fpu_mem_reg );
11289 %}
11290 
11291 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11292   predicate(UseSSE>=1);
11293   match(Set dst (MoveF2I src));
11294   effect( DEF dst, USE src );
11295 
11296   ins_cost(95);
11297   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11298   ins_encode %{
11299     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11300   %}
11301   ins_pipe( pipe_slow );
11302 %}
11303 
11304 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11305   predicate(UseSSE>=2);
11306   match(Set dst (MoveF2I src));
11307   effect( DEF dst, USE src );
11308   ins_cost(85);
11309   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11310   ins_encode %{
11311     __ movdl($dst$$Register, $src$$XMMRegister);
11312   %}
11313   ins_pipe( pipe_slow );
11314 %}
11315 
11316 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11317   match(Set dst (MoveI2F src));
11318   effect( DEF dst, USE src );
11319 
11320   ins_cost(100);
11321   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11322   ins_encode %{
11323     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11324   %}
11325   ins_pipe( ialu_mem_reg );
11326 %}
11327 
11328 
11329 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11330   predicate(UseSSE==0);
11331   match(Set dst (MoveI2F src));
11332   effect(DEF dst, USE src);
11333 
11334   ins_cost(125);
11335   format %{ "FLD_S  $src\n\t"
11336             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11337   opcode(0xD9);               /* D9 /0, FLD m32real */
11338   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11339               Pop_Reg_FPR(dst) );
11340   ins_pipe( fpu_reg_mem );
11341 %}
11342 
11343 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11344   predicate(UseSSE>=1);
11345   match(Set dst (MoveI2F src));
11346   effect( DEF dst, USE src );
11347 
11348   ins_cost(95);
11349   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11350   ins_encode %{
11351     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11352   %}
11353   ins_pipe( pipe_slow );
11354 %}
11355 
11356 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11357   predicate(UseSSE>=2);
11358   match(Set dst (MoveI2F src));
11359   effect( DEF dst, USE src );
11360 
11361   ins_cost(85);
11362   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11363   ins_encode %{
11364     __ movdl($dst$$XMMRegister, $src$$Register);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11370   match(Set dst (MoveD2L src));
11371   effect(DEF dst, USE src);
11372 
11373   ins_cost(250);
11374   format %{ "MOV    $dst.lo,$src\n\t"
11375             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11376   opcode(0x8B, 0x8B);
11377   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11378   ins_pipe( ialu_mem_long_reg );
11379 %}
11380 
11381 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11382   predicate(UseSSE<=1);
11383   match(Set dst (MoveD2L src));
11384   effect(DEF dst, USE src);
11385 
11386   ins_cost(125);
11387   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11388   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11389   ins_pipe( fpu_mem_reg );
11390 %}
11391 
11392 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11393   predicate(UseSSE>=2);
11394   match(Set dst (MoveD2L src));
11395   effect(DEF dst, USE src);
11396   ins_cost(95);
11397   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11398   ins_encode %{
11399     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11400   %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11405   predicate(UseSSE>=2);
11406   match(Set dst (MoveD2L src));
11407   effect(DEF dst, USE src, TEMP tmp);
11408   ins_cost(85);
11409   format %{ "MOVD   $dst.lo,$src\n\t"
11410             "PSHUFLW $tmp,$src,0x4E\n\t"
11411             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11412   ins_encode %{
11413     __ movdl($dst$$Register, $src$$XMMRegister);
11414     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11415     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11416   %}
11417   ins_pipe( pipe_slow );
11418 %}
11419 
11420 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11421   match(Set dst (MoveL2D src));
11422   effect(DEF dst, USE src);
11423 
11424   ins_cost(200);
11425   format %{ "MOV    $dst,$src.lo\n\t"
11426             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11427   opcode(0x89, 0x89);
11428   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11429   ins_pipe( ialu_mem_long_reg );
11430 %}
11431 
11432 
11433 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11434   predicate(UseSSE<=1);
11435   match(Set dst (MoveL2D src));
11436   effect(DEF dst, USE src);
11437   ins_cost(125);
11438 
11439   format %{ "FLD_D  $src\n\t"
11440             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11441   opcode(0xDD);               /* DD /0, FLD m64real */
11442   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11443               Pop_Reg_DPR(dst) );
11444   ins_pipe( fpu_reg_mem );
11445 %}
11446 
11447 
11448 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11449   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11450   match(Set dst (MoveL2D src));
11451   effect(DEF dst, USE src);
11452 
11453   ins_cost(95);
11454   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11455   ins_encode %{
11456     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11457   %}
11458   ins_pipe( pipe_slow );
11459 %}
11460 
11461 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11462   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11463   match(Set dst (MoveL2D src));
11464   effect(DEF dst, USE src);
11465 
11466   ins_cost(95);
11467   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11468   ins_encode %{
11469     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11470   %}
11471   ins_pipe( pipe_slow );
11472 %}
11473 
11474 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11475   predicate(UseSSE>=2);
11476   match(Set dst (MoveL2D src));
11477   effect(TEMP dst, USE src, TEMP tmp);
11478   ins_cost(85);
11479   format %{ "MOVD   $dst,$src.lo\n\t"
11480             "MOVD   $tmp,$src.hi\n\t"
11481             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11482   ins_encode %{
11483     __ movdl($dst$$XMMRegister, $src$$Register);
11484     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11485     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11486   %}
11487   ins_pipe( pipe_slow );
11488 %}
11489 
11490 
11491 // =======================================================================
11492 // fast clearing of an array
11493 // Small ClearArray non-AVX512.
11494 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11495   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11496   match(Set dummy (ClearArray cnt base));
11497   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11498 
11499   format %{ $$template
11500     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11501     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11502     $$emit$$"JG     LARGE\n\t"
11503     $$emit$$"SHL    ECX, 1\n\t"
11504     $$emit$$"DEC    ECX\n\t"
11505     $$emit$$"JS     DONE\t# Zero length\n\t"
11506     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11507     $$emit$$"DEC    ECX\n\t"
11508     $$emit$$"JGE    LOOP\n\t"
11509     $$emit$$"JMP    DONE\n\t"
11510     $$emit$$"# LARGE:\n\t"
11511     if (UseFastStosb) {
11512        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11513        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11514     } else if (UseXMMForObjInit) {
11515        $$emit$$"MOV     RDI,RAX\n\t"
11516        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11517        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11518        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11519        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11520        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11521        $$emit$$"ADD     0x40,RAX\n\t"
11522        $$emit$$"# L_zero_64_bytes:\n\t"
11523        $$emit$$"SUB     0x8,RCX\n\t"
11524        $$emit$$"JGE     L_loop\n\t"
11525        $$emit$$"ADD     0x4,RCX\n\t"
11526        $$emit$$"JL      L_tail\n\t"
11527        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11528        $$emit$$"ADD     0x20,RAX\n\t"
11529        $$emit$$"SUB     0x4,RCX\n\t"
11530        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11531        $$emit$$"ADD     0x4,RCX\n\t"
11532        $$emit$$"JLE     L_end\n\t"
11533        $$emit$$"DEC     RCX\n\t"
11534        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11535        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11536        $$emit$$"ADD     0x8,RAX\n\t"
11537        $$emit$$"DEC     RCX\n\t"
11538        $$emit$$"JGE     L_sloop\n\t"
11539        $$emit$$"# L_end:\n\t"
11540     } else {
11541        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11542        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11543     }
11544     $$emit$$"# DONE"
11545   %}
11546   ins_encode %{
11547     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11548                  $tmp$$XMMRegister, false, knoreg);
11549   %}
11550   ins_pipe( pipe_slow );
11551 %}
11552 
11553 // Small ClearArray AVX512 non-constant length.
11554 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11555   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11556   match(Set dummy (ClearArray cnt base));
11557   ins_cost(125);
11558   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11559 
11560   format %{ $$template
11561     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11562     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11563     $$emit$$"JG     LARGE\n\t"
11564     $$emit$$"SHL    ECX, 1\n\t"
11565     $$emit$$"DEC    ECX\n\t"
11566     $$emit$$"JS     DONE\t# Zero length\n\t"
11567     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11568     $$emit$$"DEC    ECX\n\t"
11569     $$emit$$"JGE    LOOP\n\t"
11570     $$emit$$"JMP    DONE\n\t"
11571     $$emit$$"# LARGE:\n\t"
11572     if (UseFastStosb) {
11573        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11574        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11575     } else if (UseXMMForObjInit) {
11576        $$emit$$"MOV     RDI,RAX\n\t"
11577        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11578        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11579        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11580        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11581        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11582        $$emit$$"ADD     0x40,RAX\n\t"
11583        $$emit$$"# L_zero_64_bytes:\n\t"
11584        $$emit$$"SUB     0x8,RCX\n\t"
11585        $$emit$$"JGE     L_loop\n\t"
11586        $$emit$$"ADD     0x4,RCX\n\t"
11587        $$emit$$"JL      L_tail\n\t"
11588        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11589        $$emit$$"ADD     0x20,RAX\n\t"
11590        $$emit$$"SUB     0x4,RCX\n\t"
11591        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11592        $$emit$$"ADD     0x4,RCX\n\t"
11593        $$emit$$"JLE     L_end\n\t"
11594        $$emit$$"DEC     RCX\n\t"
11595        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11596        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11597        $$emit$$"ADD     0x8,RAX\n\t"
11598        $$emit$$"DEC     RCX\n\t"
11599        $$emit$$"JGE     L_sloop\n\t"
11600        $$emit$$"# L_end:\n\t"
11601     } else {
11602        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11603        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11604     }
11605     $$emit$$"# DONE"
11606   %}
11607   ins_encode %{
11608     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11609                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11610   %}
11611   ins_pipe( pipe_slow );
11612 %}
11613 
11614 // Large ClearArray non-AVX512.
11615 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11616   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11617   match(Set dummy (ClearArray cnt base));
11618   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11619   format %{ $$template
11620     if (UseFastStosb) {
11621        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11622        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11623        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11624     } else if (UseXMMForObjInit) {
11625        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11626        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11627        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11628        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11629        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11631        $$emit$$"ADD     0x40,RAX\n\t"
11632        $$emit$$"# L_zero_64_bytes:\n\t"
11633        $$emit$$"SUB     0x8,RCX\n\t"
11634        $$emit$$"JGE     L_loop\n\t"
11635        $$emit$$"ADD     0x4,RCX\n\t"
11636        $$emit$$"JL      L_tail\n\t"
11637        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11638        $$emit$$"ADD     0x20,RAX\n\t"
11639        $$emit$$"SUB     0x4,RCX\n\t"
11640        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11641        $$emit$$"ADD     0x4,RCX\n\t"
11642        $$emit$$"JLE     L_end\n\t"
11643        $$emit$$"DEC     RCX\n\t"
11644        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11645        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11646        $$emit$$"ADD     0x8,RAX\n\t"
11647        $$emit$$"DEC     RCX\n\t"
11648        $$emit$$"JGE     L_sloop\n\t"
11649        $$emit$$"# L_end:\n\t"
11650     } else {
11651        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11652        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11653        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11654     }
11655     $$emit$$"# DONE"
11656   %}
11657   ins_encode %{
11658     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11659                  $tmp$$XMMRegister, true, knoreg);
11660   %}
11661   ins_pipe( pipe_slow );
11662 %}
11663 
11664 // Large ClearArray AVX512.
11665 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11666   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11667   match(Set dummy (ClearArray cnt base));
11668   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11669   format %{ $$template
11670     if (UseFastStosb) {
11671        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11672        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11673        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11674     } else if (UseXMMForObjInit) {
11675        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11676        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11677        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11678        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11679        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11680        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11681        $$emit$$"ADD     0x40,RAX\n\t"
11682        $$emit$$"# L_zero_64_bytes:\n\t"
11683        $$emit$$"SUB     0x8,RCX\n\t"
11684        $$emit$$"JGE     L_loop\n\t"
11685        $$emit$$"ADD     0x4,RCX\n\t"
11686        $$emit$$"JL      L_tail\n\t"
11687        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11688        $$emit$$"ADD     0x20,RAX\n\t"
11689        $$emit$$"SUB     0x4,RCX\n\t"
11690        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11691        $$emit$$"ADD     0x4,RCX\n\t"
11692        $$emit$$"JLE     L_end\n\t"
11693        $$emit$$"DEC     RCX\n\t"
11694        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11695        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11696        $$emit$$"ADD     0x8,RAX\n\t"
11697        $$emit$$"DEC     RCX\n\t"
11698        $$emit$$"JGE     L_sloop\n\t"
11699        $$emit$$"# L_end:\n\t"
11700     } else {
11701        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11702        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11703        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11704     }
11705     $$emit$$"# DONE"
11706   %}
11707   ins_encode %{
11708     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11709                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11710   %}
11711   ins_pipe( pipe_slow );
11712 %}
11713 
11714 // Small ClearArray AVX512 constant length.
11715 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11716 %{
11717   predicate(!((ClearArrayNode*)n)->is_large() &&
11718                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11719   match(Set dummy (ClearArray cnt base));
11720   ins_cost(100);
11721   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11722   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11723   ins_encode %{
11724    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11725   %}
11726   ins_pipe(pipe_slow);
11727 %}
11728 
11729 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11730                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11731   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11732   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11733   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11734 
11735   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11736   ins_encode %{
11737     __ string_compare($str1$$Register, $str2$$Register,
11738                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11739                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11740   %}
11741   ins_pipe( pipe_slow );
11742 %}
11743 
11744 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11745                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11746   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11747   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11748   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11749 
11750   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11751   ins_encode %{
11752     __ string_compare($str1$$Register, $str2$$Register,
11753                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11754                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11755   %}
11756   ins_pipe( pipe_slow );
11757 %}
11758 
11759 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11760                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11761   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11762   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11763   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11764 
11765   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11766   ins_encode %{
11767     __ string_compare($str1$$Register, $str2$$Register,
11768                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11769                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11770   %}
11771   ins_pipe( pipe_slow );
11772 %}
11773 
11774 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11775                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11776   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11777   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11778   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11779 
11780   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11781   ins_encode %{
11782     __ string_compare($str1$$Register, $str2$$Register,
11783                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11784                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11785   %}
11786   ins_pipe( pipe_slow );
11787 %}
11788 
11789 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11790                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11791   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11792   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11793   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11794 
11795   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11796   ins_encode %{
11797     __ string_compare($str1$$Register, $str2$$Register,
11798                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11799                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11800   %}
11801   ins_pipe( pipe_slow );
11802 %}
11803 
11804 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11805                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11806   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11807   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11808   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11809 
11810   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11811   ins_encode %{
11812     __ string_compare($str1$$Register, $str2$$Register,
11813                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11814                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11815   %}
11816   ins_pipe( pipe_slow );
11817 %}
11818 
11819 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11820                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11821   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11822   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11823   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11824 
11825   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11826   ins_encode %{
11827     __ string_compare($str2$$Register, $str1$$Register,
11828                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11829                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11830   %}
11831   ins_pipe( pipe_slow );
11832 %}
11833 
11834 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11835                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11836   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11837   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11838   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11839 
11840   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11841   ins_encode %{
11842     __ string_compare($str2$$Register, $str1$$Register,
11843                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11844                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11845   %}
11846   ins_pipe( pipe_slow );
11847 %}
11848 
11849 // fast string equals
11850 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11851                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11852   predicate(!VM_Version::supports_avx512vlbw());
11853   match(Set result (StrEquals (Binary str1 str2) cnt));
11854   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11855 
11856   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11857   ins_encode %{
11858     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11859                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11860                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11861   %}
11862 
11863   ins_pipe( pipe_slow );
11864 %}
11865 
11866 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11867                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11868   predicate(VM_Version::supports_avx512vlbw());
11869   match(Set result (StrEquals (Binary str1 str2) cnt));
11870   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11871 
11872   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11873   ins_encode %{
11874     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11875                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11876                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11877   %}
11878 
11879   ins_pipe( pipe_slow );
11880 %}
11881 
11882 
11883 // fast search of substring with known size.
11884 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11885                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11886   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11887   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11888   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11889 
11890   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11891   ins_encode %{
11892     int icnt2 = (int)$int_cnt2$$constant;
11893     if (icnt2 >= 16) {
11894       // IndexOf for constant substrings with size >= 16 elements
11895       // which don't need to be loaded through stack.
11896       __ string_indexofC8($str1$$Register, $str2$$Register,
11897                           $cnt1$$Register, $cnt2$$Register,
11898                           icnt2, $result$$Register,
11899                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11900     } else {
11901       // Small strings are loaded through stack if they cross page boundary.
11902       __ string_indexof($str1$$Register, $str2$$Register,
11903                         $cnt1$$Register, $cnt2$$Register,
11904                         icnt2, $result$$Register,
11905                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11906     }
11907   %}
11908   ins_pipe( pipe_slow );
11909 %}
11910 
11911 // fast search of substring with known size.
11912 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11913                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11914   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11915   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11916   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11917 
11918   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11919   ins_encode %{
11920     int icnt2 = (int)$int_cnt2$$constant;
11921     if (icnt2 >= 8) {
11922       // IndexOf for constant substrings with size >= 8 elements
11923       // which don't need to be loaded through stack.
11924       __ string_indexofC8($str1$$Register, $str2$$Register,
11925                           $cnt1$$Register, $cnt2$$Register,
11926                           icnt2, $result$$Register,
11927                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11928     } else {
11929       // Small strings are loaded through stack if they cross page boundary.
11930       __ string_indexof($str1$$Register, $str2$$Register,
11931                         $cnt1$$Register, $cnt2$$Register,
11932                         icnt2, $result$$Register,
11933                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11934     }
11935   %}
11936   ins_pipe( pipe_slow );
11937 %}
11938 
11939 // fast search of substring with known size.
11940 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11941                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11942   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11943   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11944   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11945 
11946   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11947   ins_encode %{
11948     int icnt2 = (int)$int_cnt2$$constant;
11949     if (icnt2 >= 8) {
11950       // IndexOf for constant substrings with size >= 8 elements
11951       // which don't need to be loaded through stack.
11952       __ string_indexofC8($str1$$Register, $str2$$Register,
11953                           $cnt1$$Register, $cnt2$$Register,
11954                           icnt2, $result$$Register,
11955                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11956     } else {
11957       // Small strings are loaded through stack if they cross page boundary.
11958       __ string_indexof($str1$$Register, $str2$$Register,
11959                         $cnt1$$Register, $cnt2$$Register,
11960                         icnt2, $result$$Register,
11961                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11962     }
11963   %}
11964   ins_pipe( pipe_slow );
11965 %}
11966 
11967 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11968                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11969   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11970   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11971   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11972 
11973   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11974   ins_encode %{
11975     __ string_indexof($str1$$Register, $str2$$Register,
11976                       $cnt1$$Register, $cnt2$$Register,
11977                       (-1), $result$$Register,
11978                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11979   %}
11980   ins_pipe( pipe_slow );
11981 %}
11982 
11983 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11984                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11985   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11986   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11987   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11988 
11989   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11990   ins_encode %{
11991     __ string_indexof($str1$$Register, $str2$$Register,
11992                       $cnt1$$Register, $cnt2$$Register,
11993                       (-1), $result$$Register,
11994                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11995   %}
11996   ins_pipe( pipe_slow );
11997 %}
11998 
11999 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12000                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12001   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12002   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12003   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12004 
12005   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12006   ins_encode %{
12007     __ string_indexof($str1$$Register, $str2$$Register,
12008                       $cnt1$$Register, $cnt2$$Register,
12009                       (-1), $result$$Register,
12010                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12011   %}
12012   ins_pipe( pipe_slow );
12013 %}
12014 
12015 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12016                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12017   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12018   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12019   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12020   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12021   ins_encode %{
12022     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12023                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12024   %}
12025   ins_pipe( pipe_slow );
12026 %}
12027 
12028 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12029                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12030   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12031   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12032   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12033   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12034   ins_encode %{
12035     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12036                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12037   %}
12038   ins_pipe( pipe_slow );
12039 %}
12040 
12041 
12042 // fast array equals
12043 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12044                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12045 %{
12046   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12047   match(Set result (AryEq ary1 ary2));
12048   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12049   //ins_cost(300);
12050 
12051   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12052   ins_encode %{
12053     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12054                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12055                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12056   %}
12057   ins_pipe( pipe_slow );
12058 %}
12059 
12060 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12061                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12062 %{
12063   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12064   match(Set result (AryEq ary1 ary2));
12065   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12066   //ins_cost(300);
12067 
12068   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12069   ins_encode %{
12070     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12071                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12072                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12073   %}
12074   ins_pipe( pipe_slow );
12075 %}
12076 
12077 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12078                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12079 %{
12080   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12081   match(Set result (AryEq ary1 ary2));
12082   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12083   //ins_cost(300);
12084 
12085   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12086   ins_encode %{
12087     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12088                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12089                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12090   %}
12091   ins_pipe( pipe_slow );
12092 %}
12093 
12094 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12095                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12096 %{
12097   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12098   match(Set result (AryEq ary1 ary2));
12099   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12100   //ins_cost(300);
12101 
12102   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12103   ins_encode %{
12104     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12105                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12106                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12107   %}
12108   ins_pipe( pipe_slow );
12109 %}
12110 
12111 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12112                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12113 %{
12114   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12115   match(Set result (HasNegatives ary1 len));
12116   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12117 
12118   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12119   ins_encode %{
12120     __ has_negatives($ary1$$Register, $len$$Register,
12121                      $result$$Register, $tmp3$$Register,
12122                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12123   %}
12124   ins_pipe( pipe_slow );
12125 %}
12126 
12127 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12128                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12129 %{
12130   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12131   match(Set result (HasNegatives ary1 len));
12132   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12133 
12134   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12135   ins_encode %{
12136     __ has_negatives($ary1$$Register, $len$$Register,
12137                      $result$$Register, $tmp3$$Register,
12138                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12139   %}
12140   ins_pipe( pipe_slow );
12141 %}
12142 
12143 
12144 // fast char[] to byte[] compression
12145 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12146                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12147   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12148   match(Set result (StrCompressedCopy src (Binary dst len)));
12149   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12150 
12151   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12152   ins_encode %{
12153     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12154                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12155                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12156                            knoreg, knoreg);
12157   %}
12158   ins_pipe( pipe_slow );
12159 %}
12160 
12161 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12162                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12163   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12164   match(Set result (StrCompressedCopy src (Binary dst len)));
12165   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12166 
12167   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12168   ins_encode %{
12169     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12170                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12171                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12172                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12173   %}
12174   ins_pipe( pipe_slow );
12175 %}
12176 
12177 // fast byte[] to char[] inflation
12178 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12179                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12180   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12181   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12182   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12183 
12184   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12185   ins_encode %{
12186     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12187                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12188   %}
12189   ins_pipe( pipe_slow );
12190 %}
12191 
12192 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12193                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12194   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12195   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12196   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12197 
12198   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12199   ins_encode %{
12200     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12201                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12202   %}
12203   ins_pipe( pipe_slow );
12204 %}
12205 
12206 // encode char[] to byte[] in ISO_8859_1
12207 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12208                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12209                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12210   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12211   match(Set result (EncodeISOArray src (Binary dst len)));
12212   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12213 
12214   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12215   ins_encode %{
12216     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12217                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12218                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12219   %}
12220   ins_pipe( pipe_slow );
12221 %}
12222 
12223 // encode char[] to byte[] in ASCII
12224 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12225                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12226                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12227   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12228   match(Set result (EncodeISOArray src (Binary dst len)));
12229   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12230 
12231   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12232   ins_encode %{
12233     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12234                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12235                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12236   %}
12237   ins_pipe( pipe_slow );
12238 %}
12239 
12240 //----------Control Flow Instructions------------------------------------------
12241 // Signed compare Instructions
12242 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12243   match(Set cr (CmpI op1 op2));
12244   effect( DEF cr, USE op1, USE op2 );
12245   format %{ "CMP    $op1,$op2" %}
12246   opcode(0x3B);  /* Opcode 3B /r */
12247   ins_encode( OpcP, RegReg( op1, op2) );
12248   ins_pipe( ialu_cr_reg_reg );
12249 %}
12250 
12251 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12252   match(Set cr (CmpI op1 op2));
12253   effect( DEF cr, USE op1 );
12254   format %{ "CMP    $op1,$op2" %}
12255   opcode(0x81,0x07);  /* Opcode 81 /7 */
12256   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12257   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12258   ins_pipe( ialu_cr_reg_imm );
12259 %}
12260 
12261 // Cisc-spilled version of cmpI_eReg
12262 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12263   match(Set cr (CmpI op1 (LoadI op2)));
12264 
12265   format %{ "CMP    $op1,$op2" %}
12266   ins_cost(500);
12267   opcode(0x3B);  /* Opcode 3B /r */
12268   ins_encode( OpcP, RegMem( op1, op2) );
12269   ins_pipe( ialu_cr_reg_mem );
12270 %}
12271 
12272 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12273   match(Set cr (CmpI src zero));
12274   effect( DEF cr, USE src );
12275 
12276   format %{ "TEST   $src,$src" %}
12277   opcode(0x85);
12278   ins_encode( OpcP, RegReg( src, src ) );
12279   ins_pipe( ialu_cr_reg_imm );
12280 %}
12281 
12282 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12283   match(Set cr (CmpI (AndI src con) zero));
12284 
12285   format %{ "TEST   $src,$con" %}
12286   opcode(0xF7,0x00);
12287   ins_encode( OpcP, RegOpc(src), Con32(con) );
12288   ins_pipe( ialu_cr_reg_imm );
12289 %}
12290 
12291 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12292   match(Set cr (CmpI (AndI src mem) zero));
12293 
12294   format %{ "TEST   $src,$mem" %}
12295   opcode(0x85);
12296   ins_encode( OpcP, RegMem( src, mem ) );
12297   ins_pipe( ialu_cr_reg_mem );
12298 %}
12299 
12300 // Unsigned compare Instructions; really, same as signed except they
12301 // produce an eFlagsRegU instead of eFlagsReg.
12302 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12303   match(Set cr (CmpU op1 op2));
12304 
12305   format %{ "CMPu   $op1,$op2" %}
12306   opcode(0x3B);  /* Opcode 3B /r */
12307   ins_encode( OpcP, RegReg( op1, op2) );
12308   ins_pipe( ialu_cr_reg_reg );
12309 %}
12310 
12311 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12312   match(Set cr (CmpU op1 op2));
12313 
12314   format %{ "CMPu   $op1,$op2" %}
12315   opcode(0x81,0x07);  /* Opcode 81 /7 */
12316   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12317   ins_pipe( ialu_cr_reg_imm );
12318 %}
12319 
12320 // // Cisc-spilled version of cmpU_eReg
12321 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12322   match(Set cr (CmpU op1 (LoadI op2)));
12323 
12324   format %{ "CMPu   $op1,$op2" %}
12325   ins_cost(500);
12326   opcode(0x3B);  /* Opcode 3B /r */
12327   ins_encode( OpcP, RegMem( op1, op2) );
12328   ins_pipe( ialu_cr_reg_mem );
12329 %}
12330 
12331 // // Cisc-spilled version of cmpU_eReg
12332 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12333 //  match(Set cr (CmpU (LoadI op1) op2));
12334 //
12335 //  format %{ "CMPu   $op1,$op2" %}
12336 //  ins_cost(500);
12337 //  opcode(0x39);  /* Opcode 39 /r */
12338 //  ins_encode( OpcP, RegMem( op1, op2) );
12339 //%}
12340 
12341 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12342   match(Set cr (CmpU src zero));
12343 
12344   format %{ "TESTu  $src,$src" %}
12345   opcode(0x85);
12346   ins_encode( OpcP, RegReg( src, src ) );
12347   ins_pipe( ialu_cr_reg_imm );
12348 %}
12349 
12350 // Unsigned pointer compare Instructions
12351 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12352   match(Set cr (CmpP op1 op2));
12353 
12354   format %{ "CMPu   $op1,$op2" %}
12355   opcode(0x3B);  /* Opcode 3B /r */
12356   ins_encode( OpcP, RegReg( op1, op2) );
12357   ins_pipe( ialu_cr_reg_reg );
12358 %}
12359 
12360 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12361   match(Set cr (CmpP op1 op2));
12362 
12363   format %{ "CMPu   $op1,$op2" %}
12364   opcode(0x81,0x07);  /* Opcode 81 /7 */
12365   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12366   ins_pipe( ialu_cr_reg_imm );
12367 %}
12368 
12369 // // Cisc-spilled version of cmpP_eReg
12370 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12371   match(Set cr (CmpP op1 (LoadP op2)));
12372 
12373   format %{ "CMPu   $op1,$op2" %}
12374   ins_cost(500);
12375   opcode(0x3B);  /* Opcode 3B /r */
12376   ins_encode( OpcP, RegMem( op1, op2) );
12377   ins_pipe( ialu_cr_reg_mem );
12378 %}
12379 
12380 // // Cisc-spilled version of cmpP_eReg
12381 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12382 //  match(Set cr (CmpP (LoadP op1) op2));
12383 //
12384 //  format %{ "CMPu   $op1,$op2" %}
12385 //  ins_cost(500);
12386 //  opcode(0x39);  /* Opcode 39 /r */
12387 //  ins_encode( OpcP, RegMem( op1, op2) );
12388 //%}
12389 
12390 // Compare raw pointer (used in out-of-heap check).
12391 // Only works because non-oop pointers must be raw pointers
12392 // and raw pointers have no anti-dependencies.
12393 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12394   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12395   match(Set cr (CmpP op1 (LoadP op2)));
12396 
12397   format %{ "CMPu   $op1,$op2" %}
12398   opcode(0x3B);  /* Opcode 3B /r */
12399   ins_encode( OpcP, RegMem( op1, op2) );
12400   ins_pipe( ialu_cr_reg_mem );
12401 %}
12402 
12403 //
12404 // This will generate a signed flags result. This should be ok
12405 // since any compare to a zero should be eq/neq.
12406 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12407   match(Set cr (CmpP src zero));
12408 
12409   format %{ "TEST   $src,$src" %}
12410   opcode(0x85);
12411   ins_encode( OpcP, RegReg( src, src ) );
12412   ins_pipe( ialu_cr_reg_imm );
12413 %}
12414 
12415 // Cisc-spilled version of testP_reg
12416 // This will generate a signed flags result. This should be ok
12417 // since any compare to a zero should be eq/neq.
12418 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12419   match(Set cr (CmpP (LoadP op) zero));
12420 
12421   format %{ "TEST   $op,0xFFFFFFFF" %}
12422   ins_cost(500);
12423   opcode(0xF7);               /* Opcode F7 /0 */
12424   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12425   ins_pipe( ialu_cr_reg_imm );
12426 %}
12427 
12428 // Yanked all unsigned pointer compare operations.
12429 // Pointer compares are done with CmpP which is already unsigned.
12430 
12431 //----------Max and Min--------------------------------------------------------
12432 // Min Instructions
12433 ////
12434 //   *** Min and Max using the conditional move are slower than the
12435 //   *** branch version on a Pentium III.
12436 // // Conditional move for min
12437 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12438 //  effect( USE_DEF op2, USE op1, USE cr );
12439 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12440 //  opcode(0x4C,0x0F);
12441 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12442 //  ins_pipe( pipe_cmov_reg );
12443 //%}
12444 //
12445 //// Min Register with Register (P6 version)
12446 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12447 //  predicate(VM_Version::supports_cmov() );
12448 //  match(Set op2 (MinI op1 op2));
12449 //  ins_cost(200);
12450 //  expand %{
12451 //    eFlagsReg cr;
12452 //    compI_eReg(cr,op1,op2);
12453 //    cmovI_reg_lt(op2,op1,cr);
12454 //  %}
12455 //%}
12456 
12457 // Min Register with Register (generic version)
12458 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12459   match(Set dst (MinI dst src));
12460   effect(KILL flags);
12461   ins_cost(300);
12462 
12463   format %{ "MIN    $dst,$src" %}
12464   opcode(0xCC);
12465   ins_encode( min_enc(dst,src) );
12466   ins_pipe( pipe_slow );
12467 %}
12468 
12469 // Max Register with Register
12470 //   *** Min and Max using the conditional move are slower than the
12471 //   *** branch version on a Pentium III.
12472 // // Conditional move for max
12473 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12474 //  effect( USE_DEF op2, USE op1, USE cr );
12475 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12476 //  opcode(0x4F,0x0F);
12477 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12478 //  ins_pipe( pipe_cmov_reg );
12479 //%}
12480 //
12481 // // Max Register with Register (P6 version)
12482 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12483 //  predicate(VM_Version::supports_cmov() );
12484 //  match(Set op2 (MaxI op1 op2));
12485 //  ins_cost(200);
12486 //  expand %{
12487 //    eFlagsReg cr;
12488 //    compI_eReg(cr,op1,op2);
12489 //    cmovI_reg_gt(op2,op1,cr);
12490 //  %}
12491 //%}
12492 
12493 // Max Register with Register (generic version)
12494 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12495   match(Set dst (MaxI dst src));
12496   effect(KILL flags);
12497   ins_cost(300);
12498 
12499   format %{ "MAX    $dst,$src" %}
12500   opcode(0xCC);
12501   ins_encode( max_enc(dst,src) );
12502   ins_pipe( pipe_slow );
12503 %}
12504 
12505 // ============================================================================
12506 // Counted Loop limit node which represents exact final iterator value.
12507 // Note: the resulting value should fit into integer range since
12508 // counted loops have limit check on overflow.
12509 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12510   match(Set limit (LoopLimit (Binary init limit) stride));
12511   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12512   ins_cost(300);
12513 
12514   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12515   ins_encode %{
12516     int strd = (int)$stride$$constant;
12517     assert(strd != 1 && strd != -1, "sanity");
12518     int m1 = (strd > 0) ? 1 : -1;
12519     // Convert limit to long (EAX:EDX)
12520     __ cdql();
12521     // Convert init to long (init:tmp)
12522     __ movl($tmp$$Register, $init$$Register);
12523     __ sarl($tmp$$Register, 31);
12524     // $limit - $init
12525     __ subl($limit$$Register, $init$$Register);
12526     __ sbbl($limit_hi$$Register, $tmp$$Register);
12527     // + ($stride - 1)
12528     if (strd > 0) {
12529       __ addl($limit$$Register, (strd - 1));
12530       __ adcl($limit_hi$$Register, 0);
12531       __ movl($tmp$$Register, strd);
12532     } else {
12533       __ addl($limit$$Register, (strd + 1));
12534       __ adcl($limit_hi$$Register, -1);
12535       __ lneg($limit_hi$$Register, $limit$$Register);
12536       __ movl($tmp$$Register, -strd);
12537     }
12538     // signed devision: (EAX:EDX) / pos_stride
12539     __ idivl($tmp$$Register);
12540     if (strd < 0) {
12541       // restore sign
12542       __ negl($tmp$$Register);
12543     }
12544     // (EAX) * stride
12545     __ mull($tmp$$Register);
12546     // + init (ignore upper bits)
12547     __ addl($limit$$Register, $init$$Register);
12548   %}
12549   ins_pipe( pipe_slow );
12550 %}
12551 
12552 // ============================================================================
12553 // Branch Instructions
12554 // Jump Table
12555 instruct jumpXtnd(rRegI switch_val) %{
12556   match(Jump switch_val);
12557   ins_cost(350);
12558   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12559   ins_encode %{
12560     // Jump to Address(table_base + switch_reg)
12561     Address index(noreg, $switch_val$$Register, Address::times_1);
12562     __ jump(ArrayAddress($constantaddress, index));
12563   %}
12564   ins_pipe(pipe_jmp);
12565 %}
12566 
12567 // Jump Direct - Label defines a relative address from JMP+1
12568 instruct jmpDir(label labl) %{
12569   match(Goto);
12570   effect(USE labl);
12571 
12572   ins_cost(300);
12573   format %{ "JMP    $labl" %}
12574   size(5);
12575   ins_encode %{
12576     Label* L = $labl$$label;
12577     __ jmp(*L, false); // Always long jump
12578   %}
12579   ins_pipe( pipe_jmp );
12580 %}
12581 
12582 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12583 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12584   match(If cop cr);
12585   effect(USE labl);
12586 
12587   ins_cost(300);
12588   format %{ "J$cop    $labl" %}
12589   size(6);
12590   ins_encode %{
12591     Label* L = $labl$$label;
12592     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12593   %}
12594   ins_pipe( pipe_jcc );
12595 %}
12596 
12597 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12598 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12599   predicate(!n->has_vector_mask_set());
12600   match(CountedLoopEnd cop cr);
12601   effect(USE labl);
12602 
12603   ins_cost(300);
12604   format %{ "J$cop    $labl\t# Loop end" %}
12605   size(6);
12606   ins_encode %{
12607     Label* L = $labl$$label;
12608     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12609   %}
12610   ins_pipe( pipe_jcc );
12611 %}
12612 
12613 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12614 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12615   predicate(!n->has_vector_mask_set());
12616   match(CountedLoopEnd cop cmp);
12617   effect(USE labl);
12618 
12619   ins_cost(300);
12620   format %{ "J$cop,u  $labl\t# Loop end" %}
12621   size(6);
12622   ins_encode %{
12623     Label* L = $labl$$label;
12624     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12625   %}
12626   ins_pipe( pipe_jcc );
12627 %}
12628 
12629 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12630   predicate(!n->has_vector_mask_set());
12631   match(CountedLoopEnd cop cmp);
12632   effect(USE labl);
12633 
12634   ins_cost(200);
12635   format %{ "J$cop,u  $labl\t# Loop end" %}
12636   size(6);
12637   ins_encode %{
12638     Label* L = $labl$$label;
12639     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12640   %}
12641   ins_pipe( pipe_jcc );
12642 %}
12643 
12644 // mask version
12645 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12646 // Bounded mask operand used in following patten is needed for
12647 // post-loop multiversioning.
12648 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12649   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12650   match(CountedLoopEnd cop cr);
12651   effect(USE labl, TEMP ktmp);
12652 
12653   ins_cost(400);
12654   format %{ "J$cop    $labl\t# Loop end\n\t"
12655             "restorevectmask \t# vector mask restore for loops" %}
12656   size(10);
12657   ins_encode %{
12658     Label* L = $labl$$label;
12659     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12660     __ restorevectmask($ktmp$$KRegister);
12661   %}
12662   ins_pipe( pipe_jcc );
12663 %}
12664 
12665 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12666 // Bounded mask operand used in following patten is needed for
12667 // post-loop multiversioning.
12668 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12669   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12670   match(CountedLoopEnd cop cmp);
12671   effect(USE labl, TEMP ktmp);
12672 
12673   ins_cost(400);
12674   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12675             "restorevectmask \t# vector mask restore for loops" %}
12676   size(10);
12677   ins_encode %{
12678     Label* L = $labl$$label;
12679     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12680     __ restorevectmask($ktmp$$KRegister);
12681   %}
12682   ins_pipe( pipe_jcc );
12683 %}
12684 
12685 // Bounded mask operand used in following patten is needed for
12686 // post-loop multiversioning.
12687 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12688   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12689   match(CountedLoopEnd cop cmp);
12690   effect(USE labl, TEMP ktmp);
12691 
12692   ins_cost(300);
12693   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12694             "restorevectmask \t# vector mask restore for loops" %}
12695   size(10);
12696   ins_encode %{
12697     Label* L = $labl$$label;
12698     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12699     __ restorevectmask($ktmp$$KRegister);
12700   %}
12701   ins_pipe( pipe_jcc );
12702 %}
12703 
12704 // Jump Direct Conditional - using unsigned comparison
12705 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12706   match(If cop cmp);
12707   effect(USE labl);
12708 
12709   ins_cost(300);
12710   format %{ "J$cop,u  $labl" %}
12711   size(6);
12712   ins_encode %{
12713     Label* L = $labl$$label;
12714     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12715   %}
12716   ins_pipe(pipe_jcc);
12717 %}
12718 
12719 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12720   match(If cop cmp);
12721   effect(USE labl);
12722 
12723   ins_cost(200);
12724   format %{ "J$cop,u  $labl" %}
12725   size(6);
12726   ins_encode %{
12727     Label* L = $labl$$label;
12728     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12729   %}
12730   ins_pipe(pipe_jcc);
12731 %}
12732 
12733 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12734   match(If cop cmp);
12735   effect(USE labl);
12736 
12737   ins_cost(200);
12738   format %{ $$template
12739     if ($cop$$cmpcode == Assembler::notEqual) {
12740       $$emit$$"JP,u   $labl\n\t"
12741       $$emit$$"J$cop,u   $labl"
12742     } else {
12743       $$emit$$"JP,u   done\n\t"
12744       $$emit$$"J$cop,u   $labl\n\t"
12745       $$emit$$"done:"
12746     }
12747   %}
12748   ins_encode %{
12749     Label* l = $labl$$label;
12750     if ($cop$$cmpcode == Assembler::notEqual) {
12751       __ jcc(Assembler::parity, *l, false);
12752       __ jcc(Assembler::notEqual, *l, false);
12753     } else if ($cop$$cmpcode == Assembler::equal) {
12754       Label done;
12755       __ jccb(Assembler::parity, done);
12756       __ jcc(Assembler::equal, *l, false);
12757       __ bind(done);
12758     } else {
12759        ShouldNotReachHere();
12760     }
12761   %}
12762   ins_pipe(pipe_jcc);
12763 %}
12764 
12765 // ============================================================================
12766 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12767 // array for an instance of the superklass.  Set a hidden internal cache on a
12768 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12769 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12770 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12771   match(Set result (PartialSubtypeCheck sub super));
12772   effect( KILL rcx, KILL cr );
12773 
12774   ins_cost(1100);  // slightly larger than the next version
12775   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12776             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12777             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12778             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12779             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12780             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12781             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12782      "miss:\t" %}
12783 
12784   opcode(0x1); // Force a XOR of EDI
12785   ins_encode( enc_PartialSubtypeCheck() );
12786   ins_pipe( pipe_slow );
12787 %}
12788 
12789 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12790   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12791   effect( KILL rcx, KILL result );
12792 
12793   ins_cost(1000);
12794   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12795             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12796             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12797             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12798             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12799             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12800      "miss:\t" %}
12801 
12802   opcode(0x0);  // No need to XOR EDI
12803   ins_encode( enc_PartialSubtypeCheck() );
12804   ins_pipe( pipe_slow );
12805 %}
12806 
12807 // ============================================================================
12808 // Branch Instructions -- short offset versions
12809 //
12810 // These instructions are used to replace jumps of a long offset (the default
12811 // match) with jumps of a shorter offset.  These instructions are all tagged
12812 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12813 // match rules in general matching.  Instead, the ADLC generates a conversion
12814 // method in the MachNode which can be used to do in-place replacement of the
12815 // long variant with the shorter variant.  The compiler will determine if a
12816 // branch can be taken by the is_short_branch_offset() predicate in the machine
12817 // specific code section of the file.
12818 
12819 // Jump Direct - Label defines a relative address from JMP+1
12820 instruct jmpDir_short(label labl) %{
12821   match(Goto);
12822   effect(USE labl);
12823 
12824   ins_cost(300);
12825   format %{ "JMP,s  $labl" %}
12826   size(2);
12827   ins_encode %{
12828     Label* L = $labl$$label;
12829     __ jmpb(*L);
12830   %}
12831   ins_pipe( pipe_jmp );
12832   ins_short_branch(1);
12833 %}
12834 
12835 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12836 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12837   match(If cop cr);
12838   effect(USE labl);
12839 
12840   ins_cost(300);
12841   format %{ "J$cop,s  $labl" %}
12842   size(2);
12843   ins_encode %{
12844     Label* L = $labl$$label;
12845     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12846   %}
12847   ins_pipe( pipe_jcc );
12848   ins_short_branch(1);
12849 %}
12850 
12851 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12852 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12853   match(CountedLoopEnd cop cr);
12854   effect(USE labl);
12855 
12856   ins_cost(300);
12857   format %{ "J$cop,s  $labl\t# Loop end" %}
12858   size(2);
12859   ins_encode %{
12860     Label* L = $labl$$label;
12861     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12862   %}
12863   ins_pipe( pipe_jcc );
12864   ins_short_branch(1);
12865 %}
12866 
12867 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12868 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12869   match(CountedLoopEnd cop cmp);
12870   effect(USE labl);
12871 
12872   ins_cost(300);
12873   format %{ "J$cop,us $labl\t# Loop end" %}
12874   size(2);
12875   ins_encode %{
12876     Label* L = $labl$$label;
12877     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12878   %}
12879   ins_pipe( pipe_jcc );
12880   ins_short_branch(1);
12881 %}
12882 
12883 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12884   match(CountedLoopEnd cop cmp);
12885   effect(USE labl);
12886 
12887   ins_cost(300);
12888   format %{ "J$cop,us $labl\t# Loop end" %}
12889   size(2);
12890   ins_encode %{
12891     Label* L = $labl$$label;
12892     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12893   %}
12894   ins_pipe( pipe_jcc );
12895   ins_short_branch(1);
12896 %}
12897 
12898 // Jump Direct Conditional - using unsigned comparison
12899 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12900   match(If cop cmp);
12901   effect(USE labl);
12902 
12903   ins_cost(300);
12904   format %{ "J$cop,us $labl" %}
12905   size(2);
12906   ins_encode %{
12907     Label* L = $labl$$label;
12908     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12909   %}
12910   ins_pipe( pipe_jcc );
12911   ins_short_branch(1);
12912 %}
12913 
12914 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12915   match(If cop cmp);
12916   effect(USE labl);
12917 
12918   ins_cost(300);
12919   format %{ "J$cop,us $labl" %}
12920   size(2);
12921   ins_encode %{
12922     Label* L = $labl$$label;
12923     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12924   %}
12925   ins_pipe( pipe_jcc );
12926   ins_short_branch(1);
12927 %}
12928 
12929 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12930   match(If cop cmp);
12931   effect(USE labl);
12932 
12933   ins_cost(300);
12934   format %{ $$template
12935     if ($cop$$cmpcode == Assembler::notEqual) {
12936       $$emit$$"JP,u,s   $labl\n\t"
12937       $$emit$$"J$cop,u,s   $labl"
12938     } else {
12939       $$emit$$"JP,u,s   done\n\t"
12940       $$emit$$"J$cop,u,s  $labl\n\t"
12941       $$emit$$"done:"
12942     }
12943   %}
12944   size(4);
12945   ins_encode %{
12946     Label* l = $labl$$label;
12947     if ($cop$$cmpcode == Assembler::notEqual) {
12948       __ jccb(Assembler::parity, *l);
12949       __ jccb(Assembler::notEqual, *l);
12950     } else if ($cop$$cmpcode == Assembler::equal) {
12951       Label done;
12952       __ jccb(Assembler::parity, done);
12953       __ jccb(Assembler::equal, *l);
12954       __ bind(done);
12955     } else {
12956        ShouldNotReachHere();
12957     }
12958   %}
12959   ins_pipe(pipe_jcc);
12960   ins_short_branch(1);
12961 %}
12962 
12963 // ============================================================================
12964 // Long Compare
12965 //
12966 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12967 // is tricky.  The flavor of compare used depends on whether we are testing
12968 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12969 // The GE test is the negated LT test.  The LE test can be had by commuting
12970 // the operands (yielding a GE test) and then negating; negate again for the
12971 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12972 // NE test is negated from that.
12973 
12974 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12975 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12976 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12977 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12978 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12979 // foo match ends up with the wrong leaf.  One fix is to not match both
12980 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12981 // both forms beat the trinary form of long-compare and both are very useful
12982 // on Intel which has so few registers.
12983 
12984 // Manifest a CmpL result in an integer register.  Very painful.
12985 // This is the test to avoid.
12986 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12987   match(Set dst (CmpL3 src1 src2));
12988   effect( KILL flags );
12989   ins_cost(1000);
12990   format %{ "XOR    $dst,$dst\n\t"
12991             "CMP    $src1.hi,$src2.hi\n\t"
12992             "JLT,s  m_one\n\t"
12993             "JGT,s  p_one\n\t"
12994             "CMP    $src1.lo,$src2.lo\n\t"
12995             "JB,s   m_one\n\t"
12996             "JEQ,s  done\n"
12997     "p_one:\tINC    $dst\n\t"
12998             "JMP,s  done\n"
12999     "m_one:\tDEC    $dst\n"
13000      "done:" %}
13001   ins_encode %{
13002     Label p_one, m_one, done;
13003     __ xorptr($dst$$Register, $dst$$Register);
13004     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13005     __ jccb(Assembler::less,    m_one);
13006     __ jccb(Assembler::greater, p_one);
13007     __ cmpl($src1$$Register, $src2$$Register);
13008     __ jccb(Assembler::below,   m_one);
13009     __ jccb(Assembler::equal,   done);
13010     __ bind(p_one);
13011     __ incrementl($dst$$Register);
13012     __ jmpb(done);
13013     __ bind(m_one);
13014     __ decrementl($dst$$Register);
13015     __ bind(done);
13016   %}
13017   ins_pipe( pipe_slow );
13018 %}
13019 
13020 //======
13021 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13022 // compares.  Can be used for LE or GT compares by reversing arguments.
13023 // NOT GOOD FOR EQ/NE tests.
13024 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13025   match( Set flags (CmpL src zero ));
13026   ins_cost(100);
13027   format %{ "TEST   $src.hi,$src.hi" %}
13028   opcode(0x85);
13029   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13030   ins_pipe( ialu_cr_reg_reg );
13031 %}
13032 
13033 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13034 // compares.  Can be used for LE or GT compares by reversing arguments.
13035 // NOT GOOD FOR EQ/NE tests.
13036 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13037   match( Set flags (CmpL src1 src2 ));
13038   effect( TEMP tmp );
13039   ins_cost(300);
13040   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13041             "MOV    $tmp,$src1.hi\n\t"
13042             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13043   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13044   ins_pipe( ialu_cr_reg_reg );
13045 %}
13046 
13047 // Long compares reg < zero/req OR reg >= zero/req.
13048 // Just a wrapper for a normal branch, plus the predicate test.
13049 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13050   match(If cmp flags);
13051   effect(USE labl);
13052   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13053   expand %{
13054     jmpCon(cmp,flags,labl);    // JLT or JGE...
13055   %}
13056 %}
13057 
13058 //======
13059 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13060 // compares.  Can be used for LE or GT compares by reversing arguments.
13061 // NOT GOOD FOR EQ/NE tests.
13062 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13063   match(Set flags (CmpUL src zero));
13064   ins_cost(100);
13065   format %{ "TEST   $src.hi,$src.hi" %}
13066   opcode(0x85);
13067   ins_encode(OpcP, RegReg_Hi2(src, src));
13068   ins_pipe(ialu_cr_reg_reg);
13069 %}
13070 
13071 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13072 // compares.  Can be used for LE or GT compares by reversing arguments.
13073 // NOT GOOD FOR EQ/NE tests.
13074 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13075   match(Set flags (CmpUL src1 src2));
13076   effect(TEMP tmp);
13077   ins_cost(300);
13078   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13079             "MOV    $tmp,$src1.hi\n\t"
13080             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13081   ins_encode(long_cmp_flags2(src1, src2, tmp));
13082   ins_pipe(ialu_cr_reg_reg);
13083 %}
13084 
13085 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13086 // Just a wrapper for a normal branch, plus the predicate test.
13087 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13088   match(If cmp flags);
13089   effect(USE labl);
13090   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13091   expand %{
13092     jmpCon(cmp, flags, labl);    // JLT or JGE...
13093   %}
13094 %}
13095 
13096 // Compare 2 longs and CMOVE longs.
13097 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13098   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13099   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13100   ins_cost(400);
13101   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13102             "CMOV$cmp $dst.hi,$src.hi" %}
13103   opcode(0x0F,0x40);
13104   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13105   ins_pipe( pipe_cmov_reg_long );
13106 %}
13107 
13108 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13109   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13110   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13111   ins_cost(500);
13112   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13113             "CMOV$cmp $dst.hi,$src.hi" %}
13114   opcode(0x0F,0x40);
13115   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13116   ins_pipe( pipe_cmov_reg_long );
13117 %}
13118 
13119 // Compare 2 longs and CMOVE ints.
13120 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13121   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13122   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13123   ins_cost(200);
13124   format %{ "CMOV$cmp $dst,$src" %}
13125   opcode(0x0F,0x40);
13126   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13127   ins_pipe( pipe_cmov_reg );
13128 %}
13129 
13130 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13131   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13132   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13133   ins_cost(250);
13134   format %{ "CMOV$cmp $dst,$src" %}
13135   opcode(0x0F,0x40);
13136   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13137   ins_pipe( pipe_cmov_mem );
13138 %}
13139 
13140 // Compare 2 longs and CMOVE ints.
13141 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13142   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13143   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13144   ins_cost(200);
13145   format %{ "CMOV$cmp $dst,$src" %}
13146   opcode(0x0F,0x40);
13147   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13148   ins_pipe( pipe_cmov_reg );
13149 %}
13150 
13151 // Compare 2 longs and CMOVE doubles
13152 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13153   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13154   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13155   ins_cost(200);
13156   expand %{
13157     fcmovDPR_regS(cmp,flags,dst,src);
13158   %}
13159 %}
13160 
13161 // Compare 2 longs and CMOVE doubles
13162 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13163   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13164   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13165   ins_cost(200);
13166   expand %{
13167     fcmovD_regS(cmp,flags,dst,src);
13168   %}
13169 %}
13170 
13171 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13172   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13173   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13174   ins_cost(200);
13175   expand %{
13176     fcmovFPR_regS(cmp,flags,dst,src);
13177   %}
13178 %}
13179 
13180 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13181   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13182   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13183   ins_cost(200);
13184   expand %{
13185     fcmovF_regS(cmp,flags,dst,src);
13186   %}
13187 %}
13188 
13189 //======
13190 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13191 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13192   match( Set flags (CmpL src zero ));
13193   effect(TEMP tmp);
13194   ins_cost(200);
13195   format %{ "MOV    $tmp,$src.lo\n\t"
13196             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13197   ins_encode( long_cmp_flags0( src, tmp ) );
13198   ins_pipe( ialu_reg_reg_long );
13199 %}
13200 
13201 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13202 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13203   match( Set flags (CmpL src1 src2 ));
13204   ins_cost(200+300);
13205   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13206             "JNE,s  skip\n\t"
13207             "CMP    $src1.hi,$src2.hi\n\t"
13208      "skip:\t" %}
13209   ins_encode( long_cmp_flags1( src1, src2 ) );
13210   ins_pipe( ialu_cr_reg_reg );
13211 %}
13212 
13213 // Long compare reg == zero/reg OR reg != zero/reg
13214 // Just a wrapper for a normal branch, plus the predicate test.
13215 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13216   match(If cmp flags);
13217   effect(USE labl);
13218   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13219   expand %{
13220     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13221   %}
13222 %}
13223 
13224 //======
13225 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13226 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13227   match(Set flags (CmpUL src zero));
13228   effect(TEMP tmp);
13229   ins_cost(200);
13230   format %{ "MOV    $tmp,$src.lo\n\t"
13231             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13232   ins_encode(long_cmp_flags0(src, tmp));
13233   ins_pipe(ialu_reg_reg_long);
13234 %}
13235 
13236 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13237 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13238   match(Set flags (CmpUL src1 src2));
13239   ins_cost(200+300);
13240   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13241             "JNE,s  skip\n\t"
13242             "CMP    $src1.hi,$src2.hi\n\t"
13243      "skip:\t" %}
13244   ins_encode(long_cmp_flags1(src1, src2));
13245   ins_pipe(ialu_cr_reg_reg);
13246 %}
13247 
13248 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13249 // Just a wrapper for a normal branch, plus the predicate test.
13250 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13251   match(If cmp flags);
13252   effect(USE labl);
13253   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13254   expand %{
13255     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13256   %}
13257 %}
13258 
13259 // Compare 2 longs and CMOVE longs.
13260 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13261   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13262   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13263   ins_cost(400);
13264   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13265             "CMOV$cmp $dst.hi,$src.hi" %}
13266   opcode(0x0F,0x40);
13267   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13268   ins_pipe( pipe_cmov_reg_long );
13269 %}
13270 
13271 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13272   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13273   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13274   ins_cost(500);
13275   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13276             "CMOV$cmp $dst.hi,$src.hi" %}
13277   opcode(0x0F,0x40);
13278   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13279   ins_pipe( pipe_cmov_reg_long );
13280 %}
13281 
13282 // Compare 2 longs and CMOVE ints.
13283 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13284   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13285   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13286   ins_cost(200);
13287   format %{ "CMOV$cmp $dst,$src" %}
13288   opcode(0x0F,0x40);
13289   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13290   ins_pipe( pipe_cmov_reg );
13291 %}
13292 
13293 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13294   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13295   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13296   ins_cost(250);
13297   format %{ "CMOV$cmp $dst,$src" %}
13298   opcode(0x0F,0x40);
13299   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13300   ins_pipe( pipe_cmov_mem );
13301 %}
13302 
13303 // Compare 2 longs and CMOVE ints.
13304 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13305   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13306   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13307   ins_cost(200);
13308   format %{ "CMOV$cmp $dst,$src" %}
13309   opcode(0x0F,0x40);
13310   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13311   ins_pipe( pipe_cmov_reg );
13312 %}
13313 
13314 // Compare 2 longs and CMOVE doubles
13315 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13316   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13317   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13318   ins_cost(200);
13319   expand %{
13320     fcmovDPR_regS(cmp,flags,dst,src);
13321   %}
13322 %}
13323 
13324 // Compare 2 longs and CMOVE doubles
13325 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13326   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13327   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13328   ins_cost(200);
13329   expand %{
13330     fcmovD_regS(cmp,flags,dst,src);
13331   %}
13332 %}
13333 
13334 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13335   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13336   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13337   ins_cost(200);
13338   expand %{
13339     fcmovFPR_regS(cmp,flags,dst,src);
13340   %}
13341 %}
13342 
13343 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13344   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13345   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13346   ins_cost(200);
13347   expand %{
13348     fcmovF_regS(cmp,flags,dst,src);
13349   %}
13350 %}
13351 
13352 //======
13353 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13354 // Same as cmpL_reg_flags_LEGT except must negate src
13355 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13356   match( Set flags (CmpL src zero ));
13357   effect( TEMP tmp );
13358   ins_cost(300);
13359   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13360             "CMP    $tmp,$src.lo\n\t"
13361             "SBB    $tmp,$src.hi\n\t" %}
13362   ins_encode( long_cmp_flags3(src, tmp) );
13363   ins_pipe( ialu_reg_reg_long );
13364 %}
13365 
13366 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13367 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13368 // requires a commuted test to get the same result.
13369 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13370   match( Set flags (CmpL src1 src2 ));
13371   effect( TEMP tmp );
13372   ins_cost(300);
13373   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13374             "MOV    $tmp,$src2.hi\n\t"
13375             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13376   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13377   ins_pipe( ialu_cr_reg_reg );
13378 %}
13379 
13380 // Long compares reg < zero/req OR reg >= zero/req.
13381 // Just a wrapper for a normal branch, plus the predicate test
13382 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13383   match(If cmp flags);
13384   effect(USE labl);
13385   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13386   ins_cost(300);
13387   expand %{
13388     jmpCon(cmp,flags,labl);    // JGT or JLE...
13389   %}
13390 %}
13391 
13392 //======
13393 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13394 // Same as cmpUL_reg_flags_LEGT except must negate src
13395 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13396   match(Set flags (CmpUL src zero));
13397   effect(TEMP tmp);
13398   ins_cost(300);
13399   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13400             "CMP    $tmp,$src.lo\n\t"
13401             "SBB    $tmp,$src.hi\n\t" %}
13402   ins_encode(long_cmp_flags3(src, tmp));
13403   ins_pipe(ialu_reg_reg_long);
13404 %}
13405 
13406 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13407 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13408 // requires a commuted test to get the same result.
13409 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13410   match(Set flags (CmpUL src1 src2));
13411   effect(TEMP tmp);
13412   ins_cost(300);
13413   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13414             "MOV    $tmp,$src2.hi\n\t"
13415             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13416   ins_encode(long_cmp_flags2( src2, src1, tmp));
13417   ins_pipe(ialu_cr_reg_reg);
13418 %}
13419 
13420 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13421 // Just a wrapper for a normal branch, plus the predicate test
13422 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13423   match(If cmp flags);
13424   effect(USE labl);
13425   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13426   ins_cost(300);
13427   expand %{
13428     jmpCon(cmp, flags, labl);    // JGT or JLE...
13429   %}
13430 %}
13431 
13432 // Compare 2 longs and CMOVE longs.
13433 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13434   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13435   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13436   ins_cost(400);
13437   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13438             "CMOV$cmp $dst.hi,$src.hi" %}
13439   opcode(0x0F,0x40);
13440   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13441   ins_pipe( pipe_cmov_reg_long );
13442 %}
13443 
13444 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13445   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13446   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447   ins_cost(500);
13448   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13449             "CMOV$cmp $dst.hi,$src.hi+4" %}
13450   opcode(0x0F,0x40);
13451   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13452   ins_pipe( pipe_cmov_reg_long );
13453 %}
13454 
13455 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13456   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13457   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13458   ins_cost(400);
13459   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13460             "CMOV$cmp $dst.hi,$src.hi" %}
13461   opcode(0x0F,0x40);
13462   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13463   ins_pipe( pipe_cmov_reg_long );
13464 %}
13465 
13466 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13467   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13468   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13469   ins_cost(500);
13470   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13471             "CMOV$cmp $dst.hi,$src.hi+4" %}
13472   opcode(0x0F,0x40);
13473   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13474   ins_pipe( pipe_cmov_reg_long );
13475 %}
13476 
13477 // Compare 2 longs and CMOVE ints.
13478 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13479   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13480   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13481   ins_cost(200);
13482   format %{ "CMOV$cmp $dst,$src" %}
13483   opcode(0x0F,0x40);
13484   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13485   ins_pipe( pipe_cmov_reg );
13486 %}
13487 
13488 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13489   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13490   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13491   ins_cost(250);
13492   format %{ "CMOV$cmp $dst,$src" %}
13493   opcode(0x0F,0x40);
13494   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13495   ins_pipe( pipe_cmov_mem );
13496 %}
13497 
13498 // Compare 2 longs and CMOVE ptrs.
13499 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13500   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13501   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13502   ins_cost(200);
13503   format %{ "CMOV$cmp $dst,$src" %}
13504   opcode(0x0F,0x40);
13505   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13506   ins_pipe( pipe_cmov_reg );
13507 %}
13508 
13509 // Compare 2 longs and CMOVE doubles
13510 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13511   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13512   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13513   ins_cost(200);
13514   expand %{
13515     fcmovDPR_regS(cmp,flags,dst,src);
13516   %}
13517 %}
13518 
13519 // Compare 2 longs and CMOVE doubles
13520 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13521   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13522   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13523   ins_cost(200);
13524   expand %{
13525     fcmovD_regS(cmp,flags,dst,src);
13526   %}
13527 %}
13528 
13529 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13530   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13531   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13532   ins_cost(200);
13533   expand %{
13534     fcmovFPR_regS(cmp,flags,dst,src);
13535   %}
13536 %}
13537 
13538 
13539 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13540   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13541   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13542   ins_cost(200);
13543   expand %{
13544     fcmovF_regS(cmp,flags,dst,src);
13545   %}
13546 %}
13547 
13548 
13549 // ============================================================================
13550 // Procedure Call/Return Instructions
13551 // Call Java Static Instruction
13552 // Note: If this code changes, the corresponding ret_addr_offset() and
13553 //       compute_padding() functions will have to be adjusted.
13554 instruct CallStaticJavaDirect(method meth) %{
13555   match(CallStaticJava);
13556   effect(USE meth);
13557 
13558   ins_cost(300);
13559   format %{ "CALL,static " %}
13560   opcode(0xE8); /* E8 cd */
13561   ins_encode( pre_call_resets,
13562               Java_Static_Call( meth ),
13563               call_epilog,
13564               post_call_FPU );
13565   ins_pipe( pipe_slow );
13566   ins_alignment(4);
13567 %}
13568 
13569 // Call Java Dynamic Instruction
13570 // Note: If this code changes, the corresponding ret_addr_offset() and
13571 //       compute_padding() functions will have to be adjusted.
13572 instruct CallDynamicJavaDirect(method meth) %{
13573   match(CallDynamicJava);
13574   effect(USE meth);
13575 
13576   ins_cost(300);
13577   format %{ "MOV    EAX,(oop)-1\n\t"
13578             "CALL,dynamic" %}
13579   opcode(0xE8); /* E8 cd */
13580   ins_encode( pre_call_resets,
13581               Java_Dynamic_Call( meth ),
13582               call_epilog,
13583               post_call_FPU );
13584   ins_pipe( pipe_slow );
13585   ins_alignment(4);
13586 %}
13587 
13588 // Call Runtime Instruction
13589 instruct CallRuntimeDirect(method meth) %{
13590   match(CallRuntime );
13591   effect(USE meth);
13592 
13593   ins_cost(300);
13594   format %{ "CALL,runtime " %}
13595   opcode(0xE8); /* E8 cd */
13596   // Use FFREEs to clear entries in float stack
13597   ins_encode( pre_call_resets,
13598               FFree_Float_Stack_All,
13599               Java_To_Runtime( meth ),
13600               post_call_FPU );
13601   ins_pipe( pipe_slow );
13602 %}
13603 
13604 // Call runtime without safepoint
13605 instruct CallLeafDirect(method meth) %{
13606   match(CallLeaf);
13607   effect(USE meth);
13608 
13609   ins_cost(300);
13610   format %{ "CALL_LEAF,runtime " %}
13611   opcode(0xE8); /* E8 cd */
13612   ins_encode( pre_call_resets,
13613               FFree_Float_Stack_All,
13614               Java_To_Runtime( meth ),
13615               Verify_FPU_For_Leaf, post_call_FPU );
13616   ins_pipe( pipe_slow );
13617 %}
13618 
13619 instruct CallLeafNoFPDirect(method meth) %{
13620   match(CallLeafNoFP);
13621   effect(USE meth);
13622 
13623   ins_cost(300);
13624   format %{ "CALL_LEAF_NOFP,runtime " %}
13625   opcode(0xE8); /* E8 cd */
13626   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13627   ins_pipe( pipe_slow );
13628 %}
13629 
13630 
13631 // Return Instruction
13632 // Remove the return address & jump to it.
13633 instruct Ret() %{
13634   match(Return);
13635   format %{ "RET" %}
13636   opcode(0xC3);
13637   ins_encode(OpcP);
13638   ins_pipe( pipe_jmp );
13639 %}
13640 
13641 // Tail Call; Jump from runtime stub to Java code.
13642 // Also known as an 'interprocedural jump'.
13643 // Target of jump will eventually return to caller.
13644 // TailJump below removes the return address.
13645 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13646   match(TailCall jump_target method_ptr);
13647   ins_cost(300);
13648   format %{ "JMP    $jump_target \t# EBX holds method" %}
13649   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13650   ins_encode( OpcP, RegOpc(jump_target) );
13651   ins_pipe( pipe_jmp );
13652 %}
13653 
13654 
13655 // Tail Jump; remove the return address; jump to target.
13656 // TailCall above leaves the return address around.
13657 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13658   match( TailJump jump_target ex_oop );
13659   ins_cost(300);
13660   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13661             "JMP    $jump_target " %}
13662   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13663   ins_encode( enc_pop_rdx,
13664               OpcP, RegOpc(jump_target) );
13665   ins_pipe( pipe_jmp );
13666 %}
13667 
13668 // Create exception oop: created by stack-crawling runtime code.
13669 // Created exception is now available to this handler, and is setup
13670 // just prior to jumping to this handler.  No code emitted.
13671 instruct CreateException( eAXRegP ex_oop )
13672 %{
13673   match(Set ex_oop (CreateEx));
13674 
13675   size(0);
13676   // use the following format syntax
13677   format %{ "# exception oop is in EAX; no code emitted" %}
13678   ins_encode();
13679   ins_pipe( empty );
13680 %}
13681 
13682 
13683 // Rethrow exception:
13684 // The exception oop will come in the first argument position.
13685 // Then JUMP (not call) to the rethrow stub code.
13686 instruct RethrowException()
13687 %{
13688   match(Rethrow);
13689 
13690   // use the following format syntax
13691   format %{ "JMP    rethrow_stub" %}
13692   ins_encode(enc_rethrow);
13693   ins_pipe( pipe_jmp );
13694 %}
13695 
13696 // inlined locking and unlocking
13697 
13698 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13699   predicate(Compile::current()->use_rtm());
13700   match(Set cr (FastLock object box));
13701   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13702   ins_cost(300);
13703   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13704   ins_encode %{
13705     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13706                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13707                  _counters, _rtm_counters, _stack_rtm_counters,
13708                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13709                  true, ra_->C->profile_rtm());
13710   %}
13711   ins_pipe(pipe_slow);
13712 %}
13713 
13714 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13715   predicate(!Compile::current()->use_rtm());
13716   match(Set cr (FastLock object box));
13717   effect(TEMP tmp, TEMP scr, USE_KILL box);
13718   ins_cost(300);
13719   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13720   ins_encode %{
13721     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13722                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13723   %}
13724   ins_pipe(pipe_slow);
13725 %}
13726 
13727 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13728   match(Set cr (FastUnlock object box));
13729   effect(TEMP tmp, USE_KILL box);
13730   ins_cost(300);
13731   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13732   ins_encode %{
13733     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13734   %}
13735   ins_pipe(pipe_slow);
13736 %}
13737 
13738 
13739 
13740 // ============================================================================
13741 // Safepoint Instruction
13742 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13743   match(SafePoint poll);
13744   effect(KILL cr, USE poll);
13745 
13746   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13747   ins_cost(125);
13748   // EBP would need size(3)
13749   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13750   ins_encode %{
13751     __ relocate(relocInfo::poll_type);
13752     address pre_pc = __ pc();
13753     __ testl(rax, Address($poll$$Register, 0));
13754     address post_pc = __ pc();
13755     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13756   %}
13757   ins_pipe(ialu_reg_mem);
13758 %}
13759 
13760 
13761 // ============================================================================
13762 // This name is KNOWN by the ADLC and cannot be changed.
13763 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13764 // for this guy.
13765 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13766   match(Set dst (ThreadLocal));
13767   effect(DEF dst, KILL cr);
13768 
13769   format %{ "MOV    $dst, Thread::current()" %}
13770   ins_encode %{
13771     Register dstReg = as_Register($dst$$reg);
13772     __ get_thread(dstReg);
13773   %}
13774   ins_pipe( ialu_reg_fat );
13775 %}
13776 
13777 
13778 
13779 //----------PEEPHOLE RULES-----------------------------------------------------
13780 // These must follow all instruction definitions as they use the names
13781 // defined in the instructions definitions.
13782 //
13783 // peepmatch ( root_instr_name [preceding_instruction]* );
13784 //
13785 // peepconstraint %{
13786 // (instruction_number.operand_name relational_op instruction_number.operand_name
13787 //  [, ...] );
13788 // // instruction numbers are zero-based using left to right order in peepmatch
13789 //
13790 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13791 // // provide an instruction_number.operand_name for each operand that appears
13792 // // in the replacement instruction's match rule
13793 //
13794 // ---------VM FLAGS---------------------------------------------------------
13795 //
13796 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13797 //
13798 // Each peephole rule is given an identifying number starting with zero and
13799 // increasing by one in the order seen by the parser.  An individual peephole
13800 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13801 // on the command-line.
13802 //
13803 // ---------CURRENT LIMITATIONS----------------------------------------------
13804 //
13805 // Only match adjacent instructions in same basic block
13806 // Only equality constraints
13807 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13808 // Only one replacement instruction
13809 //
13810 // ---------EXAMPLE----------------------------------------------------------
13811 //
13812 // // pertinent parts of existing instructions in architecture description
13813 // instruct movI(rRegI dst, rRegI src) %{
13814 //   match(Set dst (CopyI src));
13815 // %}
13816 //
13817 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13818 //   match(Set dst (AddI dst src));
13819 //   effect(KILL cr);
13820 // %}
13821 //
13822 // // Change (inc mov) to lea
13823 // peephole %{
13824 //   // increment preceeded by register-register move
13825 //   peepmatch ( incI_eReg movI );
13826 //   // require that the destination register of the increment
13827 //   // match the destination register of the move
13828 //   peepconstraint ( 0.dst == 1.dst );
13829 //   // construct a replacement instruction that sets
13830 //   // the destination to ( move's source register + one )
13831 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13832 // %}
13833 //
13834 // Implementation no longer uses movX instructions since
13835 // machine-independent system no longer uses CopyX nodes.
13836 //
13837 // peephole %{
13838 //   peepmatch ( incI_eReg movI );
13839 //   peepconstraint ( 0.dst == 1.dst );
13840 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13841 // %}
13842 //
13843 // peephole %{
13844 //   peepmatch ( decI_eReg movI );
13845 //   peepconstraint ( 0.dst == 1.dst );
13846 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13847 // %}
13848 //
13849 // peephole %{
13850 //   peepmatch ( addI_eReg_imm movI );
13851 //   peepconstraint ( 0.dst == 1.dst );
13852 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13853 // %}
13854 //
13855 // peephole %{
13856 //   peepmatch ( addP_eReg_imm movP );
13857 //   peepconstraint ( 0.dst == 1.dst );
13858 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13859 // %}
13860 
13861 // // Change load of spilled value to only a spill
13862 // instruct storeI(memory mem, rRegI src) %{
13863 //   match(Set mem (StoreI mem src));
13864 // %}
13865 //
13866 // instruct loadI(rRegI dst, memory mem) %{
13867 //   match(Set dst (LoadI mem));
13868 // %}
13869 //
13870 peephole %{
13871   peepmatch ( loadI storeI );
13872   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13873   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13874 %}
13875 
13876 //----------SMARTSPILL RULES---------------------------------------------------
13877 // These must follow all instruction definitions as they use the names
13878 // defined in the instructions definitions.