1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  631 
  632   C->output()->set_frame_complete(cbuf.insts_size());
  633 
  634   if (C->has_mach_constant_base_node()) {
  635     // NOTE: We set the table base offset here because users might be
  636     // emitted before MachConstantBaseNode.
  637     ConstantTable& constant_table = C->output()->constant_table();
  638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  639   }
  640 }
  641 
  642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  643   return MachNode::size(ra_); // too many variables; just compute it the hard way
  644 }
  645 
  646 int MachPrologNode::reloc() const {
  647   return 0; // a large enough number
  648 }
  649 
  650 //=============================================================================
  651 #ifndef PRODUCT
  652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  653   Compile *C = ra_->C;
  654   int framesize = C->output()->frame_size_in_bytes();
  655   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  656   // Remove two words for return addr and rbp,
  657   framesize -= 2*wordSize;
  658 
  659   if (C->max_vector_size() > 16) {
  660     st->print("VZEROUPPER");
  661     st->cr(); st->print("\t");
  662   }
  663   if (C->in_24_bit_fp_mode()) {
  664     st->print("FLDCW  standard control word");
  665     st->cr(); st->print("\t");
  666   }
  667   if (framesize) {
  668     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  669     st->cr(); st->print("\t");
  670   }
  671   st->print_cr("POPL   EBP"); st->print("\t");
  672   if (do_polling() && C->is_method_compilation()) {
  673     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  674               "JA       #safepoint_stub\t"
  675               "# Safepoint: poll for GC");
  676   }
  677 }
  678 #endif
  679 
  680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  681   Compile *C = ra_->C;
  682   MacroAssembler _masm(&cbuf);
  683 
  684   if (C->max_vector_size() > 16) {
  685     // Clear upper bits of YMM registers when current compiled code uses
  686     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  687     _masm.vzeroupper();
  688   }
  689   // If method set FPU control word, restore to standard control word
  690   if (C->in_24_bit_fp_mode()) {
  691     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  692   }
  693 
  694   int framesize = C->output()->frame_size_in_bytes();
  695   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  696   // Remove two words for return addr and rbp,
  697   framesize -= 2*wordSize;
  698 
  699   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  700 
  701   if (framesize >= 128) {
  702     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  703     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  704     emit_d32(cbuf, framesize);
  705   } else if (framesize) {
  706     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  707     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  708     emit_d8(cbuf, framesize);
  709   }
  710 
  711   emit_opcode(cbuf, 0x58 | EBP_enc);
  712 
  713   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  714     __ reserved_stack_check();
  715   }
  716 
  717   if (do_polling() && C->is_method_compilation()) {
  718     Register thread = as_Register(EBX_enc);
  719     MacroAssembler masm(&cbuf);
  720     __ get_thread(thread);
  721     Label dummy_label;
  722     Label* code_stub = &dummy_label;
  723     if (!C->output()->in_scratch_emit_size()) {
  724       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  725       C->output()->add_stub(stub);
  726       code_stub = &stub->entry();
  727     }
  728     __ relocate(relocInfo::poll_return_type);
  729     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  730   }
  731 }
  732 
  733 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  734   return MachNode::size(ra_); // too many variables; just compute it
  735                               // the hard way
  736 }
  737 
  738 int MachEpilogNode::reloc() const {
  739   return 0; // a large enough number
  740 }
  741 
  742 const Pipeline * MachEpilogNode::pipeline() const {
  743   return MachNode::pipeline_class();
  744 }
  745 
  746 //=============================================================================
  747 
  748 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  749 static enum RC rc_class( OptoReg::Name reg ) {
  750 
  751   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  752   if (OptoReg::is_stack(reg)) return rc_stack;
  753 
  754   VMReg r = OptoReg::as_VMReg(reg);
  755   if (r->is_Register()) return rc_int;
  756   if (r->is_FloatRegister()) {
  757     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  758     return rc_float;
  759   }
  760   if (r->is_KRegister()) return rc_kreg;
  761   assert(r->is_XMMRegister(), "must be");
  762   return rc_xmm;
  763 }
  764 
  765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  766                         int opcode, const char *op_str, int size, outputStream* st ) {
  767   if( cbuf ) {
  768     emit_opcode  (*cbuf, opcode );
  769     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  770 #ifndef PRODUCT
  771   } else if( !do_size ) {
  772     if( size != 0 ) st->print("\n\t");
  773     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  774       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  775       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  776     } else { // FLD, FST, PUSH, POP
  777       st->print("%s [ESP + #%d]",op_str,offset);
  778     }
  779 #endif
  780   }
  781   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  782   return size+3+offset_size;
  783 }
  784 
  785 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  787                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  788   int in_size_in_bits = Assembler::EVEX_32bit;
  789   int evex_encoding = 0;
  790   if (reg_lo+1 == reg_hi) {
  791     in_size_in_bits = Assembler::EVEX_64bit;
  792     evex_encoding = Assembler::VEX_W;
  793   }
  794   if (cbuf) {
  795     MacroAssembler _masm(cbuf);
  796     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  797     //                          it maps more cases to single byte displacement
  798     _masm.set_managed();
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) {
  801         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  802       } else {
  803         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  804       }
  805     } else {
  806       if (is_load) {
  807         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  808       } else {
  809         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  810       }
  811     }
  812 #ifndef PRODUCT
  813   } else if (!do_size) {
  814     if (size != 0) st->print("\n\t");
  815     if (reg_lo+1 == reg_hi) { // double move?
  816       if (is_load) st->print("%s %s,[ESP + #%d]",
  817                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  818                               Matcher::regName[reg_lo], offset);
  819       else         st->print("MOVSD  [ESP + #%d],%s",
  820                               offset, Matcher::regName[reg_lo]);
  821     } else {
  822       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  823                               Matcher::regName[reg_lo], offset);
  824       else         st->print("MOVSS  [ESP + #%d],%s",
  825                               offset, Matcher::regName[reg_lo]);
  826     }
  827 #endif
  828   }
  829   bool is_single_byte = false;
  830   if ((UseAVX > 2) && (offset != 0)) {
  831     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  832   }
  833   int offset_size = 0;
  834   if (UseAVX > 2 ) {
  835     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  836   } else {
  837     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  838   }
  839   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  840   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  841   return size+5+offset_size;
  842 }
  843 
  844 
  845 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  846                             int src_hi, int dst_hi, int size, outputStream* st ) {
  847   if (cbuf) {
  848     MacroAssembler _masm(cbuf);
  849     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  850     _masm.set_managed();
  851     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  852       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  854     } else {
  855       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  856                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  857     }
  858 #ifndef PRODUCT
  859   } else if (!do_size) {
  860     if (size != 0) st->print("\n\t");
  861     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  862       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  863         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  864       } else {
  865         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  866       }
  867     } else {
  868       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  869         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  870       } else {
  871         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  872       }
  873     }
  874 #endif
  875   }
  876   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  877   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  878   int sz = (UseAVX > 2) ? 6 : 4;
  879   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  880       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  881   return size + sz;
  882 }
  883 
  884 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  885                             int src_hi, int dst_hi, int size, outputStream* st ) {
  886   // 32-bit
  887   if (cbuf) {
  888     MacroAssembler _masm(cbuf);
  889     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  890     _masm.set_managed();
  891     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  892              as_Register(Matcher::_regEncode[src_lo]));
  893 #ifndef PRODUCT
  894   } else if (!do_size) {
  895     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  896 #endif
  897   }
  898   return (UseAVX> 2) ? 6 : 4;
  899 }
  900 
  901 
  902 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  903                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  904   // 32-bit
  905   if (cbuf) {
  906     MacroAssembler _masm(cbuf);
  907     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  908     _masm.set_managed();
  909     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  910              as_XMMRegister(Matcher::_regEncode[src_lo]));
  911 #ifndef PRODUCT
  912   } else if (!do_size) {
  913     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  914 #endif
  915   }
  916   return (UseAVX> 2) ? 6 : 4;
  917 }
  918 
  919 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  920   if( cbuf ) {
  921     emit_opcode(*cbuf, 0x8B );
  922     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  923 #ifndef PRODUCT
  924   } else if( !do_size ) {
  925     if( size != 0 ) st->print("\n\t");
  926     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  927 #endif
  928   }
  929   return size+2;
  930 }
  931 
  932 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  933                                  int offset, int size, outputStream* st ) {
  934   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  935     if( cbuf ) {
  936       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  937       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  938 #ifndef PRODUCT
  939     } else if( !do_size ) {
  940       if( size != 0 ) st->print("\n\t");
  941       st->print("FLD    %s",Matcher::regName[src_lo]);
  942 #endif
  943     }
  944     size += 2;
  945   }
  946 
  947   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  948   const char *op_str;
  949   int op;
  950   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  951     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  952     op = 0xDD;
  953   } else {                   // 32-bit store
  954     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  955     op = 0xD9;
  956     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  957   }
  958 
  959   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  960 }
  961 
  962 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  963 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  964                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  965 
  966 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  967                             int stack_offset, int reg, uint ireg, outputStream* st);
  968 
  969 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  970                                      int dst_offset, uint ireg, outputStream* st) {
  971   if (cbuf) {
  972     MacroAssembler _masm(cbuf);
  973     switch (ireg) {
  974     case Op_VecS:
  975       __ pushl(Address(rsp, src_offset));
  976       __ popl (Address(rsp, dst_offset));
  977       break;
  978     case Op_VecD:
  979       __ pushl(Address(rsp, src_offset));
  980       __ popl (Address(rsp, dst_offset));
  981       __ pushl(Address(rsp, src_offset+4));
  982       __ popl (Address(rsp, dst_offset+4));
  983       break;
  984     case Op_VecX:
  985       __ movdqu(Address(rsp, -16), xmm0);
  986       __ movdqu(xmm0, Address(rsp, src_offset));
  987       __ movdqu(Address(rsp, dst_offset), xmm0);
  988       __ movdqu(xmm0, Address(rsp, -16));
  989       break;
  990     case Op_VecY:
  991       __ vmovdqu(Address(rsp, -32), xmm0);
  992       __ vmovdqu(xmm0, Address(rsp, src_offset));
  993       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  994       __ vmovdqu(xmm0, Address(rsp, -32));
  995       break;
  996     case Op_VecZ:
  997       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  998       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  999       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1000       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1001       break;
 1002     default:
 1003       ShouldNotReachHere();
 1004     }
 1005 #ifndef PRODUCT
 1006   } else {
 1007     switch (ireg) {
 1008     case Op_VecS:
 1009       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1010                 "popl    [rsp + #%d]",
 1011                 src_offset, dst_offset);
 1012       break;
 1013     case Op_VecD:
 1014       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1015                 "popq    [rsp + #%d]\n\t"
 1016                 "pushl   [rsp + #%d]\n\t"
 1017                 "popq    [rsp + #%d]",
 1018                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1019       break;
 1020      case Op_VecX:
 1021       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1022                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1023                 "movdqu  [rsp + #%d], xmm0\n\t"
 1024                 "movdqu  xmm0, [rsp - #16]",
 1025                 src_offset, dst_offset);
 1026       break;
 1027     case Op_VecY:
 1028       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1029                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1030                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1031                 "vmovdqu xmm0, [rsp - #32]",
 1032                 src_offset, dst_offset);
 1033       break;
 1034     case Op_VecZ:
 1035       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1036                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1037                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1038                 "vmovdqu xmm0, [rsp - #64]",
 1039                 src_offset, dst_offset);
 1040       break;
 1041     default:
 1042       ShouldNotReachHere();
 1043     }
 1044 #endif
 1045   }
 1046 }
 1047 
 1048 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1049   // Get registers to move
 1050   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1051   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1052   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1053   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1054 
 1055   enum RC src_second_rc = rc_class(src_second);
 1056   enum RC src_first_rc = rc_class(src_first);
 1057   enum RC dst_second_rc = rc_class(dst_second);
 1058   enum RC dst_first_rc = rc_class(dst_first);
 1059 
 1060   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1061 
 1062   // Generate spill code!
 1063   int size = 0;
 1064 
 1065   if( src_first == dst_first && src_second == dst_second )
 1066     return size;            // Self copy, no move
 1067 
 1068   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1069     uint ireg = ideal_reg();
 1070     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1071     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1072     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1073     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1074       // mem -> mem
 1075       int src_offset = ra_->reg2offset(src_first);
 1076       int dst_offset = ra_->reg2offset(dst_first);
 1077       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1078     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1079       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1080     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1081       int stack_offset = ra_->reg2offset(dst_first);
 1082       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1083     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1084       int stack_offset = ra_->reg2offset(src_first);
 1085       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1086     } else {
 1087       ShouldNotReachHere();
 1088     }
 1089     return 0;
 1090   }
 1091 
 1092   // --------------------------------------
 1093   // Check for mem-mem move.  push/pop to move.
 1094   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1095     if( src_second == dst_first ) { // overlapping stack copy ranges
 1096       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1097       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1098       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1099       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1100     }
 1101     // move low bits
 1102     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1103     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1104     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1105       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1106       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1107     }
 1108     return size;
 1109   }
 1110 
 1111   // --------------------------------------
 1112   // Check for integer reg-reg copy
 1113   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1114     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1115 
 1116   // Check for integer store
 1117   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1118     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1119 
 1120   // Check for integer load
 1121   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1122     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1123 
 1124   // Check for integer reg-xmm reg copy
 1125   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1126     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1127             "no 64 bit integer-float reg moves" );
 1128     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1129   }
 1130   // --------------------------------------
 1131   // Check for float reg-reg copy
 1132   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1133     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1134             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1135     if( cbuf ) {
 1136 
 1137       // Note the mucking with the register encode to compensate for the 0/1
 1138       // indexing issue mentioned in a comment in the reg_def sections
 1139       // for FPR registers many lines above here.
 1140 
 1141       if( src_first != FPR1L_num ) {
 1142         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1143         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1144         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1145         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1146      } else {
 1147         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1148         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1149      }
 1150 #ifndef PRODUCT
 1151     } else if( !do_size ) {
 1152       if( size != 0 ) st->print("\n\t");
 1153       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1154       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1155 #endif
 1156     }
 1157     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1158   }
 1159 
 1160   // Check for float store
 1161   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1162     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1163   }
 1164 
 1165   // Check for float load
 1166   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1167     int offset = ra_->reg2offset(src_first);
 1168     const char *op_str;
 1169     int op;
 1170     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1171       op_str = "FLD_D";
 1172       op = 0xDD;
 1173     } else {                   // 32-bit load
 1174       op_str = "FLD_S";
 1175       op = 0xD9;
 1176       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1177     }
 1178     if( cbuf ) {
 1179       emit_opcode  (*cbuf, op );
 1180       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1181       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1182       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1183 #ifndef PRODUCT
 1184     } else if( !do_size ) {
 1185       if( size != 0 ) st->print("\n\t");
 1186       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1187 #endif
 1188     }
 1189     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1190     return size + 3+offset_size+2;
 1191   }
 1192 
 1193   // Check for xmm reg-reg copy
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1195     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1196             (src_first+1 == src_second && dst_first+1 == dst_second),
 1197             "no non-adjacent float-moves" );
 1198     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1199   }
 1200 
 1201   // Check for xmm reg-integer reg copy
 1202   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1203     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1204             "no 64 bit float-integer reg moves" );
 1205     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1206   }
 1207 
 1208   // Check for xmm store
 1209   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1210     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1211   }
 1212 
 1213   // Check for float xmm load
 1214   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1215     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1216   }
 1217 
 1218   // Copy from float reg to xmm reg
 1219   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1220     // copy to the top of stack from floating point reg
 1221     // and use LEA to preserve flags
 1222     if( cbuf ) {
 1223       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1224       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1225       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1226       emit_d8(*cbuf,0xF8);
 1227 #ifndef PRODUCT
 1228     } else if( !do_size ) {
 1229       if( size != 0 ) st->print("\n\t");
 1230       st->print("LEA    ESP,[ESP-8]");
 1231 #endif
 1232     }
 1233     size += 4;
 1234 
 1235     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1236 
 1237     // Copy from the temp memory to the xmm reg.
 1238     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1239 
 1240     if( cbuf ) {
 1241       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1242       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1243       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1244       emit_d8(*cbuf,0x08);
 1245 #ifndef PRODUCT
 1246     } else if( !do_size ) {
 1247       if( size != 0 ) st->print("\n\t");
 1248       st->print("LEA    ESP,[ESP+8]");
 1249 #endif
 1250     }
 1251     size += 4;
 1252     return size;
 1253   }
 1254 
 1255   // AVX-512 opmask specific spilling.
 1256   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1257     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1258     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1259     MacroAssembler _masm(cbuf);
 1260     int offset = ra_->reg2offset(src_first);
 1261     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1262     return 0;
 1263   }
 1264 
 1265   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1266     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1267     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1268     MacroAssembler _masm(cbuf);
 1269     int offset = ra_->reg2offset(dst_first);
 1270     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1280     Unimplemented();
 1281     return 0;
 1282   }
 1283 
 1284   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1285     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1286     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1287     MacroAssembler _masm(cbuf);
 1288     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1289     return 0;
 1290   }
 1291 
 1292   assert( size > 0, "missed a case" );
 1293 
 1294   // --------------------------------------------------------------------
 1295   // Check for second bits still needing moving.
 1296   if( src_second == dst_second )
 1297     return size;               // Self copy; no move
 1298   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1299 
 1300   // Check for second word int-int move
 1301   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1302     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1303 
 1304   // Check for second word integer store
 1305   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1306     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1307 
 1308   // Check for second word integer load
 1309   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1310     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1311 
 1312   Unimplemented();
 1313   return 0; // Mute compiler
 1314 }
 1315 
 1316 #ifndef PRODUCT
 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1318   implementation( NULL, ra_, false, st );
 1319 }
 1320 #endif
 1321 
 1322 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1323   implementation( &cbuf, ra_, false, NULL );
 1324 }
 1325 
 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1327   return MachNode::size(ra_);
 1328 }
 1329 
 1330 
 1331 //=============================================================================
 1332 #ifndef PRODUCT
 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1335   int reg = ra_->get_reg_first(this);
 1336   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1337 }
 1338 #endif
 1339 
 1340 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1341   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1342   int reg = ra_->get_encode(this);
 1343   if( offset >= 128 ) {
 1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(cbuf, 0x2, reg, 0x04);
 1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1347     emit_d32(cbuf, offset);
 1348   }
 1349   else {
 1350     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1351     emit_rm(cbuf, 0x1, reg, 0x04);
 1352     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1353     emit_d8(cbuf, offset);
 1354   }
 1355 }
 1356 
 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1359   if( offset >= 128 ) {
 1360     return 7;
 1361   }
 1362   else {
 1363     return 4;
 1364   }
 1365 }
 1366 
 1367 //=============================================================================
 1368 #ifndef PRODUCT
 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1370   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1371   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1372   st->print_cr("\tNOP");
 1373   st->print_cr("\tNOP");
 1374   if( !OptoBreakpoint )
 1375     st->print_cr("\tNOP");
 1376 }
 1377 #endif
 1378 
 1379 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1380   MacroAssembler masm(&cbuf);
 1381 #ifdef ASSERT
 1382   uint insts_size = cbuf.insts_size();
 1383 #endif
 1384   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1385   masm.jump_cc(Assembler::notEqual,
 1386                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1387   /* WARNING these NOPs are critical so that verified entry point is properly
 1388      aligned for patching by NativeJump::patch_verified_entry() */
 1389   int nops_cnt = 2;
 1390   if( !OptoBreakpoint ) // Leave space for int3
 1391      nops_cnt += 1;
 1392   masm.nop(nops_cnt);
 1393 
 1394   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1395 }
 1396 
 1397 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1398   return OptoBreakpoint ? 11 : 12;
 1399 }
 1400 
 1401 
 1402 //=============================================================================
 1403 
 1404 // Vector calling convention not supported.
 1405 const bool Matcher::supports_vector_calling_convention() {
 1406   return false;
 1407 }
 1408 
 1409 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1410   Unimplemented();
 1411   return OptoRegPair(0, 0);
 1412 }
 1413 
 1414 // Is this branch offset short enough that a short branch can be used?
 1415 //
 1416 // NOTE: If the platform does not provide any short branch variants, then
 1417 //       this method should return false for offset 0.
 1418 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1419   // The passed offset is relative to address of the branch.
 1420   // On 86 a branch displacement is calculated relative to address
 1421   // of a next instruction.
 1422   offset -= br_size;
 1423 
 1424   // the short version of jmpConUCF2 contains multiple branches,
 1425   // making the reach slightly less
 1426   if (rule == jmpConUCF2_rule)
 1427     return (-126 <= offset && offset <= 125);
 1428   return (-128 <= offset && offset <= 127);
 1429 }
 1430 
 1431 // Return whether or not this register is ever used as an argument.  This
 1432 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1433 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1434 // arguments in those registers not be available to the callee.
 1435 bool Matcher::can_be_java_arg( int reg ) {
 1436   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1437   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1438   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1439   return false;
 1440 }
 1441 
 1442 bool Matcher::is_spillable_arg( int reg ) {
 1443   return can_be_java_arg(reg);
 1444 }
 1445 
 1446 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1447   // Use hardware integer DIV instruction when
 1448   // it is faster than a code which use multiply.
 1449   // Only when constant divisor fits into 32 bit
 1450   // (min_jint is excluded to get only correct
 1451   // positive 32 bit values from negative).
 1452   return VM_Version::has_fast_idiv() &&
 1453          (divisor == (int)divisor && divisor != min_jint);
 1454 }
 1455 
 1456 // Register for DIVI projection of divmodI
 1457 RegMask Matcher::divI_proj_mask() {
 1458   return EAX_REG_mask();
 1459 }
 1460 
 1461 // Register for MODI projection of divmodI
 1462 RegMask Matcher::modI_proj_mask() {
 1463   return EDX_REG_mask();
 1464 }
 1465 
 1466 // Register for DIVL projection of divmodL
 1467 RegMask Matcher::divL_proj_mask() {
 1468   ShouldNotReachHere();
 1469   return RegMask();
 1470 }
 1471 
 1472 // Register for MODL projection of divmodL
 1473 RegMask Matcher::modL_proj_mask() {
 1474   ShouldNotReachHere();
 1475   return RegMask();
 1476 }
 1477 
 1478 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1479   return NO_REG_mask();
 1480 }
 1481 
 1482 // Returns true if the high 32 bits of the value is known to be zero.
 1483 bool is_operand_hi32_zero(Node* n) {
 1484   int opc = n->Opcode();
 1485   if (opc == Op_AndL) {
 1486     Node* o2 = n->in(2);
 1487     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1488       return true;
 1489     }
 1490   }
 1491   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1492     return true;
 1493   }
 1494   return false;
 1495 }
 1496 
 1497 %}
 1498 
 1499 //----------ENCODING BLOCK-----------------------------------------------------
 1500 // This block specifies the encoding classes used by the compiler to output
 1501 // byte streams.  Encoding classes generate functions which are called by
 1502 // Machine Instruction Nodes in order to generate the bit encoding of the
 1503 // instruction.  Operands specify their base encoding interface with the
 1504 // interface keyword.  There are currently supported four interfaces,
 1505 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1506 // operand to generate a function which returns its register number when
 1507 // queried.   CONST_INTER causes an operand to generate a function which
 1508 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1509 // operand to generate four functions which return the Base Register, the
 1510 // Index Register, the Scale Value, and the Offset Value of the operand when
 1511 // queried.  COND_INTER causes an operand to generate six functions which
 1512 // return the encoding code (ie - encoding bits for the instruction)
 1513 // associated with each basic boolean condition for a conditional instruction.
 1514 // Instructions specify two basic values for encoding.  They use the
 1515 // ins_encode keyword to specify their encoding class (which must be one of
 1516 // the class names specified in the encoding block), and they use the
 1517 // opcode keyword to specify, in order, their primary, secondary, and
 1518 // tertiary opcode.  Only the opcode sections which a particular instruction
 1519 // needs for encoding need to be specified.
 1520 encode %{
 1521   // Build emit functions for each basic byte or larger field in the intel
 1522   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1523   // code in the enc_class source block.  Emit functions will live in the
 1524   // main source block for now.  In future, we can generalize this by
 1525   // adding a syntax that specifies the sizes of fields in an order,
 1526   // so that the adlc can build the emit functions automagically
 1527 
 1528   // Emit primary opcode
 1529   enc_class OpcP %{
 1530     emit_opcode(cbuf, $primary);
 1531   %}
 1532 
 1533   // Emit secondary opcode
 1534   enc_class OpcS %{
 1535     emit_opcode(cbuf, $secondary);
 1536   %}
 1537 
 1538   // Emit opcode directly
 1539   enc_class Opcode(immI d8) %{
 1540     emit_opcode(cbuf, $d8$$constant);
 1541   %}
 1542 
 1543   enc_class SizePrefix %{
 1544     emit_opcode(cbuf,0x66);
 1545   %}
 1546 
 1547   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1548     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1549   %}
 1550 
 1551   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1552     emit_opcode(cbuf,$opcode$$constant);
 1553     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1554   %}
 1555 
 1556   enc_class mov_r32_imm0( rRegI dst ) %{
 1557     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1558     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1559   %}
 1560 
 1561   enc_class cdq_enc %{
 1562     // Full implementation of Java idiv and irem; checks for
 1563     // special case as described in JVM spec., p.243 & p.271.
 1564     //
 1565     //         normal case                           special case
 1566     //
 1567     // input : rax,: dividend                         min_int
 1568     //         reg: divisor                          -1
 1569     //
 1570     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1571     //         rdx: remainder (= rax, irem reg)       0
 1572     //
 1573     //  Code sequnce:
 1574     //
 1575     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1576     //  0F 85 0B 00 00 00    jne         normal_case
 1577     //  33 D2                xor         rdx,edx
 1578     //  83 F9 FF             cmp         rcx,0FFh
 1579     //  0F 84 03 00 00 00    je          done
 1580     //                  normal_case:
 1581     //  99                   cdq
 1582     //  F7 F9                idiv        rax,ecx
 1583     //                  done:
 1584     //
 1585     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1586     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1587     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1588     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1589     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1591     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1592     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1593     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1594     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1595     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1596     // normal_case:
 1597     emit_opcode(cbuf,0x99);                                         // cdq
 1598     // idiv (note: must be emitted by the user of this rule)
 1599     // normal:
 1600   %}
 1601 
 1602   // Dense encoding for older common ops
 1603   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1604     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1605   %}
 1606 
 1607 
 1608   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1609   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1610     // Check for 8-bit immediate, and set sign extend bit in opcode
 1611     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1612       emit_opcode(cbuf, $primary | 0x02);
 1613     }
 1614     else {                          // If 32-bit immediate
 1615       emit_opcode(cbuf, $primary);
 1616     }
 1617   %}
 1618 
 1619   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1620     // Emit primary opcode and set sign-extend bit
 1621     // Check for 8-bit immediate, and set sign extend bit in opcode
 1622     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1623       emit_opcode(cbuf, $primary | 0x02);    }
 1624     else {                          // If 32-bit immediate
 1625       emit_opcode(cbuf, $primary);
 1626     }
 1627     // Emit r/m byte with secondary opcode, after primary opcode.
 1628     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1629   %}
 1630 
 1631   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1632     // Check for 8-bit immediate, and set sign extend bit in opcode
 1633     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1634       $$$emit8$imm$$constant;
 1635     }
 1636     else {                          // If 32-bit immediate
 1637       // Output immediate
 1638       $$$emit32$imm$$constant;
 1639     }
 1640   %}
 1641 
 1642   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1643     // Emit primary opcode and set sign-extend bit
 1644     // Check for 8-bit immediate, and set sign extend bit in opcode
 1645     int con = (int)$imm$$constant; // Throw away top bits
 1646     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1647     // Emit r/m byte with secondary opcode, after primary opcode.
 1648     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1649     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1650     else                               emit_d32(cbuf,con);
 1651   %}
 1652 
 1653   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1654     // Emit primary opcode and set sign-extend bit
 1655     // Check for 8-bit immediate, and set sign extend bit in opcode
 1656     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1657     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1658     // Emit r/m byte with tertiary opcode, after primary opcode.
 1659     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1660     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1661     else                               emit_d32(cbuf,con);
 1662   %}
 1663 
 1664   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1665     emit_cc(cbuf, $secondary, $dst$$reg );
 1666   %}
 1667 
 1668   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1669     int destlo = $dst$$reg;
 1670     int desthi = HIGH_FROM_LOW(destlo);
 1671     // bswap lo
 1672     emit_opcode(cbuf, 0x0F);
 1673     emit_cc(cbuf, 0xC8, destlo);
 1674     // bswap hi
 1675     emit_opcode(cbuf, 0x0F);
 1676     emit_cc(cbuf, 0xC8, desthi);
 1677     // xchg lo and hi
 1678     emit_opcode(cbuf, 0x87);
 1679     emit_rm(cbuf, 0x3, destlo, desthi);
 1680   %}
 1681 
 1682   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1683     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1684   %}
 1685 
 1686   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1687     $$$emit8$primary;
 1688     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1689   %}
 1690 
 1691   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1692     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1693     emit_d8(cbuf, op >> 8 );
 1694     emit_d8(cbuf, op & 255);
 1695   %}
 1696 
 1697   // emulate a CMOV with a conditional branch around a MOV
 1698   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1699     // Invert sense of branch from sense of CMOV
 1700     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1701     emit_d8( cbuf, $brOffs$$constant );
 1702   %}
 1703 
 1704   enc_class enc_PartialSubtypeCheck( ) %{
 1705     Register Redi = as_Register(EDI_enc); // result register
 1706     Register Reax = as_Register(EAX_enc); // super class
 1707     Register Recx = as_Register(ECX_enc); // killed
 1708     Register Resi = as_Register(ESI_enc); // sub class
 1709     Label miss;
 1710 
 1711     MacroAssembler _masm(&cbuf);
 1712     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1713                                      NULL, &miss,
 1714                                      /*set_cond_codes:*/ true);
 1715     if ($primary) {
 1716       __ xorptr(Redi, Redi);
 1717     }
 1718     __ bind(miss);
 1719   %}
 1720 
 1721   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1722     MacroAssembler masm(&cbuf);
 1723     int start = masm.offset();
 1724     if (UseSSE >= 2) {
 1725       if (VerifyFPU) {
 1726         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1727       }
 1728     } else {
 1729       // External c_calling_convention expects the FPU stack to be 'clean'.
 1730       // Compiled code leaves it dirty.  Do cleanup now.
 1731       masm.empty_FPU_stack();
 1732     }
 1733     if (sizeof_FFree_Float_Stack_All == -1) {
 1734       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1735     } else {
 1736       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1737     }
 1738   %}
 1739 
 1740   enc_class Verify_FPU_For_Leaf %{
 1741     if( VerifyFPU ) {
 1742       MacroAssembler masm(&cbuf);
 1743       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1744     }
 1745   %}
 1746 
 1747   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1748     // This is the instruction starting address for relocation info.
 1749     cbuf.set_insts_mark();
 1750     $$$emit8$primary;
 1751     // CALL directly to the runtime
 1752     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1753                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1754 
 1755     if (UseSSE >= 2) {
 1756       MacroAssembler _masm(&cbuf);
 1757       BasicType rt = tf()->return_type();
 1758 
 1759       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1760         // A C runtime call where the return value is unused.  In SSE2+
 1761         // mode the result needs to be removed from the FPU stack.  It's
 1762         // likely that this function call could be removed by the
 1763         // optimizer if the C function is a pure function.
 1764         __ ffree(0);
 1765       } else if (rt == T_FLOAT) {
 1766         __ lea(rsp, Address(rsp, -4));
 1767         __ fstp_s(Address(rsp, 0));
 1768         __ movflt(xmm0, Address(rsp, 0));
 1769         __ lea(rsp, Address(rsp,  4));
 1770       } else if (rt == T_DOUBLE) {
 1771         __ lea(rsp, Address(rsp, -8));
 1772         __ fstp_d(Address(rsp, 0));
 1773         __ movdbl(xmm0, Address(rsp, 0));
 1774         __ lea(rsp, Address(rsp,  8));
 1775       }
 1776     }
 1777   %}
 1778 
 1779   enc_class pre_call_resets %{
 1780     // If method sets FPU control word restore it here
 1781     debug_only(int off0 = cbuf.insts_size());
 1782     if (ra_->C->in_24_bit_fp_mode()) {
 1783       MacroAssembler _masm(&cbuf);
 1784       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1785     }
 1786     // Clear upper bits of YMM registers when current compiled code uses
 1787     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1788     MacroAssembler _masm(&cbuf);
 1789     __ vzeroupper();
 1790     debug_only(int off1 = cbuf.insts_size());
 1791     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1792   %}
 1793 
 1794   enc_class post_call_FPU %{
 1795     // If method sets FPU control word do it here also
 1796     if (Compile::current()->in_24_bit_fp_mode()) {
 1797       MacroAssembler masm(&cbuf);
 1798       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1799     }
 1800   %}
 1801 
 1802   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1803     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1804     // who we intended to call.
 1805     cbuf.set_insts_mark();
 1806     $$$emit8$primary;
 1807 
 1808     if (!_method) {
 1809       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1810                      runtime_call_Relocation::spec(),
 1811                      RELOC_IMM32);
 1812     } else {
 1813       int method_index = resolved_method_index(cbuf);
 1814       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1815                                                   : static_call_Relocation::spec(method_index);
 1816       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1817                      rspec, RELOC_DISP32);
 1818       // Emit stubs for static call.
 1819       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1820       if (stub == NULL) {
 1821         ciEnv::current()->record_failure("CodeCache is full");
 1822         return;
 1823       }
 1824     }
 1825   %}
 1826 
 1827   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1828     MacroAssembler _masm(&cbuf);
 1829     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1830   %}
 1831 
 1832   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1833     int disp = in_bytes(Method::from_compiled_offset());
 1834     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1835 
 1836     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1837     cbuf.set_insts_mark();
 1838     $$$emit8$primary;
 1839     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1840     emit_d8(cbuf, disp);             // Displacement
 1841 
 1842   %}
 1843 
 1844 //   Following encoding is no longer used, but may be restored if calling
 1845 //   convention changes significantly.
 1846 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1847 //
 1848 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1849 //     // int ic_reg     = Matcher::inline_cache_reg();
 1850 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1851 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1852 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1853 //
 1854 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1855 //     // // so we load it immediately before the call
 1856 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1857 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1858 //
 1859 //     // xor rbp,ebp
 1860 //     emit_opcode(cbuf, 0x33);
 1861 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1862 //
 1863 //     // CALL to interpreter.
 1864 //     cbuf.set_insts_mark();
 1865 //     $$$emit8$primary;
 1866 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1867 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1868 //   %}
 1869 
 1870   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1871     $$$emit8$primary;
 1872     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1873     $$$emit8$shift$$constant;
 1874   %}
 1875 
 1876   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1877     // Load immediate does not have a zero or sign extended version
 1878     // for 8-bit immediates
 1879     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1880     $$$emit32$src$$constant;
 1881   %}
 1882 
 1883   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1884     // Load immediate does not have a zero or sign extended version
 1885     // for 8-bit immediates
 1886     emit_opcode(cbuf, $primary + $dst$$reg);
 1887     $$$emit32$src$$constant;
 1888   %}
 1889 
 1890   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1891     // Load immediate does not have a zero or sign extended version
 1892     // for 8-bit immediates
 1893     int dst_enc = $dst$$reg;
 1894     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1895     if (src_con == 0) {
 1896       // xor dst, dst
 1897       emit_opcode(cbuf, 0x33);
 1898       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1899     } else {
 1900       emit_opcode(cbuf, $primary + dst_enc);
 1901       emit_d32(cbuf, src_con);
 1902     }
 1903   %}
 1904 
 1905   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1906     // Load immediate does not have a zero or sign extended version
 1907     // for 8-bit immediates
 1908     int dst_enc = $dst$$reg + 2;
 1909     int src_con = ((julong)($src$$constant)) >> 32;
 1910     if (src_con == 0) {
 1911       // xor dst, dst
 1912       emit_opcode(cbuf, 0x33);
 1913       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1914     } else {
 1915       emit_opcode(cbuf, $primary + dst_enc);
 1916       emit_d32(cbuf, src_con);
 1917     }
 1918   %}
 1919 
 1920 
 1921   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1922   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1923     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1924   %}
 1925 
 1926   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1927     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1928   %}
 1929 
 1930   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1931     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1932   %}
 1933 
 1934   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1935     $$$emit8$primary;
 1936     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1937   %}
 1938 
 1939   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1940     $$$emit8$secondary;
 1941     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1942   %}
 1943 
 1944   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1945     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1946   %}
 1947 
 1948   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1949     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1950   %}
 1951 
 1952   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1953     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1954   %}
 1955 
 1956   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1957     // Output immediate
 1958     $$$emit32$src$$constant;
 1959   %}
 1960 
 1961   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1962     // Output Float immediate bits
 1963     jfloat jf = $src$$constant;
 1964     int    jf_as_bits = jint_cast( jf );
 1965     emit_d32(cbuf, jf_as_bits);
 1966   %}
 1967 
 1968   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1969     // Output Float immediate bits
 1970     jfloat jf = $src$$constant;
 1971     int    jf_as_bits = jint_cast( jf );
 1972     emit_d32(cbuf, jf_as_bits);
 1973   %}
 1974 
 1975   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1976     // Output immediate
 1977     $$$emit16$src$$constant;
 1978   %}
 1979 
 1980   enc_class Con_d32(immI src) %{
 1981     emit_d32(cbuf,$src$$constant);
 1982   %}
 1983 
 1984   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1985     // Output immediate memory reference
 1986     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1987     emit_d32(cbuf, 0x00);
 1988   %}
 1989 
 1990   enc_class lock_prefix( ) %{
 1991     emit_opcode(cbuf,0xF0);         // [Lock]
 1992   %}
 1993 
 1994   // Cmp-xchg long value.
 1995   // Note: we need to swap rbx, and rcx before and after the
 1996   //       cmpxchg8 instruction because the instruction uses
 1997   //       rcx as the high order word of the new value to store but
 1998   //       our register encoding uses rbx,.
 1999   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2000 
 2001     // XCHG  rbx,ecx
 2002     emit_opcode(cbuf,0x87);
 2003     emit_opcode(cbuf,0xD9);
 2004     // [Lock]
 2005     emit_opcode(cbuf,0xF0);
 2006     // CMPXCHG8 [Eptr]
 2007     emit_opcode(cbuf,0x0F);
 2008     emit_opcode(cbuf,0xC7);
 2009     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2010     // XCHG  rbx,ecx
 2011     emit_opcode(cbuf,0x87);
 2012     emit_opcode(cbuf,0xD9);
 2013   %}
 2014 
 2015   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2016     // [Lock]
 2017     emit_opcode(cbuf,0xF0);
 2018 
 2019     // CMPXCHG [Eptr]
 2020     emit_opcode(cbuf,0x0F);
 2021     emit_opcode(cbuf,0xB1);
 2022     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2023   %}
 2024 
 2025   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2026     // [Lock]
 2027     emit_opcode(cbuf,0xF0);
 2028 
 2029     // CMPXCHGB [Eptr]
 2030     emit_opcode(cbuf,0x0F);
 2031     emit_opcode(cbuf,0xB0);
 2032     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2033   %}
 2034 
 2035   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2036     // [Lock]
 2037     emit_opcode(cbuf,0xF0);
 2038 
 2039     // 16-bit mode
 2040     emit_opcode(cbuf, 0x66);
 2041 
 2042     // CMPXCHGW [Eptr]
 2043     emit_opcode(cbuf,0x0F);
 2044     emit_opcode(cbuf,0xB1);
 2045     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2046   %}
 2047 
 2048   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2049     int res_encoding = $res$$reg;
 2050 
 2051     // MOV  res,0
 2052     emit_opcode( cbuf, 0xB8 + res_encoding);
 2053     emit_d32( cbuf, 0 );
 2054     // JNE,s  fail
 2055     emit_opcode(cbuf,0x75);
 2056     emit_d8(cbuf, 5 );
 2057     // MOV  res,1
 2058     emit_opcode( cbuf, 0xB8 + res_encoding);
 2059     emit_d32( cbuf, 1 );
 2060     // fail:
 2061   %}
 2062 
 2063   enc_class set_instruction_start( ) %{
 2064     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2065   %}
 2066 
 2067   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2068     int reg_encoding = $ereg$$reg;
 2069     int base  = $mem$$base;
 2070     int index = $mem$$index;
 2071     int scale = $mem$$scale;
 2072     int displace = $mem$$disp;
 2073     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2074     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2075   %}
 2076 
 2077   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2078     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2079     int base  = $mem$$base;
 2080     int index = $mem$$index;
 2081     int scale = $mem$$scale;
 2082     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2083     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2084     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2085   %}
 2086 
 2087   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2088     int r1, r2;
 2089     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2090     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2091     emit_opcode(cbuf,0x0F);
 2092     emit_opcode(cbuf,$tertiary);
 2093     emit_rm(cbuf, 0x3, r1, r2);
 2094     emit_d8(cbuf,$cnt$$constant);
 2095     emit_d8(cbuf,$primary);
 2096     emit_rm(cbuf, 0x3, $secondary, r1);
 2097     emit_d8(cbuf,$cnt$$constant);
 2098   %}
 2099 
 2100   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2101     emit_opcode( cbuf, 0x8B ); // Move
 2102     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2103     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2104       emit_d8(cbuf,$primary);
 2105       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2106       emit_d8(cbuf,$cnt$$constant-32);
 2107     }
 2108     emit_d8(cbuf,$primary);
 2109     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2110     emit_d8(cbuf,31);
 2111   %}
 2112 
 2113   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2114     int r1, r2;
 2115     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2116     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2117 
 2118     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2119     emit_rm(cbuf, 0x3, r1, r2);
 2120     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2121       emit_opcode(cbuf,$primary);
 2122       emit_rm(cbuf, 0x3, $secondary, r1);
 2123       emit_d8(cbuf,$cnt$$constant-32);
 2124     }
 2125     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2126     emit_rm(cbuf, 0x3, r2, r2);
 2127   %}
 2128 
 2129   // Clone of RegMem but accepts an extra parameter to access each
 2130   // half of a double in memory; it never needs relocation info.
 2131   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2132     emit_opcode(cbuf,$opcode$$constant);
 2133     int reg_encoding = $rm_reg$$reg;
 2134     int base     = $mem$$base;
 2135     int index    = $mem$$index;
 2136     int scale    = $mem$$scale;
 2137     int displace = $mem$$disp + $disp_for_half$$constant;
 2138     relocInfo::relocType disp_reloc = relocInfo::none;
 2139     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2140   %}
 2141 
 2142   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2143   //
 2144   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2145   // and it never needs relocation information.
 2146   // Frequently used to move data between FPU's Stack Top and memory.
 2147   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2148     int rm_byte_opcode = $rm_opcode$$constant;
 2149     int base     = $mem$$base;
 2150     int index    = $mem$$index;
 2151     int scale    = $mem$$scale;
 2152     int displace = $mem$$disp;
 2153     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2154     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2155   %}
 2156 
 2157   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2158     int rm_byte_opcode = $rm_opcode$$constant;
 2159     int base     = $mem$$base;
 2160     int index    = $mem$$index;
 2161     int scale    = $mem$$scale;
 2162     int displace = $mem$$disp;
 2163     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2164     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2165   %}
 2166 
 2167   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2168     int reg_encoding = $dst$$reg;
 2169     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2170     int index        = 0x04;            // 0x04 indicates no index
 2171     int scale        = 0x00;            // 0x00 indicates no scale
 2172     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2173     relocInfo::relocType disp_reloc = relocInfo::none;
 2174     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2175   %}
 2176 
 2177   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2178     // Compare dst,src
 2179     emit_opcode(cbuf,0x3B);
 2180     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2181     // jmp dst < src around move
 2182     emit_opcode(cbuf,0x7C);
 2183     emit_d8(cbuf,2);
 2184     // move dst,src
 2185     emit_opcode(cbuf,0x8B);
 2186     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2187   %}
 2188 
 2189   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2190     // Compare dst,src
 2191     emit_opcode(cbuf,0x3B);
 2192     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2193     // jmp dst > src around move
 2194     emit_opcode(cbuf,0x7F);
 2195     emit_d8(cbuf,2);
 2196     // move dst,src
 2197     emit_opcode(cbuf,0x8B);
 2198     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2199   %}
 2200 
 2201   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2202     // If src is FPR1, we can just FST to store it.
 2203     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2204     int reg_encoding = 0x2; // Just store
 2205     int base  = $mem$$base;
 2206     int index = $mem$$index;
 2207     int scale = $mem$$scale;
 2208     int displace = $mem$$disp;
 2209     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2210     if( $src$$reg != FPR1L_enc ) {
 2211       reg_encoding = 0x3;  // Store & pop
 2212       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2213       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2214     }
 2215     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2216     emit_opcode(cbuf,$primary);
 2217     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2218   %}
 2219 
 2220   enc_class neg_reg(rRegI dst) %{
 2221     // NEG $dst
 2222     emit_opcode(cbuf,0xF7);
 2223     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2224   %}
 2225 
 2226   enc_class setLT_reg(eCXRegI dst) %{
 2227     // SETLT $dst
 2228     emit_opcode(cbuf,0x0F);
 2229     emit_opcode(cbuf,0x9C);
 2230     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2231   %}
 2232 
 2233   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2234     int tmpReg = $tmp$$reg;
 2235 
 2236     // SUB $p,$q
 2237     emit_opcode(cbuf,0x2B);
 2238     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2239     // SBB $tmp,$tmp
 2240     emit_opcode(cbuf,0x1B);
 2241     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2242     // AND $tmp,$y
 2243     emit_opcode(cbuf,0x23);
 2244     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2245     // ADD $p,$tmp
 2246     emit_opcode(cbuf,0x03);
 2247     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2248   %}
 2249 
 2250   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2251     // TEST shift,32
 2252     emit_opcode(cbuf,0xF7);
 2253     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2254     emit_d32(cbuf,0x20);
 2255     // JEQ,s small
 2256     emit_opcode(cbuf, 0x74);
 2257     emit_d8(cbuf, 0x04);
 2258     // MOV    $dst.hi,$dst.lo
 2259     emit_opcode( cbuf, 0x8B );
 2260     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2261     // CLR    $dst.lo
 2262     emit_opcode(cbuf, 0x33);
 2263     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2264 // small:
 2265     // SHLD   $dst.hi,$dst.lo,$shift
 2266     emit_opcode(cbuf,0x0F);
 2267     emit_opcode(cbuf,0xA5);
 2268     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2269     // SHL    $dst.lo,$shift"
 2270     emit_opcode(cbuf,0xD3);
 2271     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2272   %}
 2273 
 2274   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2275     // TEST shift,32
 2276     emit_opcode(cbuf,0xF7);
 2277     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2278     emit_d32(cbuf,0x20);
 2279     // JEQ,s small
 2280     emit_opcode(cbuf, 0x74);
 2281     emit_d8(cbuf, 0x04);
 2282     // MOV    $dst.lo,$dst.hi
 2283     emit_opcode( cbuf, 0x8B );
 2284     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2285     // CLR    $dst.hi
 2286     emit_opcode(cbuf, 0x33);
 2287     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2288 // small:
 2289     // SHRD   $dst.lo,$dst.hi,$shift
 2290     emit_opcode(cbuf,0x0F);
 2291     emit_opcode(cbuf,0xAD);
 2292     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2293     // SHR    $dst.hi,$shift"
 2294     emit_opcode(cbuf,0xD3);
 2295     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2296   %}
 2297 
 2298   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2299     // TEST shift,32
 2300     emit_opcode(cbuf,0xF7);
 2301     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2302     emit_d32(cbuf,0x20);
 2303     // JEQ,s small
 2304     emit_opcode(cbuf, 0x74);
 2305     emit_d8(cbuf, 0x05);
 2306     // MOV    $dst.lo,$dst.hi
 2307     emit_opcode( cbuf, 0x8B );
 2308     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2309     // SAR    $dst.hi,31
 2310     emit_opcode(cbuf, 0xC1);
 2311     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2312     emit_d8(cbuf, 0x1F );
 2313 // small:
 2314     // SHRD   $dst.lo,$dst.hi,$shift
 2315     emit_opcode(cbuf,0x0F);
 2316     emit_opcode(cbuf,0xAD);
 2317     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2318     // SAR    $dst.hi,$shift"
 2319     emit_opcode(cbuf,0xD3);
 2320     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2321   %}
 2322 
 2323 
 2324   // ----------------- Encodings for floating point unit -----------------
 2325   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2326   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2327     $$$emit8$primary;
 2328     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2329   %}
 2330 
 2331   // Pop argument in FPR0 with FSTP ST(0)
 2332   enc_class PopFPU() %{
 2333     emit_opcode( cbuf, 0xDD );
 2334     emit_d8( cbuf, 0xD8 );
 2335   %}
 2336 
 2337   // !!!!! equivalent to Pop_Reg_F
 2338   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2339     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2340     emit_d8( cbuf, 0xD8+$dst$$reg );
 2341   %}
 2342 
 2343   enc_class Push_Reg_DPR( regDPR dst ) %{
 2344     emit_opcode( cbuf, 0xD9 );
 2345     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2346   %}
 2347 
 2348   enc_class strictfp_bias1( regDPR dst ) %{
 2349     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2350     emit_opcode( cbuf, 0x2D );
 2351     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2352     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2353     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2354   %}
 2355 
 2356   enc_class strictfp_bias2( regDPR dst ) %{
 2357     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2358     emit_opcode( cbuf, 0x2D );
 2359     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2360     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2361     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2362   %}
 2363 
 2364   // Special case for moving an integer register to a stack slot.
 2365   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2366     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2367   %}
 2368 
 2369   // Special case for moving a register to a stack slot.
 2370   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2371     // Opcode already emitted
 2372     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2373     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2374     emit_d32(cbuf, $dst$$disp);   // Displacement
 2375   %}
 2376 
 2377   // Push the integer in stackSlot 'src' onto FP-stack
 2378   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2379     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2380   %}
 2381 
 2382   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2383   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2384     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2385   %}
 2386 
 2387   // Same as Pop_Mem_F except for opcode
 2388   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2389   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2390     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2391   %}
 2392 
 2393   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2394     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2395     emit_d8( cbuf, 0xD8+$dst$$reg );
 2396   %}
 2397 
 2398   enc_class Push_Reg_FPR( regFPR dst ) %{
 2399     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2400     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2401   %}
 2402 
 2403   // Push FPU's float to a stack-slot, and pop FPU-stack
 2404   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2405     int pop = 0x02;
 2406     if ($src$$reg != FPR1L_enc) {
 2407       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2408       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2409       pop = 0x03;
 2410     }
 2411     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2412   %}
 2413 
 2414   // Push FPU's double to a stack-slot, and pop FPU-stack
 2415   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2416     int pop = 0x02;
 2417     if ($src$$reg != FPR1L_enc) {
 2418       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2419       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2420       pop = 0x03;
 2421     }
 2422     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2423   %}
 2424 
 2425   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2426   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2427     int pop = 0xD0 - 1; // -1 since we skip FLD
 2428     if ($src$$reg != FPR1L_enc) {
 2429       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2430       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2431       pop = 0xD8;
 2432     }
 2433     emit_opcode( cbuf, 0xDD );
 2434     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2435   %}
 2436 
 2437 
 2438   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2439     // load dst in FPR0
 2440     emit_opcode( cbuf, 0xD9 );
 2441     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2442     if ($src$$reg != FPR1L_enc) {
 2443       // fincstp
 2444       emit_opcode (cbuf, 0xD9);
 2445       emit_opcode (cbuf, 0xF7);
 2446       // swap src with FPR1:
 2447       // FXCH FPR1 with src
 2448       emit_opcode(cbuf, 0xD9);
 2449       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2450       // fdecstp
 2451       emit_opcode (cbuf, 0xD9);
 2452       emit_opcode (cbuf, 0xF6);
 2453     }
 2454   %}
 2455 
 2456   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2457     MacroAssembler _masm(&cbuf);
 2458     __ subptr(rsp, 8);
 2459     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2460     __ fld_d(Address(rsp, 0));
 2461     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2462     __ fld_d(Address(rsp, 0));
 2463   %}
 2464 
 2465   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2466     MacroAssembler _masm(&cbuf);
 2467     __ subptr(rsp, 4);
 2468     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2469     __ fld_s(Address(rsp, 0));
 2470     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2471     __ fld_s(Address(rsp, 0));
 2472   %}
 2473 
 2474   enc_class Push_ResultD(regD dst) %{
 2475     MacroAssembler _masm(&cbuf);
 2476     __ fstp_d(Address(rsp, 0));
 2477     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2478     __ addptr(rsp, 8);
 2479   %}
 2480 
 2481   enc_class Push_ResultF(regF dst, immI d8) %{
 2482     MacroAssembler _masm(&cbuf);
 2483     __ fstp_s(Address(rsp, 0));
 2484     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2485     __ addptr(rsp, $d8$$constant);
 2486   %}
 2487 
 2488   enc_class Push_SrcD(regD src) %{
 2489     MacroAssembler _masm(&cbuf);
 2490     __ subptr(rsp, 8);
 2491     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2492     __ fld_d(Address(rsp, 0));
 2493   %}
 2494 
 2495   enc_class push_stack_temp_qword() %{
 2496     MacroAssembler _masm(&cbuf);
 2497     __ subptr(rsp, 8);
 2498   %}
 2499 
 2500   enc_class pop_stack_temp_qword() %{
 2501     MacroAssembler _masm(&cbuf);
 2502     __ addptr(rsp, 8);
 2503   %}
 2504 
 2505   enc_class push_xmm_to_fpr1(regD src) %{
 2506     MacroAssembler _masm(&cbuf);
 2507     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2508     __ fld_d(Address(rsp, 0));
 2509   %}
 2510 
 2511   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2512     if ($src$$reg != FPR1L_enc) {
 2513       // fincstp
 2514       emit_opcode (cbuf, 0xD9);
 2515       emit_opcode (cbuf, 0xF7);
 2516       // FXCH FPR1 with src
 2517       emit_opcode(cbuf, 0xD9);
 2518       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2519       // fdecstp
 2520       emit_opcode (cbuf, 0xD9);
 2521       emit_opcode (cbuf, 0xF6);
 2522     }
 2523     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2524     // // FSTP   FPR$dst$$reg
 2525     // emit_opcode( cbuf, 0xDD );
 2526     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2527   %}
 2528 
 2529   enc_class fnstsw_sahf_skip_parity() %{
 2530     // fnstsw ax
 2531     emit_opcode( cbuf, 0xDF );
 2532     emit_opcode( cbuf, 0xE0 );
 2533     // sahf
 2534     emit_opcode( cbuf, 0x9E );
 2535     // jnp  ::skip
 2536     emit_opcode( cbuf, 0x7B );
 2537     emit_opcode( cbuf, 0x05 );
 2538   %}
 2539 
 2540   enc_class emitModDPR() %{
 2541     // fprem must be iterative
 2542     // :: loop
 2543     // fprem
 2544     emit_opcode( cbuf, 0xD9 );
 2545     emit_opcode( cbuf, 0xF8 );
 2546     // wait
 2547     emit_opcode( cbuf, 0x9b );
 2548     // fnstsw ax
 2549     emit_opcode( cbuf, 0xDF );
 2550     emit_opcode( cbuf, 0xE0 );
 2551     // sahf
 2552     emit_opcode( cbuf, 0x9E );
 2553     // jp  ::loop
 2554     emit_opcode( cbuf, 0x0F );
 2555     emit_opcode( cbuf, 0x8A );
 2556     emit_opcode( cbuf, 0xF4 );
 2557     emit_opcode( cbuf, 0xFF );
 2558     emit_opcode( cbuf, 0xFF );
 2559     emit_opcode( cbuf, 0xFF );
 2560   %}
 2561 
 2562   enc_class fpu_flags() %{
 2563     // fnstsw_ax
 2564     emit_opcode( cbuf, 0xDF);
 2565     emit_opcode( cbuf, 0xE0);
 2566     // test ax,0x0400
 2567     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2568     emit_opcode( cbuf, 0xA9 );
 2569     emit_d16   ( cbuf, 0x0400 );
 2570     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2571     // // test rax,0x0400
 2572     // emit_opcode( cbuf, 0xA9 );
 2573     // emit_d32   ( cbuf, 0x00000400 );
 2574     //
 2575     // jz exit (no unordered comparison)
 2576     emit_opcode( cbuf, 0x74 );
 2577     emit_d8    ( cbuf, 0x02 );
 2578     // mov ah,1 - treat as LT case (set carry flag)
 2579     emit_opcode( cbuf, 0xB4 );
 2580     emit_d8    ( cbuf, 0x01 );
 2581     // sahf
 2582     emit_opcode( cbuf, 0x9E);
 2583   %}
 2584 
 2585   enc_class cmpF_P6_fixup() %{
 2586     // Fixup the integer flags in case comparison involved a NaN
 2587     //
 2588     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2589     emit_opcode( cbuf, 0x7B );
 2590     emit_d8    ( cbuf, 0x03 );
 2591     // MOV AH,1 - treat as LT case (set carry flag)
 2592     emit_opcode( cbuf, 0xB4 );
 2593     emit_d8    ( cbuf, 0x01 );
 2594     // SAHF
 2595     emit_opcode( cbuf, 0x9E);
 2596     // NOP     // target for branch to avoid branch to branch
 2597     emit_opcode( cbuf, 0x90);
 2598   %}
 2599 
 2600 //     fnstsw_ax();
 2601 //     sahf();
 2602 //     movl(dst, nan_result);
 2603 //     jcc(Assembler::parity, exit);
 2604 //     movl(dst, less_result);
 2605 //     jcc(Assembler::below, exit);
 2606 //     movl(dst, equal_result);
 2607 //     jcc(Assembler::equal, exit);
 2608 //     movl(dst, greater_result);
 2609 
 2610 // less_result     =  1;
 2611 // greater_result  = -1;
 2612 // equal_result    = 0;
 2613 // nan_result      = -1;
 2614 
 2615   enc_class CmpF_Result(rRegI dst) %{
 2616     // fnstsw_ax();
 2617     emit_opcode( cbuf, 0xDF);
 2618     emit_opcode( cbuf, 0xE0);
 2619     // sahf
 2620     emit_opcode( cbuf, 0x9E);
 2621     // movl(dst, nan_result);
 2622     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2623     emit_d32( cbuf, -1 );
 2624     // jcc(Assembler::parity, exit);
 2625     emit_opcode( cbuf, 0x7A );
 2626     emit_d8    ( cbuf, 0x13 );
 2627     // movl(dst, less_result);
 2628     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2629     emit_d32( cbuf, -1 );
 2630     // jcc(Assembler::below, exit);
 2631     emit_opcode( cbuf, 0x72 );
 2632     emit_d8    ( cbuf, 0x0C );
 2633     // movl(dst, equal_result);
 2634     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2635     emit_d32( cbuf, 0 );
 2636     // jcc(Assembler::equal, exit);
 2637     emit_opcode( cbuf, 0x74 );
 2638     emit_d8    ( cbuf, 0x05 );
 2639     // movl(dst, greater_result);
 2640     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2641     emit_d32( cbuf, 1 );
 2642   %}
 2643 
 2644 
 2645   // Compare the longs and set flags
 2646   // BROKEN!  Do Not use as-is
 2647   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2648     // CMP    $src1.hi,$src2.hi
 2649     emit_opcode( cbuf, 0x3B );
 2650     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2651     // JNE,s  done
 2652     emit_opcode(cbuf,0x75);
 2653     emit_d8(cbuf, 2 );
 2654     // CMP    $src1.lo,$src2.lo
 2655     emit_opcode( cbuf, 0x3B );
 2656     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2657 // done:
 2658   %}
 2659 
 2660   enc_class convert_int_long( regL dst, rRegI src ) %{
 2661     // mov $dst.lo,$src
 2662     int dst_encoding = $dst$$reg;
 2663     int src_encoding = $src$$reg;
 2664     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2665     // mov $dst.hi,$src
 2666     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2667     // sar $dst.hi,31
 2668     emit_opcode( cbuf, 0xC1 );
 2669     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2670     emit_d8(cbuf, 0x1F );
 2671   %}
 2672 
 2673   enc_class convert_long_double( eRegL src ) %{
 2674     // push $src.hi
 2675     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2676     // push $src.lo
 2677     emit_opcode(cbuf, 0x50+$src$$reg  );
 2678     // fild 64-bits at [SP]
 2679     emit_opcode(cbuf,0xdf);
 2680     emit_d8(cbuf, 0x6C);
 2681     emit_d8(cbuf, 0x24);
 2682     emit_d8(cbuf, 0x00);
 2683     // pop stack
 2684     emit_opcode(cbuf, 0x83); // add  SP, #8
 2685     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2686     emit_d8(cbuf, 0x8);
 2687   %}
 2688 
 2689   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2690     // IMUL   EDX:EAX,$src1
 2691     emit_opcode( cbuf, 0xF7 );
 2692     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2693     // SAR    EDX,$cnt-32
 2694     int shift_count = ((int)$cnt$$constant) - 32;
 2695     if (shift_count > 0) {
 2696       emit_opcode(cbuf, 0xC1);
 2697       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2698       emit_d8(cbuf, shift_count);
 2699     }
 2700   %}
 2701 
 2702   // this version doesn't have add sp, 8
 2703   enc_class convert_long_double2( eRegL src ) %{
 2704     // push $src.hi
 2705     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2706     // push $src.lo
 2707     emit_opcode(cbuf, 0x50+$src$$reg  );
 2708     // fild 64-bits at [SP]
 2709     emit_opcode(cbuf,0xdf);
 2710     emit_d8(cbuf, 0x6C);
 2711     emit_d8(cbuf, 0x24);
 2712     emit_d8(cbuf, 0x00);
 2713   %}
 2714 
 2715   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2716     // Basic idea: long = (long)int * (long)int
 2717     // IMUL EDX:EAX, src
 2718     emit_opcode( cbuf, 0xF7 );
 2719     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2720   %}
 2721 
 2722   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2723     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2724     // MUL EDX:EAX, src
 2725     emit_opcode( cbuf, 0xF7 );
 2726     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2727   %}
 2728 
 2729   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2730     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2731     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2732     // MOV    $tmp,$src.lo
 2733     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2734     // IMUL   $tmp,EDX
 2735     emit_opcode( cbuf, 0x0F );
 2736     emit_opcode( cbuf, 0xAF );
 2737     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2738     // MOV    EDX,$src.hi
 2739     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2740     // IMUL   EDX,EAX
 2741     emit_opcode( cbuf, 0x0F );
 2742     emit_opcode( cbuf, 0xAF );
 2743     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2744     // ADD    $tmp,EDX
 2745     emit_opcode( cbuf, 0x03 );
 2746     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2747     // MUL   EDX:EAX,$src.lo
 2748     emit_opcode( cbuf, 0xF7 );
 2749     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2750     // ADD    EDX,ESI
 2751     emit_opcode( cbuf, 0x03 );
 2752     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2753   %}
 2754 
 2755   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2756     // Basic idea: lo(result) = lo(src * y_lo)
 2757     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2758     // IMUL   $tmp,EDX,$src
 2759     emit_opcode( cbuf, 0x6B );
 2760     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2761     emit_d8( cbuf, (int)$src$$constant );
 2762     // MOV    EDX,$src
 2763     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2764     emit_d32( cbuf, (int)$src$$constant );
 2765     // MUL   EDX:EAX,EDX
 2766     emit_opcode( cbuf, 0xF7 );
 2767     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2768     // ADD    EDX,ESI
 2769     emit_opcode( cbuf, 0x03 );
 2770     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2771   %}
 2772 
 2773   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2774     // PUSH src1.hi
 2775     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2776     // PUSH src1.lo
 2777     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2778     // PUSH src2.hi
 2779     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2780     // PUSH src2.lo
 2781     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2782     // CALL directly to the runtime
 2783     cbuf.set_insts_mark();
 2784     emit_opcode(cbuf,0xE8);       // Call into runtime
 2785     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2786     // Restore stack
 2787     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2788     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2789     emit_d8(cbuf, 4*4);
 2790   %}
 2791 
 2792   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2793     // PUSH src1.hi
 2794     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2795     // PUSH src1.lo
 2796     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2797     // PUSH src2.hi
 2798     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2799     // PUSH src2.lo
 2800     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2801     // CALL directly to the runtime
 2802     cbuf.set_insts_mark();
 2803     emit_opcode(cbuf,0xE8);       // Call into runtime
 2804     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2805     // Restore stack
 2806     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2807     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2808     emit_d8(cbuf, 4*4);
 2809   %}
 2810 
 2811   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2812     // MOV   $tmp,$src.lo
 2813     emit_opcode(cbuf, 0x8B);
 2814     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2815     // OR    $tmp,$src.hi
 2816     emit_opcode(cbuf, 0x0B);
 2817     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2818   %}
 2819 
 2820   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2821     // CMP    $src1.lo,$src2.lo
 2822     emit_opcode( cbuf, 0x3B );
 2823     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2824     // JNE,s  skip
 2825     emit_cc(cbuf, 0x70, 0x5);
 2826     emit_d8(cbuf,2);
 2827     // CMP    $src1.hi,$src2.hi
 2828     emit_opcode( cbuf, 0x3B );
 2829     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2830   %}
 2831 
 2832   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2833     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2834     emit_opcode( cbuf, 0x3B );
 2835     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2836     // MOV    $tmp,$src1.hi
 2837     emit_opcode( cbuf, 0x8B );
 2838     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2839     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2840     emit_opcode( cbuf, 0x1B );
 2841     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2842   %}
 2843 
 2844   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2845     // XOR    $tmp,$tmp
 2846     emit_opcode(cbuf,0x33);  // XOR
 2847     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2848     // CMP    $tmp,$src.lo
 2849     emit_opcode( cbuf, 0x3B );
 2850     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2851     // SBB    $tmp,$src.hi
 2852     emit_opcode( cbuf, 0x1B );
 2853     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2854   %}
 2855 
 2856  // Sniff, sniff... smells like Gnu Superoptimizer
 2857   enc_class neg_long( eRegL dst ) %{
 2858     emit_opcode(cbuf,0xF7);    // NEG hi
 2859     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2860     emit_opcode(cbuf,0xF7);    // NEG lo
 2861     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2862     emit_opcode(cbuf,0x83);    // SBB hi,0
 2863     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2864     emit_d8    (cbuf,0 );
 2865   %}
 2866 
 2867   enc_class enc_pop_rdx() %{
 2868     emit_opcode(cbuf,0x5A);
 2869   %}
 2870 
 2871   enc_class enc_rethrow() %{
 2872     cbuf.set_insts_mark();
 2873     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2874     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2875                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2876   %}
 2877 
 2878 
 2879   // Convert a double to an int.  Java semantics require we do complex
 2880   // manglelations in the corner cases.  So we set the rounding mode to
 2881   // 'zero', store the darned double down as an int, and reset the
 2882   // rounding mode to 'nearest'.  The hardware throws an exception which
 2883   // patches up the correct value directly to the stack.
 2884   enc_class DPR2I_encoding( regDPR src ) %{
 2885     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2886     // exceptions here, so that a NAN or other corner-case value will
 2887     // thrown an exception (but normal values get converted at full speed).
 2888     // However, I2C adapters and other float-stack manglers leave pending
 2889     // invalid-op exceptions hanging.  We would have to clear them before
 2890     // enabling them and that is more expensive than just testing for the
 2891     // invalid value Intel stores down in the corner cases.
 2892     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2893     emit_opcode(cbuf,0x2D);
 2894     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2895     // Allocate a word
 2896     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2897     emit_opcode(cbuf,0xEC);
 2898     emit_d8(cbuf,0x04);
 2899     // Encoding assumes a double has been pushed into FPR0.
 2900     // Store down the double as an int, popping the FPU stack
 2901     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2902     emit_opcode(cbuf,0x1C);
 2903     emit_d8(cbuf,0x24);
 2904     // Restore the rounding mode; mask the exception
 2905     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2906     emit_opcode(cbuf,0x2D);
 2907     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2908         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2909         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2910 
 2911     // Load the converted int; adjust CPU stack
 2912     emit_opcode(cbuf,0x58);       // POP EAX
 2913     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2914     emit_d32   (cbuf,0x80000000); //         0x80000000
 2915     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2916     emit_d8    (cbuf,0x07);       // Size of slow_call
 2917     // Push src onto stack slow-path
 2918     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2919     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2920     // CALL directly to the runtime
 2921     cbuf.set_insts_mark();
 2922     emit_opcode(cbuf,0xE8);       // Call into runtime
 2923     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2924     // Carry on here...
 2925   %}
 2926 
 2927   enc_class DPR2L_encoding( regDPR src ) %{
 2928     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2929     emit_opcode(cbuf,0x2D);
 2930     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2931     // Allocate a word
 2932     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2933     emit_opcode(cbuf,0xEC);
 2934     emit_d8(cbuf,0x08);
 2935     // Encoding assumes a double has been pushed into FPR0.
 2936     // Store down the double as a long, popping the FPU stack
 2937     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2938     emit_opcode(cbuf,0x3C);
 2939     emit_d8(cbuf,0x24);
 2940     // Restore the rounding mode; mask the exception
 2941     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2942     emit_opcode(cbuf,0x2D);
 2943     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2944         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2945         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2946 
 2947     // Load the converted int; adjust CPU stack
 2948     emit_opcode(cbuf,0x58);       // POP EAX
 2949     emit_opcode(cbuf,0x5A);       // POP EDX
 2950     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2951     emit_d8    (cbuf,0xFA);       // rdx
 2952     emit_d32   (cbuf,0x80000000); //         0x80000000
 2953     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2954     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2955     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2956     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2957     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2958     emit_d8    (cbuf,0x07);       // Size of slow_call
 2959     // Push src onto stack slow-path
 2960     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2961     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2962     // CALL directly to the runtime
 2963     cbuf.set_insts_mark();
 2964     emit_opcode(cbuf,0xE8);       // Call into runtime
 2965     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2966     // Carry on here...
 2967   %}
 2968 
 2969   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2970     // Operand was loaded from memory into fp ST (stack top)
 2971     // FMUL   ST,$src  /* D8 C8+i */
 2972     emit_opcode(cbuf, 0xD8);
 2973     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2974   %}
 2975 
 2976   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2977     // FADDP  ST,src2  /* D8 C0+i */
 2978     emit_opcode(cbuf, 0xD8);
 2979     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2980     //could use FADDP  src2,fpST  /* DE C0+i */
 2981   %}
 2982 
 2983   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2984     // FADDP  src2,ST  /* DE C0+i */
 2985     emit_opcode(cbuf, 0xDE);
 2986     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2987   %}
 2988 
 2989   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2990     // Operand has been loaded into fp ST (stack top)
 2991       // FSUB   ST,$src1
 2992       emit_opcode(cbuf, 0xD8);
 2993       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2994 
 2995       // FDIV
 2996       emit_opcode(cbuf, 0xD8);
 2997       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 2998   %}
 2999 
 3000   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3001     // Operand was loaded from memory into fp ST (stack top)
 3002     // FADD   ST,$src  /* D8 C0+i */
 3003     emit_opcode(cbuf, 0xD8);
 3004     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3005 
 3006     // FMUL  ST,src2  /* D8 C*+i */
 3007     emit_opcode(cbuf, 0xD8);
 3008     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3009   %}
 3010 
 3011 
 3012   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3013     // Operand was loaded from memory into fp ST (stack top)
 3014     // FADD   ST,$src  /* D8 C0+i */
 3015     emit_opcode(cbuf, 0xD8);
 3016     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3017 
 3018     // FMULP  src2,ST  /* DE C8+i */
 3019     emit_opcode(cbuf, 0xDE);
 3020     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3021   %}
 3022 
 3023   // Atomically load the volatile long
 3024   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3025     emit_opcode(cbuf,0xDF);
 3026     int rm_byte_opcode = 0x05;
 3027     int base     = $mem$$base;
 3028     int index    = $mem$$index;
 3029     int scale    = $mem$$scale;
 3030     int displace = $mem$$disp;
 3031     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3032     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3033     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3034   %}
 3035 
 3036   // Volatile Store Long.  Must be atomic, so move it into
 3037   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3038   // target address before the store (for null-ptr checks)
 3039   // so the memory operand is used twice in the encoding.
 3040   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3041     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3042     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3043     emit_opcode(cbuf,0xDF);
 3044     int rm_byte_opcode = 0x07;
 3045     int base     = $mem$$base;
 3046     int index    = $mem$$index;
 3047     int scale    = $mem$$scale;
 3048     int displace = $mem$$disp;
 3049     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3050     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3051   %}
 3052 
 3053 %}
 3054 
 3055 
 3056 //----------FRAME--------------------------------------------------------------
 3057 // Definition of frame structure and management information.
 3058 //
 3059 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3060 //                             |   (to get allocators register number
 3061 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3062 //  r   CALLER     |        |
 3063 //  o     |        +--------+      pad to even-align allocators stack-slot
 3064 //  w     V        |  pad0  |        numbers; owned by CALLER
 3065 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3066 //  h     ^        |   in   |  5
 3067 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3068 //  |     |        |        |  3
 3069 //  |     |        +--------+
 3070 //  V     |        | old out|      Empty on Intel, window on Sparc
 3071 //        |    old |preserve|      Must be even aligned.
 3072 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3073 //        |        |   in   |  3   area for Intel ret address
 3074 //     Owned by    |preserve|      Empty on Sparc.
 3075 //       SELF      +--------+
 3076 //        |        |  pad2  |  2   pad to align old SP
 3077 //        |        +--------+  1
 3078 //        |        | locks  |  0
 3079 //        |        +--------+----> OptoReg::stack0(), even aligned
 3080 //        |        |  pad1  | 11   pad to align new SP
 3081 //        |        +--------+
 3082 //        |        |        | 10
 3083 //        |        | spills |  9   spills
 3084 //        V        |        |  8   (pad0 slot for callee)
 3085 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3086 //        ^        |  out   |  7
 3087 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3088 //     Owned by    +--------+
 3089 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3090 //        |    new |preserve|      Must be even-aligned.
 3091 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3092 //        |        |        |
 3093 //
 3094 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3095 //         known from SELF's arguments and the Java calling convention.
 3096 //         Region 6-7 is determined per call site.
 3097 // Note 2: If the calling convention leaves holes in the incoming argument
 3098 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3099 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3100 //         incoming area, as the Java calling convention is completely under
 3101 //         the control of the AD file.  Doubles can be sorted and packed to
 3102 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3103 //         varargs C calling conventions.
 3104 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3105 //         even aligned with pad0 as needed.
 3106 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3107 //         region 6-11 is even aligned; it may be padded out more so that
 3108 //         the region from SP to FP meets the minimum stack alignment.
 3109 
 3110 frame %{
 3111   // These three registers define part of the calling convention
 3112   // between compiled code and the interpreter.
 3113   inline_cache_reg(EAX);                // Inline Cache Register
 3114 
 3115   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3116   cisc_spilling_operand_name(indOffset32);
 3117 
 3118   // Number of stack slots consumed by locking an object
 3119   sync_stack_slots(1);
 3120 
 3121   // Compiled code's Frame Pointer
 3122   frame_pointer(ESP);
 3123   // Interpreter stores its frame pointer in a register which is
 3124   // stored to the stack by I2CAdaptors.
 3125   // I2CAdaptors convert from interpreted java to compiled java.
 3126   interpreter_frame_pointer(EBP);
 3127 
 3128   // Stack alignment requirement
 3129   // Alignment size in bytes (128-bit -> 16 bytes)
 3130   stack_alignment(StackAlignmentInBytes);
 3131 
 3132   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3133   // for calls to C.  Supports the var-args backing area for register parms.
 3134   varargs_C_out_slots_killed(0);
 3135 
 3136   // The after-PROLOG location of the return address.  Location of
 3137   // return address specifies a type (REG or STACK) and a number
 3138   // representing the register number (i.e. - use a register name) or
 3139   // stack slot.
 3140   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3141   // Otherwise, it is above the locks and verification slot and alignment word
 3142   return_addr(STACK - 1 +
 3143               align_up((Compile::current()->in_preserve_stack_slots() +
 3144                         Compile::current()->fixed_slots()),
 3145                        stack_alignment_in_slots()));
 3146 
 3147   // Location of C & interpreter return values
 3148   c_return_value %{
 3149     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3150     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3151     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3152 
 3153     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3154     // that C functions return float and double results in XMM0.
 3155     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3156       return OptoRegPair(XMM0b_num,XMM0_num);
 3157     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3158       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3159 
 3160     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3161   %}
 3162 
 3163   // Location of return values
 3164   return_value %{
 3165     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3166     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3167     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3168     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3169       return OptoRegPair(XMM0b_num,XMM0_num);
 3170     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3171       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3172     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3173   %}
 3174 
 3175 %}
 3176 
 3177 //----------ATTRIBUTES---------------------------------------------------------
 3178 //----------Operand Attributes-------------------------------------------------
 3179 op_attrib op_cost(0);        // Required cost attribute
 3180 
 3181 //----------Instruction Attributes---------------------------------------------
 3182 ins_attrib ins_cost(100);       // Required cost attribute
 3183 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3184 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3185                                 // non-matching short branch variant of some
 3186                                                             // long branch?
 3187 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3188                                 // specifies the alignment that some part of the instruction (not
 3189                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3190                                 // function must be provided for the instruction
 3191 
 3192 //----------OPERANDS-----------------------------------------------------------
 3193 // Operand definitions must precede instruction definitions for correct parsing
 3194 // in the ADLC because operands constitute user defined types which are used in
 3195 // instruction definitions.
 3196 
 3197 //----------Simple Operands----------------------------------------------------
 3198 // Immediate Operands
 3199 // Integer Immediate
 3200 operand immI() %{
 3201   match(ConI);
 3202 
 3203   op_cost(10);
 3204   format %{ %}
 3205   interface(CONST_INTER);
 3206 %}
 3207 
 3208 // Constant for test vs zero
 3209 operand immI_0() %{
 3210   predicate(n->get_int() == 0);
 3211   match(ConI);
 3212 
 3213   op_cost(0);
 3214   format %{ %}
 3215   interface(CONST_INTER);
 3216 %}
 3217 
 3218 // Constant for increment
 3219 operand immI_1() %{
 3220   predicate(n->get_int() == 1);
 3221   match(ConI);
 3222 
 3223   op_cost(0);
 3224   format %{ %}
 3225   interface(CONST_INTER);
 3226 %}
 3227 
 3228 // Constant for decrement
 3229 operand immI_M1() %{
 3230   predicate(n->get_int() == -1);
 3231   match(ConI);
 3232 
 3233   op_cost(0);
 3234   format %{ %}
 3235   interface(CONST_INTER);
 3236 %}
 3237 
 3238 // Valid scale values for addressing modes
 3239 operand immI2() %{
 3240   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3241   match(ConI);
 3242 
 3243   format %{ %}
 3244   interface(CONST_INTER);
 3245 %}
 3246 
 3247 operand immI8() %{
 3248   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3249   match(ConI);
 3250 
 3251   op_cost(5);
 3252   format %{ %}
 3253   interface(CONST_INTER);
 3254 %}
 3255 
 3256 operand immU8() %{
 3257   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3258   match(ConI);
 3259 
 3260   op_cost(5);
 3261   format %{ %}
 3262   interface(CONST_INTER);
 3263 %}
 3264 
 3265 operand immI16() %{
 3266   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3267   match(ConI);
 3268 
 3269   op_cost(10);
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 // Int Immediate non-negative
 3275 operand immU31()
 3276 %{
 3277   predicate(n->get_int() >= 0);
 3278   match(ConI);
 3279 
 3280   op_cost(0);
 3281   format %{ %}
 3282   interface(CONST_INTER);
 3283 %}
 3284 
 3285 // Constant for long shifts
 3286 operand immI_32() %{
 3287   predicate( n->get_int() == 32 );
 3288   match(ConI);
 3289 
 3290   op_cost(0);
 3291   format %{ %}
 3292   interface(CONST_INTER);
 3293 %}
 3294 
 3295 operand immI_1_31() %{
 3296   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3297   match(ConI);
 3298 
 3299   op_cost(0);
 3300   format %{ %}
 3301   interface(CONST_INTER);
 3302 %}
 3303 
 3304 operand immI_32_63() %{
 3305   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3306   match(ConI);
 3307   op_cost(0);
 3308 
 3309   format %{ %}
 3310   interface(CONST_INTER);
 3311 %}
 3312 
 3313 operand immI_2() %{
 3314   predicate( n->get_int() == 2 );
 3315   match(ConI);
 3316 
 3317   op_cost(0);
 3318   format %{ %}
 3319   interface(CONST_INTER);
 3320 %}
 3321 
 3322 operand immI_3() %{
 3323   predicate( n->get_int() == 3 );
 3324   match(ConI);
 3325 
 3326   op_cost(0);
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_4()
 3332 %{
 3333   predicate(n->get_int() == 4);
 3334   match(ConI);
 3335 
 3336   op_cost(0);
 3337   format %{ %}
 3338   interface(CONST_INTER);
 3339 %}
 3340 
 3341 operand immI_8()
 3342 %{
 3343   predicate(n->get_int() == 8);
 3344   match(ConI);
 3345 
 3346   op_cost(0);
 3347   format %{ %}
 3348   interface(CONST_INTER);
 3349 %}
 3350 
 3351 // Pointer Immediate
 3352 operand immP() %{
 3353   match(ConP);
 3354 
 3355   op_cost(10);
 3356   format %{ %}
 3357   interface(CONST_INTER);
 3358 %}
 3359 
 3360 // NULL Pointer Immediate
 3361 operand immP0() %{
 3362   predicate( n->get_ptr() == 0 );
 3363   match(ConP);
 3364   op_cost(0);
 3365 
 3366   format %{ %}
 3367   interface(CONST_INTER);
 3368 %}
 3369 
 3370 // Long Immediate
 3371 operand immL() %{
 3372   match(ConL);
 3373 
 3374   op_cost(20);
 3375   format %{ %}
 3376   interface(CONST_INTER);
 3377 %}
 3378 
 3379 // Long Immediate zero
 3380 operand immL0() %{
 3381   predicate( n->get_long() == 0L );
 3382   match(ConL);
 3383   op_cost(0);
 3384 
 3385   format %{ %}
 3386   interface(CONST_INTER);
 3387 %}
 3388 
 3389 // Long Immediate zero
 3390 operand immL_M1() %{
 3391   predicate( n->get_long() == -1L );
 3392   match(ConL);
 3393   op_cost(0);
 3394 
 3395   format %{ %}
 3396   interface(CONST_INTER);
 3397 %}
 3398 
 3399 // Long immediate from 0 to 127.
 3400 // Used for a shorter form of long mul by 10.
 3401 operand immL_127() %{
 3402   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3403   match(ConL);
 3404   op_cost(0);
 3405 
 3406   format %{ %}
 3407   interface(CONST_INTER);
 3408 %}
 3409 
 3410 // Long Immediate: low 32-bit mask
 3411 operand immL_32bits() %{
 3412   predicate(n->get_long() == 0xFFFFFFFFL);
 3413   match(ConL);
 3414   op_cost(0);
 3415 
 3416   format %{ %}
 3417   interface(CONST_INTER);
 3418 %}
 3419 
 3420 // Long Immediate: low 32-bit mask
 3421 operand immL32() %{
 3422   predicate(n->get_long() == (int)(n->get_long()));
 3423   match(ConL);
 3424   op_cost(20);
 3425 
 3426   format %{ %}
 3427   interface(CONST_INTER);
 3428 %}
 3429 
 3430 //Double Immediate zero
 3431 operand immDPR0() %{
 3432   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3433   // bug that generates code such that NaNs compare equal to 0.0
 3434   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3435   match(ConD);
 3436 
 3437   op_cost(5);
 3438   format %{ %}
 3439   interface(CONST_INTER);
 3440 %}
 3441 
 3442 // Double Immediate one
 3443 operand immDPR1() %{
 3444   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3445   match(ConD);
 3446 
 3447   op_cost(5);
 3448   format %{ %}
 3449   interface(CONST_INTER);
 3450 %}
 3451 
 3452 // Double Immediate
 3453 operand immDPR() %{
 3454   predicate(UseSSE<=1);
 3455   match(ConD);
 3456 
 3457   op_cost(5);
 3458   format %{ %}
 3459   interface(CONST_INTER);
 3460 %}
 3461 
 3462 operand immD() %{
 3463   predicate(UseSSE>=2);
 3464   match(ConD);
 3465 
 3466   op_cost(5);
 3467   format %{ %}
 3468   interface(CONST_INTER);
 3469 %}
 3470 
 3471 // Double Immediate zero
 3472 operand immD0() %{
 3473   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3474   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3475   // compare equal to -0.0.
 3476   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3477   match(ConD);
 3478 
 3479   format %{ %}
 3480   interface(CONST_INTER);
 3481 %}
 3482 
 3483 // Float Immediate zero
 3484 operand immFPR0() %{
 3485   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3486   match(ConF);
 3487 
 3488   op_cost(5);
 3489   format %{ %}
 3490   interface(CONST_INTER);
 3491 %}
 3492 
 3493 // Float Immediate one
 3494 operand immFPR1() %{
 3495   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3496   match(ConF);
 3497 
 3498   op_cost(5);
 3499   format %{ %}
 3500   interface(CONST_INTER);
 3501 %}
 3502 
 3503 // Float Immediate
 3504 operand immFPR() %{
 3505   predicate( UseSSE == 0 );
 3506   match(ConF);
 3507 
 3508   op_cost(5);
 3509   format %{ %}
 3510   interface(CONST_INTER);
 3511 %}
 3512 
 3513 // Float Immediate
 3514 operand immF() %{
 3515   predicate(UseSSE >= 1);
 3516   match(ConF);
 3517 
 3518   op_cost(5);
 3519   format %{ %}
 3520   interface(CONST_INTER);
 3521 %}
 3522 
 3523 // Float Immediate zero.  Zero and not -0.0
 3524 operand immF0() %{
 3525   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3526   match(ConF);
 3527 
 3528   op_cost(5);
 3529   format %{ %}
 3530   interface(CONST_INTER);
 3531 %}
 3532 
 3533 // Immediates for special shifts (sign extend)
 3534 
 3535 // Constants for increment
 3536 operand immI_16() %{
 3537   predicate( n->get_int() == 16 );
 3538   match(ConI);
 3539 
 3540   format %{ %}
 3541   interface(CONST_INTER);
 3542 %}
 3543 
 3544 operand immI_24() %{
 3545   predicate( n->get_int() == 24 );
 3546   match(ConI);
 3547 
 3548   format %{ %}
 3549   interface(CONST_INTER);
 3550 %}
 3551 
 3552 // Constant for byte-wide masking
 3553 operand immI_255() %{
 3554   predicate( n->get_int() == 255 );
 3555   match(ConI);
 3556 
 3557   format %{ %}
 3558   interface(CONST_INTER);
 3559 %}
 3560 
 3561 // Constant for short-wide masking
 3562 operand immI_65535() %{
 3563   predicate(n->get_int() == 65535);
 3564   match(ConI);
 3565 
 3566   format %{ %}
 3567   interface(CONST_INTER);
 3568 %}
 3569 
 3570 operand kReg()
 3571 %{
 3572   constraint(ALLOC_IN_RC(vectmask_reg));
 3573   match(RegVectMask);
 3574   format %{%}
 3575   interface(REG_INTER);
 3576 %}
 3577 
 3578 operand kReg_K1()
 3579 %{
 3580   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3581   match(RegVectMask);
 3582   format %{%}
 3583   interface(REG_INTER);
 3584 %}
 3585 
 3586 operand kReg_K2()
 3587 %{
 3588   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3589   match(RegVectMask);
 3590   format %{%}
 3591   interface(REG_INTER);
 3592 %}
 3593 
 3594 // Special Registers
 3595 operand kReg_K3()
 3596 %{
 3597   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3598   match(RegVectMask);
 3599   format %{%}
 3600   interface(REG_INTER);
 3601 %}
 3602 
 3603 operand kReg_K4()
 3604 %{
 3605   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3606   match(RegVectMask);
 3607   format %{%}
 3608   interface(REG_INTER);
 3609 %}
 3610 
 3611 operand kReg_K5()
 3612 %{
 3613   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3614   match(RegVectMask);
 3615   format %{%}
 3616   interface(REG_INTER);
 3617 %}
 3618 
 3619 operand kReg_K6()
 3620 %{
 3621   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3622   match(RegVectMask);
 3623   format %{%}
 3624   interface(REG_INTER);
 3625 %}
 3626 
 3627 // Special Registers
 3628 operand kReg_K7()
 3629 %{
 3630   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3631   match(RegVectMask);
 3632   format %{%}
 3633   interface(REG_INTER);
 3634 %}
 3635 
 3636 // Register Operands
 3637 // Integer Register
 3638 operand rRegI() %{
 3639   constraint(ALLOC_IN_RC(int_reg));
 3640   match(RegI);
 3641   match(xRegI);
 3642   match(eAXRegI);
 3643   match(eBXRegI);
 3644   match(eCXRegI);
 3645   match(eDXRegI);
 3646   match(eDIRegI);
 3647   match(eSIRegI);
 3648 
 3649   format %{ %}
 3650   interface(REG_INTER);
 3651 %}
 3652 
 3653 // Subset of Integer Register
 3654 operand xRegI(rRegI reg) %{
 3655   constraint(ALLOC_IN_RC(int_x_reg));
 3656   match(reg);
 3657   match(eAXRegI);
 3658   match(eBXRegI);
 3659   match(eCXRegI);
 3660   match(eDXRegI);
 3661 
 3662   format %{ %}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 // Special Registers
 3667 operand eAXRegI(xRegI reg) %{
 3668   constraint(ALLOC_IN_RC(eax_reg));
 3669   match(reg);
 3670   match(rRegI);
 3671 
 3672   format %{ "EAX" %}
 3673   interface(REG_INTER);
 3674 %}
 3675 
 3676 // Special Registers
 3677 operand eBXRegI(xRegI reg) %{
 3678   constraint(ALLOC_IN_RC(ebx_reg));
 3679   match(reg);
 3680   match(rRegI);
 3681 
 3682   format %{ "EBX" %}
 3683   interface(REG_INTER);
 3684 %}
 3685 
 3686 operand eCXRegI(xRegI reg) %{
 3687   constraint(ALLOC_IN_RC(ecx_reg));
 3688   match(reg);
 3689   match(rRegI);
 3690 
 3691   format %{ "ECX" %}
 3692   interface(REG_INTER);
 3693 %}
 3694 
 3695 operand eDXRegI(xRegI reg) %{
 3696   constraint(ALLOC_IN_RC(edx_reg));
 3697   match(reg);
 3698   match(rRegI);
 3699 
 3700   format %{ "EDX" %}
 3701   interface(REG_INTER);
 3702 %}
 3703 
 3704 operand eDIRegI(xRegI reg) %{
 3705   constraint(ALLOC_IN_RC(edi_reg));
 3706   match(reg);
 3707   match(rRegI);
 3708 
 3709   format %{ "EDI" %}
 3710   interface(REG_INTER);
 3711 %}
 3712 
 3713 operand naxRegI() %{
 3714   constraint(ALLOC_IN_RC(nax_reg));
 3715   match(RegI);
 3716   match(eCXRegI);
 3717   match(eDXRegI);
 3718   match(eSIRegI);
 3719   match(eDIRegI);
 3720 
 3721   format %{ %}
 3722   interface(REG_INTER);
 3723 %}
 3724 
 3725 operand nadxRegI() %{
 3726   constraint(ALLOC_IN_RC(nadx_reg));
 3727   match(RegI);
 3728   match(eBXRegI);
 3729   match(eCXRegI);
 3730   match(eSIRegI);
 3731   match(eDIRegI);
 3732 
 3733   format %{ %}
 3734   interface(REG_INTER);
 3735 %}
 3736 
 3737 operand ncxRegI() %{
 3738   constraint(ALLOC_IN_RC(ncx_reg));
 3739   match(RegI);
 3740   match(eAXRegI);
 3741   match(eDXRegI);
 3742   match(eSIRegI);
 3743   match(eDIRegI);
 3744 
 3745   format %{ %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3750 // //
 3751 operand eSIRegI(xRegI reg) %{
 3752    constraint(ALLOC_IN_RC(esi_reg));
 3753    match(reg);
 3754    match(rRegI);
 3755 
 3756    format %{ "ESI" %}
 3757    interface(REG_INTER);
 3758 %}
 3759 
 3760 // Pointer Register
 3761 operand anyRegP() %{
 3762   constraint(ALLOC_IN_RC(any_reg));
 3763   match(RegP);
 3764   match(eAXRegP);
 3765   match(eBXRegP);
 3766   match(eCXRegP);
 3767   match(eDIRegP);
 3768   match(eRegP);
 3769 
 3770   format %{ %}
 3771   interface(REG_INTER);
 3772 %}
 3773 
 3774 operand eRegP() %{
 3775   constraint(ALLOC_IN_RC(int_reg));
 3776   match(RegP);
 3777   match(eAXRegP);
 3778   match(eBXRegP);
 3779   match(eCXRegP);
 3780   match(eDIRegP);
 3781 
 3782   format %{ %}
 3783   interface(REG_INTER);
 3784 %}
 3785 
 3786 operand rRegP() %{
 3787   constraint(ALLOC_IN_RC(int_reg));
 3788   match(RegP);
 3789   match(eAXRegP);
 3790   match(eBXRegP);
 3791   match(eCXRegP);
 3792   match(eDIRegP);
 3793 
 3794   format %{ %}
 3795   interface(REG_INTER);
 3796 %}
 3797 
 3798 // On windows95, EBP is not safe to use for implicit null tests.
 3799 operand eRegP_no_EBP() %{
 3800   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3801   match(RegP);
 3802   match(eAXRegP);
 3803   match(eBXRegP);
 3804   match(eCXRegP);
 3805   match(eDIRegP);
 3806 
 3807   op_cost(100);
 3808   format %{ %}
 3809   interface(REG_INTER);
 3810 %}
 3811 
 3812 operand naxRegP() %{
 3813   constraint(ALLOC_IN_RC(nax_reg));
 3814   match(RegP);
 3815   match(eBXRegP);
 3816   match(eDXRegP);
 3817   match(eCXRegP);
 3818   match(eSIRegP);
 3819   match(eDIRegP);
 3820 
 3821   format %{ %}
 3822   interface(REG_INTER);
 3823 %}
 3824 
 3825 operand nabxRegP() %{
 3826   constraint(ALLOC_IN_RC(nabx_reg));
 3827   match(RegP);
 3828   match(eCXRegP);
 3829   match(eDXRegP);
 3830   match(eSIRegP);
 3831   match(eDIRegP);
 3832 
 3833   format %{ %}
 3834   interface(REG_INTER);
 3835 %}
 3836 
 3837 operand pRegP() %{
 3838   constraint(ALLOC_IN_RC(p_reg));
 3839   match(RegP);
 3840   match(eBXRegP);
 3841   match(eDXRegP);
 3842   match(eSIRegP);
 3843   match(eDIRegP);
 3844 
 3845   format %{ %}
 3846   interface(REG_INTER);
 3847 %}
 3848 
 3849 // Special Registers
 3850 // Return a pointer value
 3851 operand eAXRegP(eRegP reg) %{
 3852   constraint(ALLOC_IN_RC(eax_reg));
 3853   match(reg);
 3854   format %{ "EAX" %}
 3855   interface(REG_INTER);
 3856 %}
 3857 
 3858 // Used in AtomicAdd
 3859 operand eBXRegP(eRegP reg) %{
 3860   constraint(ALLOC_IN_RC(ebx_reg));
 3861   match(reg);
 3862   format %{ "EBX" %}
 3863   interface(REG_INTER);
 3864 %}
 3865 
 3866 // Tail-call (interprocedural jump) to interpreter
 3867 operand eCXRegP(eRegP reg) %{
 3868   constraint(ALLOC_IN_RC(ecx_reg));
 3869   match(reg);
 3870   format %{ "ECX" %}
 3871   interface(REG_INTER);
 3872 %}
 3873 
 3874 operand eDXRegP(eRegP reg) %{
 3875   constraint(ALLOC_IN_RC(edx_reg));
 3876   match(reg);
 3877   format %{ "EDX" %}
 3878   interface(REG_INTER);
 3879 %}
 3880 
 3881 operand eSIRegP(eRegP reg) %{
 3882   constraint(ALLOC_IN_RC(esi_reg));
 3883   match(reg);
 3884   format %{ "ESI" %}
 3885   interface(REG_INTER);
 3886 %}
 3887 
 3888 // Used in rep stosw
 3889 operand eDIRegP(eRegP reg) %{
 3890   constraint(ALLOC_IN_RC(edi_reg));
 3891   match(reg);
 3892   format %{ "EDI" %}
 3893   interface(REG_INTER);
 3894 %}
 3895 
 3896 operand eRegL() %{
 3897   constraint(ALLOC_IN_RC(long_reg));
 3898   match(RegL);
 3899   match(eADXRegL);
 3900 
 3901   format %{ %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 operand eADXRegL( eRegL reg ) %{
 3906   constraint(ALLOC_IN_RC(eadx_reg));
 3907   match(reg);
 3908 
 3909   format %{ "EDX:EAX" %}
 3910   interface(REG_INTER);
 3911 %}
 3912 
 3913 operand eBCXRegL( eRegL reg ) %{
 3914   constraint(ALLOC_IN_RC(ebcx_reg));
 3915   match(reg);
 3916 
 3917   format %{ "EBX:ECX" %}
 3918   interface(REG_INTER);
 3919 %}
 3920 
 3921 // Special case for integer high multiply
 3922 operand eADXRegL_low_only() %{
 3923   constraint(ALLOC_IN_RC(eadx_reg));
 3924   match(RegL);
 3925 
 3926   format %{ "EAX" %}
 3927   interface(REG_INTER);
 3928 %}
 3929 
 3930 // Flags register, used as output of compare instructions
 3931 operand rFlagsReg() %{
 3932   constraint(ALLOC_IN_RC(int_flags));
 3933   match(RegFlags);
 3934 
 3935   format %{ "EFLAGS" %}
 3936   interface(REG_INTER);
 3937 %}
 3938 
 3939 // Flags register, used as output of compare instructions
 3940 operand eFlagsReg() %{
 3941   constraint(ALLOC_IN_RC(int_flags));
 3942   match(RegFlags);
 3943 
 3944   format %{ "EFLAGS" %}
 3945   interface(REG_INTER);
 3946 %}
 3947 
 3948 // Flags register, used as output of FLOATING POINT compare instructions
 3949 operand eFlagsRegU() %{
 3950   constraint(ALLOC_IN_RC(int_flags));
 3951   match(RegFlags);
 3952 
 3953   format %{ "EFLAGS_U" %}
 3954   interface(REG_INTER);
 3955 %}
 3956 
 3957 operand eFlagsRegUCF() %{
 3958   constraint(ALLOC_IN_RC(int_flags));
 3959   match(RegFlags);
 3960   predicate(false);
 3961 
 3962   format %{ "EFLAGS_U_CF" %}
 3963   interface(REG_INTER);
 3964 %}
 3965 
 3966 // Condition Code Register used by long compare
 3967 operand flagsReg_long_LTGE() %{
 3968   constraint(ALLOC_IN_RC(int_flags));
 3969   match(RegFlags);
 3970   format %{ "FLAGS_LTGE" %}
 3971   interface(REG_INTER);
 3972 %}
 3973 operand flagsReg_long_EQNE() %{
 3974   constraint(ALLOC_IN_RC(int_flags));
 3975   match(RegFlags);
 3976   format %{ "FLAGS_EQNE" %}
 3977   interface(REG_INTER);
 3978 %}
 3979 operand flagsReg_long_LEGT() %{
 3980   constraint(ALLOC_IN_RC(int_flags));
 3981   match(RegFlags);
 3982   format %{ "FLAGS_LEGT" %}
 3983   interface(REG_INTER);
 3984 %}
 3985 
 3986 // Condition Code Register used by unsigned long compare
 3987 operand flagsReg_ulong_LTGE() %{
 3988   constraint(ALLOC_IN_RC(int_flags));
 3989   match(RegFlags);
 3990   format %{ "FLAGS_U_LTGE" %}
 3991   interface(REG_INTER);
 3992 %}
 3993 operand flagsReg_ulong_EQNE() %{
 3994   constraint(ALLOC_IN_RC(int_flags));
 3995   match(RegFlags);
 3996   format %{ "FLAGS_U_EQNE" %}
 3997   interface(REG_INTER);
 3998 %}
 3999 operand flagsReg_ulong_LEGT() %{
 4000   constraint(ALLOC_IN_RC(int_flags));
 4001   match(RegFlags);
 4002   format %{ "FLAGS_U_LEGT" %}
 4003   interface(REG_INTER);
 4004 %}
 4005 
 4006 // Float register operands
 4007 operand regDPR() %{
 4008   predicate( UseSSE < 2 );
 4009   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4010   match(RegD);
 4011   match(regDPR1);
 4012   match(regDPR2);
 4013   format %{ %}
 4014   interface(REG_INTER);
 4015 %}
 4016 
 4017 operand regDPR1(regDPR reg) %{
 4018   predicate( UseSSE < 2 );
 4019   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4020   match(reg);
 4021   format %{ "FPR1" %}
 4022   interface(REG_INTER);
 4023 %}
 4024 
 4025 operand regDPR2(regDPR reg) %{
 4026   predicate( UseSSE < 2 );
 4027   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4028   match(reg);
 4029   format %{ "FPR2" %}
 4030   interface(REG_INTER);
 4031 %}
 4032 
 4033 operand regnotDPR1(regDPR reg) %{
 4034   predicate( UseSSE < 2 );
 4035   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4036   match(reg);
 4037   format %{ %}
 4038   interface(REG_INTER);
 4039 %}
 4040 
 4041 // Float register operands
 4042 operand regFPR() %{
 4043   predicate( UseSSE < 2 );
 4044   constraint(ALLOC_IN_RC(fp_flt_reg));
 4045   match(RegF);
 4046   match(regFPR1);
 4047   format %{ %}
 4048   interface(REG_INTER);
 4049 %}
 4050 
 4051 // Float register operands
 4052 operand regFPR1(regFPR reg) %{
 4053   predicate( UseSSE < 2 );
 4054   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4055   match(reg);
 4056   format %{ "FPR1" %}
 4057   interface(REG_INTER);
 4058 %}
 4059 
 4060 // XMM Float register operands
 4061 operand regF() %{
 4062   predicate( UseSSE>=1 );
 4063   constraint(ALLOC_IN_RC(float_reg_legacy));
 4064   match(RegF);
 4065   format %{ %}
 4066   interface(REG_INTER);
 4067 %}
 4068 
 4069 operand legRegF() %{
 4070   predicate( UseSSE>=1 );
 4071   constraint(ALLOC_IN_RC(float_reg_legacy));
 4072   match(RegF);
 4073   format %{ %}
 4074   interface(REG_INTER);
 4075 %}
 4076 
 4077 // Float register operands
 4078 operand vlRegF() %{
 4079    constraint(ALLOC_IN_RC(float_reg_vl));
 4080    match(RegF);
 4081 
 4082    format %{ %}
 4083    interface(REG_INTER);
 4084 %}
 4085 
 4086 // XMM Double register operands
 4087 operand regD() %{
 4088   predicate( UseSSE>=2 );
 4089   constraint(ALLOC_IN_RC(double_reg_legacy));
 4090   match(RegD);
 4091   format %{ %}
 4092   interface(REG_INTER);
 4093 %}
 4094 
 4095 // Double register operands
 4096 operand legRegD() %{
 4097   predicate( UseSSE>=2 );
 4098   constraint(ALLOC_IN_RC(double_reg_legacy));
 4099   match(RegD);
 4100   format %{ %}
 4101   interface(REG_INTER);
 4102 %}
 4103 
 4104 operand vlRegD() %{
 4105    constraint(ALLOC_IN_RC(double_reg_vl));
 4106    match(RegD);
 4107 
 4108    format %{ %}
 4109    interface(REG_INTER);
 4110 %}
 4111 
 4112 //----------Memory Operands----------------------------------------------------
 4113 // Direct Memory Operand
 4114 operand direct(immP addr) %{
 4115   match(addr);
 4116 
 4117   format %{ "[$addr]" %}
 4118   interface(MEMORY_INTER) %{
 4119     base(0xFFFFFFFF);
 4120     index(0x4);
 4121     scale(0x0);
 4122     disp($addr);
 4123   %}
 4124 %}
 4125 
 4126 // Indirect Memory Operand
 4127 operand indirect(eRegP reg) %{
 4128   constraint(ALLOC_IN_RC(int_reg));
 4129   match(reg);
 4130 
 4131   format %{ "[$reg]" %}
 4132   interface(MEMORY_INTER) %{
 4133     base($reg);
 4134     index(0x4);
 4135     scale(0x0);
 4136     disp(0x0);
 4137   %}
 4138 %}
 4139 
 4140 // Indirect Memory Plus Short Offset Operand
 4141 operand indOffset8(eRegP reg, immI8 off) %{
 4142   match(AddP reg off);
 4143 
 4144   format %{ "[$reg + $off]" %}
 4145   interface(MEMORY_INTER) %{
 4146     base($reg);
 4147     index(0x4);
 4148     scale(0x0);
 4149     disp($off);
 4150   %}
 4151 %}
 4152 
 4153 // Indirect Memory Plus Long Offset Operand
 4154 operand indOffset32(eRegP reg, immI off) %{
 4155   match(AddP reg off);
 4156 
 4157   format %{ "[$reg + $off]" %}
 4158   interface(MEMORY_INTER) %{
 4159     base($reg);
 4160     index(0x4);
 4161     scale(0x0);
 4162     disp($off);
 4163   %}
 4164 %}
 4165 
 4166 // Indirect Memory Plus Long Offset Operand
 4167 operand indOffset32X(rRegI reg, immP off) %{
 4168   match(AddP off reg);
 4169 
 4170   format %{ "[$reg + $off]" %}
 4171   interface(MEMORY_INTER) %{
 4172     base($reg);
 4173     index(0x4);
 4174     scale(0x0);
 4175     disp($off);
 4176   %}
 4177 %}
 4178 
 4179 // Indirect Memory Plus Index Register Plus Offset Operand
 4180 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4181   match(AddP (AddP reg ireg) off);
 4182 
 4183   op_cost(10);
 4184   format %{"[$reg + $off + $ireg]" %}
 4185   interface(MEMORY_INTER) %{
 4186     base($reg);
 4187     index($ireg);
 4188     scale(0x0);
 4189     disp($off);
 4190   %}
 4191 %}
 4192 
 4193 // Indirect Memory Plus Index Register Plus Offset Operand
 4194 operand indIndex(eRegP reg, rRegI ireg) %{
 4195   match(AddP reg ireg);
 4196 
 4197   op_cost(10);
 4198   format %{"[$reg + $ireg]" %}
 4199   interface(MEMORY_INTER) %{
 4200     base($reg);
 4201     index($ireg);
 4202     scale(0x0);
 4203     disp(0x0);
 4204   %}
 4205 %}
 4206 
 4207 // // -------------------------------------------------------------------------
 4208 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4209 // // -------------------------------------------------------------------------
 4210 // // Scaled Memory Operands
 4211 // // Indirect Memory Times Scale Plus Offset Operand
 4212 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4213 //   match(AddP off (LShiftI ireg scale));
 4214 //
 4215 //   op_cost(10);
 4216 //   format %{"[$off + $ireg << $scale]" %}
 4217 //   interface(MEMORY_INTER) %{
 4218 //     base(0x4);
 4219 //     index($ireg);
 4220 //     scale($scale);
 4221 //     disp($off);
 4222 //   %}
 4223 // %}
 4224 
 4225 // Indirect Memory Times Scale Plus Index Register
 4226 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4227   match(AddP reg (LShiftI ireg scale));
 4228 
 4229   op_cost(10);
 4230   format %{"[$reg + $ireg << $scale]" %}
 4231   interface(MEMORY_INTER) %{
 4232     base($reg);
 4233     index($ireg);
 4234     scale($scale);
 4235     disp(0x0);
 4236   %}
 4237 %}
 4238 
 4239 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4240 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4241   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4242 
 4243   op_cost(10);
 4244   format %{"[$reg + $off + $ireg << $scale]" %}
 4245   interface(MEMORY_INTER) %{
 4246     base($reg);
 4247     index($ireg);
 4248     scale($scale);
 4249     disp($off);
 4250   %}
 4251 %}
 4252 
 4253 //----------Load Long Memory Operands------------------------------------------
 4254 // The load-long idiom will use it's address expression again after loading
 4255 // the first word of the long.  If the load-long destination overlaps with
 4256 // registers used in the addressing expression, the 2nd half will be loaded
 4257 // from a clobbered address.  Fix this by requiring that load-long use
 4258 // address registers that do not overlap with the load-long target.
 4259 
 4260 // load-long support
 4261 operand load_long_RegP() %{
 4262   constraint(ALLOC_IN_RC(esi_reg));
 4263   match(RegP);
 4264   match(eSIRegP);
 4265   op_cost(100);
 4266   format %{  %}
 4267   interface(REG_INTER);
 4268 %}
 4269 
 4270 // Indirect Memory Operand Long
 4271 operand load_long_indirect(load_long_RegP reg) %{
 4272   constraint(ALLOC_IN_RC(esi_reg));
 4273   match(reg);
 4274 
 4275   format %{ "[$reg]" %}
 4276   interface(MEMORY_INTER) %{
 4277     base($reg);
 4278     index(0x4);
 4279     scale(0x0);
 4280     disp(0x0);
 4281   %}
 4282 %}
 4283 
 4284 // Indirect Memory Plus Long Offset Operand
 4285 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4286   match(AddP reg off);
 4287 
 4288   format %{ "[$reg + $off]" %}
 4289   interface(MEMORY_INTER) %{
 4290     base($reg);
 4291     index(0x4);
 4292     scale(0x0);
 4293     disp($off);
 4294   %}
 4295 %}
 4296 
 4297 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4298 
 4299 
 4300 //----------Special Memory Operands--------------------------------------------
 4301 // Stack Slot Operand - This operand is used for loading and storing temporary
 4302 //                      values on the stack where a match requires a value to
 4303 //                      flow through memory.
 4304 operand stackSlotP(sRegP reg) %{
 4305   constraint(ALLOC_IN_RC(stack_slots));
 4306   // No match rule because this operand is only generated in matching
 4307   format %{ "[$reg]" %}
 4308   interface(MEMORY_INTER) %{
 4309     base(0x4);   // ESP
 4310     index(0x4);  // No Index
 4311     scale(0x0);  // No Scale
 4312     disp($reg);  // Stack Offset
 4313   %}
 4314 %}
 4315 
 4316 operand stackSlotI(sRegI reg) %{
 4317   constraint(ALLOC_IN_RC(stack_slots));
 4318   // No match rule because this operand is only generated in matching
 4319   format %{ "[$reg]" %}
 4320   interface(MEMORY_INTER) %{
 4321     base(0x4);   // ESP
 4322     index(0x4);  // No Index
 4323     scale(0x0);  // No Scale
 4324     disp($reg);  // Stack Offset
 4325   %}
 4326 %}
 4327 
 4328 operand stackSlotF(sRegF reg) %{
 4329   constraint(ALLOC_IN_RC(stack_slots));
 4330   // No match rule because this operand is only generated in matching
 4331   format %{ "[$reg]" %}
 4332   interface(MEMORY_INTER) %{
 4333     base(0x4);   // ESP
 4334     index(0x4);  // No Index
 4335     scale(0x0);  // No Scale
 4336     disp($reg);  // Stack Offset
 4337   %}
 4338 %}
 4339 
 4340 operand stackSlotD(sRegD reg) %{
 4341   constraint(ALLOC_IN_RC(stack_slots));
 4342   // No match rule because this operand is only generated in matching
 4343   format %{ "[$reg]" %}
 4344   interface(MEMORY_INTER) %{
 4345     base(0x4);   // ESP
 4346     index(0x4);  // No Index
 4347     scale(0x0);  // No Scale
 4348     disp($reg);  // Stack Offset
 4349   %}
 4350 %}
 4351 
 4352 operand stackSlotL(sRegL reg) %{
 4353   constraint(ALLOC_IN_RC(stack_slots));
 4354   // No match rule because this operand is only generated in matching
 4355   format %{ "[$reg]" %}
 4356   interface(MEMORY_INTER) %{
 4357     base(0x4);   // ESP
 4358     index(0x4);  // No Index
 4359     scale(0x0);  // No Scale
 4360     disp($reg);  // Stack Offset
 4361   %}
 4362 %}
 4363 
 4364 //----------Conditional Branch Operands----------------------------------------
 4365 // Comparison Op  - This is the operation of the comparison, and is limited to
 4366 //                  the following set of codes:
 4367 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4368 //
 4369 // Other attributes of the comparison, such as unsignedness, are specified
 4370 // by the comparison instruction that sets a condition code flags register.
 4371 // That result is represented by a flags operand whose subtype is appropriate
 4372 // to the unsignedness (etc.) of the comparison.
 4373 //
 4374 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4375 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4376 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4377 
 4378 // Comparision Code
 4379 operand cmpOp() %{
 4380   match(Bool);
 4381 
 4382   format %{ "" %}
 4383   interface(COND_INTER) %{
 4384     equal(0x4, "e");
 4385     not_equal(0x5, "ne");
 4386     less(0xC, "l");
 4387     greater_equal(0xD, "ge");
 4388     less_equal(0xE, "le");
 4389     greater(0xF, "g");
 4390     overflow(0x0, "o");
 4391     no_overflow(0x1, "no");
 4392   %}
 4393 %}
 4394 
 4395 // Comparison Code, unsigned compare.  Used by FP also, with
 4396 // C2 (unordered) turned into GT or LT already.  The other bits
 4397 // C0 and C3 are turned into Carry & Zero flags.
 4398 operand cmpOpU() %{
 4399   match(Bool);
 4400 
 4401   format %{ "" %}
 4402   interface(COND_INTER) %{
 4403     equal(0x4, "e");
 4404     not_equal(0x5, "ne");
 4405     less(0x2, "b");
 4406     greater_equal(0x3, "nb");
 4407     less_equal(0x6, "be");
 4408     greater(0x7, "nbe");
 4409     overflow(0x0, "o");
 4410     no_overflow(0x1, "no");
 4411   %}
 4412 %}
 4413 
 4414 // Floating comparisons that don't require any fixup for the unordered case
 4415 operand cmpOpUCF() %{
 4416   match(Bool);
 4417   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4418             n->as_Bool()->_test._test == BoolTest::ge ||
 4419             n->as_Bool()->_test._test == BoolTest::le ||
 4420             n->as_Bool()->_test._test == BoolTest::gt);
 4421   format %{ "" %}
 4422   interface(COND_INTER) %{
 4423     equal(0x4, "e");
 4424     not_equal(0x5, "ne");
 4425     less(0x2, "b");
 4426     greater_equal(0x3, "nb");
 4427     less_equal(0x6, "be");
 4428     greater(0x7, "nbe");
 4429     overflow(0x0, "o");
 4430     no_overflow(0x1, "no");
 4431   %}
 4432 %}
 4433 
 4434 
 4435 // Floating comparisons that can be fixed up with extra conditional jumps
 4436 operand cmpOpUCF2() %{
 4437   match(Bool);
 4438   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4439             n->as_Bool()->_test._test == BoolTest::eq);
 4440   format %{ "" %}
 4441   interface(COND_INTER) %{
 4442     equal(0x4, "e");
 4443     not_equal(0x5, "ne");
 4444     less(0x2, "b");
 4445     greater_equal(0x3, "nb");
 4446     less_equal(0x6, "be");
 4447     greater(0x7, "nbe");
 4448     overflow(0x0, "o");
 4449     no_overflow(0x1, "no");
 4450   %}
 4451 %}
 4452 
 4453 // Comparison Code for FP conditional move
 4454 operand cmpOp_fcmov() %{
 4455   match(Bool);
 4456 
 4457   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4458             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4459   format %{ "" %}
 4460   interface(COND_INTER) %{
 4461     equal        (0x0C8);
 4462     not_equal    (0x1C8);
 4463     less         (0x0C0);
 4464     greater_equal(0x1C0);
 4465     less_equal   (0x0D0);
 4466     greater      (0x1D0);
 4467     overflow(0x0, "o"); // not really supported by the instruction
 4468     no_overflow(0x1, "no"); // not really supported by the instruction
 4469   %}
 4470 %}
 4471 
 4472 // Comparison Code used in long compares
 4473 operand cmpOp_commute() %{
 4474   match(Bool);
 4475 
 4476   format %{ "" %}
 4477   interface(COND_INTER) %{
 4478     equal(0x4, "e");
 4479     not_equal(0x5, "ne");
 4480     less(0xF, "g");
 4481     greater_equal(0xE, "le");
 4482     less_equal(0xD, "ge");
 4483     greater(0xC, "l");
 4484     overflow(0x0, "o");
 4485     no_overflow(0x1, "no");
 4486   %}
 4487 %}
 4488 
 4489 // Comparison Code used in unsigned long compares
 4490 operand cmpOpU_commute() %{
 4491   match(Bool);
 4492 
 4493   format %{ "" %}
 4494   interface(COND_INTER) %{
 4495     equal(0x4, "e");
 4496     not_equal(0x5, "ne");
 4497     less(0x7, "nbe");
 4498     greater_equal(0x6, "be");
 4499     less_equal(0x3, "nb");
 4500     greater(0x2, "b");
 4501     overflow(0x0, "o");
 4502     no_overflow(0x1, "no");
 4503   %}
 4504 %}
 4505 
 4506 //----------OPERAND CLASSES----------------------------------------------------
 4507 // Operand Classes are groups of operands that are used as to simplify
 4508 // instruction definitions by not requiring the AD writer to specify separate
 4509 // instructions for every form of operand when the instruction accepts
 4510 // multiple operand types with the same basic encoding and format.  The classic
 4511 // case of this is memory operands.
 4512 
 4513 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4514                indIndex, indIndexScale, indIndexScaleOffset);
 4515 
 4516 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4517 // This means some kind of offset is always required and you cannot use
 4518 // an oop as the offset (done when working on static globals).
 4519 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4520                     indIndex, indIndexScale, indIndexScaleOffset);
 4521 
 4522 
 4523 //----------PIPELINE-----------------------------------------------------------
 4524 // Rules which define the behavior of the target architectures pipeline.
 4525 pipeline %{
 4526 
 4527 //----------ATTRIBUTES---------------------------------------------------------
 4528 attributes %{
 4529   variable_size_instructions;        // Fixed size instructions
 4530   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4531   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4532   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4533   instruction_fetch_units = 1;       // of 16 bytes
 4534 
 4535   // List of nop instructions
 4536   nops( MachNop );
 4537 %}
 4538 
 4539 //----------RESOURCES----------------------------------------------------------
 4540 // Resources are the functional units available to the machine
 4541 
 4542 // Generic P2/P3 pipeline
 4543 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4544 // 3 instructions decoded per cycle.
 4545 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4546 // 2 ALU op, only ALU0 handles mul/div instructions.
 4547 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4548            MS0, MS1, MEM = MS0 | MS1,
 4549            BR, FPU,
 4550            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4551 
 4552 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4553 // Pipeline Description specifies the stages in the machine's pipeline
 4554 
 4555 // Generic P2/P3 pipeline
 4556 pipe_desc(S0, S1, S2, S3, S4, S5);
 4557 
 4558 //----------PIPELINE CLASSES---------------------------------------------------
 4559 // Pipeline Classes describe the stages in which input and output are
 4560 // referenced by the hardware pipeline.
 4561 
 4562 // Naming convention: ialu or fpu
 4563 // Then: _reg
 4564 // Then: _reg if there is a 2nd register
 4565 // Then: _long if it's a pair of instructions implementing a long
 4566 // Then: _fat if it requires the big decoder
 4567 //   Or: _mem if it requires the big decoder and a memory unit.
 4568 
 4569 // Integer ALU reg operation
 4570 pipe_class ialu_reg(rRegI dst) %{
 4571     single_instruction;
 4572     dst    : S4(write);
 4573     dst    : S3(read);
 4574     DECODE : S0;        // any decoder
 4575     ALU    : S3;        // any alu
 4576 %}
 4577 
 4578 // Long ALU reg operation
 4579 pipe_class ialu_reg_long(eRegL dst) %{
 4580     instruction_count(2);
 4581     dst    : S4(write);
 4582     dst    : S3(read);
 4583     DECODE : S0(2);     // any 2 decoders
 4584     ALU    : S3(2);     // both alus
 4585 %}
 4586 
 4587 // Integer ALU reg operation using big decoder
 4588 pipe_class ialu_reg_fat(rRegI dst) %{
 4589     single_instruction;
 4590     dst    : S4(write);
 4591     dst    : S3(read);
 4592     D0     : S0;        // big decoder only
 4593     ALU    : S3;        // any alu
 4594 %}
 4595 
 4596 // Long ALU reg operation using big decoder
 4597 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4598     instruction_count(2);
 4599     dst    : S4(write);
 4600     dst    : S3(read);
 4601     D0     : S0(2);     // big decoder only; twice
 4602     ALU    : S3(2);     // any 2 alus
 4603 %}
 4604 
 4605 // Integer ALU reg-reg operation
 4606 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4607     single_instruction;
 4608     dst    : S4(write);
 4609     src    : S3(read);
 4610     DECODE : S0;        // any decoder
 4611     ALU    : S3;        // any alu
 4612 %}
 4613 
 4614 // Long ALU reg-reg operation
 4615 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4616     instruction_count(2);
 4617     dst    : S4(write);
 4618     src    : S3(read);
 4619     DECODE : S0(2);     // any 2 decoders
 4620     ALU    : S3(2);     // both alus
 4621 %}
 4622 
 4623 // Integer ALU reg-reg operation
 4624 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4625     single_instruction;
 4626     dst    : S4(write);
 4627     src    : S3(read);
 4628     D0     : S0;        // big decoder only
 4629     ALU    : S3;        // any alu
 4630 %}
 4631 
 4632 // Long ALU reg-reg operation
 4633 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4634     instruction_count(2);
 4635     dst    : S4(write);
 4636     src    : S3(read);
 4637     D0     : S0(2);     // big decoder only; twice
 4638     ALU    : S3(2);     // both alus
 4639 %}
 4640 
 4641 // Integer ALU reg-mem operation
 4642 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4643     single_instruction;
 4644     dst    : S5(write);
 4645     mem    : S3(read);
 4646     D0     : S0;        // big decoder only
 4647     ALU    : S4;        // any alu
 4648     MEM    : S3;        // any mem
 4649 %}
 4650 
 4651 // Long ALU reg-mem operation
 4652 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4653     instruction_count(2);
 4654     dst    : S5(write);
 4655     mem    : S3(read);
 4656     D0     : S0(2);     // big decoder only; twice
 4657     ALU    : S4(2);     // any 2 alus
 4658     MEM    : S3(2);     // both mems
 4659 %}
 4660 
 4661 // Integer mem operation (prefetch)
 4662 pipe_class ialu_mem(memory mem)
 4663 %{
 4664     single_instruction;
 4665     mem    : S3(read);
 4666     D0     : S0;        // big decoder only
 4667     MEM    : S3;        // any mem
 4668 %}
 4669 
 4670 // Integer Store to Memory
 4671 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4672     single_instruction;
 4673     mem    : S3(read);
 4674     src    : S5(read);
 4675     D0     : S0;        // big decoder only
 4676     ALU    : S4;        // any alu
 4677     MEM    : S3;
 4678 %}
 4679 
 4680 // Long Store to Memory
 4681 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4682     instruction_count(2);
 4683     mem    : S3(read);
 4684     src    : S5(read);
 4685     D0     : S0(2);     // big decoder only; twice
 4686     ALU    : S4(2);     // any 2 alus
 4687     MEM    : S3(2);     // Both mems
 4688 %}
 4689 
 4690 // Integer Store to Memory
 4691 pipe_class ialu_mem_imm(memory mem) %{
 4692     single_instruction;
 4693     mem    : S3(read);
 4694     D0     : S0;        // big decoder only
 4695     ALU    : S4;        // any alu
 4696     MEM    : S3;
 4697 %}
 4698 
 4699 // Integer ALU0 reg-reg operation
 4700 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4701     single_instruction;
 4702     dst    : S4(write);
 4703     src    : S3(read);
 4704     D0     : S0;        // Big decoder only
 4705     ALU0   : S3;        // only alu0
 4706 %}
 4707 
 4708 // Integer ALU0 reg-mem operation
 4709 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4710     single_instruction;
 4711     dst    : S5(write);
 4712     mem    : S3(read);
 4713     D0     : S0;        // big decoder only
 4714     ALU0   : S4;        // ALU0 only
 4715     MEM    : S3;        // any mem
 4716 %}
 4717 
 4718 // Integer ALU reg-reg operation
 4719 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4720     single_instruction;
 4721     cr     : S4(write);
 4722     src1   : S3(read);
 4723     src2   : S3(read);
 4724     DECODE : S0;        // any decoder
 4725     ALU    : S3;        // any alu
 4726 %}
 4727 
 4728 // Integer ALU reg-imm operation
 4729 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4730     single_instruction;
 4731     cr     : S4(write);
 4732     src1   : S3(read);
 4733     DECODE : S0;        // any decoder
 4734     ALU    : S3;        // any alu
 4735 %}
 4736 
 4737 // Integer ALU reg-mem operation
 4738 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4739     single_instruction;
 4740     cr     : S4(write);
 4741     src1   : S3(read);
 4742     src2   : S3(read);
 4743     D0     : S0;        // big decoder only
 4744     ALU    : S4;        // any alu
 4745     MEM    : S3;
 4746 %}
 4747 
 4748 // Conditional move reg-reg
 4749 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4750     instruction_count(4);
 4751     y      : S4(read);
 4752     q      : S3(read);
 4753     p      : S3(read);
 4754     DECODE : S0(4);     // any decoder
 4755 %}
 4756 
 4757 // Conditional move reg-reg
 4758 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4759     single_instruction;
 4760     dst    : S4(write);
 4761     src    : S3(read);
 4762     cr     : S3(read);
 4763     DECODE : S0;        // any decoder
 4764 %}
 4765 
 4766 // Conditional move reg-mem
 4767 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4768     single_instruction;
 4769     dst    : S4(write);
 4770     src    : S3(read);
 4771     cr     : S3(read);
 4772     DECODE : S0;        // any decoder
 4773     MEM    : S3;
 4774 %}
 4775 
 4776 // Conditional move reg-reg long
 4777 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4778     single_instruction;
 4779     dst    : S4(write);
 4780     src    : S3(read);
 4781     cr     : S3(read);
 4782     DECODE : S0(2);     // any 2 decoders
 4783 %}
 4784 
 4785 // Conditional move double reg-reg
 4786 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4787     single_instruction;
 4788     dst    : S4(write);
 4789     src    : S3(read);
 4790     cr     : S3(read);
 4791     DECODE : S0;        // any decoder
 4792 %}
 4793 
 4794 // Float reg-reg operation
 4795 pipe_class fpu_reg(regDPR dst) %{
 4796     instruction_count(2);
 4797     dst    : S3(read);
 4798     DECODE : S0(2);     // any 2 decoders
 4799     FPU    : S3;
 4800 %}
 4801 
 4802 // Float reg-reg operation
 4803 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4804     instruction_count(2);
 4805     dst    : S4(write);
 4806     src    : S3(read);
 4807     DECODE : S0(2);     // any 2 decoders
 4808     FPU    : S3;
 4809 %}
 4810 
 4811 // Float reg-reg operation
 4812 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4813     instruction_count(3);
 4814     dst    : S4(write);
 4815     src1   : S3(read);
 4816     src2   : S3(read);
 4817     DECODE : S0(3);     // any 3 decoders
 4818     FPU    : S3(2);
 4819 %}
 4820 
 4821 // Float reg-reg operation
 4822 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4823     instruction_count(4);
 4824     dst    : S4(write);
 4825     src1   : S3(read);
 4826     src2   : S3(read);
 4827     src3   : S3(read);
 4828     DECODE : S0(4);     // any 3 decoders
 4829     FPU    : S3(2);
 4830 %}
 4831 
 4832 // Float reg-reg operation
 4833 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4834     instruction_count(4);
 4835     dst    : S4(write);
 4836     src1   : S3(read);
 4837     src2   : S3(read);
 4838     src3   : S3(read);
 4839     DECODE : S1(3);     // any 3 decoders
 4840     D0     : S0;        // Big decoder only
 4841     FPU    : S3(2);
 4842     MEM    : S3;
 4843 %}
 4844 
 4845 // Float reg-mem operation
 4846 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4847     instruction_count(2);
 4848     dst    : S5(write);
 4849     mem    : S3(read);
 4850     D0     : S0;        // big decoder only
 4851     DECODE : S1;        // any decoder for FPU POP
 4852     FPU    : S4;
 4853     MEM    : S3;        // any mem
 4854 %}
 4855 
 4856 // Float reg-mem operation
 4857 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4858     instruction_count(3);
 4859     dst    : S5(write);
 4860     src1   : S3(read);
 4861     mem    : S3(read);
 4862     D0     : S0;        // big decoder only
 4863     DECODE : S1(2);     // any decoder for FPU POP
 4864     FPU    : S4;
 4865     MEM    : S3;        // any mem
 4866 %}
 4867 
 4868 // Float mem-reg operation
 4869 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4870     instruction_count(2);
 4871     src    : S5(read);
 4872     mem    : S3(read);
 4873     DECODE : S0;        // any decoder for FPU PUSH
 4874     D0     : S1;        // big decoder only
 4875     FPU    : S4;
 4876     MEM    : S3;        // any mem
 4877 %}
 4878 
 4879 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4880     instruction_count(3);
 4881     src1   : S3(read);
 4882     src2   : S3(read);
 4883     mem    : S3(read);
 4884     DECODE : S0(2);     // any decoder for FPU PUSH
 4885     D0     : S1;        // big decoder only
 4886     FPU    : S4;
 4887     MEM    : S3;        // any mem
 4888 %}
 4889 
 4890 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4891     instruction_count(3);
 4892     src1   : S3(read);
 4893     src2   : S3(read);
 4894     mem    : S4(read);
 4895     DECODE : S0;        // any decoder for FPU PUSH
 4896     D0     : S0(2);     // big decoder only
 4897     FPU    : S4;
 4898     MEM    : S3(2);     // any mem
 4899 %}
 4900 
 4901 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4902     instruction_count(2);
 4903     src1   : S3(read);
 4904     dst    : S4(read);
 4905     D0     : S0(2);     // big decoder only
 4906     MEM    : S3(2);     // any mem
 4907 %}
 4908 
 4909 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4910     instruction_count(3);
 4911     src1   : S3(read);
 4912     src2   : S3(read);
 4913     dst    : S4(read);
 4914     D0     : S0(3);     // big decoder only
 4915     FPU    : S4;
 4916     MEM    : S3(3);     // any mem
 4917 %}
 4918 
 4919 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4920     instruction_count(3);
 4921     src1   : S4(read);
 4922     mem    : S4(read);
 4923     DECODE : S0;        // any decoder for FPU PUSH
 4924     D0     : S0(2);     // big decoder only
 4925     FPU    : S4;
 4926     MEM    : S3(2);     // any mem
 4927 %}
 4928 
 4929 // Float load constant
 4930 pipe_class fpu_reg_con(regDPR dst) %{
 4931     instruction_count(2);
 4932     dst    : S5(write);
 4933     D0     : S0;        // big decoder only for the load
 4934     DECODE : S1;        // any decoder for FPU POP
 4935     FPU    : S4;
 4936     MEM    : S3;        // any mem
 4937 %}
 4938 
 4939 // Float load constant
 4940 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4941     instruction_count(3);
 4942     dst    : S5(write);
 4943     src    : S3(read);
 4944     D0     : S0;        // big decoder only for the load
 4945     DECODE : S1(2);     // any decoder for FPU POP
 4946     FPU    : S4;
 4947     MEM    : S3;        // any mem
 4948 %}
 4949 
 4950 // UnConditional branch
 4951 pipe_class pipe_jmp( label labl ) %{
 4952     single_instruction;
 4953     BR   : S3;
 4954 %}
 4955 
 4956 // Conditional branch
 4957 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4958     single_instruction;
 4959     cr    : S1(read);
 4960     BR    : S3;
 4961 %}
 4962 
 4963 // Allocation idiom
 4964 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4965     instruction_count(1); force_serialization;
 4966     fixed_latency(6);
 4967     heap_ptr : S3(read);
 4968     DECODE   : S0(3);
 4969     D0       : S2;
 4970     MEM      : S3;
 4971     ALU      : S3(2);
 4972     dst      : S5(write);
 4973     BR       : S5;
 4974 %}
 4975 
 4976 // Generic big/slow expanded idiom
 4977 pipe_class pipe_slow(  ) %{
 4978     instruction_count(10); multiple_bundles; force_serialization;
 4979     fixed_latency(100);
 4980     D0  : S0(2);
 4981     MEM : S3(2);
 4982 %}
 4983 
 4984 // The real do-nothing guy
 4985 pipe_class empty( ) %{
 4986     instruction_count(0);
 4987 %}
 4988 
 4989 // Define the class for the Nop node
 4990 define %{
 4991    MachNop = empty;
 4992 %}
 4993 
 4994 %}
 4995 
 4996 //----------INSTRUCTIONS-------------------------------------------------------
 4997 //
 4998 // match      -- States which machine-independent subtree may be replaced
 4999 //               by this instruction.
 5000 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5001 //               selection to identify a minimum cost tree of machine
 5002 //               instructions that matches a tree of machine-independent
 5003 //               instructions.
 5004 // format     -- A string providing the disassembly for this instruction.
 5005 //               The value of an instruction's operand may be inserted
 5006 //               by referring to it with a '$' prefix.
 5007 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5008 //               to within an encode class as $primary, $secondary, and $tertiary
 5009 //               respectively.  The primary opcode is commonly used to
 5010 //               indicate the type of machine instruction, while secondary
 5011 //               and tertiary are often used for prefix options or addressing
 5012 //               modes.
 5013 // ins_encode -- A list of encode classes with parameters. The encode class
 5014 //               name must have been defined in an 'enc_class' specification
 5015 //               in the encode section of the architecture description.
 5016 
 5017 //----------BSWAP-Instruction--------------------------------------------------
 5018 instruct bytes_reverse_int(rRegI dst) %{
 5019   match(Set dst (ReverseBytesI dst));
 5020 
 5021   format %{ "BSWAP  $dst" %}
 5022   opcode(0x0F, 0xC8);
 5023   ins_encode( OpcP, OpcSReg(dst) );
 5024   ins_pipe( ialu_reg );
 5025 %}
 5026 
 5027 instruct bytes_reverse_long(eRegL dst) %{
 5028   match(Set dst (ReverseBytesL dst));
 5029 
 5030   format %{ "BSWAP  $dst.lo\n\t"
 5031             "BSWAP  $dst.hi\n\t"
 5032             "XCHG   $dst.lo $dst.hi" %}
 5033 
 5034   ins_cost(125);
 5035   ins_encode( bswap_long_bytes(dst) );
 5036   ins_pipe( ialu_reg_reg);
 5037 %}
 5038 
 5039 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5040   match(Set dst (ReverseBytesUS dst));
 5041   effect(KILL cr);
 5042 
 5043   format %{ "BSWAP  $dst\n\t"
 5044             "SHR    $dst,16\n\t" %}
 5045   ins_encode %{
 5046     __ bswapl($dst$$Register);
 5047     __ shrl($dst$$Register, 16);
 5048   %}
 5049   ins_pipe( ialu_reg );
 5050 %}
 5051 
 5052 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5053   match(Set dst (ReverseBytesS dst));
 5054   effect(KILL cr);
 5055 
 5056   format %{ "BSWAP  $dst\n\t"
 5057             "SAR    $dst,16\n\t" %}
 5058   ins_encode %{
 5059     __ bswapl($dst$$Register);
 5060     __ sarl($dst$$Register, 16);
 5061   %}
 5062   ins_pipe( ialu_reg );
 5063 %}
 5064 
 5065 
 5066 //---------- Zeros Count Instructions ------------------------------------------
 5067 
 5068 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5069   predicate(UseCountLeadingZerosInstruction);
 5070   match(Set dst (CountLeadingZerosI src));
 5071   effect(KILL cr);
 5072 
 5073   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5074   ins_encode %{
 5075     __ lzcntl($dst$$Register, $src$$Register);
 5076   %}
 5077   ins_pipe(ialu_reg);
 5078 %}
 5079 
 5080 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5081   predicate(!UseCountLeadingZerosInstruction);
 5082   match(Set dst (CountLeadingZerosI src));
 5083   effect(KILL cr);
 5084 
 5085   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5086             "JNZ    skip\n\t"
 5087             "MOV    $dst, -1\n"
 5088       "skip:\n\t"
 5089             "NEG    $dst\n\t"
 5090             "ADD    $dst, 31" %}
 5091   ins_encode %{
 5092     Register Rdst = $dst$$Register;
 5093     Register Rsrc = $src$$Register;
 5094     Label skip;
 5095     __ bsrl(Rdst, Rsrc);
 5096     __ jccb(Assembler::notZero, skip);
 5097     __ movl(Rdst, -1);
 5098     __ bind(skip);
 5099     __ negl(Rdst);
 5100     __ addl(Rdst, BitsPerInt - 1);
 5101   %}
 5102   ins_pipe(ialu_reg);
 5103 %}
 5104 
 5105 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5106   predicate(UseCountLeadingZerosInstruction);
 5107   match(Set dst (CountLeadingZerosL src));
 5108   effect(TEMP dst, KILL cr);
 5109 
 5110   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5111             "JNC    done\n\t"
 5112             "LZCNT  $dst, $src.lo\n\t"
 5113             "ADD    $dst, 32\n"
 5114       "done:" %}
 5115   ins_encode %{
 5116     Register Rdst = $dst$$Register;
 5117     Register Rsrc = $src$$Register;
 5118     Label done;
 5119     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5120     __ jccb(Assembler::carryClear, done);
 5121     __ lzcntl(Rdst, Rsrc);
 5122     __ addl(Rdst, BitsPerInt);
 5123     __ bind(done);
 5124   %}
 5125   ins_pipe(ialu_reg);
 5126 %}
 5127 
 5128 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5129   predicate(!UseCountLeadingZerosInstruction);
 5130   match(Set dst (CountLeadingZerosL src));
 5131   effect(TEMP dst, KILL cr);
 5132 
 5133   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5134             "JZ     msw_is_zero\n\t"
 5135             "ADD    $dst, 32\n\t"
 5136             "JMP    not_zero\n"
 5137       "msw_is_zero:\n\t"
 5138             "BSR    $dst, $src.lo\n\t"
 5139             "JNZ    not_zero\n\t"
 5140             "MOV    $dst, -1\n"
 5141       "not_zero:\n\t"
 5142             "NEG    $dst\n\t"
 5143             "ADD    $dst, 63\n" %}
 5144  ins_encode %{
 5145     Register Rdst = $dst$$Register;
 5146     Register Rsrc = $src$$Register;
 5147     Label msw_is_zero;
 5148     Label not_zero;
 5149     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5150     __ jccb(Assembler::zero, msw_is_zero);
 5151     __ addl(Rdst, BitsPerInt);
 5152     __ jmpb(not_zero);
 5153     __ bind(msw_is_zero);
 5154     __ bsrl(Rdst, Rsrc);
 5155     __ jccb(Assembler::notZero, not_zero);
 5156     __ movl(Rdst, -1);
 5157     __ bind(not_zero);
 5158     __ negl(Rdst);
 5159     __ addl(Rdst, BitsPerLong - 1);
 5160   %}
 5161   ins_pipe(ialu_reg);
 5162 %}
 5163 
 5164 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5165   predicate(UseCountTrailingZerosInstruction);
 5166   match(Set dst (CountTrailingZerosI src));
 5167   effect(KILL cr);
 5168 
 5169   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5170   ins_encode %{
 5171     __ tzcntl($dst$$Register, $src$$Register);
 5172   %}
 5173   ins_pipe(ialu_reg);
 5174 %}
 5175 
 5176 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5177   predicate(!UseCountTrailingZerosInstruction);
 5178   match(Set dst (CountTrailingZerosI src));
 5179   effect(KILL cr);
 5180 
 5181   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5182             "JNZ    done\n\t"
 5183             "MOV    $dst, 32\n"
 5184       "done:" %}
 5185   ins_encode %{
 5186     Register Rdst = $dst$$Register;
 5187     Label done;
 5188     __ bsfl(Rdst, $src$$Register);
 5189     __ jccb(Assembler::notZero, done);
 5190     __ movl(Rdst, BitsPerInt);
 5191     __ bind(done);
 5192   %}
 5193   ins_pipe(ialu_reg);
 5194 %}
 5195 
 5196 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5197   predicate(UseCountTrailingZerosInstruction);
 5198   match(Set dst (CountTrailingZerosL src));
 5199   effect(TEMP dst, KILL cr);
 5200 
 5201   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5202             "JNC    done\n\t"
 5203             "TZCNT  $dst, $src.hi\n\t"
 5204             "ADD    $dst, 32\n"
 5205             "done:" %}
 5206   ins_encode %{
 5207     Register Rdst = $dst$$Register;
 5208     Register Rsrc = $src$$Register;
 5209     Label done;
 5210     __ tzcntl(Rdst, Rsrc);
 5211     __ jccb(Assembler::carryClear, done);
 5212     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5213     __ addl(Rdst, BitsPerInt);
 5214     __ bind(done);
 5215   %}
 5216   ins_pipe(ialu_reg);
 5217 %}
 5218 
 5219 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5220   predicate(!UseCountTrailingZerosInstruction);
 5221   match(Set dst (CountTrailingZerosL src));
 5222   effect(TEMP dst, KILL cr);
 5223 
 5224   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5225             "JNZ    done\n\t"
 5226             "BSF    $dst, $src.hi\n\t"
 5227             "JNZ    msw_not_zero\n\t"
 5228             "MOV    $dst, 32\n"
 5229       "msw_not_zero:\n\t"
 5230             "ADD    $dst, 32\n"
 5231       "done:" %}
 5232   ins_encode %{
 5233     Register Rdst = $dst$$Register;
 5234     Register Rsrc = $src$$Register;
 5235     Label msw_not_zero;
 5236     Label done;
 5237     __ bsfl(Rdst, Rsrc);
 5238     __ jccb(Assembler::notZero, done);
 5239     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5240     __ jccb(Assembler::notZero, msw_not_zero);
 5241     __ movl(Rdst, BitsPerInt);
 5242     __ bind(msw_not_zero);
 5243     __ addl(Rdst, BitsPerInt);
 5244     __ bind(done);
 5245   %}
 5246   ins_pipe(ialu_reg);
 5247 %}
 5248 
 5249 
 5250 //---------- Population Count Instructions -------------------------------------
 5251 
 5252 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5253   predicate(UsePopCountInstruction);
 5254   match(Set dst (PopCountI src));
 5255   effect(KILL cr);
 5256 
 5257   format %{ "POPCNT $dst, $src" %}
 5258   ins_encode %{
 5259     __ popcntl($dst$$Register, $src$$Register);
 5260   %}
 5261   ins_pipe(ialu_reg);
 5262 %}
 5263 
 5264 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5265   predicate(UsePopCountInstruction);
 5266   match(Set dst (PopCountI (LoadI mem)));
 5267   effect(KILL cr);
 5268 
 5269   format %{ "POPCNT $dst, $mem" %}
 5270   ins_encode %{
 5271     __ popcntl($dst$$Register, $mem$$Address);
 5272   %}
 5273   ins_pipe(ialu_reg);
 5274 %}
 5275 
 5276 // Note: Long.bitCount(long) returns an int.
 5277 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5278   predicate(UsePopCountInstruction);
 5279   match(Set dst (PopCountL src));
 5280   effect(KILL cr, TEMP tmp, TEMP dst);
 5281 
 5282   format %{ "POPCNT $dst, $src.lo\n\t"
 5283             "POPCNT $tmp, $src.hi\n\t"
 5284             "ADD    $dst, $tmp" %}
 5285   ins_encode %{
 5286     __ popcntl($dst$$Register, $src$$Register);
 5287     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5288     __ addl($dst$$Register, $tmp$$Register);
 5289   %}
 5290   ins_pipe(ialu_reg);
 5291 %}
 5292 
 5293 // Note: Long.bitCount(long) returns an int.
 5294 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5295   predicate(UsePopCountInstruction);
 5296   match(Set dst (PopCountL (LoadL mem)));
 5297   effect(KILL cr, TEMP tmp, TEMP dst);
 5298 
 5299   format %{ "POPCNT $dst, $mem\n\t"
 5300             "POPCNT $tmp, $mem+4\n\t"
 5301             "ADD    $dst, $tmp" %}
 5302   ins_encode %{
 5303     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5304     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5305     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5306     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5307     __ addl($dst$$Register, $tmp$$Register);
 5308   %}
 5309   ins_pipe(ialu_reg);
 5310 %}
 5311 
 5312 
 5313 //----------Load/Store/Move Instructions---------------------------------------
 5314 //----------Load Instructions--------------------------------------------------
 5315 // Load Byte (8bit signed)
 5316 instruct loadB(xRegI dst, memory mem) %{
 5317   match(Set dst (LoadB mem));
 5318 
 5319   ins_cost(125);
 5320   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5321 
 5322   ins_encode %{
 5323     __ movsbl($dst$$Register, $mem$$Address);
 5324   %}
 5325 
 5326   ins_pipe(ialu_reg_mem);
 5327 %}
 5328 
 5329 // Load Byte (8bit signed) into Long Register
 5330 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5331   match(Set dst (ConvI2L (LoadB mem)));
 5332   effect(KILL cr);
 5333 
 5334   ins_cost(375);
 5335   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5336             "MOV    $dst.hi,$dst.lo\n\t"
 5337             "SAR    $dst.hi,7" %}
 5338 
 5339   ins_encode %{
 5340     __ movsbl($dst$$Register, $mem$$Address);
 5341     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5342     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5343   %}
 5344 
 5345   ins_pipe(ialu_reg_mem);
 5346 %}
 5347 
 5348 // Load Unsigned Byte (8bit UNsigned)
 5349 instruct loadUB(xRegI dst, memory mem) %{
 5350   match(Set dst (LoadUB mem));
 5351 
 5352   ins_cost(125);
 5353   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5354 
 5355   ins_encode %{
 5356     __ movzbl($dst$$Register, $mem$$Address);
 5357   %}
 5358 
 5359   ins_pipe(ialu_reg_mem);
 5360 %}
 5361 
 5362 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5363 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5364   match(Set dst (ConvI2L (LoadUB mem)));
 5365   effect(KILL cr);
 5366 
 5367   ins_cost(250);
 5368   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5369             "XOR    $dst.hi,$dst.hi" %}
 5370 
 5371   ins_encode %{
 5372     Register Rdst = $dst$$Register;
 5373     __ movzbl(Rdst, $mem$$Address);
 5374     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5375   %}
 5376 
 5377   ins_pipe(ialu_reg_mem);
 5378 %}
 5379 
 5380 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5381 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5382   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5383   effect(KILL cr);
 5384 
 5385   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5386             "XOR    $dst.hi,$dst.hi\n\t"
 5387             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5388   ins_encode %{
 5389     Register Rdst = $dst$$Register;
 5390     __ movzbl(Rdst, $mem$$Address);
 5391     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5392     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5393   %}
 5394   ins_pipe(ialu_reg_mem);
 5395 %}
 5396 
 5397 // Load Short (16bit signed)
 5398 instruct loadS(rRegI dst, memory mem) %{
 5399   match(Set dst (LoadS mem));
 5400 
 5401   ins_cost(125);
 5402   format %{ "MOVSX  $dst,$mem\t# short" %}
 5403 
 5404   ins_encode %{
 5405     __ movswl($dst$$Register, $mem$$Address);
 5406   %}
 5407 
 5408   ins_pipe(ialu_reg_mem);
 5409 %}
 5410 
 5411 // Load Short (16 bit signed) to Byte (8 bit signed)
 5412 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5413   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5414 
 5415   ins_cost(125);
 5416   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5417   ins_encode %{
 5418     __ movsbl($dst$$Register, $mem$$Address);
 5419   %}
 5420   ins_pipe(ialu_reg_mem);
 5421 %}
 5422 
 5423 // Load Short (16bit signed) into Long Register
 5424 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5425   match(Set dst (ConvI2L (LoadS mem)));
 5426   effect(KILL cr);
 5427 
 5428   ins_cost(375);
 5429   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5430             "MOV    $dst.hi,$dst.lo\n\t"
 5431             "SAR    $dst.hi,15" %}
 5432 
 5433   ins_encode %{
 5434     __ movswl($dst$$Register, $mem$$Address);
 5435     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5436     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5437   %}
 5438 
 5439   ins_pipe(ialu_reg_mem);
 5440 %}
 5441 
 5442 // Load Unsigned Short/Char (16bit unsigned)
 5443 instruct loadUS(rRegI dst, memory mem) %{
 5444   match(Set dst (LoadUS mem));
 5445 
 5446   ins_cost(125);
 5447   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5448 
 5449   ins_encode %{
 5450     __ movzwl($dst$$Register, $mem$$Address);
 5451   %}
 5452 
 5453   ins_pipe(ialu_reg_mem);
 5454 %}
 5455 
 5456 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5457 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5458   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5459 
 5460   ins_cost(125);
 5461   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5462   ins_encode %{
 5463     __ movsbl($dst$$Register, $mem$$Address);
 5464   %}
 5465   ins_pipe(ialu_reg_mem);
 5466 %}
 5467 
 5468 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5469 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5470   match(Set dst (ConvI2L (LoadUS mem)));
 5471   effect(KILL cr);
 5472 
 5473   ins_cost(250);
 5474   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5475             "XOR    $dst.hi,$dst.hi" %}
 5476 
 5477   ins_encode %{
 5478     __ movzwl($dst$$Register, $mem$$Address);
 5479     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5480   %}
 5481 
 5482   ins_pipe(ialu_reg_mem);
 5483 %}
 5484 
 5485 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5486 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5487   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5488   effect(KILL cr);
 5489 
 5490   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5491             "XOR    $dst.hi,$dst.hi" %}
 5492   ins_encode %{
 5493     Register Rdst = $dst$$Register;
 5494     __ movzbl(Rdst, $mem$$Address);
 5495     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5496   %}
 5497   ins_pipe(ialu_reg_mem);
 5498 %}
 5499 
 5500 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5501 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5502   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5503   effect(KILL cr);
 5504 
 5505   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5506             "XOR    $dst.hi,$dst.hi\n\t"
 5507             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5508   ins_encode %{
 5509     Register Rdst = $dst$$Register;
 5510     __ movzwl(Rdst, $mem$$Address);
 5511     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5512     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5513   %}
 5514   ins_pipe(ialu_reg_mem);
 5515 %}
 5516 
 5517 // Load Integer
 5518 instruct loadI(rRegI dst, memory mem) %{
 5519   match(Set dst (LoadI mem));
 5520 
 5521   ins_cost(125);
 5522   format %{ "MOV    $dst,$mem\t# int" %}
 5523 
 5524   ins_encode %{
 5525     __ movl($dst$$Register, $mem$$Address);
 5526   %}
 5527 
 5528   ins_pipe(ialu_reg_mem);
 5529 %}
 5530 
 5531 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5532 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5533   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5534 
 5535   ins_cost(125);
 5536   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5537   ins_encode %{
 5538     __ movsbl($dst$$Register, $mem$$Address);
 5539   %}
 5540   ins_pipe(ialu_reg_mem);
 5541 %}
 5542 
 5543 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5544 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5545   match(Set dst (AndI (LoadI mem) mask));
 5546 
 5547   ins_cost(125);
 5548   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5549   ins_encode %{
 5550     __ movzbl($dst$$Register, $mem$$Address);
 5551   %}
 5552   ins_pipe(ialu_reg_mem);
 5553 %}
 5554 
 5555 // Load Integer (32 bit signed) to Short (16 bit signed)
 5556 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5557   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5558 
 5559   ins_cost(125);
 5560   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5561   ins_encode %{
 5562     __ movswl($dst$$Register, $mem$$Address);
 5563   %}
 5564   ins_pipe(ialu_reg_mem);
 5565 %}
 5566 
 5567 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5568 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5569   match(Set dst (AndI (LoadI mem) mask));
 5570 
 5571   ins_cost(125);
 5572   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5573   ins_encode %{
 5574     __ movzwl($dst$$Register, $mem$$Address);
 5575   %}
 5576   ins_pipe(ialu_reg_mem);
 5577 %}
 5578 
 5579 // Load Integer into Long Register
 5580 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5581   match(Set dst (ConvI2L (LoadI mem)));
 5582   effect(KILL cr);
 5583 
 5584   ins_cost(375);
 5585   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5586             "MOV    $dst.hi,$dst.lo\n\t"
 5587             "SAR    $dst.hi,31" %}
 5588 
 5589   ins_encode %{
 5590     __ movl($dst$$Register, $mem$$Address);
 5591     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5592     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5593   %}
 5594 
 5595   ins_pipe(ialu_reg_mem);
 5596 %}
 5597 
 5598 // Load Integer with mask 0xFF into Long Register
 5599 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5600   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5601   effect(KILL cr);
 5602 
 5603   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5604             "XOR    $dst.hi,$dst.hi" %}
 5605   ins_encode %{
 5606     Register Rdst = $dst$$Register;
 5607     __ movzbl(Rdst, $mem$$Address);
 5608     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5609   %}
 5610   ins_pipe(ialu_reg_mem);
 5611 %}
 5612 
 5613 // Load Integer with mask 0xFFFF into Long Register
 5614 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5615   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5616   effect(KILL cr);
 5617 
 5618   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5619             "XOR    $dst.hi,$dst.hi" %}
 5620   ins_encode %{
 5621     Register Rdst = $dst$$Register;
 5622     __ movzwl(Rdst, $mem$$Address);
 5623     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5624   %}
 5625   ins_pipe(ialu_reg_mem);
 5626 %}
 5627 
 5628 // Load Integer with 31-bit mask into Long Register
 5629 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5630   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5631   effect(KILL cr);
 5632 
 5633   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5634             "XOR    $dst.hi,$dst.hi\n\t"
 5635             "AND    $dst.lo,$mask" %}
 5636   ins_encode %{
 5637     Register Rdst = $dst$$Register;
 5638     __ movl(Rdst, $mem$$Address);
 5639     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5640     __ andl(Rdst, $mask$$constant);
 5641   %}
 5642   ins_pipe(ialu_reg_mem);
 5643 %}
 5644 
 5645 // Load Unsigned Integer into Long Register
 5646 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5647   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5648   effect(KILL cr);
 5649 
 5650   ins_cost(250);
 5651   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5652             "XOR    $dst.hi,$dst.hi" %}
 5653 
 5654   ins_encode %{
 5655     __ movl($dst$$Register, $mem$$Address);
 5656     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5657   %}
 5658 
 5659   ins_pipe(ialu_reg_mem);
 5660 %}
 5661 
 5662 // Load Long.  Cannot clobber address while loading, so restrict address
 5663 // register to ESI
 5664 instruct loadL(eRegL dst, load_long_memory mem) %{
 5665   predicate(!((LoadLNode*)n)->require_atomic_access());
 5666   match(Set dst (LoadL mem));
 5667 
 5668   ins_cost(250);
 5669   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5670             "MOV    $dst.hi,$mem+4" %}
 5671 
 5672   ins_encode %{
 5673     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5674     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5675     __ movl($dst$$Register, Amemlo);
 5676     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5677   %}
 5678 
 5679   ins_pipe(ialu_reg_long_mem);
 5680 %}
 5681 
 5682 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5683 // then store it down to the stack and reload on the int
 5684 // side.
 5685 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5686   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5687   match(Set dst (LoadL mem));
 5688 
 5689   ins_cost(200);
 5690   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5691             "FISTp  $dst" %}
 5692   ins_encode(enc_loadL_volatile(mem,dst));
 5693   ins_pipe( fpu_reg_mem );
 5694 %}
 5695 
 5696 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5697   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5698   match(Set dst (LoadL mem));
 5699   effect(TEMP tmp);
 5700   ins_cost(180);
 5701   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5702             "MOVSD  $dst,$tmp" %}
 5703   ins_encode %{
 5704     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5705     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5706   %}
 5707   ins_pipe( pipe_slow );
 5708 %}
 5709 
 5710 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5711   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5712   match(Set dst (LoadL mem));
 5713   effect(TEMP tmp);
 5714   ins_cost(160);
 5715   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5716             "MOVD   $dst.lo,$tmp\n\t"
 5717             "PSRLQ  $tmp,32\n\t"
 5718             "MOVD   $dst.hi,$tmp" %}
 5719   ins_encode %{
 5720     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5721     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5722     __ psrlq($tmp$$XMMRegister, 32);
 5723     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5724   %}
 5725   ins_pipe( pipe_slow );
 5726 %}
 5727 
 5728 // Load Range
 5729 instruct loadRange(rRegI dst, memory mem) %{
 5730   match(Set dst (LoadRange mem));
 5731 
 5732   ins_cost(125);
 5733   format %{ "MOV    $dst,$mem" %}
 5734   opcode(0x8B);
 5735   ins_encode( OpcP, RegMem(dst,mem));
 5736   ins_pipe( ialu_reg_mem );
 5737 %}
 5738 
 5739 
 5740 // Load Pointer
 5741 instruct loadP(eRegP dst, memory mem) %{
 5742   match(Set dst (LoadP mem));
 5743 
 5744   ins_cost(125);
 5745   format %{ "MOV    $dst,$mem" %}
 5746   opcode(0x8B);
 5747   ins_encode( OpcP, RegMem(dst,mem));
 5748   ins_pipe( ialu_reg_mem );
 5749 %}
 5750 
 5751 // Load Klass Pointer
 5752 instruct loadKlass(eRegP dst, memory mem) %{
 5753   match(Set dst (LoadKlass mem));
 5754 
 5755   ins_cost(125);
 5756   format %{ "MOV    $dst,$mem" %}
 5757   opcode(0x8B);
 5758   ins_encode( OpcP, RegMem(dst,mem));
 5759   ins_pipe( ialu_reg_mem );
 5760 %}
 5761 
 5762 // Load Float
 5763 instruct MoveF2LEG(legRegF dst, regF src) %{
 5764   match(Set dst src);
 5765   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5766   ins_encode %{
 5767     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5768   %}
 5769   ins_pipe( fpu_reg_reg );
 5770 %}
 5771 
 5772 // Load Float
 5773 instruct MoveLEG2F(regF dst, legRegF src) %{
 5774   match(Set dst src);
 5775   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5776   ins_encode %{
 5777     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5778   %}
 5779   ins_pipe( fpu_reg_reg );
 5780 %}
 5781 
 5782 // Load Double
 5783 instruct MoveD2LEG(legRegD dst, regD src) %{
 5784   match(Set dst src);
 5785   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5786   ins_encode %{
 5787     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5788   %}
 5789   ins_pipe( fpu_reg_reg );
 5790 %}
 5791 
 5792 // Load Double
 5793 instruct MoveLEG2D(regD dst, legRegD src) %{
 5794   match(Set dst src);
 5795   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5796   ins_encode %{
 5797     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5798   %}
 5799   ins_pipe( fpu_reg_reg );
 5800 %}
 5801 
 5802 // Load Double
 5803 instruct loadDPR(regDPR dst, memory mem) %{
 5804   predicate(UseSSE<=1);
 5805   match(Set dst (LoadD mem));
 5806 
 5807   ins_cost(150);
 5808   format %{ "FLD_D  ST,$mem\n\t"
 5809             "FSTP   $dst" %}
 5810   opcode(0xDD);               /* DD /0 */
 5811   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5812               Pop_Reg_DPR(dst) );
 5813   ins_pipe( fpu_reg_mem );
 5814 %}
 5815 
 5816 // Load Double to XMM
 5817 instruct loadD(regD dst, memory mem) %{
 5818   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5819   match(Set dst (LoadD mem));
 5820   ins_cost(145);
 5821   format %{ "MOVSD  $dst,$mem" %}
 5822   ins_encode %{
 5823     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5824   %}
 5825   ins_pipe( pipe_slow );
 5826 %}
 5827 
 5828 instruct loadD_partial(regD dst, memory mem) %{
 5829   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5830   match(Set dst (LoadD mem));
 5831   ins_cost(145);
 5832   format %{ "MOVLPD $dst,$mem" %}
 5833   ins_encode %{
 5834     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5835   %}
 5836   ins_pipe( pipe_slow );
 5837 %}
 5838 
 5839 // Load to XMM register (single-precision floating point)
 5840 // MOVSS instruction
 5841 instruct loadF(regF dst, memory mem) %{
 5842   predicate(UseSSE>=1);
 5843   match(Set dst (LoadF mem));
 5844   ins_cost(145);
 5845   format %{ "MOVSS  $dst,$mem" %}
 5846   ins_encode %{
 5847     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5848   %}
 5849   ins_pipe( pipe_slow );
 5850 %}
 5851 
 5852 // Load Float
 5853 instruct loadFPR(regFPR dst, memory mem) %{
 5854   predicate(UseSSE==0);
 5855   match(Set dst (LoadF mem));
 5856 
 5857   ins_cost(150);
 5858   format %{ "FLD_S  ST,$mem\n\t"
 5859             "FSTP   $dst" %}
 5860   opcode(0xD9);               /* D9 /0 */
 5861   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5862               Pop_Reg_FPR(dst) );
 5863   ins_pipe( fpu_reg_mem );
 5864 %}
 5865 
 5866 // Load Effective Address
 5867 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5868   match(Set dst mem);
 5869 
 5870   ins_cost(110);
 5871   format %{ "LEA    $dst,$mem" %}
 5872   opcode(0x8D);
 5873   ins_encode( OpcP, RegMem(dst,mem));
 5874   ins_pipe( ialu_reg_reg_fat );
 5875 %}
 5876 
 5877 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5878   match(Set dst mem);
 5879 
 5880   ins_cost(110);
 5881   format %{ "LEA    $dst,$mem" %}
 5882   opcode(0x8D);
 5883   ins_encode( OpcP, RegMem(dst,mem));
 5884   ins_pipe( ialu_reg_reg_fat );
 5885 %}
 5886 
 5887 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5888   match(Set dst mem);
 5889 
 5890   ins_cost(110);
 5891   format %{ "LEA    $dst,$mem" %}
 5892   opcode(0x8D);
 5893   ins_encode( OpcP, RegMem(dst,mem));
 5894   ins_pipe( ialu_reg_reg_fat );
 5895 %}
 5896 
 5897 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5898   match(Set dst mem);
 5899 
 5900   ins_cost(110);
 5901   format %{ "LEA    $dst,$mem" %}
 5902   opcode(0x8D);
 5903   ins_encode( OpcP, RegMem(dst,mem));
 5904   ins_pipe( ialu_reg_reg_fat );
 5905 %}
 5906 
 5907 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5908   match(Set dst mem);
 5909 
 5910   ins_cost(110);
 5911   format %{ "LEA    $dst,$mem" %}
 5912   opcode(0x8D);
 5913   ins_encode( OpcP, RegMem(dst,mem));
 5914   ins_pipe( ialu_reg_reg_fat );
 5915 %}
 5916 
 5917 // Load Constant
 5918 instruct loadConI(rRegI dst, immI src) %{
 5919   match(Set dst src);
 5920 
 5921   format %{ "MOV    $dst,$src" %}
 5922   ins_encode( LdImmI(dst, src) );
 5923   ins_pipe( ialu_reg_fat );
 5924 %}
 5925 
 5926 // Load Constant zero
 5927 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5928   match(Set dst src);
 5929   effect(KILL cr);
 5930 
 5931   ins_cost(50);
 5932   format %{ "XOR    $dst,$dst" %}
 5933   opcode(0x33);  /* + rd */
 5934   ins_encode( OpcP, RegReg( dst, dst ) );
 5935   ins_pipe( ialu_reg );
 5936 %}
 5937 
 5938 instruct loadConP(eRegP dst, immP src) %{
 5939   match(Set dst src);
 5940 
 5941   format %{ "MOV    $dst,$src" %}
 5942   opcode(0xB8);  /* + rd */
 5943   ins_encode( LdImmP(dst, src) );
 5944   ins_pipe( ialu_reg_fat );
 5945 %}
 5946 
 5947 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5948   match(Set dst src);
 5949   effect(KILL cr);
 5950   ins_cost(200);
 5951   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5952             "MOV    $dst.hi,$src.hi" %}
 5953   opcode(0xB8);
 5954   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5955   ins_pipe( ialu_reg_long_fat );
 5956 %}
 5957 
 5958 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5959   match(Set dst src);
 5960   effect(KILL cr);
 5961   ins_cost(150);
 5962   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5963             "XOR    $dst.hi,$dst.hi" %}
 5964   opcode(0x33,0x33);
 5965   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5966   ins_pipe( ialu_reg_long );
 5967 %}
 5968 
 5969 // The instruction usage is guarded by predicate in operand immFPR().
 5970 instruct loadConFPR(regFPR dst, immFPR con) %{
 5971   match(Set dst con);
 5972   ins_cost(125);
 5973   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5974             "FSTP   $dst" %}
 5975   ins_encode %{
 5976     __ fld_s($constantaddress($con));
 5977     __ fstp_d($dst$$reg);
 5978   %}
 5979   ins_pipe(fpu_reg_con);
 5980 %}
 5981 
 5982 // The instruction usage is guarded by predicate in operand immFPR0().
 5983 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5984   match(Set dst con);
 5985   ins_cost(125);
 5986   format %{ "FLDZ   ST\n\t"
 5987             "FSTP   $dst" %}
 5988   ins_encode %{
 5989     __ fldz();
 5990     __ fstp_d($dst$$reg);
 5991   %}
 5992   ins_pipe(fpu_reg_con);
 5993 %}
 5994 
 5995 // The instruction usage is guarded by predicate in operand immFPR1().
 5996 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5997   match(Set dst con);
 5998   ins_cost(125);
 5999   format %{ "FLD1   ST\n\t"
 6000             "FSTP   $dst" %}
 6001   ins_encode %{
 6002     __ fld1();
 6003     __ fstp_d($dst$$reg);
 6004   %}
 6005   ins_pipe(fpu_reg_con);
 6006 %}
 6007 
 6008 // The instruction usage is guarded by predicate in operand immF().
 6009 instruct loadConF(regF dst, immF con) %{
 6010   match(Set dst con);
 6011   ins_cost(125);
 6012   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6013   ins_encode %{
 6014     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6015   %}
 6016   ins_pipe(pipe_slow);
 6017 %}
 6018 
 6019 // The instruction usage is guarded by predicate in operand immF0().
 6020 instruct loadConF0(regF dst, immF0 src) %{
 6021   match(Set dst src);
 6022   ins_cost(100);
 6023   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6024   ins_encode %{
 6025     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6026   %}
 6027   ins_pipe(pipe_slow);
 6028 %}
 6029 
 6030 // The instruction usage is guarded by predicate in operand immDPR().
 6031 instruct loadConDPR(regDPR dst, immDPR con) %{
 6032   match(Set dst con);
 6033   ins_cost(125);
 6034 
 6035   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6036             "FSTP   $dst" %}
 6037   ins_encode %{
 6038     __ fld_d($constantaddress($con));
 6039     __ fstp_d($dst$$reg);
 6040   %}
 6041   ins_pipe(fpu_reg_con);
 6042 %}
 6043 
 6044 // The instruction usage is guarded by predicate in operand immDPR0().
 6045 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6046   match(Set dst con);
 6047   ins_cost(125);
 6048 
 6049   format %{ "FLDZ   ST\n\t"
 6050             "FSTP   $dst" %}
 6051   ins_encode %{
 6052     __ fldz();
 6053     __ fstp_d($dst$$reg);
 6054   %}
 6055   ins_pipe(fpu_reg_con);
 6056 %}
 6057 
 6058 // The instruction usage is guarded by predicate in operand immDPR1().
 6059 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6060   match(Set dst con);
 6061   ins_cost(125);
 6062 
 6063   format %{ "FLD1   ST\n\t"
 6064             "FSTP   $dst" %}
 6065   ins_encode %{
 6066     __ fld1();
 6067     __ fstp_d($dst$$reg);
 6068   %}
 6069   ins_pipe(fpu_reg_con);
 6070 %}
 6071 
 6072 // The instruction usage is guarded by predicate in operand immD().
 6073 instruct loadConD(regD dst, immD con) %{
 6074   match(Set dst con);
 6075   ins_cost(125);
 6076   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6077   ins_encode %{
 6078     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6079   %}
 6080   ins_pipe(pipe_slow);
 6081 %}
 6082 
 6083 // The instruction usage is guarded by predicate in operand immD0().
 6084 instruct loadConD0(regD dst, immD0 src) %{
 6085   match(Set dst src);
 6086   ins_cost(100);
 6087   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6088   ins_encode %{
 6089     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6090   %}
 6091   ins_pipe( pipe_slow );
 6092 %}
 6093 
 6094 // Load Stack Slot
 6095 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6096   match(Set dst src);
 6097   ins_cost(125);
 6098 
 6099   format %{ "MOV    $dst,$src" %}
 6100   opcode(0x8B);
 6101   ins_encode( OpcP, RegMem(dst,src));
 6102   ins_pipe( ialu_reg_mem );
 6103 %}
 6104 
 6105 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6106   match(Set dst src);
 6107 
 6108   ins_cost(200);
 6109   format %{ "MOV    $dst,$src.lo\n\t"
 6110             "MOV    $dst+4,$src.hi" %}
 6111   opcode(0x8B, 0x8B);
 6112   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6113   ins_pipe( ialu_mem_long_reg );
 6114 %}
 6115 
 6116 // Load Stack Slot
 6117 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6118   match(Set dst src);
 6119   ins_cost(125);
 6120 
 6121   format %{ "MOV    $dst,$src" %}
 6122   opcode(0x8B);
 6123   ins_encode( OpcP, RegMem(dst,src));
 6124   ins_pipe( ialu_reg_mem );
 6125 %}
 6126 
 6127 // Load Stack Slot
 6128 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6129   match(Set dst src);
 6130   ins_cost(125);
 6131 
 6132   format %{ "FLD_S  $src\n\t"
 6133             "FSTP   $dst" %}
 6134   opcode(0xD9);               /* D9 /0, FLD m32real */
 6135   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6136               Pop_Reg_FPR(dst) );
 6137   ins_pipe( fpu_reg_mem );
 6138 %}
 6139 
 6140 // Load Stack Slot
 6141 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6142   match(Set dst src);
 6143   ins_cost(125);
 6144 
 6145   format %{ "FLD_D  $src\n\t"
 6146             "FSTP   $dst" %}
 6147   opcode(0xDD);               /* DD /0, FLD m64real */
 6148   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6149               Pop_Reg_DPR(dst) );
 6150   ins_pipe( fpu_reg_mem );
 6151 %}
 6152 
 6153 // Prefetch instructions for allocation.
 6154 // Must be safe to execute with invalid address (cannot fault).
 6155 
 6156 instruct prefetchAlloc0( memory mem ) %{
 6157   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6158   match(PrefetchAllocation mem);
 6159   ins_cost(0);
 6160   size(0);
 6161   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6162   ins_encode();
 6163   ins_pipe(empty);
 6164 %}
 6165 
 6166 instruct prefetchAlloc( memory mem ) %{
 6167   predicate(AllocatePrefetchInstr==3);
 6168   match( PrefetchAllocation mem );
 6169   ins_cost(100);
 6170 
 6171   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6172   ins_encode %{
 6173     __ prefetchw($mem$$Address);
 6174   %}
 6175   ins_pipe(ialu_mem);
 6176 %}
 6177 
 6178 instruct prefetchAllocNTA( memory mem ) %{
 6179   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6180   match(PrefetchAllocation mem);
 6181   ins_cost(100);
 6182 
 6183   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6184   ins_encode %{
 6185     __ prefetchnta($mem$$Address);
 6186   %}
 6187   ins_pipe(ialu_mem);
 6188 %}
 6189 
 6190 instruct prefetchAllocT0( memory mem ) %{
 6191   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6192   match(PrefetchAllocation mem);
 6193   ins_cost(100);
 6194 
 6195   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6196   ins_encode %{
 6197     __ prefetcht0($mem$$Address);
 6198   %}
 6199   ins_pipe(ialu_mem);
 6200 %}
 6201 
 6202 instruct prefetchAllocT2( memory mem ) %{
 6203   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6204   match(PrefetchAllocation mem);
 6205   ins_cost(100);
 6206 
 6207   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6208   ins_encode %{
 6209     __ prefetcht2($mem$$Address);
 6210   %}
 6211   ins_pipe(ialu_mem);
 6212 %}
 6213 
 6214 //----------Store Instructions-------------------------------------------------
 6215 
 6216 // Store Byte
 6217 instruct storeB(memory mem, xRegI src) %{
 6218   match(Set mem (StoreB mem src));
 6219 
 6220   ins_cost(125);
 6221   format %{ "MOV8   $mem,$src" %}
 6222   opcode(0x88);
 6223   ins_encode( OpcP, RegMem( src, mem ) );
 6224   ins_pipe( ialu_mem_reg );
 6225 %}
 6226 
 6227 // Store Char/Short
 6228 instruct storeC(memory mem, rRegI src) %{
 6229   match(Set mem (StoreC mem src));
 6230 
 6231   ins_cost(125);
 6232   format %{ "MOV16  $mem,$src" %}
 6233   opcode(0x89, 0x66);
 6234   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6235   ins_pipe( ialu_mem_reg );
 6236 %}
 6237 
 6238 // Store Integer
 6239 instruct storeI(memory mem, rRegI src) %{
 6240   match(Set mem (StoreI mem src));
 6241 
 6242   ins_cost(125);
 6243   format %{ "MOV    $mem,$src" %}
 6244   opcode(0x89);
 6245   ins_encode( OpcP, RegMem( src, mem ) );
 6246   ins_pipe( ialu_mem_reg );
 6247 %}
 6248 
 6249 // Store Long
 6250 instruct storeL(long_memory mem, eRegL src) %{
 6251   predicate(!((StoreLNode*)n)->require_atomic_access());
 6252   match(Set mem (StoreL mem src));
 6253 
 6254   ins_cost(200);
 6255   format %{ "MOV    $mem,$src.lo\n\t"
 6256             "MOV    $mem+4,$src.hi" %}
 6257   opcode(0x89, 0x89);
 6258   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6259   ins_pipe( ialu_mem_long_reg );
 6260 %}
 6261 
 6262 // Store Long to Integer
 6263 instruct storeL2I(memory mem, eRegL src) %{
 6264   match(Set mem (StoreI mem (ConvL2I src)));
 6265 
 6266   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6267   ins_encode %{
 6268     __ movl($mem$$Address, $src$$Register);
 6269   %}
 6270   ins_pipe(ialu_mem_reg);
 6271 %}
 6272 
 6273 // Volatile Store Long.  Must be atomic, so move it into
 6274 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6275 // target address before the store (for null-ptr checks)
 6276 // so the memory operand is used twice in the encoding.
 6277 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6278   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6279   match(Set mem (StoreL mem src));
 6280   effect( KILL cr );
 6281   ins_cost(400);
 6282   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6283             "FILD   $src\n\t"
 6284             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6285   opcode(0x3B);
 6286   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6287   ins_pipe( fpu_reg_mem );
 6288 %}
 6289 
 6290 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6291   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6292   match(Set mem (StoreL mem src));
 6293   effect( TEMP tmp, KILL cr );
 6294   ins_cost(380);
 6295   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6296             "MOVSD  $tmp,$src\n\t"
 6297             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6298   ins_encode %{
 6299     __ cmpl(rax, $mem$$Address);
 6300     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6301     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6302   %}
 6303   ins_pipe( pipe_slow );
 6304 %}
 6305 
 6306 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6307   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6308   match(Set mem (StoreL mem src));
 6309   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6310   ins_cost(360);
 6311   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6312             "MOVD   $tmp,$src.lo\n\t"
 6313             "MOVD   $tmp2,$src.hi\n\t"
 6314             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6315             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6316   ins_encode %{
 6317     __ cmpl(rax, $mem$$Address);
 6318     __ movdl($tmp$$XMMRegister, $src$$Register);
 6319     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6320     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6321     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6322   %}
 6323   ins_pipe( pipe_slow );
 6324 %}
 6325 
 6326 // Store Pointer; for storing unknown oops and raw pointers
 6327 instruct storeP(memory mem, anyRegP src) %{
 6328   match(Set mem (StoreP mem src));
 6329 
 6330   ins_cost(125);
 6331   format %{ "MOV    $mem,$src" %}
 6332   opcode(0x89);
 6333   ins_encode( OpcP, RegMem( src, mem ) );
 6334   ins_pipe( ialu_mem_reg );
 6335 %}
 6336 
 6337 // Store Integer Immediate
 6338 instruct storeImmI(memory mem, immI src) %{
 6339   match(Set mem (StoreI mem src));
 6340 
 6341   ins_cost(150);
 6342   format %{ "MOV    $mem,$src" %}
 6343   opcode(0xC7);               /* C7 /0 */
 6344   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6345   ins_pipe( ialu_mem_imm );
 6346 %}
 6347 
 6348 // Store Short/Char Immediate
 6349 instruct storeImmI16(memory mem, immI16 src) %{
 6350   predicate(UseStoreImmI16);
 6351   match(Set mem (StoreC mem src));
 6352 
 6353   ins_cost(150);
 6354   format %{ "MOV16  $mem,$src" %}
 6355   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6356   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6357   ins_pipe( ialu_mem_imm );
 6358 %}
 6359 
 6360 // Store Pointer Immediate; null pointers or constant oops that do not
 6361 // need card-mark barriers.
 6362 instruct storeImmP(memory mem, immP src) %{
 6363   match(Set mem (StoreP mem src));
 6364 
 6365   ins_cost(150);
 6366   format %{ "MOV    $mem,$src" %}
 6367   opcode(0xC7);               /* C7 /0 */
 6368   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6369   ins_pipe( ialu_mem_imm );
 6370 %}
 6371 
 6372 // Store Byte Immediate
 6373 instruct storeImmB(memory mem, immI8 src) %{
 6374   match(Set mem (StoreB mem src));
 6375 
 6376   ins_cost(150);
 6377   format %{ "MOV8   $mem,$src" %}
 6378   opcode(0xC6);               /* C6 /0 */
 6379   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6380   ins_pipe( ialu_mem_imm );
 6381 %}
 6382 
 6383 // Store CMS card-mark Immediate
 6384 instruct storeImmCM(memory mem, immI8 src) %{
 6385   match(Set mem (StoreCM mem src));
 6386 
 6387   ins_cost(150);
 6388   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6389   opcode(0xC6);               /* C6 /0 */
 6390   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6391   ins_pipe( ialu_mem_imm );
 6392 %}
 6393 
 6394 // Store Double
 6395 instruct storeDPR( memory mem, regDPR1 src) %{
 6396   predicate(UseSSE<=1);
 6397   match(Set mem (StoreD mem src));
 6398 
 6399   ins_cost(100);
 6400   format %{ "FST_D  $mem,$src" %}
 6401   opcode(0xDD);       /* DD /2 */
 6402   ins_encode( enc_FPR_store(mem,src) );
 6403   ins_pipe( fpu_mem_reg );
 6404 %}
 6405 
 6406 // Store double does rounding on x86
 6407 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6408   predicate(UseSSE<=1);
 6409   match(Set mem (StoreD mem (RoundDouble src)));
 6410 
 6411   ins_cost(100);
 6412   format %{ "FST_D  $mem,$src\t# round" %}
 6413   opcode(0xDD);       /* DD /2 */
 6414   ins_encode( enc_FPR_store(mem,src) );
 6415   ins_pipe( fpu_mem_reg );
 6416 %}
 6417 
 6418 // Store XMM register to memory (double-precision floating points)
 6419 // MOVSD instruction
 6420 instruct storeD(memory mem, regD src) %{
 6421   predicate(UseSSE>=2);
 6422   match(Set mem (StoreD mem src));
 6423   ins_cost(95);
 6424   format %{ "MOVSD  $mem,$src" %}
 6425   ins_encode %{
 6426     __ movdbl($mem$$Address, $src$$XMMRegister);
 6427   %}
 6428   ins_pipe( pipe_slow );
 6429 %}
 6430 
 6431 // Load Double
 6432 instruct MoveD2VL(vlRegD dst, regD src) %{
 6433   match(Set dst src);
 6434   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6435   ins_encode %{
 6436     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6437   %}
 6438   ins_pipe( fpu_reg_reg );
 6439 %}
 6440 
 6441 // Load Double
 6442 instruct MoveVL2D(regD dst, vlRegD src) %{
 6443   match(Set dst src);
 6444   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6445   ins_encode %{
 6446     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6447   %}
 6448   ins_pipe( fpu_reg_reg );
 6449 %}
 6450 
 6451 // Store XMM register to memory (single-precision floating point)
 6452 // MOVSS instruction
 6453 instruct storeF(memory mem, regF src) %{
 6454   predicate(UseSSE>=1);
 6455   match(Set mem (StoreF mem src));
 6456   ins_cost(95);
 6457   format %{ "MOVSS  $mem,$src" %}
 6458   ins_encode %{
 6459     __ movflt($mem$$Address, $src$$XMMRegister);
 6460   %}
 6461   ins_pipe( pipe_slow );
 6462 %}
 6463 
 6464 // Load Float
 6465 instruct MoveF2VL(vlRegF dst, regF src) %{
 6466   match(Set dst src);
 6467   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6468   ins_encode %{
 6469     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6470   %}
 6471   ins_pipe( fpu_reg_reg );
 6472 %}
 6473 
 6474 // Load Float
 6475 instruct MoveVL2F(regF dst, vlRegF src) %{
 6476   match(Set dst src);
 6477   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6478   ins_encode %{
 6479     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6480   %}
 6481   ins_pipe( fpu_reg_reg );
 6482 %}
 6483 
 6484 // Store Float
 6485 instruct storeFPR( memory mem, regFPR1 src) %{
 6486   predicate(UseSSE==0);
 6487   match(Set mem (StoreF mem src));
 6488 
 6489   ins_cost(100);
 6490   format %{ "FST_S  $mem,$src" %}
 6491   opcode(0xD9);       /* D9 /2 */
 6492   ins_encode( enc_FPR_store(mem,src) );
 6493   ins_pipe( fpu_mem_reg );
 6494 %}
 6495 
 6496 // Store Float does rounding on x86
 6497 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6498   predicate(UseSSE==0);
 6499   match(Set mem (StoreF mem (RoundFloat src)));
 6500 
 6501   ins_cost(100);
 6502   format %{ "FST_S  $mem,$src\t# round" %}
 6503   opcode(0xD9);       /* D9 /2 */
 6504   ins_encode( enc_FPR_store(mem,src) );
 6505   ins_pipe( fpu_mem_reg );
 6506 %}
 6507 
 6508 // Store Float does rounding on x86
 6509 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6510   predicate(UseSSE<=1);
 6511   match(Set mem (StoreF mem (ConvD2F src)));
 6512 
 6513   ins_cost(100);
 6514   format %{ "FST_S  $mem,$src\t# D-round" %}
 6515   opcode(0xD9);       /* D9 /2 */
 6516   ins_encode( enc_FPR_store(mem,src) );
 6517   ins_pipe( fpu_mem_reg );
 6518 %}
 6519 
 6520 // Store immediate Float value (it is faster than store from FPU register)
 6521 // The instruction usage is guarded by predicate in operand immFPR().
 6522 instruct storeFPR_imm( memory mem, immFPR src) %{
 6523   match(Set mem (StoreF mem src));
 6524 
 6525   ins_cost(50);
 6526   format %{ "MOV    $mem,$src\t# store float" %}
 6527   opcode(0xC7);               /* C7 /0 */
 6528   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6529   ins_pipe( ialu_mem_imm );
 6530 %}
 6531 
 6532 // Store immediate Float value (it is faster than store from XMM register)
 6533 // The instruction usage is guarded by predicate in operand immF().
 6534 instruct storeF_imm( memory mem, immF src) %{
 6535   match(Set mem (StoreF mem src));
 6536 
 6537   ins_cost(50);
 6538   format %{ "MOV    $mem,$src\t# store float" %}
 6539   opcode(0xC7);               /* C7 /0 */
 6540   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6541   ins_pipe( ialu_mem_imm );
 6542 %}
 6543 
 6544 // Store Integer to stack slot
 6545 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6546   match(Set dst src);
 6547 
 6548   ins_cost(100);
 6549   format %{ "MOV    $dst,$src" %}
 6550   opcode(0x89);
 6551   ins_encode( OpcPRegSS( dst, src ) );
 6552   ins_pipe( ialu_mem_reg );
 6553 %}
 6554 
 6555 // Store Integer to stack slot
 6556 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6557   match(Set dst src);
 6558 
 6559   ins_cost(100);
 6560   format %{ "MOV    $dst,$src" %}
 6561   opcode(0x89);
 6562   ins_encode( OpcPRegSS( dst, src ) );
 6563   ins_pipe( ialu_mem_reg );
 6564 %}
 6565 
 6566 // Store Long to stack slot
 6567 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6568   match(Set dst src);
 6569 
 6570   ins_cost(200);
 6571   format %{ "MOV    $dst,$src.lo\n\t"
 6572             "MOV    $dst+4,$src.hi" %}
 6573   opcode(0x89, 0x89);
 6574   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6575   ins_pipe( ialu_mem_long_reg );
 6576 %}
 6577 
 6578 //----------MemBar Instructions-----------------------------------------------
 6579 // Memory barrier flavors
 6580 
 6581 instruct membar_acquire() %{
 6582   match(MemBarAcquire);
 6583   match(LoadFence);
 6584   ins_cost(400);
 6585 
 6586   size(0);
 6587   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6588   ins_encode();
 6589   ins_pipe(empty);
 6590 %}
 6591 
 6592 instruct membar_acquire_lock() %{
 6593   match(MemBarAcquireLock);
 6594   ins_cost(0);
 6595 
 6596   size(0);
 6597   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6598   ins_encode( );
 6599   ins_pipe(empty);
 6600 %}
 6601 
 6602 instruct membar_release() %{
 6603   match(MemBarRelease);
 6604   match(StoreFence);
 6605   ins_cost(400);
 6606 
 6607   size(0);
 6608   format %{ "MEMBAR-release ! (empty encoding)" %}
 6609   ins_encode( );
 6610   ins_pipe(empty);
 6611 %}
 6612 
 6613 instruct membar_release_lock() %{
 6614   match(MemBarReleaseLock);
 6615   ins_cost(0);
 6616 
 6617   size(0);
 6618   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6619   ins_encode( );
 6620   ins_pipe(empty);
 6621 %}
 6622 
 6623 instruct membar_volatile(eFlagsReg cr) %{
 6624   match(MemBarVolatile);
 6625   effect(KILL cr);
 6626   ins_cost(400);
 6627 
 6628   format %{
 6629     $$template
 6630     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6631   %}
 6632   ins_encode %{
 6633     __ membar(Assembler::StoreLoad);
 6634   %}
 6635   ins_pipe(pipe_slow);
 6636 %}
 6637 
 6638 instruct unnecessary_membar_volatile() %{
 6639   match(MemBarVolatile);
 6640   predicate(Matcher::post_store_load_barrier(n));
 6641   ins_cost(0);
 6642 
 6643   size(0);
 6644   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6645   ins_encode( );
 6646   ins_pipe(empty);
 6647 %}
 6648 
 6649 instruct membar_storestore() %{
 6650   match(MemBarStoreStore);
 6651   match(StoreStoreFence);
 6652   ins_cost(0);
 6653 
 6654   size(0);
 6655   format %{ "MEMBAR-storestore (empty encoding)" %}
 6656   ins_encode( );
 6657   ins_pipe(empty);
 6658 %}
 6659 
 6660 //----------Move Instructions--------------------------------------------------
 6661 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6662   match(Set dst (CastX2P src));
 6663   format %{ "# X2P  $dst, $src" %}
 6664   ins_encode( /*empty encoding*/ );
 6665   ins_cost(0);
 6666   ins_pipe(empty);
 6667 %}
 6668 
 6669 instruct castP2X(rRegI dst, eRegP src ) %{
 6670   match(Set dst (CastP2X src));
 6671   ins_cost(50);
 6672   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6673   ins_encode( enc_Copy( dst, src) );
 6674   ins_pipe( ialu_reg_reg );
 6675 %}
 6676 
 6677 //----------Conditional Move---------------------------------------------------
 6678 // Conditional move
 6679 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6680   predicate(!VM_Version::supports_cmov() );
 6681   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6682   ins_cost(200);
 6683   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6684             "MOV    $dst,$src\n"
 6685       "skip:" %}
 6686   ins_encode %{
 6687     Label Lskip;
 6688     // Invert sense of branch from sense of CMOV
 6689     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6690     __ movl($dst$$Register, $src$$Register);
 6691     __ bind(Lskip);
 6692   %}
 6693   ins_pipe( pipe_cmov_reg );
 6694 %}
 6695 
 6696 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6697   predicate(!VM_Version::supports_cmov() );
 6698   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6699   ins_cost(200);
 6700   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6701             "MOV    $dst,$src\n"
 6702       "skip:" %}
 6703   ins_encode %{
 6704     Label Lskip;
 6705     // Invert sense of branch from sense of CMOV
 6706     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6707     __ movl($dst$$Register, $src$$Register);
 6708     __ bind(Lskip);
 6709   %}
 6710   ins_pipe( pipe_cmov_reg );
 6711 %}
 6712 
 6713 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6714   predicate(VM_Version::supports_cmov() );
 6715   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6716   ins_cost(200);
 6717   format %{ "CMOV$cop $dst,$src" %}
 6718   opcode(0x0F,0x40);
 6719   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6720   ins_pipe( pipe_cmov_reg );
 6721 %}
 6722 
 6723 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6724   predicate(VM_Version::supports_cmov() );
 6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6726   ins_cost(200);
 6727   format %{ "CMOV$cop $dst,$src" %}
 6728   opcode(0x0F,0x40);
 6729   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6730   ins_pipe( pipe_cmov_reg );
 6731 %}
 6732 
 6733 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6734   predicate(VM_Version::supports_cmov() );
 6735   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6736   ins_cost(200);
 6737   expand %{
 6738     cmovI_regU(cop, cr, dst, src);
 6739   %}
 6740 %}
 6741 
 6742 // Conditional move
 6743 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6744   predicate(VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6746   ins_cost(250);
 6747   format %{ "CMOV$cop $dst,$src" %}
 6748   opcode(0x0F,0x40);
 6749   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6750   ins_pipe( pipe_cmov_mem );
 6751 %}
 6752 
 6753 // Conditional move
 6754 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6755   predicate(VM_Version::supports_cmov() );
 6756   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6757   ins_cost(250);
 6758   format %{ "CMOV$cop $dst,$src" %}
 6759   opcode(0x0F,0x40);
 6760   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6761   ins_pipe( pipe_cmov_mem );
 6762 %}
 6763 
 6764 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6765   predicate(VM_Version::supports_cmov() );
 6766   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6767   ins_cost(250);
 6768   expand %{
 6769     cmovI_memU(cop, cr, dst, src);
 6770   %}
 6771 %}
 6772 
 6773 // Conditional move
 6774 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6775   predicate(VM_Version::supports_cmov() );
 6776   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6777   ins_cost(200);
 6778   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6779   opcode(0x0F,0x40);
 6780   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6781   ins_pipe( pipe_cmov_reg );
 6782 %}
 6783 
 6784 // Conditional move (non-P6 version)
 6785 // Note:  a CMoveP is generated for  stubs and native wrappers
 6786 //        regardless of whether we are on a P6, so we
 6787 //        emulate a cmov here
 6788 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6789   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6790   ins_cost(300);
 6791   format %{ "Jn$cop   skip\n\t"
 6792           "MOV    $dst,$src\t# pointer\n"
 6793       "skip:" %}
 6794   opcode(0x8b);
 6795   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6796   ins_pipe( pipe_cmov_reg );
 6797 %}
 6798 
 6799 // Conditional move
 6800 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6801   predicate(VM_Version::supports_cmov() );
 6802   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6803   ins_cost(200);
 6804   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6805   opcode(0x0F,0x40);
 6806   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6807   ins_pipe( pipe_cmov_reg );
 6808 %}
 6809 
 6810 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6811   predicate(VM_Version::supports_cmov() );
 6812   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6813   ins_cost(200);
 6814   expand %{
 6815     cmovP_regU(cop, cr, dst, src);
 6816   %}
 6817 %}
 6818 
 6819 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6820 // correctly meets the two pointer arguments; one is an incoming
 6821 // register but the other is a memory operand.  ALSO appears to
 6822 // be buggy with implicit null checks.
 6823 //
 6824 //// Conditional move
 6825 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6826 //  predicate(VM_Version::supports_cmov() );
 6827 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6828 //  ins_cost(250);
 6829 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6830 //  opcode(0x0F,0x40);
 6831 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6832 //  ins_pipe( pipe_cmov_mem );
 6833 //%}
 6834 //
 6835 //// Conditional move
 6836 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6837 //  predicate(VM_Version::supports_cmov() );
 6838 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6839 //  ins_cost(250);
 6840 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6841 //  opcode(0x0F,0x40);
 6842 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6843 //  ins_pipe( pipe_cmov_mem );
 6844 //%}
 6845 
 6846 // Conditional move
 6847 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6848   predicate(UseSSE<=1);
 6849   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6850   ins_cost(200);
 6851   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6852   opcode(0xDA);
 6853   ins_encode( enc_cmov_dpr(cop,src) );
 6854   ins_pipe( pipe_cmovDPR_reg );
 6855 %}
 6856 
 6857 // Conditional move
 6858 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6859   predicate(UseSSE==0);
 6860   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6861   ins_cost(200);
 6862   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6863   opcode(0xDA);
 6864   ins_encode( enc_cmov_dpr(cop,src) );
 6865   ins_pipe( pipe_cmovDPR_reg );
 6866 %}
 6867 
 6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6869 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6870   predicate(UseSSE<=1);
 6871   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6872   ins_cost(200);
 6873   format %{ "Jn$cop   skip\n\t"
 6874             "MOV    $dst,$src\t# double\n"
 6875       "skip:" %}
 6876   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6877   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6878   ins_pipe( pipe_cmovDPR_reg );
 6879 %}
 6880 
 6881 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6882 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6883   predicate(UseSSE==0);
 6884   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6885   ins_cost(200);
 6886   format %{ "Jn$cop    skip\n\t"
 6887             "MOV    $dst,$src\t# float\n"
 6888       "skip:" %}
 6889   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6890   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6891   ins_pipe( pipe_cmovDPR_reg );
 6892 %}
 6893 
 6894 // No CMOVE with SSE/SSE2
 6895 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6896   predicate (UseSSE>=1);
 6897   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6898   ins_cost(200);
 6899   format %{ "Jn$cop   skip\n\t"
 6900             "MOVSS  $dst,$src\t# float\n"
 6901       "skip:" %}
 6902   ins_encode %{
 6903     Label skip;
 6904     // Invert sense of branch from sense of CMOV
 6905     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6906     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6907     __ bind(skip);
 6908   %}
 6909   ins_pipe( pipe_slow );
 6910 %}
 6911 
 6912 // No CMOVE with SSE/SSE2
 6913 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6914   predicate (UseSSE>=2);
 6915   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6916   ins_cost(200);
 6917   format %{ "Jn$cop   skip\n\t"
 6918             "MOVSD  $dst,$src\t# float\n"
 6919       "skip:" %}
 6920   ins_encode %{
 6921     Label skip;
 6922     // Invert sense of branch from sense of CMOV
 6923     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6924     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6925     __ bind(skip);
 6926   %}
 6927   ins_pipe( pipe_slow );
 6928 %}
 6929 
 6930 // unsigned version
 6931 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6932   predicate (UseSSE>=1);
 6933   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6934   ins_cost(200);
 6935   format %{ "Jn$cop   skip\n\t"
 6936             "MOVSS  $dst,$src\t# float\n"
 6937       "skip:" %}
 6938   ins_encode %{
 6939     Label skip;
 6940     // Invert sense of branch from sense of CMOV
 6941     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6942     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6943     __ bind(skip);
 6944   %}
 6945   ins_pipe( pipe_slow );
 6946 %}
 6947 
 6948 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6949   predicate (UseSSE>=1);
 6950   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6951   ins_cost(200);
 6952   expand %{
 6953     fcmovF_regU(cop, cr, dst, src);
 6954   %}
 6955 %}
 6956 
 6957 // unsigned version
 6958 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6959   predicate (UseSSE>=2);
 6960   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6961   ins_cost(200);
 6962   format %{ "Jn$cop   skip\n\t"
 6963             "MOVSD  $dst,$src\t# float\n"
 6964       "skip:" %}
 6965   ins_encode %{
 6966     Label skip;
 6967     // Invert sense of branch from sense of CMOV
 6968     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6969     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6970     __ bind(skip);
 6971   %}
 6972   ins_pipe( pipe_slow );
 6973 %}
 6974 
 6975 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6976   predicate (UseSSE>=2);
 6977   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6978   ins_cost(200);
 6979   expand %{
 6980     fcmovD_regU(cop, cr, dst, src);
 6981   %}
 6982 %}
 6983 
 6984 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6985   predicate(VM_Version::supports_cmov() );
 6986   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6987   ins_cost(200);
 6988   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6989             "CMOV$cop $dst.hi,$src.hi" %}
 6990   opcode(0x0F,0x40);
 6991   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6992   ins_pipe( pipe_cmov_reg_long );
 6993 %}
 6994 
 6995 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6996   predicate(VM_Version::supports_cmov() );
 6997   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6998   ins_cost(200);
 6999   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7000             "CMOV$cop $dst.hi,$src.hi" %}
 7001   opcode(0x0F,0x40);
 7002   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7003   ins_pipe( pipe_cmov_reg_long );
 7004 %}
 7005 
 7006 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7007   predicate(VM_Version::supports_cmov() );
 7008   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7009   ins_cost(200);
 7010   expand %{
 7011     cmovL_regU(cop, cr, dst, src);
 7012   %}
 7013 %}
 7014 
 7015 //----------Arithmetic Instructions--------------------------------------------
 7016 //----------Addition Instructions----------------------------------------------
 7017 
 7018 // Integer Addition Instructions
 7019 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7020   match(Set dst (AddI dst src));
 7021   effect(KILL cr);
 7022 
 7023   size(2);
 7024   format %{ "ADD    $dst,$src" %}
 7025   opcode(0x03);
 7026   ins_encode( OpcP, RegReg( dst, src) );
 7027   ins_pipe( ialu_reg_reg );
 7028 %}
 7029 
 7030 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7031   match(Set dst (AddI dst src));
 7032   effect(KILL cr);
 7033 
 7034   format %{ "ADD    $dst,$src" %}
 7035   opcode(0x81, 0x00); /* /0 id */
 7036   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7037   ins_pipe( ialu_reg );
 7038 %}
 7039 
 7040 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7041   predicate(UseIncDec);
 7042   match(Set dst (AddI dst src));
 7043   effect(KILL cr);
 7044 
 7045   size(1);
 7046   format %{ "INC    $dst" %}
 7047   opcode(0x40); /*  */
 7048   ins_encode( Opc_plus( primary, dst ) );
 7049   ins_pipe( ialu_reg );
 7050 %}
 7051 
 7052 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7053   match(Set dst (AddI src0 src1));
 7054   ins_cost(110);
 7055 
 7056   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7057   opcode(0x8D); /* 0x8D /r */
 7058   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7059   ins_pipe( ialu_reg_reg );
 7060 %}
 7061 
 7062 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7063   match(Set dst (AddP src0 src1));
 7064   ins_cost(110);
 7065 
 7066   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7067   opcode(0x8D); /* 0x8D /r */
 7068   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7069   ins_pipe( ialu_reg_reg );
 7070 %}
 7071 
 7072 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7073   predicate(UseIncDec);
 7074   match(Set dst (AddI dst src));
 7075   effect(KILL cr);
 7076 
 7077   size(1);
 7078   format %{ "DEC    $dst" %}
 7079   opcode(0x48); /*  */
 7080   ins_encode( Opc_plus( primary, dst ) );
 7081   ins_pipe( ialu_reg );
 7082 %}
 7083 
 7084 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7085   match(Set dst (AddP dst src));
 7086   effect(KILL cr);
 7087 
 7088   size(2);
 7089   format %{ "ADD    $dst,$src" %}
 7090   opcode(0x03);
 7091   ins_encode( OpcP, RegReg( dst, src) );
 7092   ins_pipe( ialu_reg_reg );
 7093 %}
 7094 
 7095 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7096   match(Set dst (AddP dst src));
 7097   effect(KILL cr);
 7098 
 7099   format %{ "ADD    $dst,$src" %}
 7100   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7101   // ins_encode( RegImm( dst, src) );
 7102   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7103   ins_pipe( ialu_reg );
 7104 %}
 7105 
 7106 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7107   match(Set dst (AddI dst (LoadI src)));
 7108   effect(KILL cr);
 7109 
 7110   ins_cost(125);
 7111   format %{ "ADD    $dst,$src" %}
 7112   opcode(0x03);
 7113   ins_encode( OpcP, RegMem( dst, src) );
 7114   ins_pipe( ialu_reg_mem );
 7115 %}
 7116 
 7117 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7118   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7119   effect(KILL cr);
 7120 
 7121   ins_cost(150);
 7122   format %{ "ADD    $dst,$src" %}
 7123   opcode(0x01);  /* Opcode 01 /r */
 7124   ins_encode( OpcP, RegMem( src, dst ) );
 7125   ins_pipe( ialu_mem_reg );
 7126 %}
 7127 
 7128 // Add Memory with Immediate
 7129 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7130   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7131   effect(KILL cr);
 7132 
 7133   ins_cost(125);
 7134   format %{ "ADD    $dst,$src" %}
 7135   opcode(0x81);               /* Opcode 81 /0 id */
 7136   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7137   ins_pipe( ialu_mem_imm );
 7138 %}
 7139 
 7140 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7141   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7142   effect(KILL cr);
 7143 
 7144   ins_cost(125);
 7145   format %{ "INC    $dst" %}
 7146   opcode(0xFF);               /* Opcode FF /0 */
 7147   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7148   ins_pipe( ialu_mem_imm );
 7149 %}
 7150 
 7151 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7152   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7153   effect(KILL cr);
 7154 
 7155   ins_cost(125);
 7156   format %{ "DEC    $dst" %}
 7157   opcode(0xFF);               /* Opcode FF /1 */
 7158   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7159   ins_pipe( ialu_mem_imm );
 7160 %}
 7161 
 7162 
 7163 instruct checkCastPP( eRegP dst ) %{
 7164   match(Set dst (CheckCastPP dst));
 7165 
 7166   size(0);
 7167   format %{ "#checkcastPP of $dst" %}
 7168   ins_encode( /*empty encoding*/ );
 7169   ins_pipe( empty );
 7170 %}
 7171 
 7172 instruct castPP( eRegP dst ) %{
 7173   match(Set dst (CastPP dst));
 7174   format %{ "#castPP of $dst" %}
 7175   ins_encode( /*empty encoding*/ );
 7176   ins_pipe( empty );
 7177 %}
 7178 
 7179 instruct castII( rRegI dst ) %{
 7180   match(Set dst (CastII dst));
 7181   format %{ "#castII of $dst" %}
 7182   ins_encode( /*empty encoding*/ );
 7183   ins_cost(0);
 7184   ins_pipe( empty );
 7185 %}
 7186 
 7187 instruct castLL( eRegL dst ) %{
 7188   match(Set dst (CastLL dst));
 7189   format %{ "#castLL of $dst" %}
 7190   ins_encode( /*empty encoding*/ );
 7191   ins_cost(0);
 7192   ins_pipe( empty );
 7193 %}
 7194 
 7195 instruct castFF( regF dst ) %{
 7196   predicate(UseSSE >= 1);
 7197   match(Set dst (CastFF dst));
 7198   format %{ "#castFF of $dst" %}
 7199   ins_encode( /*empty encoding*/ );
 7200   ins_cost(0);
 7201   ins_pipe( empty );
 7202 %}
 7203 
 7204 instruct castDD( regD dst ) %{
 7205   predicate(UseSSE >= 2);
 7206   match(Set dst (CastDD dst));
 7207   format %{ "#castDD of $dst" %}
 7208   ins_encode( /*empty encoding*/ );
 7209   ins_cost(0);
 7210   ins_pipe( empty );
 7211 %}
 7212 
 7213 instruct castFF_PR( regFPR dst ) %{
 7214   predicate(UseSSE < 1);
 7215   match(Set dst (CastFF dst));
 7216   format %{ "#castFF of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castDD_PR( regDPR dst ) %{
 7223   predicate(UseSSE < 2);
 7224   match(Set dst (CastDD dst));
 7225   format %{ "#castDD of $dst" %}
 7226   ins_encode( /*empty encoding*/ );
 7227   ins_cost(0);
 7228   ins_pipe( empty );
 7229 %}
 7230 
 7231 // Load-locked - same as a regular pointer load when used with compare-swap
 7232 instruct loadPLocked(eRegP dst, memory mem) %{
 7233   match(Set dst (LoadPLocked mem));
 7234 
 7235   ins_cost(125);
 7236   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7237   opcode(0x8B);
 7238   ins_encode( OpcP, RegMem(dst,mem));
 7239   ins_pipe( ialu_reg_mem );
 7240 %}
 7241 
 7242 // Conditional-store of the updated heap-top.
 7243 // Used during allocation of the shared heap.
 7244 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7245 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7246   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7247   // EAX is killed if there is contention, but then it's also unused.
 7248   // In the common case of no contention, EAX holds the new oop address.
 7249   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7250   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7251   ins_pipe( pipe_cmpxchg );
 7252 %}
 7253 
 7254 // Conditional-store of an int value.
 7255 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7256 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7257   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7258   effect(KILL oldval);
 7259   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7260   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7261   ins_pipe( pipe_cmpxchg );
 7262 %}
 7263 
 7264 // Conditional-store of a long value.
 7265 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7266 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7267   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7268   effect(KILL oldval);
 7269   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7270             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7271             "XCHG   EBX,ECX"
 7272   %}
 7273   ins_encode %{
 7274     // Note: we need to swap rbx, and rcx before and after the
 7275     //       cmpxchg8 instruction because the instruction uses
 7276     //       rcx as the high order word of the new value to store but
 7277     //       our register encoding uses rbx.
 7278     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7279     __ lock();
 7280     __ cmpxchg8($mem$$Address);
 7281     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7282   %}
 7283   ins_pipe( pipe_cmpxchg );
 7284 %}
 7285 
 7286 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7287 
 7288 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7289   predicate(VM_Version::supports_cx8());
 7290   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7291   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7292   effect(KILL cr, KILL oldval);
 7293   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7294             "MOV    $res,0\n\t"
 7295             "JNE,s  fail\n\t"
 7296             "MOV    $res,1\n"
 7297           "fail:" %}
 7298   ins_encode( enc_cmpxchg8(mem_ptr),
 7299               enc_flags_ne_to_boolean(res) );
 7300   ins_pipe( pipe_cmpxchg );
 7301 %}
 7302 
 7303 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7304   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7305   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7306   effect(KILL cr, KILL oldval);
 7307   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7308             "MOV    $res,0\n\t"
 7309             "JNE,s  fail\n\t"
 7310             "MOV    $res,1\n"
 7311           "fail:" %}
 7312   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7313   ins_pipe( pipe_cmpxchg );
 7314 %}
 7315 
 7316 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7317   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7318   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7319   effect(KILL cr, KILL oldval);
 7320   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7321             "MOV    $res,0\n\t"
 7322             "JNE,s  fail\n\t"
 7323             "MOV    $res,1\n"
 7324           "fail:" %}
 7325   ins_encode( enc_cmpxchgb(mem_ptr),
 7326               enc_flags_ne_to_boolean(res) );
 7327   ins_pipe( pipe_cmpxchg );
 7328 %}
 7329 
 7330 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7331   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7332   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7333   effect(KILL cr, KILL oldval);
 7334   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7335             "MOV    $res,0\n\t"
 7336             "JNE,s  fail\n\t"
 7337             "MOV    $res,1\n"
 7338           "fail:" %}
 7339   ins_encode( enc_cmpxchgw(mem_ptr),
 7340               enc_flags_ne_to_boolean(res) );
 7341   ins_pipe( pipe_cmpxchg );
 7342 %}
 7343 
 7344 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7345   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7346   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7347   effect(KILL cr, KILL oldval);
 7348   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7349             "MOV    $res,0\n\t"
 7350             "JNE,s  fail\n\t"
 7351             "MOV    $res,1\n"
 7352           "fail:" %}
 7353   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7354   ins_pipe( pipe_cmpxchg );
 7355 %}
 7356 
 7357 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7358   predicate(VM_Version::supports_cx8());
 7359   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7360   effect(KILL cr);
 7361   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7362   ins_encode( enc_cmpxchg8(mem_ptr) );
 7363   ins_pipe( pipe_cmpxchg );
 7364 %}
 7365 
 7366 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7367   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7368   effect(KILL cr);
 7369   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7370   ins_encode( enc_cmpxchg(mem_ptr) );
 7371   ins_pipe( pipe_cmpxchg );
 7372 %}
 7373 
 7374 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7375   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7376   effect(KILL cr);
 7377   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7378   ins_encode( enc_cmpxchgb(mem_ptr) );
 7379   ins_pipe( pipe_cmpxchg );
 7380 %}
 7381 
 7382 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7383   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7384   effect(KILL cr);
 7385   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7386   ins_encode( enc_cmpxchgw(mem_ptr) );
 7387   ins_pipe( pipe_cmpxchg );
 7388 %}
 7389 
 7390 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7391   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7392   effect(KILL cr);
 7393   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7394   ins_encode( enc_cmpxchg(mem_ptr) );
 7395   ins_pipe( pipe_cmpxchg );
 7396 %}
 7397 
 7398 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7399   predicate(n->as_LoadStore()->result_not_used());
 7400   match(Set dummy (GetAndAddB mem add));
 7401   effect(KILL cr);
 7402   format %{ "ADDB  [$mem],$add" %}
 7403   ins_encode %{
 7404     __ lock();
 7405     __ addb($mem$$Address, $add$$constant);
 7406   %}
 7407   ins_pipe( pipe_cmpxchg );
 7408 %}
 7409 
 7410 // Important to match to xRegI: only 8-bit regs.
 7411 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7412   match(Set newval (GetAndAddB mem newval));
 7413   effect(KILL cr);
 7414   format %{ "XADDB  [$mem],$newval" %}
 7415   ins_encode %{
 7416     __ lock();
 7417     __ xaddb($mem$$Address, $newval$$Register);
 7418   %}
 7419   ins_pipe( pipe_cmpxchg );
 7420 %}
 7421 
 7422 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7423   predicate(n->as_LoadStore()->result_not_used());
 7424   match(Set dummy (GetAndAddS mem add));
 7425   effect(KILL cr);
 7426   format %{ "ADDS  [$mem],$add" %}
 7427   ins_encode %{
 7428     __ lock();
 7429     __ addw($mem$$Address, $add$$constant);
 7430   %}
 7431   ins_pipe( pipe_cmpxchg );
 7432 %}
 7433 
 7434 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7435   match(Set newval (GetAndAddS mem newval));
 7436   effect(KILL cr);
 7437   format %{ "XADDS  [$mem],$newval" %}
 7438   ins_encode %{
 7439     __ lock();
 7440     __ xaddw($mem$$Address, $newval$$Register);
 7441   %}
 7442   ins_pipe( pipe_cmpxchg );
 7443 %}
 7444 
 7445 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7446   predicate(n->as_LoadStore()->result_not_used());
 7447   match(Set dummy (GetAndAddI mem add));
 7448   effect(KILL cr);
 7449   format %{ "ADDL  [$mem],$add" %}
 7450   ins_encode %{
 7451     __ lock();
 7452     __ addl($mem$$Address, $add$$constant);
 7453   %}
 7454   ins_pipe( pipe_cmpxchg );
 7455 %}
 7456 
 7457 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7458   match(Set newval (GetAndAddI mem newval));
 7459   effect(KILL cr);
 7460   format %{ "XADDL  [$mem],$newval" %}
 7461   ins_encode %{
 7462     __ lock();
 7463     __ xaddl($mem$$Address, $newval$$Register);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 // Important to match to xRegI: only 8-bit regs.
 7469 instruct xchgB( memory mem, xRegI newval) %{
 7470   match(Set newval (GetAndSetB mem newval));
 7471   format %{ "XCHGB  $newval,[$mem]" %}
 7472   ins_encode %{
 7473     __ xchgb($newval$$Register, $mem$$Address);
 7474   %}
 7475   ins_pipe( pipe_cmpxchg );
 7476 %}
 7477 
 7478 instruct xchgS( memory mem, rRegI newval) %{
 7479   match(Set newval (GetAndSetS mem newval));
 7480   format %{ "XCHGW  $newval,[$mem]" %}
 7481   ins_encode %{
 7482     __ xchgw($newval$$Register, $mem$$Address);
 7483   %}
 7484   ins_pipe( pipe_cmpxchg );
 7485 %}
 7486 
 7487 instruct xchgI( memory mem, rRegI newval) %{
 7488   match(Set newval (GetAndSetI mem newval));
 7489   format %{ "XCHGL  $newval,[$mem]" %}
 7490   ins_encode %{
 7491     __ xchgl($newval$$Register, $mem$$Address);
 7492   %}
 7493   ins_pipe( pipe_cmpxchg );
 7494 %}
 7495 
 7496 instruct xchgP( memory mem, pRegP newval) %{
 7497   match(Set newval (GetAndSetP mem newval));
 7498   format %{ "XCHGL  $newval,[$mem]" %}
 7499   ins_encode %{
 7500     __ xchgl($newval$$Register, $mem$$Address);
 7501   %}
 7502   ins_pipe( pipe_cmpxchg );
 7503 %}
 7504 
 7505 //----------Subtraction Instructions-------------------------------------------
 7506 
 7507 // Integer Subtraction Instructions
 7508 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7509   match(Set dst (SubI dst src));
 7510   effect(KILL cr);
 7511 
 7512   size(2);
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x2B);
 7515   ins_encode( OpcP, RegReg( dst, src) );
 7516   ins_pipe( ialu_reg_reg );
 7517 %}
 7518 
 7519 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7520   match(Set dst (SubI dst src));
 7521   effect(KILL cr);
 7522 
 7523   format %{ "SUB    $dst,$src" %}
 7524   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7525   // ins_encode( RegImm( dst, src) );
 7526   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7527   ins_pipe( ialu_reg );
 7528 %}
 7529 
 7530 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7531   match(Set dst (SubI dst (LoadI src)));
 7532   effect(KILL cr);
 7533 
 7534   ins_cost(125);
 7535   format %{ "SUB    $dst,$src" %}
 7536   opcode(0x2B);
 7537   ins_encode( OpcP, RegMem( dst, src) );
 7538   ins_pipe( ialu_reg_mem );
 7539 %}
 7540 
 7541 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7542   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7543   effect(KILL cr);
 7544 
 7545   ins_cost(150);
 7546   format %{ "SUB    $dst,$src" %}
 7547   opcode(0x29);  /* Opcode 29 /r */
 7548   ins_encode( OpcP, RegMem( src, dst ) );
 7549   ins_pipe( ialu_mem_reg );
 7550 %}
 7551 
 7552 // Subtract from a pointer
 7553 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7554   match(Set dst (AddP dst (SubI zero src)));
 7555   effect(KILL cr);
 7556 
 7557   size(2);
 7558   format %{ "SUB    $dst,$src" %}
 7559   opcode(0x2B);
 7560   ins_encode( OpcP, RegReg( dst, src) );
 7561   ins_pipe( ialu_reg_reg );
 7562 %}
 7563 
 7564 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7565   match(Set dst (SubI zero dst));
 7566   effect(KILL cr);
 7567 
 7568   size(2);
 7569   format %{ "NEG    $dst" %}
 7570   opcode(0xF7,0x03);  // Opcode F7 /3
 7571   ins_encode( OpcP, RegOpc( dst ) );
 7572   ins_pipe( ialu_reg );
 7573 %}
 7574 
 7575 //----------Multiplication/Division Instructions-------------------------------
 7576 // Integer Multiplication Instructions
 7577 // Multiply Register
 7578 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7579   match(Set dst (MulI dst src));
 7580   effect(KILL cr);
 7581 
 7582   size(3);
 7583   ins_cost(300);
 7584   format %{ "IMUL   $dst,$src" %}
 7585   opcode(0xAF, 0x0F);
 7586   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7587   ins_pipe( ialu_reg_reg_alu0 );
 7588 %}
 7589 
 7590 // Multiply 32-bit Immediate
 7591 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7592   match(Set dst (MulI src imm));
 7593   effect(KILL cr);
 7594 
 7595   ins_cost(300);
 7596   format %{ "IMUL   $dst,$src,$imm" %}
 7597   opcode(0x69);  /* 69 /r id */
 7598   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7599   ins_pipe( ialu_reg_reg_alu0 );
 7600 %}
 7601 
 7602 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7603   match(Set dst src);
 7604   effect(KILL cr);
 7605 
 7606   // Note that this is artificially increased to make it more expensive than loadConL
 7607   ins_cost(250);
 7608   format %{ "MOV    EAX,$src\t// low word only" %}
 7609   opcode(0xB8);
 7610   ins_encode( LdImmL_Lo(dst, src) );
 7611   ins_pipe( ialu_reg_fat );
 7612 %}
 7613 
 7614 // Multiply by 32-bit Immediate, taking the shifted high order results
 7615 //  (special case for shift by 32)
 7616 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7617   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7618   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7619              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7620              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7621   effect(USE src1, KILL cr);
 7622 
 7623   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7624   ins_cost(0*100 + 1*400 - 150);
 7625   format %{ "IMUL   EDX:EAX,$src1" %}
 7626   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7627   ins_pipe( pipe_slow );
 7628 %}
 7629 
 7630 // Multiply by 32-bit Immediate, taking the shifted high order results
 7631 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7632   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7633   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7634              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7635              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7636   effect(USE src1, KILL cr);
 7637 
 7638   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7639   ins_cost(1*100 + 1*400 - 150);
 7640   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7641             "SAR    EDX,$cnt-32" %}
 7642   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7643   ins_pipe( pipe_slow );
 7644 %}
 7645 
 7646 // Multiply Memory 32-bit Immediate
 7647 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7648   match(Set dst (MulI (LoadI src) imm));
 7649   effect(KILL cr);
 7650 
 7651   ins_cost(300);
 7652   format %{ "IMUL   $dst,$src,$imm" %}
 7653   opcode(0x69);  /* 69 /r id */
 7654   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7655   ins_pipe( ialu_reg_mem_alu0 );
 7656 %}
 7657 
 7658 // Multiply Memory
 7659 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7660   match(Set dst (MulI dst (LoadI src)));
 7661   effect(KILL cr);
 7662 
 7663   ins_cost(350);
 7664   format %{ "IMUL   $dst,$src" %}
 7665   opcode(0xAF, 0x0F);
 7666   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7667   ins_pipe( ialu_reg_mem_alu0 );
 7668 %}
 7669 
 7670 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7671 %{
 7672   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7673   effect(KILL cr, KILL src2);
 7674 
 7675   expand %{ mulI_eReg(dst, src1, cr);
 7676            mulI_eReg(src2, src3, cr);
 7677            addI_eReg(dst, src2, cr); %}
 7678 %}
 7679 
 7680 // Multiply Register Int to Long
 7681 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7682   // Basic Idea: long = (long)int * (long)int
 7683   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7684   effect(DEF dst, USE src, USE src1, KILL flags);
 7685 
 7686   ins_cost(300);
 7687   format %{ "IMUL   $dst,$src1" %}
 7688 
 7689   ins_encode( long_int_multiply( dst, src1 ) );
 7690   ins_pipe( ialu_reg_reg_alu0 );
 7691 %}
 7692 
 7693 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7694   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7695   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7696   effect(KILL flags);
 7697 
 7698   ins_cost(300);
 7699   format %{ "MUL    $dst,$src1" %}
 7700 
 7701   ins_encode( long_uint_multiply(dst, src1) );
 7702   ins_pipe( ialu_reg_reg_alu0 );
 7703 %}
 7704 
 7705 // Multiply Register Long
 7706 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7707   match(Set dst (MulL dst src));
 7708   effect(KILL cr, TEMP tmp);
 7709   ins_cost(4*100+3*400);
 7710 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7711 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7712   format %{ "MOV    $tmp,$src.lo\n\t"
 7713             "IMUL   $tmp,EDX\n\t"
 7714             "MOV    EDX,$src.hi\n\t"
 7715             "IMUL   EDX,EAX\n\t"
 7716             "ADD    $tmp,EDX\n\t"
 7717             "MUL    EDX:EAX,$src.lo\n\t"
 7718             "ADD    EDX,$tmp" %}
 7719   ins_encode( long_multiply( dst, src, tmp ) );
 7720   ins_pipe( pipe_slow );
 7721 %}
 7722 
 7723 // Multiply Register Long where the left operand's high 32 bits are zero
 7724 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7725   predicate(is_operand_hi32_zero(n->in(1)));
 7726   match(Set dst (MulL dst src));
 7727   effect(KILL cr, TEMP tmp);
 7728   ins_cost(2*100+2*400);
 7729 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7730 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7731   format %{ "MOV    $tmp,$src.hi\n\t"
 7732             "IMUL   $tmp,EAX\n\t"
 7733             "MUL    EDX:EAX,$src.lo\n\t"
 7734             "ADD    EDX,$tmp" %}
 7735   ins_encode %{
 7736     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7737     __ imull($tmp$$Register, rax);
 7738     __ mull($src$$Register);
 7739     __ addl(rdx, $tmp$$Register);
 7740   %}
 7741   ins_pipe( pipe_slow );
 7742 %}
 7743 
 7744 // Multiply Register Long where the right operand's high 32 bits are zero
 7745 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7746   predicate(is_operand_hi32_zero(n->in(2)));
 7747   match(Set dst (MulL dst src));
 7748   effect(KILL cr, TEMP tmp);
 7749   ins_cost(2*100+2*400);
 7750 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7751 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7752   format %{ "MOV    $tmp,$src.lo\n\t"
 7753             "IMUL   $tmp,EDX\n\t"
 7754             "MUL    EDX:EAX,$src.lo\n\t"
 7755             "ADD    EDX,$tmp" %}
 7756   ins_encode %{
 7757     __ movl($tmp$$Register, $src$$Register);
 7758     __ imull($tmp$$Register, rdx);
 7759     __ mull($src$$Register);
 7760     __ addl(rdx, $tmp$$Register);
 7761   %}
 7762   ins_pipe( pipe_slow );
 7763 %}
 7764 
 7765 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7766 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7767   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7768   match(Set dst (MulL dst src));
 7769   effect(KILL cr);
 7770   ins_cost(1*400);
 7771 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7772 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7773   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7774   ins_encode %{
 7775     __ mull($src$$Register);
 7776   %}
 7777   ins_pipe( pipe_slow );
 7778 %}
 7779 
 7780 // Multiply Register Long by small constant
 7781 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7782   match(Set dst (MulL dst src));
 7783   effect(KILL cr, TEMP tmp);
 7784   ins_cost(2*100+2*400);
 7785   size(12);
 7786 // Basic idea: lo(result) = lo(src * EAX)
 7787 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7788   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7789             "MOV    EDX,$src\n\t"
 7790             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7791             "ADD    EDX,$tmp" %}
 7792   ins_encode( long_multiply_con( dst, src, tmp ) );
 7793   ins_pipe( pipe_slow );
 7794 %}
 7795 
 7796 // Integer DIV with Register
 7797 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7798   match(Set rax (DivI rax div));
 7799   effect(KILL rdx, KILL cr);
 7800   size(26);
 7801   ins_cost(30*100+10*100);
 7802   format %{ "CMP    EAX,0x80000000\n\t"
 7803             "JNE,s  normal\n\t"
 7804             "XOR    EDX,EDX\n\t"
 7805             "CMP    ECX,-1\n\t"
 7806             "JE,s   done\n"
 7807     "normal: CDQ\n\t"
 7808             "IDIV   $div\n\t"
 7809     "done:"        %}
 7810   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7811   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7812   ins_pipe( ialu_reg_reg_alu0 );
 7813 %}
 7814 
 7815 // Divide Register Long
 7816 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7817   match(Set dst (DivL src1 src2));
 7818   effect(CALL);
 7819   ins_cost(10000);
 7820   format %{ "PUSH   $src1.hi\n\t"
 7821             "PUSH   $src1.lo\n\t"
 7822             "PUSH   $src2.hi\n\t"
 7823             "PUSH   $src2.lo\n\t"
 7824             "CALL   SharedRuntime::ldiv\n\t"
 7825             "ADD    ESP,16" %}
 7826   ins_encode( long_div(src1,src2) );
 7827   ins_pipe( pipe_slow );
 7828 %}
 7829 
 7830 // Integer DIVMOD with Register, both quotient and mod results
 7831 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7832   match(DivModI rax div);
 7833   effect(KILL cr);
 7834   size(26);
 7835   ins_cost(30*100+10*100);
 7836   format %{ "CMP    EAX,0x80000000\n\t"
 7837             "JNE,s  normal\n\t"
 7838             "XOR    EDX,EDX\n\t"
 7839             "CMP    ECX,-1\n\t"
 7840             "JE,s   done\n"
 7841     "normal: CDQ\n\t"
 7842             "IDIV   $div\n\t"
 7843     "done:"        %}
 7844   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7845   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7846   ins_pipe( pipe_slow );
 7847 %}
 7848 
 7849 // Integer MOD with Register
 7850 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7851   match(Set rdx (ModI rax div));
 7852   effect(KILL rax, KILL cr);
 7853 
 7854   size(26);
 7855   ins_cost(300);
 7856   format %{ "CDQ\n\t"
 7857             "IDIV   $div" %}
 7858   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7859   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7860   ins_pipe( ialu_reg_reg_alu0 );
 7861 %}
 7862 
 7863 // Remainder Register Long
 7864 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7865   match(Set dst (ModL src1 src2));
 7866   effect(CALL);
 7867   ins_cost(10000);
 7868   format %{ "PUSH   $src1.hi\n\t"
 7869             "PUSH   $src1.lo\n\t"
 7870             "PUSH   $src2.hi\n\t"
 7871             "PUSH   $src2.lo\n\t"
 7872             "CALL   SharedRuntime::lrem\n\t"
 7873             "ADD    ESP,16" %}
 7874   ins_encode( long_mod(src1,src2) );
 7875   ins_pipe( pipe_slow );
 7876 %}
 7877 
 7878 // Divide Register Long (no special case since divisor != -1)
 7879 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7880   match(Set dst (DivL dst imm));
 7881   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7882   ins_cost(1000);
 7883   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7884             "XOR    $tmp2,$tmp2\n\t"
 7885             "CMP    $tmp,EDX\n\t"
 7886             "JA,s   fast\n\t"
 7887             "MOV    $tmp2,EAX\n\t"
 7888             "MOV    EAX,EDX\n\t"
 7889             "MOV    EDX,0\n\t"
 7890             "JLE,s  pos\n\t"
 7891             "LNEG   EAX : $tmp2\n\t"
 7892             "DIV    $tmp # unsigned division\n\t"
 7893             "XCHG   EAX,$tmp2\n\t"
 7894             "DIV    $tmp\n\t"
 7895             "LNEG   $tmp2 : EAX\n\t"
 7896             "JMP,s  done\n"
 7897     "pos:\n\t"
 7898             "DIV    $tmp\n\t"
 7899             "XCHG   EAX,$tmp2\n"
 7900     "fast:\n\t"
 7901             "DIV    $tmp\n"
 7902     "done:\n\t"
 7903             "MOV    EDX,$tmp2\n\t"
 7904             "NEG    EDX:EAX # if $imm < 0" %}
 7905   ins_encode %{
 7906     int con = (int)$imm$$constant;
 7907     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7908     int pcon = (con > 0) ? con : -con;
 7909     Label Lfast, Lpos, Ldone;
 7910 
 7911     __ movl($tmp$$Register, pcon);
 7912     __ xorl($tmp2$$Register,$tmp2$$Register);
 7913     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7914     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7915 
 7916     __ movl($tmp2$$Register, $dst$$Register); // save
 7917     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7918     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7919     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7920 
 7921     // Negative dividend.
 7922     // convert value to positive to use unsigned division
 7923     __ lneg($dst$$Register, $tmp2$$Register);
 7924     __ divl($tmp$$Register);
 7925     __ xchgl($dst$$Register, $tmp2$$Register);
 7926     __ divl($tmp$$Register);
 7927     // revert result back to negative
 7928     __ lneg($tmp2$$Register, $dst$$Register);
 7929     __ jmpb(Ldone);
 7930 
 7931     __ bind(Lpos);
 7932     __ divl($tmp$$Register); // Use unsigned division
 7933     __ xchgl($dst$$Register, $tmp2$$Register);
 7934     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7935 
 7936     __ bind(Lfast);
 7937     // fast path: src is positive
 7938     __ divl($tmp$$Register); // Use unsigned division
 7939 
 7940     __ bind(Ldone);
 7941     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7942     if (con < 0) {
 7943       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7944     }
 7945   %}
 7946   ins_pipe( pipe_slow );
 7947 %}
 7948 
 7949 // Remainder Register Long (remainder fit into 32 bits)
 7950 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7951   match(Set dst (ModL dst imm));
 7952   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7953   ins_cost(1000);
 7954   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7955             "CMP    $tmp,EDX\n\t"
 7956             "JA,s   fast\n\t"
 7957             "MOV    $tmp2,EAX\n\t"
 7958             "MOV    EAX,EDX\n\t"
 7959             "MOV    EDX,0\n\t"
 7960             "JLE,s  pos\n\t"
 7961             "LNEG   EAX : $tmp2\n\t"
 7962             "DIV    $tmp # unsigned division\n\t"
 7963             "MOV    EAX,$tmp2\n\t"
 7964             "DIV    $tmp\n\t"
 7965             "NEG    EDX\n\t"
 7966             "JMP,s  done\n"
 7967     "pos:\n\t"
 7968             "DIV    $tmp\n\t"
 7969             "MOV    EAX,$tmp2\n"
 7970     "fast:\n\t"
 7971             "DIV    $tmp\n"
 7972     "done:\n\t"
 7973             "MOV    EAX,EDX\n\t"
 7974             "SAR    EDX,31\n\t" %}
 7975   ins_encode %{
 7976     int con = (int)$imm$$constant;
 7977     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7978     int pcon = (con > 0) ? con : -con;
 7979     Label  Lfast, Lpos, Ldone;
 7980 
 7981     __ movl($tmp$$Register, pcon);
 7982     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7983     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7984 
 7985     __ movl($tmp2$$Register, $dst$$Register); // save
 7986     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7987     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7988     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7989 
 7990     // Negative dividend.
 7991     // convert value to positive to use unsigned division
 7992     __ lneg($dst$$Register, $tmp2$$Register);
 7993     __ divl($tmp$$Register);
 7994     __ movl($dst$$Register, $tmp2$$Register);
 7995     __ divl($tmp$$Register);
 7996     // revert remainder back to negative
 7997     __ negl(HIGH_FROM_LOW($dst$$Register));
 7998     __ jmpb(Ldone);
 7999 
 8000     __ bind(Lpos);
 8001     __ divl($tmp$$Register);
 8002     __ movl($dst$$Register, $tmp2$$Register);
 8003 
 8004     __ bind(Lfast);
 8005     // fast path: src is positive
 8006     __ divl($tmp$$Register);
 8007 
 8008     __ bind(Ldone);
 8009     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8010     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8011 
 8012   %}
 8013   ins_pipe( pipe_slow );
 8014 %}
 8015 
 8016 // Integer Shift Instructions
 8017 // Shift Left by one
 8018 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8019   match(Set dst (LShiftI dst shift));
 8020   effect(KILL cr);
 8021 
 8022   size(2);
 8023   format %{ "SHL    $dst,$shift" %}
 8024   opcode(0xD1, 0x4);  /* D1 /4 */
 8025   ins_encode( OpcP, RegOpc( dst ) );
 8026   ins_pipe( ialu_reg );
 8027 %}
 8028 
 8029 // Shift Left by 8-bit immediate
 8030 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8031   match(Set dst (LShiftI dst shift));
 8032   effect(KILL cr);
 8033 
 8034   size(3);
 8035   format %{ "SHL    $dst,$shift" %}
 8036   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8037   ins_encode( RegOpcImm( dst, shift) );
 8038   ins_pipe( ialu_reg );
 8039 %}
 8040 
 8041 // Shift Left by variable
 8042 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8043   match(Set dst (LShiftI dst shift));
 8044   effect(KILL cr);
 8045 
 8046   size(2);
 8047   format %{ "SHL    $dst,$shift" %}
 8048   opcode(0xD3, 0x4);  /* D3 /4 */
 8049   ins_encode( OpcP, RegOpc( dst ) );
 8050   ins_pipe( ialu_reg_reg );
 8051 %}
 8052 
 8053 // Arithmetic shift right by one
 8054 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8055   match(Set dst (RShiftI dst shift));
 8056   effect(KILL cr);
 8057 
 8058   size(2);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xD1, 0x7);  /* D1 /7 */
 8061   ins_encode( OpcP, RegOpc( dst ) );
 8062   ins_pipe( ialu_reg );
 8063 %}
 8064 
 8065 // Arithmetic shift right by one
 8066 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8067   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8068   effect(KILL cr);
 8069   format %{ "SAR    $dst,$shift" %}
 8070   opcode(0xD1, 0x7);  /* D1 /7 */
 8071   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8072   ins_pipe( ialu_mem_imm );
 8073 %}
 8074 
 8075 // Arithmetic Shift Right by 8-bit immediate
 8076 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8077   match(Set dst (RShiftI dst shift));
 8078   effect(KILL cr);
 8079 
 8080   size(3);
 8081   format %{ "SAR    $dst,$shift" %}
 8082   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8083   ins_encode( RegOpcImm( dst, shift ) );
 8084   ins_pipe( ialu_mem_imm );
 8085 %}
 8086 
 8087 // Arithmetic Shift Right by 8-bit immediate
 8088 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8089   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8090   effect(KILL cr);
 8091 
 8092   format %{ "SAR    $dst,$shift" %}
 8093   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8094   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8095   ins_pipe( ialu_mem_imm );
 8096 %}
 8097 
 8098 // Arithmetic Shift Right by variable
 8099 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8100   match(Set dst (RShiftI dst shift));
 8101   effect(KILL cr);
 8102 
 8103   size(2);
 8104   format %{ "SAR    $dst,$shift" %}
 8105   opcode(0xD3, 0x7);  /* D3 /7 */
 8106   ins_encode( OpcP, RegOpc( dst ) );
 8107   ins_pipe( ialu_reg_reg );
 8108 %}
 8109 
 8110 // Logical shift right by one
 8111 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8112   match(Set dst (URShiftI dst shift));
 8113   effect(KILL cr);
 8114 
 8115   size(2);
 8116   format %{ "SHR    $dst,$shift" %}
 8117   opcode(0xD1, 0x5);  /* D1 /5 */
 8118   ins_encode( OpcP, RegOpc( dst ) );
 8119   ins_pipe( ialu_reg );
 8120 %}
 8121 
 8122 // Logical Shift Right by 8-bit immediate
 8123 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8124   match(Set dst (URShiftI dst shift));
 8125   effect(KILL cr);
 8126 
 8127   size(3);
 8128   format %{ "SHR    $dst,$shift" %}
 8129   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8130   ins_encode( RegOpcImm( dst, shift) );
 8131   ins_pipe( ialu_reg );
 8132 %}
 8133 
 8134 
 8135 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8136 // This idiom is used by the compiler for the i2b bytecode.
 8137 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8138   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8139 
 8140   size(3);
 8141   format %{ "MOVSX  $dst,$src :8" %}
 8142   ins_encode %{
 8143     __ movsbl($dst$$Register, $src$$Register);
 8144   %}
 8145   ins_pipe(ialu_reg_reg);
 8146 %}
 8147 
 8148 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8149 // This idiom is used by the compiler the i2s bytecode.
 8150 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8151   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8152 
 8153   size(3);
 8154   format %{ "MOVSX  $dst,$src :16" %}
 8155   ins_encode %{
 8156     __ movswl($dst$$Register, $src$$Register);
 8157   %}
 8158   ins_pipe(ialu_reg_reg);
 8159 %}
 8160 
 8161 
 8162 // Logical Shift Right by variable
 8163 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8164   match(Set dst (URShiftI dst shift));
 8165   effect(KILL cr);
 8166 
 8167   size(2);
 8168   format %{ "SHR    $dst,$shift" %}
 8169   opcode(0xD3, 0x5);  /* D3 /5 */
 8170   ins_encode( OpcP, RegOpc( dst ) );
 8171   ins_pipe( ialu_reg_reg );
 8172 %}
 8173 
 8174 
 8175 //----------Logical Instructions-----------------------------------------------
 8176 //----------Integer Logical Instructions---------------------------------------
 8177 // And Instructions
 8178 // And Register with Register
 8179 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8180   match(Set dst (AndI dst src));
 8181   effect(KILL cr);
 8182 
 8183   size(2);
 8184   format %{ "AND    $dst,$src" %}
 8185   opcode(0x23);
 8186   ins_encode( OpcP, RegReg( dst, src) );
 8187   ins_pipe( ialu_reg_reg );
 8188 %}
 8189 
 8190 // And Register with Immediate
 8191 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8192   match(Set dst (AndI dst src));
 8193   effect(KILL cr);
 8194 
 8195   format %{ "AND    $dst,$src" %}
 8196   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8197   // ins_encode( RegImm( dst, src) );
 8198   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8199   ins_pipe( ialu_reg );
 8200 %}
 8201 
 8202 // And Register with Memory
 8203 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8204   match(Set dst (AndI dst (LoadI src)));
 8205   effect(KILL cr);
 8206 
 8207   ins_cost(125);
 8208   format %{ "AND    $dst,$src" %}
 8209   opcode(0x23);
 8210   ins_encode( OpcP, RegMem( dst, src) );
 8211   ins_pipe( ialu_reg_mem );
 8212 %}
 8213 
 8214 // And Memory with Register
 8215 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8216   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8217   effect(KILL cr);
 8218 
 8219   ins_cost(150);
 8220   format %{ "AND    $dst,$src" %}
 8221   opcode(0x21);  /* Opcode 21 /r */
 8222   ins_encode( OpcP, RegMem( src, dst ) );
 8223   ins_pipe( ialu_mem_reg );
 8224 %}
 8225 
 8226 // And Memory with Immediate
 8227 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8228   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8229   effect(KILL cr);
 8230 
 8231   ins_cost(125);
 8232   format %{ "AND    $dst,$src" %}
 8233   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8234   // ins_encode( MemImm( dst, src) );
 8235   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8236   ins_pipe( ialu_mem_imm );
 8237 %}
 8238 
 8239 // BMI1 instructions
 8240 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8241   match(Set dst (AndI (XorI src1 minus_1) src2));
 8242   predicate(UseBMI1Instructions);
 8243   effect(KILL cr);
 8244 
 8245   format %{ "ANDNL  $dst, $src1, $src2" %}
 8246 
 8247   ins_encode %{
 8248     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8249   %}
 8250   ins_pipe(ialu_reg);
 8251 %}
 8252 
 8253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8254   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8255   predicate(UseBMI1Instructions);
 8256   effect(KILL cr);
 8257 
 8258   ins_cost(125);
 8259   format %{ "ANDNL  $dst, $src1, $src2" %}
 8260 
 8261   ins_encode %{
 8262     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8263   %}
 8264   ins_pipe(ialu_reg_mem);
 8265 %}
 8266 
 8267 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8268   match(Set dst (AndI (SubI imm_zero src) src));
 8269   predicate(UseBMI1Instructions);
 8270   effect(KILL cr);
 8271 
 8272   format %{ "BLSIL  $dst, $src" %}
 8273 
 8274   ins_encode %{
 8275     __ blsil($dst$$Register, $src$$Register);
 8276   %}
 8277   ins_pipe(ialu_reg);
 8278 %}
 8279 
 8280 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8281   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8282   predicate(UseBMI1Instructions);
 8283   effect(KILL cr);
 8284 
 8285   ins_cost(125);
 8286   format %{ "BLSIL  $dst, $src" %}
 8287 
 8288   ins_encode %{
 8289     __ blsil($dst$$Register, $src$$Address);
 8290   %}
 8291   ins_pipe(ialu_reg_mem);
 8292 %}
 8293 
 8294 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8295 %{
 8296   match(Set dst (XorI (AddI src minus_1) src));
 8297   predicate(UseBMI1Instructions);
 8298   effect(KILL cr);
 8299 
 8300   format %{ "BLSMSKL $dst, $src" %}
 8301 
 8302   ins_encode %{
 8303     __ blsmskl($dst$$Register, $src$$Register);
 8304   %}
 8305 
 8306   ins_pipe(ialu_reg);
 8307 %}
 8308 
 8309 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8310 %{
 8311   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8312   predicate(UseBMI1Instructions);
 8313   effect(KILL cr);
 8314 
 8315   ins_cost(125);
 8316   format %{ "BLSMSKL $dst, $src" %}
 8317 
 8318   ins_encode %{
 8319     __ blsmskl($dst$$Register, $src$$Address);
 8320   %}
 8321 
 8322   ins_pipe(ialu_reg_mem);
 8323 %}
 8324 
 8325 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8326 %{
 8327   match(Set dst (AndI (AddI src minus_1) src) );
 8328   predicate(UseBMI1Instructions);
 8329   effect(KILL cr);
 8330 
 8331   format %{ "BLSRL  $dst, $src" %}
 8332 
 8333   ins_encode %{
 8334     __ blsrl($dst$$Register, $src$$Register);
 8335   %}
 8336 
 8337   ins_pipe(ialu_reg);
 8338 %}
 8339 
 8340 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8341 %{
 8342   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8343   predicate(UseBMI1Instructions);
 8344   effect(KILL cr);
 8345 
 8346   ins_cost(125);
 8347   format %{ "BLSRL  $dst, $src" %}
 8348 
 8349   ins_encode %{
 8350     __ blsrl($dst$$Register, $src$$Address);
 8351   %}
 8352 
 8353   ins_pipe(ialu_reg_mem);
 8354 %}
 8355 
 8356 // Or Instructions
 8357 // Or Register with Register
 8358 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8359   match(Set dst (OrI dst src));
 8360   effect(KILL cr);
 8361 
 8362   size(2);
 8363   format %{ "OR     $dst,$src" %}
 8364   opcode(0x0B);
 8365   ins_encode( OpcP, RegReg( dst, src) );
 8366   ins_pipe( ialu_reg_reg );
 8367 %}
 8368 
 8369 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8370   match(Set dst (OrI dst (CastP2X src)));
 8371   effect(KILL cr);
 8372 
 8373   size(2);
 8374   format %{ "OR     $dst,$src" %}
 8375   opcode(0x0B);
 8376   ins_encode( OpcP, RegReg( dst, src) );
 8377   ins_pipe( ialu_reg_reg );
 8378 %}
 8379 
 8380 
 8381 // Or Register with Immediate
 8382 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8383   match(Set dst (OrI dst src));
 8384   effect(KILL cr);
 8385 
 8386   format %{ "OR     $dst,$src" %}
 8387   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8388   // ins_encode( RegImm( dst, src) );
 8389   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8390   ins_pipe( ialu_reg );
 8391 %}
 8392 
 8393 // Or Register with Memory
 8394 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8395   match(Set dst (OrI dst (LoadI src)));
 8396   effect(KILL cr);
 8397 
 8398   ins_cost(125);
 8399   format %{ "OR     $dst,$src" %}
 8400   opcode(0x0B);
 8401   ins_encode( OpcP, RegMem( dst, src) );
 8402   ins_pipe( ialu_reg_mem );
 8403 %}
 8404 
 8405 // Or Memory with Register
 8406 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8407   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8408   effect(KILL cr);
 8409 
 8410   ins_cost(150);
 8411   format %{ "OR     $dst,$src" %}
 8412   opcode(0x09);  /* Opcode 09 /r */
 8413   ins_encode( OpcP, RegMem( src, dst ) );
 8414   ins_pipe( ialu_mem_reg );
 8415 %}
 8416 
 8417 // Or Memory with Immediate
 8418 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8419   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8420   effect(KILL cr);
 8421 
 8422   ins_cost(125);
 8423   format %{ "OR     $dst,$src" %}
 8424   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8425   // ins_encode( MemImm( dst, src) );
 8426   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8427   ins_pipe( ialu_mem_imm );
 8428 %}
 8429 
 8430 // ROL/ROR
 8431 // ROL expand
 8432 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8433   effect(USE_DEF dst, USE shift, KILL cr);
 8434 
 8435   format %{ "ROL    $dst, $shift" %}
 8436   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8437   ins_encode( OpcP, RegOpc( dst ));
 8438   ins_pipe( ialu_reg );
 8439 %}
 8440 
 8441 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8442   effect(USE_DEF dst, USE shift, KILL cr);
 8443 
 8444   format %{ "ROL    $dst, $shift" %}
 8445   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8446   ins_encode( RegOpcImm(dst, shift) );
 8447   ins_pipe(ialu_reg);
 8448 %}
 8449 
 8450 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8451   effect(USE_DEF dst, USE shift, KILL cr);
 8452 
 8453   format %{ "ROL    $dst, $shift" %}
 8454   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8455   ins_encode(OpcP, RegOpc(dst));
 8456   ins_pipe( ialu_reg_reg );
 8457 %}
 8458 // end of ROL expand
 8459 
 8460 // ROL 32bit by one once
 8461 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8462   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8463 
 8464   expand %{
 8465     rolI_eReg_imm1(dst, lshift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROL 32bit var by imm8 once
 8470 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8471   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8472   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8473 
 8474   expand %{
 8475     rolI_eReg_imm8(dst, lshift, cr);
 8476   %}
 8477 %}
 8478 
 8479 // ROL 32bit var by var once
 8480 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8481   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8482 
 8483   expand %{
 8484     rolI_eReg_CL(dst, shift, cr);
 8485   %}
 8486 %}
 8487 
 8488 // ROL 32bit var by var once
 8489 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8490   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8491 
 8492   expand %{
 8493     rolI_eReg_CL(dst, shift, cr);
 8494   %}
 8495 %}
 8496 
 8497 // ROR expand
 8498 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8499   effect(USE_DEF dst, USE shift, KILL cr);
 8500 
 8501   format %{ "ROR    $dst, $shift" %}
 8502   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8503   ins_encode( OpcP, RegOpc( dst ) );
 8504   ins_pipe( ialu_reg );
 8505 %}
 8506 
 8507 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8508   effect (USE_DEF dst, USE shift, KILL cr);
 8509 
 8510   format %{ "ROR    $dst, $shift" %}
 8511   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8512   ins_encode( RegOpcImm(dst, shift) );
 8513   ins_pipe( ialu_reg );
 8514 %}
 8515 
 8516 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8517   effect(USE_DEF dst, USE shift, KILL cr);
 8518 
 8519   format %{ "ROR    $dst, $shift" %}
 8520   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8521   ins_encode(OpcP, RegOpc(dst));
 8522   ins_pipe( ialu_reg_reg );
 8523 %}
 8524 // end of ROR expand
 8525 
 8526 // ROR right once
 8527 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8528   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8529 
 8530   expand %{
 8531     rorI_eReg_imm1(dst, rshift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // ROR 32bit by immI8 once
 8536 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8537   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8538   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8539 
 8540   expand %{
 8541     rorI_eReg_imm8(dst, rshift, cr);
 8542   %}
 8543 %}
 8544 
 8545 // ROR 32bit var by var once
 8546 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8547   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8548 
 8549   expand %{
 8550     rorI_eReg_CL(dst, shift, cr);
 8551   %}
 8552 %}
 8553 
 8554 // ROR 32bit var by var once
 8555 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8556   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8557 
 8558   expand %{
 8559     rorI_eReg_CL(dst, shift, cr);
 8560   %}
 8561 %}
 8562 
 8563 // Xor Instructions
 8564 // Xor Register with Register
 8565 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8566   match(Set dst (XorI dst src));
 8567   effect(KILL cr);
 8568 
 8569   size(2);
 8570   format %{ "XOR    $dst,$src" %}
 8571   opcode(0x33);
 8572   ins_encode( OpcP, RegReg( dst, src) );
 8573   ins_pipe( ialu_reg_reg );
 8574 %}
 8575 
 8576 // Xor Register with Immediate -1
 8577 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8578   match(Set dst (XorI dst imm));
 8579 
 8580   size(2);
 8581   format %{ "NOT    $dst" %}
 8582   ins_encode %{
 8583      __ notl($dst$$Register);
 8584   %}
 8585   ins_pipe( ialu_reg );
 8586 %}
 8587 
 8588 // Xor Register with Immediate
 8589 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8590   match(Set dst (XorI dst src));
 8591   effect(KILL cr);
 8592 
 8593   format %{ "XOR    $dst,$src" %}
 8594   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8595   // ins_encode( RegImm( dst, src) );
 8596   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8597   ins_pipe( ialu_reg );
 8598 %}
 8599 
 8600 // Xor Register with Memory
 8601 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8602   match(Set dst (XorI dst (LoadI src)));
 8603   effect(KILL cr);
 8604 
 8605   ins_cost(125);
 8606   format %{ "XOR    $dst,$src" %}
 8607   opcode(0x33);
 8608   ins_encode( OpcP, RegMem(dst, src) );
 8609   ins_pipe( ialu_reg_mem );
 8610 %}
 8611 
 8612 // Xor Memory with Register
 8613 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8614   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8615   effect(KILL cr);
 8616 
 8617   ins_cost(150);
 8618   format %{ "XOR    $dst,$src" %}
 8619   opcode(0x31);  /* Opcode 31 /r */
 8620   ins_encode( OpcP, RegMem( src, dst ) );
 8621   ins_pipe( ialu_mem_reg );
 8622 %}
 8623 
 8624 // Xor Memory with Immediate
 8625 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8626   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8627   effect(KILL cr);
 8628 
 8629   ins_cost(125);
 8630   format %{ "XOR    $dst,$src" %}
 8631   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8632   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8633   ins_pipe( ialu_mem_imm );
 8634 %}
 8635 
 8636 //----------Convert Int to Boolean---------------------------------------------
 8637 
 8638 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8639   effect( DEF dst, USE src );
 8640   format %{ "MOV    $dst,$src" %}
 8641   ins_encode( enc_Copy( dst, src) );
 8642   ins_pipe( ialu_reg_reg );
 8643 %}
 8644 
 8645 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8646   effect( USE_DEF dst, USE src, KILL cr );
 8647 
 8648   size(4);
 8649   format %{ "NEG    $dst\n\t"
 8650             "ADC    $dst,$src" %}
 8651   ins_encode( neg_reg(dst),
 8652               OpcRegReg(0x13,dst,src) );
 8653   ins_pipe( ialu_reg_reg_long );
 8654 %}
 8655 
 8656 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8657   match(Set dst (Conv2B src));
 8658 
 8659   expand %{
 8660     movI_nocopy(dst,src);
 8661     ci2b(dst,src,cr);
 8662   %}
 8663 %}
 8664 
 8665 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8666   effect( DEF dst, USE src );
 8667   format %{ "MOV    $dst,$src" %}
 8668   ins_encode( enc_Copy( dst, src) );
 8669   ins_pipe( ialu_reg_reg );
 8670 %}
 8671 
 8672 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8673   effect( USE_DEF dst, USE src, KILL cr );
 8674   format %{ "NEG    $dst\n\t"
 8675             "ADC    $dst,$src" %}
 8676   ins_encode( neg_reg(dst),
 8677               OpcRegReg(0x13,dst,src) );
 8678   ins_pipe( ialu_reg_reg_long );
 8679 %}
 8680 
 8681 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8682   match(Set dst (Conv2B src));
 8683 
 8684   expand %{
 8685     movP_nocopy(dst,src);
 8686     cp2b(dst,src,cr);
 8687   %}
 8688 %}
 8689 
 8690 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8691   match(Set dst (CmpLTMask p q));
 8692   effect(KILL cr);
 8693   ins_cost(400);
 8694 
 8695   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8696   format %{ "XOR    $dst,$dst\n\t"
 8697             "CMP    $p,$q\n\t"
 8698             "SETlt  $dst\n\t"
 8699             "NEG    $dst" %}
 8700   ins_encode %{
 8701     Register Rp = $p$$Register;
 8702     Register Rq = $q$$Register;
 8703     Register Rd = $dst$$Register;
 8704     Label done;
 8705     __ xorl(Rd, Rd);
 8706     __ cmpl(Rp, Rq);
 8707     __ setb(Assembler::less, Rd);
 8708     __ negl(Rd);
 8709   %}
 8710 
 8711   ins_pipe(pipe_slow);
 8712 %}
 8713 
 8714 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8715   match(Set dst (CmpLTMask dst zero));
 8716   effect(DEF dst, KILL cr);
 8717   ins_cost(100);
 8718 
 8719   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8720   ins_encode %{
 8721   __ sarl($dst$$Register, 31);
 8722   %}
 8723   ins_pipe(ialu_reg);
 8724 %}
 8725 
 8726 /* better to save a register than avoid a branch */
 8727 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8728   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8729   effect(KILL cr);
 8730   ins_cost(400);
 8731   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8732             "JGE    done\n\t"
 8733             "ADD    $p,$y\n"
 8734             "done:  " %}
 8735   ins_encode %{
 8736     Register Rp = $p$$Register;
 8737     Register Rq = $q$$Register;
 8738     Register Ry = $y$$Register;
 8739     Label done;
 8740     __ subl(Rp, Rq);
 8741     __ jccb(Assembler::greaterEqual, done);
 8742     __ addl(Rp, Ry);
 8743     __ bind(done);
 8744   %}
 8745 
 8746   ins_pipe(pipe_cmplt);
 8747 %}
 8748 
 8749 /* better to save a register than avoid a branch */
 8750 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8751   match(Set y (AndI (CmpLTMask p q) y));
 8752   effect(KILL cr);
 8753 
 8754   ins_cost(300);
 8755 
 8756   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8757             "JLT      done\n\t"
 8758             "XORL     $y, $y\n"
 8759             "done:  " %}
 8760   ins_encode %{
 8761     Register Rp = $p$$Register;
 8762     Register Rq = $q$$Register;
 8763     Register Ry = $y$$Register;
 8764     Label done;
 8765     __ cmpl(Rp, Rq);
 8766     __ jccb(Assembler::less, done);
 8767     __ xorl(Ry, Ry);
 8768     __ bind(done);
 8769   %}
 8770 
 8771   ins_pipe(pipe_cmplt);
 8772 %}
 8773 
 8774 /* If I enable this, I encourage spilling in the inner loop of compress.
 8775 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8776   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8777 */
 8778 //----------Overflow Math Instructions-----------------------------------------
 8779 
 8780 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8781 %{
 8782   match(Set cr (OverflowAddI op1 op2));
 8783   effect(DEF cr, USE_KILL op1, USE op2);
 8784 
 8785   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8786 
 8787   ins_encode %{
 8788     __ addl($op1$$Register, $op2$$Register);
 8789   %}
 8790   ins_pipe(ialu_reg_reg);
 8791 %}
 8792 
 8793 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8794 %{
 8795   match(Set cr (OverflowAddI op1 op2));
 8796   effect(DEF cr, USE_KILL op1, USE op2);
 8797 
 8798   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8799 
 8800   ins_encode %{
 8801     __ addl($op1$$Register, $op2$$constant);
 8802   %}
 8803   ins_pipe(ialu_reg_reg);
 8804 %}
 8805 
 8806 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8807 %{
 8808   match(Set cr (OverflowSubI op1 op2));
 8809 
 8810   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8811   ins_encode %{
 8812     __ cmpl($op1$$Register, $op2$$Register);
 8813   %}
 8814   ins_pipe(ialu_reg_reg);
 8815 %}
 8816 
 8817 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8818 %{
 8819   match(Set cr (OverflowSubI op1 op2));
 8820 
 8821   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8822   ins_encode %{
 8823     __ cmpl($op1$$Register, $op2$$constant);
 8824   %}
 8825   ins_pipe(ialu_reg_reg);
 8826 %}
 8827 
 8828 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8829 %{
 8830   match(Set cr (OverflowSubI zero op2));
 8831   effect(DEF cr, USE_KILL op2);
 8832 
 8833   format %{ "NEG    $op2\t# overflow check int" %}
 8834   ins_encode %{
 8835     __ negl($op2$$Register);
 8836   %}
 8837   ins_pipe(ialu_reg_reg);
 8838 %}
 8839 
 8840 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8841 %{
 8842   match(Set cr (OverflowMulI op1 op2));
 8843   effect(DEF cr, USE_KILL op1, USE op2);
 8844 
 8845   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8846   ins_encode %{
 8847     __ imull($op1$$Register, $op2$$Register);
 8848   %}
 8849   ins_pipe(ialu_reg_reg_alu0);
 8850 %}
 8851 
 8852 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8853 %{
 8854   match(Set cr (OverflowMulI op1 op2));
 8855   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8856 
 8857   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8858   ins_encode %{
 8859     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8860   %}
 8861   ins_pipe(ialu_reg_reg_alu0);
 8862 %}
 8863 
 8864 // Integer Absolute Instructions
 8865 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8866 %{
 8867   match(Set dst (AbsI src));
 8868   effect(TEMP dst, TEMP tmp, KILL cr);
 8869   format %{ "movl $tmp, $src\n\t"
 8870             "sarl $tmp, 31\n\t"
 8871             "movl $dst, $src\n\t"
 8872             "xorl $dst, $tmp\n\t"
 8873             "subl $dst, $tmp\n"
 8874           %}
 8875   ins_encode %{
 8876     __ movl($tmp$$Register, $src$$Register);
 8877     __ sarl($tmp$$Register, 31);
 8878     __ movl($dst$$Register, $src$$Register);
 8879     __ xorl($dst$$Register, $tmp$$Register);
 8880     __ subl($dst$$Register, $tmp$$Register);
 8881   %}
 8882 
 8883   ins_pipe(ialu_reg_reg);
 8884 %}
 8885 
 8886 //----------Long Instructions------------------------------------------------
 8887 // Add Long Register with Register
 8888 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8889   match(Set dst (AddL dst src));
 8890   effect(KILL cr);
 8891   ins_cost(200);
 8892   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8893             "ADC    $dst.hi,$src.hi" %}
 8894   opcode(0x03, 0x13);
 8895   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8896   ins_pipe( ialu_reg_reg_long );
 8897 %}
 8898 
 8899 // Add Long Register with Immediate
 8900 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8901   match(Set dst (AddL dst src));
 8902   effect(KILL cr);
 8903   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8904             "ADC    $dst.hi,$src.hi" %}
 8905   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8906   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8907   ins_pipe( ialu_reg_long );
 8908 %}
 8909 
 8910 // Add Long Register with Memory
 8911 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8912   match(Set dst (AddL dst (LoadL mem)));
 8913   effect(KILL cr);
 8914   ins_cost(125);
 8915   format %{ "ADD    $dst.lo,$mem\n\t"
 8916             "ADC    $dst.hi,$mem+4" %}
 8917   opcode(0x03, 0x13);
 8918   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8919   ins_pipe( ialu_reg_long_mem );
 8920 %}
 8921 
 8922 // Subtract Long Register with Register.
 8923 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8924   match(Set dst (SubL dst src));
 8925   effect(KILL cr);
 8926   ins_cost(200);
 8927   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8928             "SBB    $dst.hi,$src.hi" %}
 8929   opcode(0x2B, 0x1B);
 8930   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8931   ins_pipe( ialu_reg_reg_long );
 8932 %}
 8933 
 8934 // Subtract Long Register with Immediate
 8935 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8936   match(Set dst (SubL dst src));
 8937   effect(KILL cr);
 8938   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8939             "SBB    $dst.hi,$src.hi" %}
 8940   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8941   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8942   ins_pipe( ialu_reg_long );
 8943 %}
 8944 
 8945 // Subtract Long Register with Memory
 8946 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8947   match(Set dst (SubL dst (LoadL mem)));
 8948   effect(KILL cr);
 8949   ins_cost(125);
 8950   format %{ "SUB    $dst.lo,$mem\n\t"
 8951             "SBB    $dst.hi,$mem+4" %}
 8952   opcode(0x2B, 0x1B);
 8953   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8954   ins_pipe( ialu_reg_long_mem );
 8955 %}
 8956 
 8957 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8958   match(Set dst (SubL zero dst));
 8959   effect(KILL cr);
 8960   ins_cost(300);
 8961   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8962   ins_encode( neg_long(dst) );
 8963   ins_pipe( ialu_reg_reg_long );
 8964 %}
 8965 
 8966 // And Long Register with Register
 8967 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8968   match(Set dst (AndL dst src));
 8969   effect(KILL cr);
 8970   format %{ "AND    $dst.lo,$src.lo\n\t"
 8971             "AND    $dst.hi,$src.hi" %}
 8972   opcode(0x23,0x23);
 8973   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8974   ins_pipe( ialu_reg_reg_long );
 8975 %}
 8976 
 8977 // And Long Register with Immediate
 8978 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8979   match(Set dst (AndL dst src));
 8980   effect(KILL cr);
 8981   format %{ "AND    $dst.lo,$src.lo\n\t"
 8982             "AND    $dst.hi,$src.hi" %}
 8983   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8984   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8985   ins_pipe( ialu_reg_long );
 8986 %}
 8987 
 8988 // And Long Register with Memory
 8989 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8990   match(Set dst (AndL dst (LoadL mem)));
 8991   effect(KILL cr);
 8992   ins_cost(125);
 8993   format %{ "AND    $dst.lo,$mem\n\t"
 8994             "AND    $dst.hi,$mem+4" %}
 8995   opcode(0x23, 0x23);
 8996   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8997   ins_pipe( ialu_reg_long_mem );
 8998 %}
 8999 
 9000 // BMI1 instructions
 9001 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 9002   match(Set dst (AndL (XorL src1 minus_1) src2));
 9003   predicate(UseBMI1Instructions);
 9004   effect(KILL cr, TEMP dst);
 9005 
 9006   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9007             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9008          %}
 9009 
 9010   ins_encode %{
 9011     Register Rdst = $dst$$Register;
 9012     Register Rsrc1 = $src1$$Register;
 9013     Register Rsrc2 = $src2$$Register;
 9014     __ andnl(Rdst, Rsrc1, Rsrc2);
 9015     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9016   %}
 9017   ins_pipe(ialu_reg_reg_long);
 9018 %}
 9019 
 9020 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9021   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9022   predicate(UseBMI1Instructions);
 9023   effect(KILL cr, TEMP dst);
 9024 
 9025   ins_cost(125);
 9026   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9027             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9028          %}
 9029 
 9030   ins_encode %{
 9031     Register Rdst = $dst$$Register;
 9032     Register Rsrc1 = $src1$$Register;
 9033     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9034 
 9035     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9036     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9037   %}
 9038   ins_pipe(ialu_reg_mem);
 9039 %}
 9040 
 9041 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9042   match(Set dst (AndL (SubL imm_zero src) src));
 9043   predicate(UseBMI1Instructions);
 9044   effect(KILL cr, TEMP dst);
 9045 
 9046   format %{ "MOVL   $dst.hi, 0\n\t"
 9047             "BLSIL  $dst.lo, $src.lo\n\t"
 9048             "JNZ    done\n\t"
 9049             "BLSIL  $dst.hi, $src.hi\n"
 9050             "done:"
 9051          %}
 9052 
 9053   ins_encode %{
 9054     Label done;
 9055     Register Rdst = $dst$$Register;
 9056     Register Rsrc = $src$$Register;
 9057     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9058     __ blsil(Rdst, Rsrc);
 9059     __ jccb(Assembler::notZero, done);
 9060     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9061     __ bind(done);
 9062   %}
 9063   ins_pipe(ialu_reg);
 9064 %}
 9065 
 9066 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9067   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9068   predicate(UseBMI1Instructions);
 9069   effect(KILL cr, TEMP dst);
 9070 
 9071   ins_cost(125);
 9072   format %{ "MOVL   $dst.hi, 0\n\t"
 9073             "BLSIL  $dst.lo, $src\n\t"
 9074             "JNZ    done\n\t"
 9075             "BLSIL  $dst.hi, $src+4\n"
 9076             "done:"
 9077          %}
 9078 
 9079   ins_encode %{
 9080     Label done;
 9081     Register Rdst = $dst$$Register;
 9082     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9083 
 9084     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9085     __ blsil(Rdst, $src$$Address);
 9086     __ jccb(Assembler::notZero, done);
 9087     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9088     __ bind(done);
 9089   %}
 9090   ins_pipe(ialu_reg_mem);
 9091 %}
 9092 
 9093 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9094 %{
 9095   match(Set dst (XorL (AddL src minus_1) src));
 9096   predicate(UseBMI1Instructions);
 9097   effect(KILL cr, TEMP dst);
 9098 
 9099   format %{ "MOVL    $dst.hi, 0\n\t"
 9100             "BLSMSKL $dst.lo, $src.lo\n\t"
 9101             "JNC     done\n\t"
 9102             "BLSMSKL $dst.hi, $src.hi\n"
 9103             "done:"
 9104          %}
 9105 
 9106   ins_encode %{
 9107     Label done;
 9108     Register Rdst = $dst$$Register;
 9109     Register Rsrc = $src$$Register;
 9110     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9111     __ blsmskl(Rdst, Rsrc);
 9112     __ jccb(Assembler::carryClear, done);
 9113     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9114     __ bind(done);
 9115   %}
 9116 
 9117   ins_pipe(ialu_reg);
 9118 %}
 9119 
 9120 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9121 %{
 9122   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9123   predicate(UseBMI1Instructions);
 9124   effect(KILL cr, TEMP dst);
 9125 
 9126   ins_cost(125);
 9127   format %{ "MOVL    $dst.hi, 0\n\t"
 9128             "BLSMSKL $dst.lo, $src\n\t"
 9129             "JNC     done\n\t"
 9130             "BLSMSKL $dst.hi, $src+4\n"
 9131             "done:"
 9132          %}
 9133 
 9134   ins_encode %{
 9135     Label done;
 9136     Register Rdst = $dst$$Register;
 9137     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9138 
 9139     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9140     __ blsmskl(Rdst, $src$$Address);
 9141     __ jccb(Assembler::carryClear, done);
 9142     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9143     __ bind(done);
 9144   %}
 9145 
 9146   ins_pipe(ialu_reg_mem);
 9147 %}
 9148 
 9149 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9150 %{
 9151   match(Set dst (AndL (AddL src minus_1) src) );
 9152   predicate(UseBMI1Instructions);
 9153   effect(KILL cr, TEMP dst);
 9154 
 9155   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9156             "BLSRL  $dst.lo, $src.lo\n\t"
 9157             "JNC    done\n\t"
 9158             "BLSRL  $dst.hi, $src.hi\n"
 9159             "done:"
 9160   %}
 9161 
 9162   ins_encode %{
 9163     Label done;
 9164     Register Rdst = $dst$$Register;
 9165     Register Rsrc = $src$$Register;
 9166     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9167     __ blsrl(Rdst, Rsrc);
 9168     __ jccb(Assembler::carryClear, done);
 9169     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9170     __ bind(done);
 9171   %}
 9172 
 9173   ins_pipe(ialu_reg);
 9174 %}
 9175 
 9176 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9177 %{
 9178   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9179   predicate(UseBMI1Instructions);
 9180   effect(KILL cr, TEMP dst);
 9181 
 9182   ins_cost(125);
 9183   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9184             "BLSRL  $dst.lo, $src\n\t"
 9185             "JNC    done\n\t"
 9186             "BLSRL  $dst.hi, $src+4\n"
 9187             "done:"
 9188   %}
 9189 
 9190   ins_encode %{
 9191     Label done;
 9192     Register Rdst = $dst$$Register;
 9193     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9194     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9195     __ blsrl(Rdst, $src$$Address);
 9196     __ jccb(Assembler::carryClear, done);
 9197     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9198     __ bind(done);
 9199   %}
 9200 
 9201   ins_pipe(ialu_reg_mem);
 9202 %}
 9203 
 9204 // Or Long Register with Register
 9205 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9206   match(Set dst (OrL dst src));
 9207   effect(KILL cr);
 9208   format %{ "OR     $dst.lo,$src.lo\n\t"
 9209             "OR     $dst.hi,$src.hi" %}
 9210   opcode(0x0B,0x0B);
 9211   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9212   ins_pipe( ialu_reg_reg_long );
 9213 %}
 9214 
 9215 // Or Long Register with Immediate
 9216 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9217   match(Set dst (OrL dst src));
 9218   effect(KILL cr);
 9219   format %{ "OR     $dst.lo,$src.lo\n\t"
 9220             "OR     $dst.hi,$src.hi" %}
 9221   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9222   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9223   ins_pipe( ialu_reg_long );
 9224 %}
 9225 
 9226 // Or Long Register with Memory
 9227 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9228   match(Set dst (OrL dst (LoadL mem)));
 9229   effect(KILL cr);
 9230   ins_cost(125);
 9231   format %{ "OR     $dst.lo,$mem\n\t"
 9232             "OR     $dst.hi,$mem+4" %}
 9233   opcode(0x0B,0x0B);
 9234   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9235   ins_pipe( ialu_reg_long_mem );
 9236 %}
 9237 
 9238 // Xor Long Register with Register
 9239 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9240   match(Set dst (XorL dst src));
 9241   effect(KILL cr);
 9242   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9243             "XOR    $dst.hi,$src.hi" %}
 9244   opcode(0x33,0x33);
 9245   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9246   ins_pipe( ialu_reg_reg_long );
 9247 %}
 9248 
 9249 // Xor Long Register with Immediate -1
 9250 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9251   match(Set dst (XorL dst imm));
 9252   format %{ "NOT    $dst.lo\n\t"
 9253             "NOT    $dst.hi" %}
 9254   ins_encode %{
 9255      __ notl($dst$$Register);
 9256      __ notl(HIGH_FROM_LOW($dst$$Register));
 9257   %}
 9258   ins_pipe( ialu_reg_long );
 9259 %}
 9260 
 9261 // Xor Long Register with Immediate
 9262 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9263   match(Set dst (XorL dst src));
 9264   effect(KILL cr);
 9265   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9266             "XOR    $dst.hi,$src.hi" %}
 9267   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9268   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9269   ins_pipe( ialu_reg_long );
 9270 %}
 9271 
 9272 // Xor Long Register with Memory
 9273 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9274   match(Set dst (XorL dst (LoadL mem)));
 9275   effect(KILL cr);
 9276   ins_cost(125);
 9277   format %{ "XOR    $dst.lo,$mem\n\t"
 9278             "XOR    $dst.hi,$mem+4" %}
 9279   opcode(0x33,0x33);
 9280   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9281   ins_pipe( ialu_reg_long_mem );
 9282 %}
 9283 
 9284 // Shift Left Long by 1
 9285 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9286   predicate(UseNewLongLShift);
 9287   match(Set dst (LShiftL dst cnt));
 9288   effect(KILL cr);
 9289   ins_cost(100);
 9290   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9291             "ADC    $dst.hi,$dst.hi" %}
 9292   ins_encode %{
 9293     __ addl($dst$$Register,$dst$$Register);
 9294     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9295   %}
 9296   ins_pipe( ialu_reg_long );
 9297 %}
 9298 
 9299 // Shift Left Long by 2
 9300 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9301   predicate(UseNewLongLShift);
 9302   match(Set dst (LShiftL dst cnt));
 9303   effect(KILL cr);
 9304   ins_cost(100);
 9305   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9306             "ADC    $dst.hi,$dst.hi\n\t"
 9307             "ADD    $dst.lo,$dst.lo\n\t"
 9308             "ADC    $dst.hi,$dst.hi" %}
 9309   ins_encode %{
 9310     __ addl($dst$$Register,$dst$$Register);
 9311     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9312     __ addl($dst$$Register,$dst$$Register);
 9313     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9314   %}
 9315   ins_pipe( ialu_reg_long );
 9316 %}
 9317 
 9318 // Shift Left Long by 3
 9319 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9320   predicate(UseNewLongLShift);
 9321   match(Set dst (LShiftL dst cnt));
 9322   effect(KILL cr);
 9323   ins_cost(100);
 9324   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9325             "ADC    $dst.hi,$dst.hi\n\t"
 9326             "ADD    $dst.lo,$dst.lo\n\t"
 9327             "ADC    $dst.hi,$dst.hi\n\t"
 9328             "ADD    $dst.lo,$dst.lo\n\t"
 9329             "ADC    $dst.hi,$dst.hi" %}
 9330   ins_encode %{
 9331     __ addl($dst$$Register,$dst$$Register);
 9332     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9333     __ addl($dst$$Register,$dst$$Register);
 9334     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9335     __ addl($dst$$Register,$dst$$Register);
 9336     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9337   %}
 9338   ins_pipe( ialu_reg_long );
 9339 %}
 9340 
 9341 // Shift Left Long by 1-31
 9342 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9343   match(Set dst (LShiftL dst cnt));
 9344   effect(KILL cr);
 9345   ins_cost(200);
 9346   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9347             "SHL    $dst.lo,$cnt" %}
 9348   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9349   ins_encode( move_long_small_shift(dst,cnt) );
 9350   ins_pipe( ialu_reg_long );
 9351 %}
 9352 
 9353 // Shift Left Long by 32-63
 9354 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9355   match(Set dst (LShiftL dst cnt));
 9356   effect(KILL cr);
 9357   ins_cost(300);
 9358   format %{ "MOV    $dst.hi,$dst.lo\n"
 9359           "\tSHL    $dst.hi,$cnt-32\n"
 9360           "\tXOR    $dst.lo,$dst.lo" %}
 9361   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9362   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9363   ins_pipe( ialu_reg_long );
 9364 %}
 9365 
 9366 // Shift Left Long by variable
 9367 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9368   match(Set dst (LShiftL dst shift));
 9369   effect(KILL cr);
 9370   ins_cost(500+200);
 9371   size(17);
 9372   format %{ "TEST   $shift,32\n\t"
 9373             "JEQ,s  small\n\t"
 9374             "MOV    $dst.hi,$dst.lo\n\t"
 9375             "XOR    $dst.lo,$dst.lo\n"
 9376     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9377             "SHL    $dst.lo,$shift" %}
 9378   ins_encode( shift_left_long( dst, shift ) );
 9379   ins_pipe( pipe_slow );
 9380 %}
 9381 
 9382 // Shift Right Long by 1-31
 9383 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9384   match(Set dst (URShiftL dst cnt));
 9385   effect(KILL cr);
 9386   ins_cost(200);
 9387   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9388             "SHR    $dst.hi,$cnt" %}
 9389   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9390   ins_encode( move_long_small_shift(dst,cnt) );
 9391   ins_pipe( ialu_reg_long );
 9392 %}
 9393 
 9394 // Shift Right Long by 32-63
 9395 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9396   match(Set dst (URShiftL dst cnt));
 9397   effect(KILL cr);
 9398   ins_cost(300);
 9399   format %{ "MOV    $dst.lo,$dst.hi\n"
 9400           "\tSHR    $dst.lo,$cnt-32\n"
 9401           "\tXOR    $dst.hi,$dst.hi" %}
 9402   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9403   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9404   ins_pipe( ialu_reg_long );
 9405 %}
 9406 
 9407 // Shift Right Long by variable
 9408 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9409   match(Set dst (URShiftL dst shift));
 9410   effect(KILL cr);
 9411   ins_cost(600);
 9412   size(17);
 9413   format %{ "TEST   $shift,32\n\t"
 9414             "JEQ,s  small\n\t"
 9415             "MOV    $dst.lo,$dst.hi\n\t"
 9416             "XOR    $dst.hi,$dst.hi\n"
 9417     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9418             "SHR    $dst.hi,$shift" %}
 9419   ins_encode( shift_right_long( dst, shift ) );
 9420   ins_pipe( pipe_slow );
 9421 %}
 9422 
 9423 // Shift Right Long by 1-31
 9424 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9425   match(Set dst (RShiftL dst cnt));
 9426   effect(KILL cr);
 9427   ins_cost(200);
 9428   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9429             "SAR    $dst.hi,$cnt" %}
 9430   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9431   ins_encode( move_long_small_shift(dst,cnt) );
 9432   ins_pipe( ialu_reg_long );
 9433 %}
 9434 
 9435 // Shift Right Long by 32-63
 9436 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9437   match(Set dst (RShiftL dst cnt));
 9438   effect(KILL cr);
 9439   ins_cost(300);
 9440   format %{ "MOV    $dst.lo,$dst.hi\n"
 9441           "\tSAR    $dst.lo,$cnt-32\n"
 9442           "\tSAR    $dst.hi,31" %}
 9443   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9444   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9445   ins_pipe( ialu_reg_long );
 9446 %}
 9447 
 9448 // Shift Right arithmetic Long by variable
 9449 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9450   match(Set dst (RShiftL dst shift));
 9451   effect(KILL cr);
 9452   ins_cost(600);
 9453   size(18);
 9454   format %{ "TEST   $shift,32\n\t"
 9455             "JEQ,s  small\n\t"
 9456             "MOV    $dst.lo,$dst.hi\n\t"
 9457             "SAR    $dst.hi,31\n"
 9458     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9459             "SAR    $dst.hi,$shift" %}
 9460   ins_encode( shift_right_arith_long( dst, shift ) );
 9461   ins_pipe( pipe_slow );
 9462 %}
 9463 
 9464 
 9465 //----------Double Instructions------------------------------------------------
 9466 // Double Math
 9467 
 9468 // Compare & branch
 9469 
 9470 // P6 version of float compare, sets condition codes in EFLAGS
 9471 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9472   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9473   match(Set cr (CmpD src1 src2));
 9474   effect(KILL rax);
 9475   ins_cost(150);
 9476   format %{ "FLD    $src1\n\t"
 9477             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9478             "JNP    exit\n\t"
 9479             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9480             "SAHF\n"
 9481      "exit:\tNOP               // avoid branch to branch" %}
 9482   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9483   ins_encode( Push_Reg_DPR(src1),
 9484               OpcP, RegOpc(src2),
 9485               cmpF_P6_fixup );
 9486   ins_pipe( pipe_slow );
 9487 %}
 9488 
 9489 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9490   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9491   match(Set cr (CmpD src1 src2));
 9492   ins_cost(150);
 9493   format %{ "FLD    $src1\n\t"
 9494             "FUCOMIP ST,$src2  // P6 instruction" %}
 9495   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9496   ins_encode( Push_Reg_DPR(src1),
 9497               OpcP, RegOpc(src2));
 9498   ins_pipe( pipe_slow );
 9499 %}
 9500 
 9501 // Compare & branch
 9502 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9503   predicate(UseSSE<=1);
 9504   match(Set cr (CmpD src1 src2));
 9505   effect(KILL rax);
 9506   ins_cost(200);
 9507   format %{ "FLD    $src1\n\t"
 9508             "FCOMp  $src2\n\t"
 9509             "FNSTSW AX\n\t"
 9510             "TEST   AX,0x400\n\t"
 9511             "JZ,s   flags\n\t"
 9512             "MOV    AH,1\t# unordered treat as LT\n"
 9513     "flags:\tSAHF" %}
 9514   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9515   ins_encode( Push_Reg_DPR(src1),
 9516               OpcP, RegOpc(src2),
 9517               fpu_flags);
 9518   ins_pipe( pipe_slow );
 9519 %}
 9520 
 9521 // Compare vs zero into -1,0,1
 9522 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9523   predicate(UseSSE<=1);
 9524   match(Set dst (CmpD3 src1 zero));
 9525   effect(KILL cr, KILL rax);
 9526   ins_cost(280);
 9527   format %{ "FTSTD  $dst,$src1" %}
 9528   opcode(0xE4, 0xD9);
 9529   ins_encode( Push_Reg_DPR(src1),
 9530               OpcS, OpcP, PopFPU,
 9531               CmpF_Result(dst));
 9532   ins_pipe( pipe_slow );
 9533 %}
 9534 
 9535 // Compare into -1,0,1
 9536 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9537   predicate(UseSSE<=1);
 9538   match(Set dst (CmpD3 src1 src2));
 9539   effect(KILL cr, KILL rax);
 9540   ins_cost(300);
 9541   format %{ "FCMPD  $dst,$src1,$src2" %}
 9542   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9543   ins_encode( Push_Reg_DPR(src1),
 9544               OpcP, RegOpc(src2),
 9545               CmpF_Result(dst));
 9546   ins_pipe( pipe_slow );
 9547 %}
 9548 
 9549 // float compare and set condition codes in EFLAGS by XMM regs
 9550 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9551   predicate(UseSSE>=2);
 9552   match(Set cr (CmpD src1 src2));
 9553   ins_cost(145);
 9554   format %{ "UCOMISD $src1,$src2\n\t"
 9555             "JNP,s   exit\n\t"
 9556             "PUSHF\t# saw NaN, set CF\n\t"
 9557             "AND     [rsp], #0xffffff2b\n\t"
 9558             "POPF\n"
 9559     "exit:" %}
 9560   ins_encode %{
 9561     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9562     emit_cmpfp_fixup(_masm);
 9563   %}
 9564   ins_pipe( pipe_slow );
 9565 %}
 9566 
 9567 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9568   predicate(UseSSE>=2);
 9569   match(Set cr (CmpD src1 src2));
 9570   ins_cost(100);
 9571   format %{ "UCOMISD $src1,$src2" %}
 9572   ins_encode %{
 9573     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9574   %}
 9575   ins_pipe( pipe_slow );
 9576 %}
 9577 
 9578 // float compare and set condition codes in EFLAGS by XMM regs
 9579 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9580   predicate(UseSSE>=2);
 9581   match(Set cr (CmpD src1 (LoadD src2)));
 9582   ins_cost(145);
 9583   format %{ "UCOMISD $src1,$src2\n\t"
 9584             "JNP,s   exit\n\t"
 9585             "PUSHF\t# saw NaN, set CF\n\t"
 9586             "AND     [rsp], #0xffffff2b\n\t"
 9587             "POPF\n"
 9588     "exit:" %}
 9589   ins_encode %{
 9590     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9591     emit_cmpfp_fixup(_masm);
 9592   %}
 9593   ins_pipe( pipe_slow );
 9594 %}
 9595 
 9596 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9597   predicate(UseSSE>=2);
 9598   match(Set cr (CmpD src1 (LoadD src2)));
 9599   ins_cost(100);
 9600   format %{ "UCOMISD $src1,$src2" %}
 9601   ins_encode %{
 9602     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9603   %}
 9604   ins_pipe( pipe_slow );
 9605 %}
 9606 
 9607 // Compare into -1,0,1 in XMM
 9608 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9609   predicate(UseSSE>=2);
 9610   match(Set dst (CmpD3 src1 src2));
 9611   effect(KILL cr);
 9612   ins_cost(255);
 9613   format %{ "UCOMISD $src1, $src2\n\t"
 9614             "MOV     $dst, #-1\n\t"
 9615             "JP,s    done\n\t"
 9616             "JB,s    done\n\t"
 9617             "SETNE   $dst\n\t"
 9618             "MOVZB   $dst, $dst\n"
 9619     "done:" %}
 9620   ins_encode %{
 9621     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9622     emit_cmpfp3(_masm, $dst$$Register);
 9623   %}
 9624   ins_pipe( pipe_slow );
 9625 %}
 9626 
 9627 // Compare into -1,0,1 in XMM and memory
 9628 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9629   predicate(UseSSE>=2);
 9630   match(Set dst (CmpD3 src1 (LoadD src2)));
 9631   effect(KILL cr);
 9632   ins_cost(275);
 9633   format %{ "UCOMISD $src1, $src2\n\t"
 9634             "MOV     $dst, #-1\n\t"
 9635             "JP,s    done\n\t"
 9636             "JB,s    done\n\t"
 9637             "SETNE   $dst\n\t"
 9638             "MOVZB   $dst, $dst\n"
 9639     "done:" %}
 9640   ins_encode %{
 9641     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9642     emit_cmpfp3(_masm, $dst$$Register);
 9643   %}
 9644   ins_pipe( pipe_slow );
 9645 %}
 9646 
 9647 
 9648 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9649   predicate (UseSSE <=1);
 9650   match(Set dst (SubD dst src));
 9651 
 9652   format %{ "FLD    $src\n\t"
 9653             "DSUBp  $dst,ST" %}
 9654   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9655   ins_cost(150);
 9656   ins_encode( Push_Reg_DPR(src),
 9657               OpcP, RegOpc(dst) );
 9658   ins_pipe( fpu_reg_reg );
 9659 %}
 9660 
 9661 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9662   predicate (UseSSE <=1);
 9663   match(Set dst (RoundDouble (SubD src1 src2)));
 9664   ins_cost(250);
 9665 
 9666   format %{ "FLD    $src2\n\t"
 9667             "DSUB   ST,$src1\n\t"
 9668             "FSTP_D $dst\t# D-round" %}
 9669   opcode(0xD8, 0x5);
 9670   ins_encode( Push_Reg_DPR(src2),
 9671               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9672   ins_pipe( fpu_mem_reg_reg );
 9673 %}
 9674 
 9675 
 9676 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9677   predicate (UseSSE <=1);
 9678   match(Set dst (SubD dst (LoadD src)));
 9679   ins_cost(150);
 9680 
 9681   format %{ "FLD    $src\n\t"
 9682             "DSUBp  $dst,ST" %}
 9683   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9684   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9685               OpcP, RegOpc(dst) );
 9686   ins_pipe( fpu_reg_mem );
 9687 %}
 9688 
 9689 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9690   predicate (UseSSE<=1);
 9691   match(Set dst (AbsD src));
 9692   ins_cost(100);
 9693   format %{ "FABS" %}
 9694   opcode(0xE1, 0xD9);
 9695   ins_encode( OpcS, OpcP );
 9696   ins_pipe( fpu_reg_reg );
 9697 %}
 9698 
 9699 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9700   predicate(UseSSE<=1);
 9701   match(Set dst (NegD src));
 9702   ins_cost(100);
 9703   format %{ "FCHS" %}
 9704   opcode(0xE0, 0xD9);
 9705   ins_encode( OpcS, OpcP );
 9706   ins_pipe( fpu_reg_reg );
 9707 %}
 9708 
 9709 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9710   predicate(UseSSE<=1);
 9711   match(Set dst (AddD dst src));
 9712   format %{ "FLD    $src\n\t"
 9713             "DADD   $dst,ST" %}
 9714   size(4);
 9715   ins_cost(150);
 9716   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9717   ins_encode( Push_Reg_DPR(src),
 9718               OpcP, RegOpc(dst) );
 9719   ins_pipe( fpu_reg_reg );
 9720 %}
 9721 
 9722 
 9723 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9724   predicate(UseSSE<=1);
 9725   match(Set dst (RoundDouble (AddD src1 src2)));
 9726   ins_cost(250);
 9727 
 9728   format %{ "FLD    $src2\n\t"
 9729             "DADD   ST,$src1\n\t"
 9730             "FSTP_D $dst\t# D-round" %}
 9731   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9732   ins_encode( Push_Reg_DPR(src2),
 9733               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9734   ins_pipe( fpu_mem_reg_reg );
 9735 %}
 9736 
 9737 
 9738 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9739   predicate(UseSSE<=1);
 9740   match(Set dst (AddD dst (LoadD src)));
 9741   ins_cost(150);
 9742 
 9743   format %{ "FLD    $src\n\t"
 9744             "DADDp  $dst,ST" %}
 9745   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9746   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9747               OpcP, RegOpc(dst) );
 9748   ins_pipe( fpu_reg_mem );
 9749 %}
 9750 
 9751 // add-to-memory
 9752 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9753   predicate(UseSSE<=1);
 9754   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9755   ins_cost(150);
 9756 
 9757   format %{ "FLD_D  $dst\n\t"
 9758             "DADD   ST,$src\n\t"
 9759             "FST_D  $dst" %}
 9760   opcode(0xDD, 0x0);
 9761   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9762               Opcode(0xD8), RegOpc(src),
 9763               set_instruction_start,
 9764               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9765   ins_pipe( fpu_reg_mem );
 9766 %}
 9767 
 9768 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9769   predicate(UseSSE<=1);
 9770   match(Set dst (AddD dst con));
 9771   ins_cost(125);
 9772   format %{ "FLD1\n\t"
 9773             "DADDp  $dst,ST" %}
 9774   ins_encode %{
 9775     __ fld1();
 9776     __ faddp($dst$$reg);
 9777   %}
 9778   ins_pipe(fpu_reg);
 9779 %}
 9780 
 9781 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9782   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9783   match(Set dst (AddD dst con));
 9784   ins_cost(200);
 9785   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9786             "DADDp  $dst,ST" %}
 9787   ins_encode %{
 9788     __ fld_d($constantaddress($con));
 9789     __ faddp($dst$$reg);
 9790   %}
 9791   ins_pipe(fpu_reg_mem);
 9792 %}
 9793 
 9794 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9795   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9796   match(Set dst (RoundDouble (AddD src con)));
 9797   ins_cost(200);
 9798   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9799             "DADD   ST,$src\n\t"
 9800             "FSTP_D $dst\t# D-round" %}
 9801   ins_encode %{
 9802     __ fld_d($constantaddress($con));
 9803     __ fadd($src$$reg);
 9804     __ fstp_d(Address(rsp, $dst$$disp));
 9805   %}
 9806   ins_pipe(fpu_mem_reg_con);
 9807 %}
 9808 
 9809 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9810   predicate(UseSSE<=1);
 9811   match(Set dst (MulD dst src));
 9812   format %{ "FLD    $src\n\t"
 9813             "DMULp  $dst,ST" %}
 9814   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9815   ins_cost(150);
 9816   ins_encode( Push_Reg_DPR(src),
 9817               OpcP, RegOpc(dst) );
 9818   ins_pipe( fpu_reg_reg );
 9819 %}
 9820 
 9821 // Strict FP instruction biases argument before multiply then
 9822 // biases result to avoid double rounding of subnormals.
 9823 //
 9824 // scale arg1 by multiplying arg1 by 2^(-15360)
 9825 // load arg2
 9826 // multiply scaled arg1 by arg2
 9827 // rescale product by 2^(15360)
 9828 //
 9829 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9830   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9831   match(Set dst (MulD dst src));
 9832   ins_cost(1);   // Select this instruction for all FP double multiplies
 9833 
 9834   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9835             "DMULp  $dst,ST\n\t"
 9836             "FLD    $src\n\t"
 9837             "DMULp  $dst,ST\n\t"
 9838             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9839             "DMULp  $dst,ST\n\t" %}
 9840   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9841   ins_encode( strictfp_bias1(dst),
 9842               Push_Reg_DPR(src),
 9843               OpcP, RegOpc(dst),
 9844               strictfp_bias2(dst) );
 9845   ins_pipe( fpu_reg_reg );
 9846 %}
 9847 
 9848 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9849   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9850   match(Set dst (MulD dst con));
 9851   ins_cost(200);
 9852   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9853             "DMULp  $dst,ST" %}
 9854   ins_encode %{
 9855     __ fld_d($constantaddress($con));
 9856     __ fmulp($dst$$reg);
 9857   %}
 9858   ins_pipe(fpu_reg_mem);
 9859 %}
 9860 
 9861 
 9862 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9863   predicate( UseSSE<=1 );
 9864   match(Set dst (MulD dst (LoadD src)));
 9865   ins_cost(200);
 9866   format %{ "FLD_D  $src\n\t"
 9867             "DMULp  $dst,ST" %}
 9868   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9869   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9870               OpcP, RegOpc(dst) );
 9871   ins_pipe( fpu_reg_mem );
 9872 %}
 9873 
 9874 //
 9875 // Cisc-alternate to reg-reg multiply
 9876 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9877   predicate( UseSSE<=1 );
 9878   match(Set dst (MulD src (LoadD mem)));
 9879   ins_cost(250);
 9880   format %{ "FLD_D  $mem\n\t"
 9881             "DMUL   ST,$src\n\t"
 9882             "FSTP_D $dst" %}
 9883   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9884   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9885               OpcReg_FPR(src),
 9886               Pop_Reg_DPR(dst) );
 9887   ins_pipe( fpu_reg_reg_mem );
 9888 %}
 9889 
 9890 
 9891 // MACRO3 -- addDPR a mulDPR
 9892 // This instruction is a '2-address' instruction in that the result goes
 9893 // back to src2.  This eliminates a move from the macro; possibly the
 9894 // register allocator will have to add it back (and maybe not).
 9895 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9896   predicate( UseSSE<=1 );
 9897   match(Set src2 (AddD (MulD src0 src1) src2));
 9898   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9899             "DMUL   ST,$src1\n\t"
 9900             "DADDp  $src2,ST" %}
 9901   ins_cost(250);
 9902   opcode(0xDD); /* LoadD DD /0 */
 9903   ins_encode( Push_Reg_FPR(src0),
 9904               FMul_ST_reg(src1),
 9905               FAddP_reg_ST(src2) );
 9906   ins_pipe( fpu_reg_reg_reg );
 9907 %}
 9908 
 9909 
 9910 // MACRO3 -- subDPR a mulDPR
 9911 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9912   predicate( UseSSE<=1 );
 9913   match(Set src2 (SubD (MulD src0 src1) src2));
 9914   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9915             "DMUL   ST,$src1\n\t"
 9916             "DSUBRp $src2,ST" %}
 9917   ins_cost(250);
 9918   ins_encode( Push_Reg_FPR(src0),
 9919               FMul_ST_reg(src1),
 9920               Opcode(0xDE), Opc_plus(0xE0,src2));
 9921   ins_pipe( fpu_reg_reg_reg );
 9922 %}
 9923 
 9924 
 9925 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9926   predicate( UseSSE<=1 );
 9927   match(Set dst (DivD dst src));
 9928 
 9929   format %{ "FLD    $src\n\t"
 9930             "FDIVp  $dst,ST" %}
 9931   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9932   ins_cost(150);
 9933   ins_encode( Push_Reg_DPR(src),
 9934               OpcP, RegOpc(dst) );
 9935   ins_pipe( fpu_reg_reg );
 9936 %}
 9937 
 9938 // Strict FP instruction biases argument before division then
 9939 // biases result, to avoid double rounding of subnormals.
 9940 //
 9941 // scale dividend by multiplying dividend by 2^(-15360)
 9942 // load divisor
 9943 // divide scaled dividend by divisor
 9944 // rescale quotient by 2^(15360)
 9945 //
 9946 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9947   predicate (UseSSE<=1);
 9948   match(Set dst (DivD dst src));
 9949   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9950   ins_cost(01);
 9951 
 9952   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9953             "DMULp  $dst,ST\n\t"
 9954             "FLD    $src\n\t"
 9955             "FDIVp  $dst,ST\n\t"
 9956             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9957             "DMULp  $dst,ST\n\t" %}
 9958   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9959   ins_encode( strictfp_bias1(dst),
 9960               Push_Reg_DPR(src),
 9961               OpcP, RegOpc(dst),
 9962               strictfp_bias2(dst) );
 9963   ins_pipe( fpu_reg_reg );
 9964 %}
 9965 
 9966 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9967   predicate(UseSSE<=1);
 9968   match(Set dst (ModD dst src));
 9969   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9970 
 9971   format %{ "DMOD   $dst,$src" %}
 9972   ins_cost(250);
 9973   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9974               emitModDPR(),
 9975               Push_Result_Mod_DPR(src),
 9976               Pop_Reg_DPR(dst));
 9977   ins_pipe( pipe_slow );
 9978 %}
 9979 
 9980 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9981   predicate(UseSSE>=2);
 9982   match(Set dst (ModD src0 src1));
 9983   effect(KILL rax, KILL cr);
 9984 
 9985   format %{ "SUB    ESP,8\t # DMOD\n"
 9986           "\tMOVSD  [ESP+0],$src1\n"
 9987           "\tFLD_D  [ESP+0]\n"
 9988           "\tMOVSD  [ESP+0],$src0\n"
 9989           "\tFLD_D  [ESP+0]\n"
 9990      "loop:\tFPREM\n"
 9991           "\tFWAIT\n"
 9992           "\tFNSTSW AX\n"
 9993           "\tSAHF\n"
 9994           "\tJP     loop\n"
 9995           "\tFSTP_D [ESP+0]\n"
 9996           "\tMOVSD  $dst,[ESP+0]\n"
 9997           "\tADD    ESP,8\n"
 9998           "\tFSTP   ST0\t # Restore FPU Stack"
 9999     %}
10000   ins_cost(250);
10001   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10006   predicate (UseSSE<=1);
10007   match(Set dst(AtanD dst src));
10008   format %{ "DATA   $dst,$src" %}
10009   opcode(0xD9, 0xF3);
10010   ins_encode( Push_Reg_DPR(src),
10011               OpcP, OpcS, RegOpc(dst) );
10012   ins_pipe( pipe_slow );
10013 %}
10014 
10015 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10016   predicate (UseSSE>=2);
10017   match(Set dst(AtanD dst src));
10018   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10019   format %{ "DATA   $dst,$src" %}
10020   opcode(0xD9, 0xF3);
10021   ins_encode( Push_SrcD(src),
10022               OpcP, OpcS, Push_ResultD(dst) );
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10027   predicate (UseSSE<=1);
10028   match(Set dst (SqrtD src));
10029   format %{ "DSQRT  $dst,$src" %}
10030   opcode(0xFA, 0xD9);
10031   ins_encode( Push_Reg_DPR(src),
10032               OpcS, OpcP, Pop_Reg_DPR(dst) );
10033   ins_pipe( pipe_slow );
10034 %}
10035 
10036 //-------------Float Instructions-------------------------------
10037 // Float Math
10038 
10039 // Code for float compare:
10040 //     fcompp();
10041 //     fwait(); fnstsw_ax();
10042 //     sahf();
10043 //     movl(dst, unordered_result);
10044 //     jcc(Assembler::parity, exit);
10045 //     movl(dst, less_result);
10046 //     jcc(Assembler::below, exit);
10047 //     movl(dst, equal_result);
10048 //     jcc(Assembler::equal, exit);
10049 //     movl(dst, greater_result);
10050 //   exit:
10051 
10052 // P6 version of float compare, sets condition codes in EFLAGS
10053 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10054   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10055   match(Set cr (CmpF src1 src2));
10056   effect(KILL rax);
10057   ins_cost(150);
10058   format %{ "FLD    $src1\n\t"
10059             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10060             "JNP    exit\n\t"
10061             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10062             "SAHF\n"
10063      "exit:\tNOP               // avoid branch to branch" %}
10064   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10065   ins_encode( Push_Reg_DPR(src1),
10066               OpcP, RegOpc(src2),
10067               cmpF_P6_fixup );
10068   ins_pipe( pipe_slow );
10069 %}
10070 
10071 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10072   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10073   match(Set cr (CmpF src1 src2));
10074   ins_cost(100);
10075   format %{ "FLD    $src1\n\t"
10076             "FUCOMIP ST,$src2  // P6 instruction" %}
10077   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10078   ins_encode( Push_Reg_DPR(src1),
10079               OpcP, RegOpc(src2));
10080   ins_pipe( pipe_slow );
10081 %}
10082 
10083 
10084 // Compare & branch
10085 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10086   predicate(UseSSE == 0);
10087   match(Set cr (CmpF src1 src2));
10088   effect(KILL rax);
10089   ins_cost(200);
10090   format %{ "FLD    $src1\n\t"
10091             "FCOMp  $src2\n\t"
10092             "FNSTSW AX\n\t"
10093             "TEST   AX,0x400\n\t"
10094             "JZ,s   flags\n\t"
10095             "MOV    AH,1\t# unordered treat as LT\n"
10096     "flags:\tSAHF" %}
10097   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10098   ins_encode( Push_Reg_DPR(src1),
10099               OpcP, RegOpc(src2),
10100               fpu_flags);
10101   ins_pipe( pipe_slow );
10102 %}
10103 
10104 // Compare vs zero into -1,0,1
10105 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10106   predicate(UseSSE == 0);
10107   match(Set dst (CmpF3 src1 zero));
10108   effect(KILL cr, KILL rax);
10109   ins_cost(280);
10110   format %{ "FTSTF  $dst,$src1" %}
10111   opcode(0xE4, 0xD9);
10112   ins_encode( Push_Reg_DPR(src1),
10113               OpcS, OpcP, PopFPU,
10114               CmpF_Result(dst));
10115   ins_pipe( pipe_slow );
10116 %}
10117 
10118 // Compare into -1,0,1
10119 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10120   predicate(UseSSE == 0);
10121   match(Set dst (CmpF3 src1 src2));
10122   effect(KILL cr, KILL rax);
10123   ins_cost(300);
10124   format %{ "FCMPF  $dst,$src1,$src2" %}
10125   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10126   ins_encode( Push_Reg_DPR(src1),
10127               OpcP, RegOpc(src2),
10128               CmpF_Result(dst));
10129   ins_pipe( pipe_slow );
10130 %}
10131 
10132 // float compare and set condition codes in EFLAGS by XMM regs
10133 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10134   predicate(UseSSE>=1);
10135   match(Set cr (CmpF src1 src2));
10136   ins_cost(145);
10137   format %{ "UCOMISS $src1,$src2\n\t"
10138             "JNP,s   exit\n\t"
10139             "PUSHF\t# saw NaN, set CF\n\t"
10140             "AND     [rsp], #0xffffff2b\n\t"
10141             "POPF\n"
10142     "exit:" %}
10143   ins_encode %{
10144     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10145     emit_cmpfp_fixup(_masm);
10146   %}
10147   ins_pipe( pipe_slow );
10148 %}
10149 
10150 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10151   predicate(UseSSE>=1);
10152   match(Set cr (CmpF src1 src2));
10153   ins_cost(100);
10154   format %{ "UCOMISS $src1,$src2" %}
10155   ins_encode %{
10156     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10157   %}
10158   ins_pipe( pipe_slow );
10159 %}
10160 
10161 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10163   predicate(UseSSE>=1);
10164   match(Set cr (CmpF src1 (LoadF src2)));
10165   ins_cost(165);
10166   format %{ "UCOMISS $src1,$src2\n\t"
10167             "JNP,s   exit\n\t"
10168             "PUSHF\t# saw NaN, set CF\n\t"
10169             "AND     [rsp], #0xffffff2b\n\t"
10170             "POPF\n"
10171     "exit:" %}
10172   ins_encode %{
10173     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10174     emit_cmpfp_fixup(_masm);
10175   %}
10176   ins_pipe( pipe_slow );
10177 %}
10178 
10179 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10180   predicate(UseSSE>=1);
10181   match(Set cr (CmpF src1 (LoadF src2)));
10182   ins_cost(100);
10183   format %{ "UCOMISS $src1,$src2" %}
10184   ins_encode %{
10185     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10186   %}
10187   ins_pipe( pipe_slow );
10188 %}
10189 
10190 // Compare into -1,0,1 in XMM
10191 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10192   predicate(UseSSE>=1);
10193   match(Set dst (CmpF3 src1 src2));
10194   effect(KILL cr);
10195   ins_cost(255);
10196   format %{ "UCOMISS $src1, $src2\n\t"
10197             "MOV     $dst, #-1\n\t"
10198             "JP,s    done\n\t"
10199             "JB,s    done\n\t"
10200             "SETNE   $dst\n\t"
10201             "MOVZB   $dst, $dst\n"
10202     "done:" %}
10203   ins_encode %{
10204     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10205     emit_cmpfp3(_masm, $dst$$Register);
10206   %}
10207   ins_pipe( pipe_slow );
10208 %}
10209 
10210 // Compare into -1,0,1 in XMM and memory
10211 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10212   predicate(UseSSE>=1);
10213   match(Set dst (CmpF3 src1 (LoadF src2)));
10214   effect(KILL cr);
10215   ins_cost(275);
10216   format %{ "UCOMISS $src1, $src2\n\t"
10217             "MOV     $dst, #-1\n\t"
10218             "JP,s    done\n\t"
10219             "JB,s    done\n\t"
10220             "SETNE   $dst\n\t"
10221             "MOVZB   $dst, $dst\n"
10222     "done:" %}
10223   ins_encode %{
10224     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10225     emit_cmpfp3(_masm, $dst$$Register);
10226   %}
10227   ins_pipe( pipe_slow );
10228 %}
10229 
10230 // Spill to obtain 24-bit precision
10231 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10232   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10233   match(Set dst (SubF src1 src2));
10234 
10235   format %{ "FSUB   $dst,$src1 - $src2" %}
10236   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10237   ins_encode( Push_Reg_FPR(src1),
10238               OpcReg_FPR(src2),
10239               Pop_Mem_FPR(dst) );
10240   ins_pipe( fpu_mem_reg_reg );
10241 %}
10242 //
10243 // This instruction does not round to 24-bits
10244 instruct subFPR_reg(regFPR dst, regFPR src) %{
10245   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10246   match(Set dst (SubF dst src));
10247 
10248   format %{ "FSUB   $dst,$src" %}
10249   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10250   ins_encode( Push_Reg_FPR(src),
10251               OpcP, RegOpc(dst) );
10252   ins_pipe( fpu_reg_reg );
10253 %}
10254 
10255 // Spill to obtain 24-bit precision
10256 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10257   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10258   match(Set dst (AddF src1 src2));
10259 
10260   format %{ "FADD   $dst,$src1,$src2" %}
10261   opcode(0xD8, 0x0); /* D8 C0+i */
10262   ins_encode( Push_Reg_FPR(src2),
10263               OpcReg_FPR(src1),
10264               Pop_Mem_FPR(dst) );
10265   ins_pipe( fpu_mem_reg_reg );
10266 %}
10267 //
10268 // This instruction does not round to 24-bits
10269 instruct addFPR_reg(regFPR dst, regFPR src) %{
10270   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10271   match(Set dst (AddF dst src));
10272 
10273   format %{ "FLD    $src\n\t"
10274             "FADDp  $dst,ST" %}
10275   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10276   ins_encode( Push_Reg_FPR(src),
10277               OpcP, RegOpc(dst) );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10282   predicate(UseSSE==0);
10283   match(Set dst (AbsF src));
10284   ins_cost(100);
10285   format %{ "FABS" %}
10286   opcode(0xE1, 0xD9);
10287   ins_encode( OpcS, OpcP );
10288   ins_pipe( fpu_reg_reg );
10289 %}
10290 
10291 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10292   predicate(UseSSE==0);
10293   match(Set dst (NegF src));
10294   ins_cost(100);
10295   format %{ "FCHS" %}
10296   opcode(0xE0, 0xD9);
10297   ins_encode( OpcS, OpcP );
10298   ins_pipe( fpu_reg_reg );
10299 %}
10300 
10301 // Cisc-alternate to addFPR_reg
10302 // Spill to obtain 24-bit precision
10303 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10304   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10305   match(Set dst (AddF src1 (LoadF src2)));
10306 
10307   format %{ "FLD    $src2\n\t"
10308             "FADD   ST,$src1\n\t"
10309             "FSTP_S $dst" %}
10310   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10311   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10312               OpcReg_FPR(src1),
10313               Pop_Mem_FPR(dst) );
10314   ins_pipe( fpu_mem_reg_mem );
10315 %}
10316 //
10317 // Cisc-alternate to addFPR_reg
10318 // This instruction does not round to 24-bits
10319 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10320   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10321   match(Set dst (AddF dst (LoadF src)));
10322 
10323   format %{ "FADD   $dst,$src" %}
10324   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10325   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10326               OpcP, RegOpc(dst) );
10327   ins_pipe( fpu_reg_mem );
10328 %}
10329 
10330 // // Following two instructions for _222_mpegaudio
10331 // Spill to obtain 24-bit precision
10332 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10333   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334   match(Set dst (AddF src1 src2));
10335 
10336   format %{ "FADD   $dst,$src1,$src2" %}
10337   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10338   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10339               OpcReg_FPR(src2),
10340               Pop_Mem_FPR(dst) );
10341   ins_pipe( fpu_mem_reg_mem );
10342 %}
10343 
10344 // Cisc-spill variant
10345 // Spill to obtain 24-bit precision
10346 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10347   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10348   match(Set dst (AddF src1 (LoadF src2)));
10349 
10350   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10351   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10352   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10353               set_instruction_start,
10354               OpcP, RMopc_Mem(secondary,src1),
10355               Pop_Mem_FPR(dst) );
10356   ins_pipe( fpu_mem_mem_mem );
10357 %}
10358 
10359 // Spill to obtain 24-bit precision
10360 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10361   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10362   match(Set dst (AddF src1 src2));
10363 
10364   format %{ "FADD   $dst,$src1,$src2" %}
10365   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10366   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10367               set_instruction_start,
10368               OpcP, RMopc_Mem(secondary,src1),
10369               Pop_Mem_FPR(dst) );
10370   ins_pipe( fpu_mem_mem_mem );
10371 %}
10372 
10373 
10374 // Spill to obtain 24-bit precision
10375 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10376   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10377   match(Set dst (AddF src con));
10378   format %{ "FLD    $src\n\t"
10379             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10380             "FSTP_S $dst"  %}
10381   ins_encode %{
10382     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10383     __ fadd_s($constantaddress($con));
10384     __ fstp_s(Address(rsp, $dst$$disp));
10385   %}
10386   ins_pipe(fpu_mem_reg_con);
10387 %}
10388 //
10389 // This instruction does not round to 24-bits
10390 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10391   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10392   match(Set dst (AddF src con));
10393   format %{ "FLD    $src\n\t"
10394             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10395             "FSTP   $dst"  %}
10396   ins_encode %{
10397     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10398     __ fadd_s($constantaddress($con));
10399     __ fstp_d($dst$$reg);
10400   %}
10401   ins_pipe(fpu_reg_reg_con);
10402 %}
10403 
10404 // Spill to obtain 24-bit precision
10405 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10406   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10407   match(Set dst (MulF src1 src2));
10408 
10409   format %{ "FLD    $src1\n\t"
10410             "FMUL   $src2\n\t"
10411             "FSTP_S $dst"  %}
10412   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10413   ins_encode( Push_Reg_FPR(src1),
10414               OpcReg_FPR(src2),
10415               Pop_Mem_FPR(dst) );
10416   ins_pipe( fpu_mem_reg_reg );
10417 %}
10418 //
10419 // This instruction does not round to 24-bits
10420 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10421   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10422   match(Set dst (MulF src1 src2));
10423 
10424   format %{ "FLD    $src1\n\t"
10425             "FMUL   $src2\n\t"
10426             "FSTP_S $dst"  %}
10427   opcode(0xD8, 0x1); /* D8 C8+i */
10428   ins_encode( Push_Reg_FPR(src2),
10429               OpcReg_FPR(src1),
10430               Pop_Reg_FPR(dst) );
10431   ins_pipe( fpu_reg_reg_reg );
10432 %}
10433 
10434 
10435 // Spill to obtain 24-bit precision
10436 // Cisc-alternate to reg-reg multiply
10437 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10438   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10439   match(Set dst (MulF src1 (LoadF src2)));
10440 
10441   format %{ "FLD_S  $src2\n\t"
10442             "FMUL   $src1\n\t"
10443             "FSTP_S $dst"  %}
10444   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10445   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10446               OpcReg_FPR(src1),
10447               Pop_Mem_FPR(dst) );
10448   ins_pipe( fpu_mem_reg_mem );
10449 %}
10450 //
10451 // This instruction does not round to 24-bits
10452 // Cisc-alternate to reg-reg multiply
10453 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10454   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10455   match(Set dst (MulF src1 (LoadF src2)));
10456 
10457   format %{ "FMUL   $dst,$src1,$src2" %}
10458   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10459   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10460               OpcReg_FPR(src1),
10461               Pop_Reg_FPR(dst) );
10462   ins_pipe( fpu_reg_reg_mem );
10463 %}
10464 
10465 // Spill to obtain 24-bit precision
10466 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10467   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10468   match(Set dst (MulF src1 src2));
10469 
10470   format %{ "FMUL   $dst,$src1,$src2" %}
10471   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10472   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10473               set_instruction_start,
10474               OpcP, RMopc_Mem(secondary,src1),
10475               Pop_Mem_FPR(dst) );
10476   ins_pipe( fpu_mem_mem_mem );
10477 %}
10478 
10479 // Spill to obtain 24-bit precision
10480 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10481   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10482   match(Set dst (MulF src con));
10483 
10484   format %{ "FLD    $src\n\t"
10485             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10486             "FSTP_S $dst"  %}
10487   ins_encode %{
10488     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10489     __ fmul_s($constantaddress($con));
10490     __ fstp_s(Address(rsp, $dst$$disp));
10491   %}
10492   ins_pipe(fpu_mem_reg_con);
10493 %}
10494 //
10495 // This instruction does not round to 24-bits
10496 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498   match(Set dst (MulF src con));
10499 
10500   format %{ "FLD    $src\n\t"
10501             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10502             "FSTP   $dst"  %}
10503   ins_encode %{
10504     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10505     __ fmul_s($constantaddress($con));
10506     __ fstp_d($dst$$reg);
10507   %}
10508   ins_pipe(fpu_reg_reg_con);
10509 %}
10510 
10511 
10512 //
10513 // MACRO1 -- subsume unshared load into mulFPR
10514 // This instruction does not round to 24-bits
10515 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10516   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10517   match(Set dst (MulF (LoadF mem1) src));
10518 
10519   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10520             "FMUL   ST,$src\n\t"
10521             "FSTP   $dst" %}
10522   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10523   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10524               OpcReg_FPR(src),
10525               Pop_Reg_FPR(dst) );
10526   ins_pipe( fpu_reg_reg_mem );
10527 %}
10528 //
10529 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10530 // This instruction does not round to 24-bits
10531 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10532   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10534   ins_cost(95);
10535 
10536   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10537             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10538             "FADD   ST,$src2\n\t"
10539             "FSTP   $dst" %}
10540   opcode(0xD9); /* LoadF D9 /0 */
10541   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10542               FMul_ST_reg(src1),
10543               FAdd_ST_reg(src2),
10544               Pop_Reg_FPR(dst) );
10545   ins_pipe( fpu_reg_mem_reg_reg );
10546 %}
10547 
10548 // MACRO3 -- addFPR a mulFPR
10549 // This instruction does not round to 24-bits.  It is a '2-address'
10550 // instruction in that the result goes back to src2.  This eliminates
10551 // a move from the macro; possibly the register allocator will have
10552 // to add it back (and maybe not).
10553 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10554   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10555   match(Set src2 (AddF (MulF src0 src1) src2));
10556 
10557   format %{ "FLD    $src0     ===MACRO3===\n\t"
10558             "FMUL   ST,$src1\n\t"
10559             "FADDP  $src2,ST" %}
10560   opcode(0xD9); /* LoadF D9 /0 */
10561   ins_encode( Push_Reg_FPR(src0),
10562               FMul_ST_reg(src1),
10563               FAddP_reg_ST(src2) );
10564   ins_pipe( fpu_reg_reg_reg );
10565 %}
10566 
10567 // MACRO4 -- divFPR subFPR
10568 // This instruction does not round to 24-bits
10569 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10570   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10571   match(Set dst (DivF (SubF src2 src1) src3));
10572 
10573   format %{ "FLD    $src2   ===MACRO4===\n\t"
10574             "FSUB   ST,$src1\n\t"
10575             "FDIV   ST,$src3\n\t"
10576             "FSTP  $dst" %}
10577   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10578   ins_encode( Push_Reg_FPR(src2),
10579               subFPR_divFPR_encode(src1,src3),
10580               Pop_Reg_FPR(dst) );
10581   ins_pipe( fpu_reg_reg_reg_reg );
10582 %}
10583 
10584 // Spill to obtain 24-bit precision
10585 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10586   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10587   match(Set dst (DivF src1 src2));
10588 
10589   format %{ "FDIV   $dst,$src1,$src2" %}
10590   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10591   ins_encode( Push_Reg_FPR(src1),
10592               OpcReg_FPR(src2),
10593               Pop_Mem_FPR(dst) );
10594   ins_pipe( fpu_mem_reg_reg );
10595 %}
10596 //
10597 // This instruction does not round to 24-bits
10598 instruct divFPR_reg(regFPR dst, regFPR src) %{
10599   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600   match(Set dst (DivF dst src));
10601 
10602   format %{ "FDIV   $dst,$src" %}
10603   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10604   ins_encode( Push_Reg_FPR(src),
10605               OpcP, RegOpc(dst) );
10606   ins_pipe( fpu_reg_reg );
10607 %}
10608 
10609 
10610 // Spill to obtain 24-bit precision
10611 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10612   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10613   match(Set dst (ModF src1 src2));
10614   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10615 
10616   format %{ "FMOD   $dst,$src1,$src2" %}
10617   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10618               emitModDPR(),
10619               Push_Result_Mod_DPR(src2),
10620               Pop_Mem_FPR(dst));
10621   ins_pipe( pipe_slow );
10622 %}
10623 //
10624 // This instruction does not round to 24-bits
10625 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10626   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10627   match(Set dst (ModF dst src));
10628   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10629 
10630   format %{ "FMOD   $dst,$src" %}
10631   ins_encode(Push_Reg_Mod_DPR(dst, src),
10632               emitModDPR(),
10633               Push_Result_Mod_DPR(src),
10634               Pop_Reg_FPR(dst));
10635   ins_pipe( pipe_slow );
10636 %}
10637 
10638 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10639   predicate(UseSSE>=1);
10640   match(Set dst (ModF src0 src1));
10641   effect(KILL rax, KILL cr);
10642   format %{ "SUB    ESP,4\t # FMOD\n"
10643           "\tMOVSS  [ESP+0],$src1\n"
10644           "\tFLD_S  [ESP+0]\n"
10645           "\tMOVSS  [ESP+0],$src0\n"
10646           "\tFLD_S  [ESP+0]\n"
10647      "loop:\tFPREM\n"
10648           "\tFWAIT\n"
10649           "\tFNSTSW AX\n"
10650           "\tSAHF\n"
10651           "\tJP     loop\n"
10652           "\tFSTP_S [ESP+0]\n"
10653           "\tMOVSS  $dst,[ESP+0]\n"
10654           "\tADD    ESP,4\n"
10655           "\tFSTP   ST0\t # Restore FPU Stack"
10656     %}
10657   ins_cost(250);
10658   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10659   ins_pipe( pipe_slow );
10660 %}
10661 
10662 
10663 //----------Arithmetic Conversion Instructions---------------------------------
10664 // The conversions operations are all Alpha sorted.  Please keep it that way!
10665 
10666 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10667   predicate(UseSSE==0);
10668   match(Set dst (RoundFloat src));
10669   ins_cost(125);
10670   format %{ "FST_S  $dst,$src\t# F-round" %}
10671   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10672   ins_pipe( fpu_mem_reg );
10673 %}
10674 
10675 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10676   predicate(UseSSE<=1);
10677   match(Set dst (RoundDouble src));
10678   ins_cost(125);
10679   format %{ "FST_D  $dst,$src\t# D-round" %}
10680   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10681   ins_pipe( fpu_mem_reg );
10682 %}
10683 
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10686   predicate(UseSSE==0);
10687   match(Set dst (ConvD2F src));
10688   format %{ "FST_S  $dst,$src\t# F-round" %}
10689   expand %{
10690     roundFloat_mem_reg(dst,src);
10691   %}
10692 %}
10693 
10694 // Force rounding to 24-bit precision and 6-bit exponent
10695 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10696   predicate(UseSSE==1);
10697   match(Set dst (ConvD2F src));
10698   effect( KILL cr );
10699   format %{ "SUB    ESP,4\n\t"
10700             "FST_S  [ESP],$src\t# F-round\n\t"
10701             "MOVSS  $dst,[ESP]\n\t"
10702             "ADD ESP,4" %}
10703   ins_encode %{
10704     __ subptr(rsp, 4);
10705     if ($src$$reg != FPR1L_enc) {
10706       __ fld_s($src$$reg-1);
10707       __ fstp_s(Address(rsp, 0));
10708     } else {
10709       __ fst_s(Address(rsp, 0));
10710     }
10711     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10712     __ addptr(rsp, 4);
10713   %}
10714   ins_pipe( pipe_slow );
10715 %}
10716 
10717 // Force rounding double precision to single precision
10718 instruct convD2F_reg(regF dst, regD src) %{
10719   predicate(UseSSE>=2);
10720   match(Set dst (ConvD2F src));
10721   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10722   ins_encode %{
10723     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10724   %}
10725   ins_pipe( pipe_slow );
10726 %}
10727 
10728 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10729   predicate(UseSSE==0);
10730   match(Set dst (ConvF2D src));
10731   format %{ "FST_S  $dst,$src\t# D-round" %}
10732   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10733   ins_pipe( fpu_reg_reg );
10734 %}
10735 
10736 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10737   predicate(UseSSE==1);
10738   match(Set dst (ConvF2D src));
10739   format %{ "FST_D  $dst,$src\t# D-round" %}
10740   expand %{
10741     roundDouble_mem_reg(dst,src);
10742   %}
10743 %}
10744 
10745 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10746   predicate(UseSSE==1);
10747   match(Set dst (ConvF2D src));
10748   effect( KILL cr );
10749   format %{ "SUB    ESP,4\n\t"
10750             "MOVSS  [ESP] $src\n\t"
10751             "FLD_S  [ESP]\n\t"
10752             "ADD    ESP,4\n\t"
10753             "FSTP   $dst\t# D-round" %}
10754   ins_encode %{
10755     __ subptr(rsp, 4);
10756     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10757     __ fld_s(Address(rsp, 0));
10758     __ addptr(rsp, 4);
10759     __ fstp_d($dst$$reg);
10760   %}
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 instruct convF2D_reg(regD dst, regF src) %{
10765   predicate(UseSSE>=2);
10766   match(Set dst (ConvF2D src));
10767   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10768   ins_encode %{
10769     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10770   %}
10771   ins_pipe( pipe_slow );
10772 %}
10773 
10774 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10775 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10776   predicate(UseSSE<=1);
10777   match(Set dst (ConvD2I src));
10778   effect( KILL tmp, KILL cr );
10779   format %{ "FLD    $src\t# Convert double to int \n\t"
10780             "FLDCW  trunc mode\n\t"
10781             "SUB    ESP,4\n\t"
10782             "FISTp  [ESP + #0]\n\t"
10783             "FLDCW  std/24-bit mode\n\t"
10784             "POP    EAX\n\t"
10785             "CMP    EAX,0x80000000\n\t"
10786             "JNE,s  fast\n\t"
10787             "FLD_D  $src\n\t"
10788             "CALL   d2i_wrapper\n"
10789       "fast:" %}
10790   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10791   ins_pipe( pipe_slow );
10792 %}
10793 
10794 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10795 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10796   predicate(UseSSE>=2);
10797   match(Set dst (ConvD2I src));
10798   effect( KILL tmp, KILL cr );
10799   format %{ "CVTTSD2SI $dst, $src\n\t"
10800             "CMP    $dst,0x80000000\n\t"
10801             "JNE,s  fast\n\t"
10802             "SUB    ESP, 8\n\t"
10803             "MOVSD  [ESP], $src\n\t"
10804             "FLD_D  [ESP]\n\t"
10805             "ADD    ESP, 8\n\t"
10806             "CALL   d2i_wrapper\n"
10807       "fast:" %}
10808   ins_encode %{
10809     Label fast;
10810     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10811     __ cmpl($dst$$Register, 0x80000000);
10812     __ jccb(Assembler::notEqual, fast);
10813     __ subptr(rsp, 8);
10814     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10815     __ fld_d(Address(rsp, 0));
10816     __ addptr(rsp, 8);
10817     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10818     __ bind(fast);
10819   %}
10820   ins_pipe( pipe_slow );
10821 %}
10822 
10823 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10824   predicate(UseSSE<=1);
10825   match(Set dst (ConvD2L src));
10826   effect( KILL cr );
10827   format %{ "FLD    $src\t# Convert double to long\n\t"
10828             "FLDCW  trunc mode\n\t"
10829             "SUB    ESP,8\n\t"
10830             "FISTp  [ESP + #0]\n\t"
10831             "FLDCW  std/24-bit mode\n\t"
10832             "POP    EAX\n\t"
10833             "POP    EDX\n\t"
10834             "CMP    EDX,0x80000000\n\t"
10835             "JNE,s  fast\n\t"
10836             "TEST   EAX,EAX\n\t"
10837             "JNE,s  fast\n\t"
10838             "FLD    $src\n\t"
10839             "CALL   d2l_wrapper\n"
10840       "fast:" %}
10841   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10842   ins_pipe( pipe_slow );
10843 %}
10844 
10845 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10846 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10847   predicate (UseSSE>=2);
10848   match(Set dst (ConvD2L src));
10849   effect( KILL cr );
10850   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10851             "MOVSD  [ESP],$src\n\t"
10852             "FLD_D  [ESP]\n\t"
10853             "FLDCW  trunc mode\n\t"
10854             "FISTp  [ESP + #0]\n\t"
10855             "FLDCW  std/24-bit mode\n\t"
10856             "POP    EAX\n\t"
10857             "POP    EDX\n\t"
10858             "CMP    EDX,0x80000000\n\t"
10859             "JNE,s  fast\n\t"
10860             "TEST   EAX,EAX\n\t"
10861             "JNE,s  fast\n\t"
10862             "SUB    ESP,8\n\t"
10863             "MOVSD  [ESP],$src\n\t"
10864             "FLD_D  [ESP]\n\t"
10865             "ADD    ESP,8\n\t"
10866             "CALL   d2l_wrapper\n"
10867       "fast:" %}
10868   ins_encode %{
10869     Label fast;
10870     __ subptr(rsp, 8);
10871     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10872     __ fld_d(Address(rsp, 0));
10873     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10874     __ fistp_d(Address(rsp, 0));
10875     // Restore the rounding mode, mask the exception
10876     if (Compile::current()->in_24_bit_fp_mode()) {
10877       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10878     } else {
10879       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10880     }
10881     // Load the converted long, adjust CPU stack
10882     __ pop(rax);
10883     __ pop(rdx);
10884     __ cmpl(rdx, 0x80000000);
10885     __ jccb(Assembler::notEqual, fast);
10886     __ testl(rax, rax);
10887     __ jccb(Assembler::notEqual, fast);
10888     __ subptr(rsp, 8);
10889     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10890     __ fld_d(Address(rsp, 0));
10891     __ addptr(rsp, 8);
10892     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10893     __ bind(fast);
10894   %}
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 // Convert a double to an int.  Java semantics require we do complex
10899 // manglations in the corner cases.  So we set the rounding mode to
10900 // 'zero', store the darned double down as an int, and reset the
10901 // rounding mode to 'nearest'.  The hardware stores a flag value down
10902 // if we would overflow or converted a NAN; we check for this and
10903 // and go the slow path if needed.
10904 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10905   predicate(UseSSE==0);
10906   match(Set dst (ConvF2I src));
10907   effect( KILL tmp, KILL cr );
10908   format %{ "FLD    $src\t# Convert float to int \n\t"
10909             "FLDCW  trunc mode\n\t"
10910             "SUB    ESP,4\n\t"
10911             "FISTp  [ESP + #0]\n\t"
10912             "FLDCW  std/24-bit mode\n\t"
10913             "POP    EAX\n\t"
10914             "CMP    EAX,0x80000000\n\t"
10915             "JNE,s  fast\n\t"
10916             "FLD    $src\n\t"
10917             "CALL   d2i_wrapper\n"
10918       "fast:" %}
10919   // DPR2I_encoding works for FPR2I
10920   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10921   ins_pipe( pipe_slow );
10922 %}
10923 
10924 // Convert a float in xmm to an int reg.
10925 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10926   predicate(UseSSE>=1);
10927   match(Set dst (ConvF2I src));
10928   effect( KILL tmp, KILL cr );
10929   format %{ "CVTTSS2SI $dst, $src\n\t"
10930             "CMP    $dst,0x80000000\n\t"
10931             "JNE,s  fast\n\t"
10932             "SUB    ESP, 4\n\t"
10933             "MOVSS  [ESP], $src\n\t"
10934             "FLD    [ESP]\n\t"
10935             "ADD    ESP, 4\n\t"
10936             "CALL   d2i_wrapper\n"
10937       "fast:" %}
10938   ins_encode %{
10939     Label fast;
10940     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10941     __ cmpl($dst$$Register, 0x80000000);
10942     __ jccb(Assembler::notEqual, fast);
10943     __ subptr(rsp, 4);
10944     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10945     __ fld_s(Address(rsp, 0));
10946     __ addptr(rsp, 4);
10947     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10948     __ bind(fast);
10949   %}
10950   ins_pipe( pipe_slow );
10951 %}
10952 
10953 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10954   predicate(UseSSE==0);
10955   match(Set dst (ConvF2L src));
10956   effect( KILL cr );
10957   format %{ "FLD    $src\t# Convert float to long\n\t"
10958             "FLDCW  trunc mode\n\t"
10959             "SUB    ESP,8\n\t"
10960             "FISTp  [ESP + #0]\n\t"
10961             "FLDCW  std/24-bit mode\n\t"
10962             "POP    EAX\n\t"
10963             "POP    EDX\n\t"
10964             "CMP    EDX,0x80000000\n\t"
10965             "JNE,s  fast\n\t"
10966             "TEST   EAX,EAX\n\t"
10967             "JNE,s  fast\n\t"
10968             "FLD    $src\n\t"
10969             "CALL   d2l_wrapper\n"
10970       "fast:" %}
10971   // DPR2L_encoding works for FPR2L
10972   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10977 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10978   predicate (UseSSE>=1);
10979   match(Set dst (ConvF2L src));
10980   effect( KILL cr );
10981   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10982             "MOVSS  [ESP],$src\n\t"
10983             "FLD_S  [ESP]\n\t"
10984             "FLDCW  trunc mode\n\t"
10985             "FISTp  [ESP + #0]\n\t"
10986             "FLDCW  std/24-bit mode\n\t"
10987             "POP    EAX\n\t"
10988             "POP    EDX\n\t"
10989             "CMP    EDX,0x80000000\n\t"
10990             "JNE,s  fast\n\t"
10991             "TEST   EAX,EAX\n\t"
10992             "JNE,s  fast\n\t"
10993             "SUB    ESP,4\t# Convert float to long\n\t"
10994             "MOVSS  [ESP],$src\n\t"
10995             "FLD_S  [ESP]\n\t"
10996             "ADD    ESP,4\n\t"
10997             "CALL   d2l_wrapper\n"
10998       "fast:" %}
10999   ins_encode %{
11000     Label fast;
11001     __ subptr(rsp, 8);
11002     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11003     __ fld_s(Address(rsp, 0));
11004     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11005     __ fistp_d(Address(rsp, 0));
11006     // Restore the rounding mode, mask the exception
11007     if (Compile::current()->in_24_bit_fp_mode()) {
11008       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11009     } else {
11010       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11011     }
11012     // Load the converted long, adjust CPU stack
11013     __ pop(rax);
11014     __ pop(rdx);
11015     __ cmpl(rdx, 0x80000000);
11016     __ jccb(Assembler::notEqual, fast);
11017     __ testl(rax, rax);
11018     __ jccb(Assembler::notEqual, fast);
11019     __ subptr(rsp, 4);
11020     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11021     __ fld_s(Address(rsp, 0));
11022     __ addptr(rsp, 4);
11023     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11024     __ bind(fast);
11025   %}
11026   ins_pipe( pipe_slow );
11027 %}
11028 
11029 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11030   predicate( UseSSE<=1 );
11031   match(Set dst (ConvI2D src));
11032   format %{ "FILD   $src\n\t"
11033             "FSTP   $dst" %}
11034   opcode(0xDB, 0x0);  /* DB /0 */
11035   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11036   ins_pipe( fpu_reg_mem );
11037 %}
11038 
11039 instruct convI2D_reg(regD dst, rRegI src) %{
11040   predicate( UseSSE>=2 && !UseXmmI2D );
11041   match(Set dst (ConvI2D src));
11042   format %{ "CVTSI2SD $dst,$src" %}
11043   ins_encode %{
11044     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11045   %}
11046   ins_pipe( pipe_slow );
11047 %}
11048 
11049 instruct convI2D_mem(regD dst, memory mem) %{
11050   predicate( UseSSE>=2 );
11051   match(Set dst (ConvI2D (LoadI mem)));
11052   format %{ "CVTSI2SD $dst,$mem" %}
11053   ins_encode %{
11054     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11055   %}
11056   ins_pipe( pipe_slow );
11057 %}
11058 
11059 instruct convXI2D_reg(regD dst, rRegI src)
11060 %{
11061   predicate( UseSSE>=2 && UseXmmI2D );
11062   match(Set dst (ConvI2D src));
11063 
11064   format %{ "MOVD  $dst,$src\n\t"
11065             "CVTDQ2PD $dst,$dst\t# i2d" %}
11066   ins_encode %{
11067     __ movdl($dst$$XMMRegister, $src$$Register);
11068     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11069   %}
11070   ins_pipe(pipe_slow); // XXX
11071 %}
11072 
11073 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11074   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11075   match(Set dst (ConvI2D (LoadI mem)));
11076   format %{ "FILD   $mem\n\t"
11077             "FSTP   $dst" %}
11078   opcode(0xDB);      /* DB /0 */
11079   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11080               Pop_Reg_DPR(dst));
11081   ins_pipe( fpu_reg_mem );
11082 %}
11083 
11084 // Convert a byte to a float; no rounding step needed.
11085 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11086   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11087   match(Set dst (ConvI2F src));
11088   format %{ "FILD   $src\n\t"
11089             "FSTP   $dst" %}
11090 
11091   opcode(0xDB, 0x0);  /* DB /0 */
11092   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11093   ins_pipe( fpu_reg_mem );
11094 %}
11095 
11096 // In 24-bit mode, force exponent rounding by storing back out
11097 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11098   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11099   match(Set dst (ConvI2F src));
11100   ins_cost(200);
11101   format %{ "FILD   $src\n\t"
11102             "FSTP_S $dst" %}
11103   opcode(0xDB, 0x0);  /* DB /0 */
11104   ins_encode( Push_Mem_I(src),
11105               Pop_Mem_FPR(dst));
11106   ins_pipe( fpu_mem_mem );
11107 %}
11108 
11109 // In 24-bit mode, force exponent rounding by storing back out
11110 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11111   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11112   match(Set dst (ConvI2F (LoadI mem)));
11113   ins_cost(200);
11114   format %{ "FILD   $mem\n\t"
11115             "FSTP_S $dst" %}
11116   opcode(0xDB);  /* DB /0 */
11117   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11118               Pop_Mem_FPR(dst));
11119   ins_pipe( fpu_mem_mem );
11120 %}
11121 
11122 // This instruction does not round to 24-bits
11123 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11124   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11125   match(Set dst (ConvI2F src));
11126   format %{ "FILD   $src\n\t"
11127             "FSTP   $dst" %}
11128   opcode(0xDB, 0x0);  /* DB /0 */
11129   ins_encode( Push_Mem_I(src),
11130               Pop_Reg_FPR(dst));
11131   ins_pipe( fpu_reg_mem );
11132 %}
11133 
11134 // This instruction does not round to 24-bits
11135 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11136   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11137   match(Set dst (ConvI2F (LoadI mem)));
11138   format %{ "FILD   $mem\n\t"
11139             "FSTP   $dst" %}
11140   opcode(0xDB);      /* DB /0 */
11141   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11142               Pop_Reg_FPR(dst));
11143   ins_pipe( fpu_reg_mem );
11144 %}
11145 
11146 // Convert an int to a float in xmm; no rounding step needed.
11147 instruct convI2F_reg(regF dst, rRegI src) %{
11148   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11149   match(Set dst (ConvI2F src));
11150   format %{ "CVTSI2SS $dst, $src" %}
11151   ins_encode %{
11152     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11153   %}
11154   ins_pipe( pipe_slow );
11155 %}
11156 
11157  instruct convXI2F_reg(regF dst, rRegI src)
11158 %{
11159   predicate( UseSSE>=2 && UseXmmI2F );
11160   match(Set dst (ConvI2F src));
11161 
11162   format %{ "MOVD  $dst,$src\n\t"
11163             "CVTDQ2PS $dst,$dst\t# i2f" %}
11164   ins_encode %{
11165     __ movdl($dst$$XMMRegister, $src$$Register);
11166     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11167   %}
11168   ins_pipe(pipe_slow); // XXX
11169 %}
11170 
11171 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11172   match(Set dst (ConvI2L src));
11173   effect(KILL cr);
11174   ins_cost(375);
11175   format %{ "MOV    $dst.lo,$src\n\t"
11176             "MOV    $dst.hi,$src\n\t"
11177             "SAR    $dst.hi,31" %}
11178   ins_encode(convert_int_long(dst,src));
11179   ins_pipe( ialu_reg_reg_long );
11180 %}
11181 
11182 // Zero-extend convert int to long
11183 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11184   match(Set dst (AndL (ConvI2L src) mask) );
11185   effect( KILL flags );
11186   ins_cost(250);
11187   format %{ "MOV    $dst.lo,$src\n\t"
11188             "XOR    $dst.hi,$dst.hi" %}
11189   opcode(0x33); // XOR
11190   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11191   ins_pipe( ialu_reg_reg_long );
11192 %}
11193 
11194 // Zero-extend long
11195 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11196   match(Set dst (AndL src mask) );
11197   effect( KILL flags );
11198   ins_cost(250);
11199   format %{ "MOV    $dst.lo,$src.lo\n\t"
11200             "XOR    $dst.hi,$dst.hi\n\t" %}
11201   opcode(0x33); // XOR
11202   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11203   ins_pipe( ialu_reg_reg_long );
11204 %}
11205 
11206 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11207   predicate (UseSSE<=1);
11208   match(Set dst (ConvL2D src));
11209   effect( KILL cr );
11210   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11211             "PUSH   $src.lo\n\t"
11212             "FILD   ST,[ESP + #0]\n\t"
11213             "ADD    ESP,8\n\t"
11214             "FSTP_D $dst\t# D-round" %}
11215   opcode(0xDF, 0x5);  /* DF /5 */
11216   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11217   ins_pipe( pipe_slow );
11218 %}
11219 
11220 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11221   predicate (UseSSE>=2);
11222   match(Set dst (ConvL2D src));
11223   effect( KILL cr );
11224   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11225             "PUSH   $src.lo\n\t"
11226             "FILD_D [ESP]\n\t"
11227             "FSTP_D [ESP]\n\t"
11228             "MOVSD  $dst,[ESP]\n\t"
11229             "ADD    ESP,8" %}
11230   opcode(0xDF, 0x5);  /* DF /5 */
11231   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11232   ins_pipe( pipe_slow );
11233 %}
11234 
11235 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11236   predicate (UseSSE>=1);
11237   match(Set dst (ConvL2F src));
11238   effect( KILL cr );
11239   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11240             "PUSH   $src.lo\n\t"
11241             "FILD_D [ESP]\n\t"
11242             "FSTP_S [ESP]\n\t"
11243             "MOVSS  $dst,[ESP]\n\t"
11244             "ADD    ESP,8" %}
11245   opcode(0xDF, 0x5);  /* DF /5 */
11246   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11247   ins_pipe( pipe_slow );
11248 %}
11249 
11250 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11251   match(Set dst (ConvL2F src));
11252   effect( KILL cr );
11253   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11254             "PUSH   $src.lo\n\t"
11255             "FILD   ST,[ESP + #0]\n\t"
11256             "ADD    ESP,8\n\t"
11257             "FSTP_S $dst\t# F-round" %}
11258   opcode(0xDF, 0x5);  /* DF /5 */
11259   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11260   ins_pipe( pipe_slow );
11261 %}
11262 
11263 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11264   match(Set dst (ConvL2I src));
11265   effect( DEF dst, USE src );
11266   format %{ "MOV    $dst,$src.lo" %}
11267   ins_encode(enc_CopyL_Lo(dst,src));
11268   ins_pipe( ialu_reg_reg );
11269 %}
11270 
11271 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11272   match(Set dst (MoveF2I src));
11273   effect( DEF dst, USE src );
11274   ins_cost(100);
11275   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11276   ins_encode %{
11277     __ movl($dst$$Register, Address(rsp, $src$$disp));
11278   %}
11279   ins_pipe( ialu_reg_mem );
11280 %}
11281 
11282 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11283   predicate(UseSSE==0);
11284   match(Set dst (MoveF2I src));
11285   effect( DEF dst, USE src );
11286 
11287   ins_cost(125);
11288   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11289   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11290   ins_pipe( fpu_mem_reg );
11291 %}
11292 
11293 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11294   predicate(UseSSE>=1);
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297 
11298   ins_cost(95);
11299   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11300   ins_encode %{
11301     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11302   %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11307   predicate(UseSSE>=2);
11308   match(Set dst (MoveF2I src));
11309   effect( DEF dst, USE src );
11310   ins_cost(85);
11311   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11312   ins_encode %{
11313     __ movdl($dst$$Register, $src$$XMMRegister);
11314   %}
11315   ins_pipe( pipe_slow );
11316 %}
11317 
11318 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11319   match(Set dst (MoveI2F src));
11320   effect( DEF dst, USE src );
11321 
11322   ins_cost(100);
11323   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11324   ins_encode %{
11325     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11326   %}
11327   ins_pipe( ialu_mem_reg );
11328 %}
11329 
11330 
11331 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11332   predicate(UseSSE==0);
11333   match(Set dst (MoveI2F src));
11334   effect(DEF dst, USE src);
11335 
11336   ins_cost(125);
11337   format %{ "FLD_S  $src\n\t"
11338             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11339   opcode(0xD9);               /* D9 /0, FLD m32real */
11340   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11341               Pop_Reg_FPR(dst) );
11342   ins_pipe( fpu_reg_mem );
11343 %}
11344 
11345 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11346   predicate(UseSSE>=1);
11347   match(Set dst (MoveI2F src));
11348   effect( DEF dst, USE src );
11349 
11350   ins_cost(95);
11351   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11352   ins_encode %{
11353     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11354   %}
11355   ins_pipe( pipe_slow );
11356 %}
11357 
11358 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11359   predicate(UseSSE>=2);
11360   match(Set dst (MoveI2F src));
11361   effect( DEF dst, USE src );
11362 
11363   ins_cost(85);
11364   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11365   ins_encode %{
11366     __ movdl($dst$$XMMRegister, $src$$Register);
11367   %}
11368   ins_pipe( pipe_slow );
11369 %}
11370 
11371 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11372   match(Set dst (MoveD2L src));
11373   effect(DEF dst, USE src);
11374 
11375   ins_cost(250);
11376   format %{ "MOV    $dst.lo,$src\n\t"
11377             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11378   opcode(0x8B, 0x8B);
11379   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11380   ins_pipe( ialu_mem_long_reg );
11381 %}
11382 
11383 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11384   predicate(UseSSE<=1);
11385   match(Set dst (MoveD2L src));
11386   effect(DEF dst, USE src);
11387 
11388   ins_cost(125);
11389   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11390   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11391   ins_pipe( fpu_mem_reg );
11392 %}
11393 
11394 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11395   predicate(UseSSE>=2);
11396   match(Set dst (MoveD2L src));
11397   effect(DEF dst, USE src);
11398   ins_cost(95);
11399   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11400   ins_encode %{
11401     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11402   %}
11403   ins_pipe( pipe_slow );
11404 %}
11405 
11406 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11407   predicate(UseSSE>=2);
11408   match(Set dst (MoveD2L src));
11409   effect(DEF dst, USE src, TEMP tmp);
11410   ins_cost(85);
11411   format %{ "MOVD   $dst.lo,$src\n\t"
11412             "PSHUFLW $tmp,$src,0x4E\n\t"
11413             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11414   ins_encode %{
11415     __ movdl($dst$$Register, $src$$XMMRegister);
11416     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11417     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11418   %}
11419   ins_pipe( pipe_slow );
11420 %}
11421 
11422 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11423   match(Set dst (MoveL2D src));
11424   effect(DEF dst, USE src);
11425 
11426   ins_cost(200);
11427   format %{ "MOV    $dst,$src.lo\n\t"
11428             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11429   opcode(0x89, 0x89);
11430   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11431   ins_pipe( ialu_mem_long_reg );
11432 %}
11433 
11434 
11435 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11436   predicate(UseSSE<=1);
11437   match(Set dst (MoveL2D src));
11438   effect(DEF dst, USE src);
11439   ins_cost(125);
11440 
11441   format %{ "FLD_D  $src\n\t"
11442             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11443   opcode(0xDD);               /* DD /0, FLD m64real */
11444   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11445               Pop_Reg_DPR(dst) );
11446   ins_pipe( fpu_reg_mem );
11447 %}
11448 
11449 
11450 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11451   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11452   match(Set dst (MoveL2D src));
11453   effect(DEF dst, USE src);
11454 
11455   ins_cost(95);
11456   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11457   ins_encode %{
11458     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11459   %}
11460   ins_pipe( pipe_slow );
11461 %}
11462 
11463 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11464   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11465   match(Set dst (MoveL2D src));
11466   effect(DEF dst, USE src);
11467 
11468   ins_cost(95);
11469   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11470   ins_encode %{
11471     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11472   %}
11473   ins_pipe( pipe_slow );
11474 %}
11475 
11476 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11477   predicate(UseSSE>=2);
11478   match(Set dst (MoveL2D src));
11479   effect(TEMP dst, USE src, TEMP tmp);
11480   ins_cost(85);
11481   format %{ "MOVD   $dst,$src.lo\n\t"
11482             "MOVD   $tmp,$src.hi\n\t"
11483             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11484   ins_encode %{
11485     __ movdl($dst$$XMMRegister, $src$$Register);
11486     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11487     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11488   %}
11489   ins_pipe( pipe_slow );
11490 %}
11491 
11492 
11493 // =======================================================================
11494 // fast clearing of an array
11495 // Small ClearArray non-AVX512.
11496 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11498   match(Set dummy (ClearArray cnt base));
11499   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11500 
11501   format %{ $$template
11502     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11503     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11504     $$emit$$"JG     LARGE\n\t"
11505     $$emit$$"SHL    ECX, 1\n\t"
11506     $$emit$$"DEC    ECX\n\t"
11507     $$emit$$"JS     DONE\t# Zero length\n\t"
11508     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11509     $$emit$$"DEC    ECX\n\t"
11510     $$emit$$"JGE    LOOP\n\t"
11511     $$emit$$"JMP    DONE\n\t"
11512     $$emit$$"# LARGE:\n\t"
11513     if (UseFastStosb) {
11514        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11515        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11516     } else if (UseXMMForObjInit) {
11517        $$emit$$"MOV     RDI,RAX\n\t"
11518        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11519        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11520        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11521        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11522        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11523        $$emit$$"ADD     0x40,RAX\n\t"
11524        $$emit$$"# L_zero_64_bytes:\n\t"
11525        $$emit$$"SUB     0x8,RCX\n\t"
11526        $$emit$$"JGE     L_loop\n\t"
11527        $$emit$$"ADD     0x4,RCX\n\t"
11528        $$emit$$"JL      L_tail\n\t"
11529        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11530        $$emit$$"ADD     0x20,RAX\n\t"
11531        $$emit$$"SUB     0x4,RCX\n\t"
11532        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11533        $$emit$$"ADD     0x4,RCX\n\t"
11534        $$emit$$"JLE     L_end\n\t"
11535        $$emit$$"DEC     RCX\n\t"
11536        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11537        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11538        $$emit$$"ADD     0x8,RAX\n\t"
11539        $$emit$$"DEC     RCX\n\t"
11540        $$emit$$"JGE     L_sloop\n\t"
11541        $$emit$$"# L_end:\n\t"
11542     } else {
11543        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11544        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11545     }
11546     $$emit$$"# DONE"
11547   %}
11548   ins_encode %{
11549     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11550                  $tmp$$XMMRegister, false, knoreg);
11551   %}
11552   ins_pipe( pipe_slow );
11553 %}
11554 
11555 // Small ClearArray AVX512 non-constant length.
11556 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11557   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11558   match(Set dummy (ClearArray cnt base));
11559   ins_cost(125);
11560   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11561 
11562   format %{ $$template
11563     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11564     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11565     $$emit$$"JG     LARGE\n\t"
11566     $$emit$$"SHL    ECX, 1\n\t"
11567     $$emit$$"DEC    ECX\n\t"
11568     $$emit$$"JS     DONE\t# Zero length\n\t"
11569     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11570     $$emit$$"DEC    ECX\n\t"
11571     $$emit$$"JGE    LOOP\n\t"
11572     $$emit$$"JMP    DONE\n\t"
11573     $$emit$$"# LARGE:\n\t"
11574     if (UseFastStosb) {
11575        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11576        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11577     } else if (UseXMMForObjInit) {
11578        $$emit$$"MOV     RDI,RAX\n\t"
11579        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11580        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11581        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11582        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11583        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11584        $$emit$$"ADD     0x40,RAX\n\t"
11585        $$emit$$"# L_zero_64_bytes:\n\t"
11586        $$emit$$"SUB     0x8,RCX\n\t"
11587        $$emit$$"JGE     L_loop\n\t"
11588        $$emit$$"ADD     0x4,RCX\n\t"
11589        $$emit$$"JL      L_tail\n\t"
11590        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11591        $$emit$$"ADD     0x20,RAX\n\t"
11592        $$emit$$"SUB     0x4,RCX\n\t"
11593        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11594        $$emit$$"ADD     0x4,RCX\n\t"
11595        $$emit$$"JLE     L_end\n\t"
11596        $$emit$$"DEC     RCX\n\t"
11597        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11598        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11599        $$emit$$"ADD     0x8,RAX\n\t"
11600        $$emit$$"DEC     RCX\n\t"
11601        $$emit$$"JGE     L_sloop\n\t"
11602        $$emit$$"# L_end:\n\t"
11603     } else {
11604        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11605        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11606     }
11607     $$emit$$"# DONE"
11608   %}
11609   ins_encode %{
11610     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11611                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11612   %}
11613   ins_pipe( pipe_slow );
11614 %}
11615 
11616 // Large ClearArray non-AVX512.
11617 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11618   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11619   match(Set dummy (ClearArray cnt base));
11620   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11621   format %{ $$template
11622     if (UseFastStosb) {
11623        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11624        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11625        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11626     } else if (UseXMMForObjInit) {
11627        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11628        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11629        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11630        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11631        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11632        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11633        $$emit$$"ADD     0x40,RAX\n\t"
11634        $$emit$$"# L_zero_64_bytes:\n\t"
11635        $$emit$$"SUB     0x8,RCX\n\t"
11636        $$emit$$"JGE     L_loop\n\t"
11637        $$emit$$"ADD     0x4,RCX\n\t"
11638        $$emit$$"JL      L_tail\n\t"
11639        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11640        $$emit$$"ADD     0x20,RAX\n\t"
11641        $$emit$$"SUB     0x4,RCX\n\t"
11642        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11643        $$emit$$"ADD     0x4,RCX\n\t"
11644        $$emit$$"JLE     L_end\n\t"
11645        $$emit$$"DEC     RCX\n\t"
11646        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11647        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11648        $$emit$$"ADD     0x8,RAX\n\t"
11649        $$emit$$"DEC     RCX\n\t"
11650        $$emit$$"JGE     L_sloop\n\t"
11651        $$emit$$"# L_end:\n\t"
11652     } else {
11653        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11654        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11655        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11656     }
11657     $$emit$$"# DONE"
11658   %}
11659   ins_encode %{
11660     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11661                  $tmp$$XMMRegister, true, knoreg);
11662   %}
11663   ins_pipe( pipe_slow );
11664 %}
11665 
11666 // Large ClearArray AVX512.
11667 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11668   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11669   match(Set dummy (ClearArray cnt base));
11670   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11671   format %{ $$template
11672     if (UseFastStosb) {
11673        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11674        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11675        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11676     } else if (UseXMMForObjInit) {
11677        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11678        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11679        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11680        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11681        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11682        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11683        $$emit$$"ADD     0x40,RAX\n\t"
11684        $$emit$$"# L_zero_64_bytes:\n\t"
11685        $$emit$$"SUB     0x8,RCX\n\t"
11686        $$emit$$"JGE     L_loop\n\t"
11687        $$emit$$"ADD     0x4,RCX\n\t"
11688        $$emit$$"JL      L_tail\n\t"
11689        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11690        $$emit$$"ADD     0x20,RAX\n\t"
11691        $$emit$$"SUB     0x4,RCX\n\t"
11692        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11693        $$emit$$"ADD     0x4,RCX\n\t"
11694        $$emit$$"JLE     L_end\n\t"
11695        $$emit$$"DEC     RCX\n\t"
11696        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11697        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11698        $$emit$$"ADD     0x8,RAX\n\t"
11699        $$emit$$"DEC     RCX\n\t"
11700        $$emit$$"JGE     L_sloop\n\t"
11701        $$emit$$"# L_end:\n\t"
11702     } else {
11703        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11704        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11705        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11706     }
11707     $$emit$$"# DONE"
11708   %}
11709   ins_encode %{
11710     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11711                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11712   %}
11713   ins_pipe( pipe_slow );
11714 %}
11715 
11716 // Small ClearArray AVX512 constant length.
11717 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11718 %{
11719   predicate(!((ClearArrayNode*)n)->is_large() &&
11720                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11721   match(Set dummy (ClearArray cnt base));
11722   ins_cost(100);
11723   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11724   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11725   ins_encode %{
11726    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11727   %}
11728   ins_pipe(pipe_slow);
11729 %}
11730 
11731 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11732                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11733   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11734   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11735   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736 
11737   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11738   ins_encode %{
11739     __ string_compare($str1$$Register, $str2$$Register,
11740                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11741                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11742   %}
11743   ins_pipe( pipe_slow );
11744 %}
11745 
11746 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11747                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11748   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11749   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11750   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751 
11752   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11753   ins_encode %{
11754     __ string_compare($str1$$Register, $str2$$Register,
11755                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11756                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11757   %}
11758   ins_pipe( pipe_slow );
11759 %}
11760 
11761 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11762                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11763   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11764   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11765   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766 
11767   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11768   ins_encode %{
11769     __ string_compare($str1$$Register, $str2$$Register,
11770                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11771                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11772   %}
11773   ins_pipe( pipe_slow );
11774 %}
11775 
11776 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11777                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11778   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11779   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11780   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11781 
11782   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11783   ins_encode %{
11784     __ string_compare($str1$$Register, $str2$$Register,
11785                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11786                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11787   %}
11788   ins_pipe( pipe_slow );
11789 %}
11790 
11791 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11792                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11793   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11794   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11795   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11796 
11797   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11798   ins_encode %{
11799     __ string_compare($str1$$Register, $str2$$Register,
11800                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11801                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11802   %}
11803   ins_pipe( pipe_slow );
11804 %}
11805 
11806 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11807                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11808   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11809   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11810   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11811 
11812   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11813   ins_encode %{
11814     __ string_compare($str1$$Register, $str2$$Register,
11815                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11816                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11817   %}
11818   ins_pipe( pipe_slow );
11819 %}
11820 
11821 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11822                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11823   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11824   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11825   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11826 
11827   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11828   ins_encode %{
11829     __ string_compare($str2$$Register, $str1$$Register,
11830                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11831                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11832   %}
11833   ins_pipe( pipe_slow );
11834 %}
11835 
11836 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11837                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11838   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11839   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11840   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11841 
11842   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11843   ins_encode %{
11844     __ string_compare($str2$$Register, $str1$$Register,
11845                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11846                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11847   %}
11848   ins_pipe( pipe_slow );
11849 %}
11850 
11851 // fast string equals
11852 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11853                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11854   predicate(!VM_Version::supports_avx512vlbw());
11855   match(Set result (StrEquals (Binary str1 str2) cnt));
11856   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11857 
11858   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11859   ins_encode %{
11860     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11861                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11862                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11863   %}
11864 
11865   ins_pipe( pipe_slow );
11866 %}
11867 
11868 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11869                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11870   predicate(VM_Version::supports_avx512vlbw());
11871   match(Set result (StrEquals (Binary str1 str2) cnt));
11872   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11873 
11874   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11875   ins_encode %{
11876     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11877                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11878                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11879   %}
11880 
11881   ins_pipe( pipe_slow );
11882 %}
11883 
11884 
11885 // fast search of substring with known size.
11886 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11887                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11888   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11889   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11890   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11891 
11892   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11893   ins_encode %{
11894     int icnt2 = (int)$int_cnt2$$constant;
11895     if (icnt2 >= 16) {
11896       // IndexOf for constant substrings with size >= 16 elements
11897       // which don't need to be loaded through stack.
11898       __ string_indexofC8($str1$$Register, $str2$$Register,
11899                           $cnt1$$Register, $cnt2$$Register,
11900                           icnt2, $result$$Register,
11901                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11902     } else {
11903       // Small strings are loaded through stack if they cross page boundary.
11904       __ string_indexof($str1$$Register, $str2$$Register,
11905                         $cnt1$$Register, $cnt2$$Register,
11906                         icnt2, $result$$Register,
11907                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11908     }
11909   %}
11910   ins_pipe( pipe_slow );
11911 %}
11912 
11913 // fast search of substring with known size.
11914 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11915                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11916   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11917   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11918   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11919 
11920   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11921   ins_encode %{
11922     int icnt2 = (int)$int_cnt2$$constant;
11923     if (icnt2 >= 8) {
11924       // IndexOf for constant substrings with size >= 8 elements
11925       // which don't need to be loaded through stack.
11926       __ string_indexofC8($str1$$Register, $str2$$Register,
11927                           $cnt1$$Register, $cnt2$$Register,
11928                           icnt2, $result$$Register,
11929                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11930     } else {
11931       // Small strings are loaded through stack if they cross page boundary.
11932       __ string_indexof($str1$$Register, $str2$$Register,
11933                         $cnt1$$Register, $cnt2$$Register,
11934                         icnt2, $result$$Register,
11935                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11936     }
11937   %}
11938   ins_pipe( pipe_slow );
11939 %}
11940 
11941 // fast search of substring with known size.
11942 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11943                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11944   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11945   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11946   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11947 
11948   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11949   ins_encode %{
11950     int icnt2 = (int)$int_cnt2$$constant;
11951     if (icnt2 >= 8) {
11952       // IndexOf for constant substrings with size >= 8 elements
11953       // which don't need to be loaded through stack.
11954       __ string_indexofC8($str1$$Register, $str2$$Register,
11955                           $cnt1$$Register, $cnt2$$Register,
11956                           icnt2, $result$$Register,
11957                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11958     } else {
11959       // Small strings are loaded through stack if they cross page boundary.
11960       __ string_indexof($str1$$Register, $str2$$Register,
11961                         $cnt1$$Register, $cnt2$$Register,
11962                         icnt2, $result$$Register,
11963                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11964     }
11965   %}
11966   ins_pipe( pipe_slow );
11967 %}
11968 
11969 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11970                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11971   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11972   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11973   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11974 
11975   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11976   ins_encode %{
11977     __ string_indexof($str1$$Register, $str2$$Register,
11978                       $cnt1$$Register, $cnt2$$Register,
11979                       (-1), $result$$Register,
11980                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11981   %}
11982   ins_pipe( pipe_slow );
11983 %}
11984 
11985 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11986                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11987   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11988   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11989   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11990 
11991   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11992   ins_encode %{
11993     __ string_indexof($str1$$Register, $str2$$Register,
11994                       $cnt1$$Register, $cnt2$$Register,
11995                       (-1), $result$$Register,
11996                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12002                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12003   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12004   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12005   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12006 
12007   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12008   ins_encode %{
12009     __ string_indexof($str1$$Register, $str2$$Register,
12010                       $cnt1$$Register, $cnt2$$Register,
12011                       (-1), $result$$Register,
12012                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12013   %}
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12018                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12019   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12020   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12021   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12022   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12023   ins_encode %{
12024     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12025                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12026   %}
12027   ins_pipe( pipe_slow );
12028 %}
12029 
12030 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12031                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12032   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12033   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12034   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12035   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12036   ins_encode %{
12037     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12038                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 
12044 // fast array equals
12045 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12046                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12047 %{
12048   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12049   match(Set result (AryEq ary1 ary2));
12050   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12051   //ins_cost(300);
12052 
12053   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12054   ins_encode %{
12055     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12056                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12057                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12058   %}
12059   ins_pipe( pipe_slow );
12060 %}
12061 
12062 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12063                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12064 %{
12065   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12066   match(Set result (AryEq ary1 ary2));
12067   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12068   //ins_cost(300);
12069 
12070   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12071   ins_encode %{
12072     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12073                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12074                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12075   %}
12076   ins_pipe( pipe_slow );
12077 %}
12078 
12079 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12080                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12081 %{
12082   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12083   match(Set result (AryEq ary1 ary2));
12084   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12085   //ins_cost(300);
12086 
12087   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12088   ins_encode %{
12089     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12090                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12091                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12092   %}
12093   ins_pipe( pipe_slow );
12094 %}
12095 
12096 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12097                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12098 %{
12099   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12100   match(Set result (AryEq ary1 ary2));
12101   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12102   //ins_cost(300);
12103 
12104   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12105   ins_encode %{
12106     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12107                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12108                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12109   %}
12110   ins_pipe( pipe_slow );
12111 %}
12112 
12113 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12114                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12115 %{
12116   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12117   match(Set result (HasNegatives ary1 len));
12118   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12119 
12120   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12121   ins_encode %{
12122     __ has_negatives($ary1$$Register, $len$$Register,
12123                      $result$$Register, $tmp3$$Register,
12124                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12125   %}
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12130                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12131 %{
12132   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12133   match(Set result (HasNegatives ary1 len));
12134   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12135 
12136   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12137   ins_encode %{
12138     __ has_negatives($ary1$$Register, $len$$Register,
12139                      $result$$Register, $tmp3$$Register,
12140                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12141   %}
12142   ins_pipe( pipe_slow );
12143 %}
12144 
12145 
12146 // fast char[] to byte[] compression
12147 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12148                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12149   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12150   match(Set result (StrCompressedCopy src (Binary dst len)));
12151   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12152 
12153   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12154   ins_encode %{
12155     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12156                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12157                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12158                            knoreg, knoreg);
12159   %}
12160   ins_pipe( pipe_slow );
12161 %}
12162 
12163 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12164                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12165   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12166   match(Set result (StrCompressedCopy src (Binary dst len)));
12167   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12168 
12169   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12170   ins_encode %{
12171     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12172                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12173                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12174                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12175   %}
12176   ins_pipe( pipe_slow );
12177 %}
12178 
12179 // fast byte[] to char[] inflation
12180 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12181                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12182   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12183   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12184   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12185 
12186   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12187   ins_encode %{
12188     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12189                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12190   %}
12191   ins_pipe( pipe_slow );
12192 %}
12193 
12194 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12195                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12196   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12197   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12198   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12199 
12200   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12201   ins_encode %{
12202     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12203                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12204   %}
12205   ins_pipe( pipe_slow );
12206 %}
12207 
12208 // encode char[] to byte[] in ISO_8859_1
12209 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12210                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12211                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12212   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12213   match(Set result (EncodeISOArray src (Binary dst len)));
12214   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12215 
12216   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12217   ins_encode %{
12218     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12219                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12220                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12221   %}
12222   ins_pipe( pipe_slow );
12223 %}
12224 
12225 // encode char[] to byte[] in ASCII
12226 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12227                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12228                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12229   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12230   match(Set result (EncodeISOArray src (Binary dst len)));
12231   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12232 
12233   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12234   ins_encode %{
12235     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12236                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12237                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12238   %}
12239   ins_pipe( pipe_slow );
12240 %}
12241 
12242 //----------Control Flow Instructions------------------------------------------
12243 // Signed compare Instructions
12244 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12245   match(Set cr (CmpI op1 op2));
12246   effect( DEF cr, USE op1, USE op2 );
12247   format %{ "CMP    $op1,$op2" %}
12248   opcode(0x3B);  /* Opcode 3B /r */
12249   ins_encode( OpcP, RegReg( op1, op2) );
12250   ins_pipe( ialu_cr_reg_reg );
12251 %}
12252 
12253 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12254   match(Set cr (CmpI op1 op2));
12255   effect( DEF cr, USE op1 );
12256   format %{ "CMP    $op1,$op2" %}
12257   opcode(0x81,0x07);  /* Opcode 81 /7 */
12258   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12259   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12260   ins_pipe( ialu_cr_reg_imm );
12261 %}
12262 
12263 // Cisc-spilled version of cmpI_eReg
12264 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12265   match(Set cr (CmpI op1 (LoadI op2)));
12266 
12267   format %{ "CMP    $op1,$op2" %}
12268   ins_cost(500);
12269   opcode(0x3B);  /* Opcode 3B /r */
12270   ins_encode( OpcP, RegMem( op1, op2) );
12271   ins_pipe( ialu_cr_reg_mem );
12272 %}
12273 
12274 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12275   match(Set cr (CmpI src zero));
12276   effect( DEF cr, USE src );
12277 
12278   format %{ "TEST   $src,$src" %}
12279   opcode(0x85);
12280   ins_encode( OpcP, RegReg( src, src ) );
12281   ins_pipe( ialu_cr_reg_imm );
12282 %}
12283 
12284 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12285   match(Set cr (CmpI (AndI src con) zero));
12286 
12287   format %{ "TEST   $src,$con" %}
12288   opcode(0xF7,0x00);
12289   ins_encode( OpcP, RegOpc(src), Con32(con) );
12290   ins_pipe( ialu_cr_reg_imm );
12291 %}
12292 
12293 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12294   match(Set cr (CmpI (AndI src mem) zero));
12295 
12296   format %{ "TEST   $src,$mem" %}
12297   opcode(0x85);
12298   ins_encode( OpcP, RegMem( src, mem ) );
12299   ins_pipe( ialu_cr_reg_mem );
12300 %}
12301 
12302 // Unsigned compare Instructions; really, same as signed except they
12303 // produce an eFlagsRegU instead of eFlagsReg.
12304 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12305   match(Set cr (CmpU op1 op2));
12306 
12307   format %{ "CMPu   $op1,$op2" %}
12308   opcode(0x3B);  /* Opcode 3B /r */
12309   ins_encode( OpcP, RegReg( op1, op2) );
12310   ins_pipe( ialu_cr_reg_reg );
12311 %}
12312 
12313 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12314   match(Set cr (CmpU op1 op2));
12315 
12316   format %{ "CMPu   $op1,$op2" %}
12317   opcode(0x81,0x07);  /* Opcode 81 /7 */
12318   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12319   ins_pipe( ialu_cr_reg_imm );
12320 %}
12321 
12322 // // Cisc-spilled version of cmpU_eReg
12323 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12324   match(Set cr (CmpU op1 (LoadI op2)));
12325 
12326   format %{ "CMPu   $op1,$op2" %}
12327   ins_cost(500);
12328   opcode(0x3B);  /* Opcode 3B /r */
12329   ins_encode( OpcP, RegMem( op1, op2) );
12330   ins_pipe( ialu_cr_reg_mem );
12331 %}
12332 
12333 // // Cisc-spilled version of cmpU_eReg
12334 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12335 //  match(Set cr (CmpU (LoadI op1) op2));
12336 //
12337 //  format %{ "CMPu   $op1,$op2" %}
12338 //  ins_cost(500);
12339 //  opcode(0x39);  /* Opcode 39 /r */
12340 //  ins_encode( OpcP, RegMem( op1, op2) );
12341 //%}
12342 
12343 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12344   match(Set cr (CmpU src zero));
12345 
12346   format %{ "TESTu  $src,$src" %}
12347   opcode(0x85);
12348   ins_encode( OpcP, RegReg( src, src ) );
12349   ins_pipe( ialu_cr_reg_imm );
12350 %}
12351 
12352 // Unsigned pointer compare Instructions
12353 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12354   match(Set cr (CmpP op1 op2));
12355 
12356   format %{ "CMPu   $op1,$op2" %}
12357   opcode(0x3B);  /* Opcode 3B /r */
12358   ins_encode( OpcP, RegReg( op1, op2) );
12359   ins_pipe( ialu_cr_reg_reg );
12360 %}
12361 
12362 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12363   match(Set cr (CmpP op1 op2));
12364 
12365   format %{ "CMPu   $op1,$op2" %}
12366   opcode(0x81,0x07);  /* Opcode 81 /7 */
12367   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12368   ins_pipe( ialu_cr_reg_imm );
12369 %}
12370 
12371 // // Cisc-spilled version of cmpP_eReg
12372 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12373   match(Set cr (CmpP op1 (LoadP op2)));
12374 
12375   format %{ "CMPu   $op1,$op2" %}
12376   ins_cost(500);
12377   opcode(0x3B);  /* Opcode 3B /r */
12378   ins_encode( OpcP, RegMem( op1, op2) );
12379   ins_pipe( ialu_cr_reg_mem );
12380 %}
12381 
12382 // // Cisc-spilled version of cmpP_eReg
12383 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12384 //  match(Set cr (CmpP (LoadP op1) op2));
12385 //
12386 //  format %{ "CMPu   $op1,$op2" %}
12387 //  ins_cost(500);
12388 //  opcode(0x39);  /* Opcode 39 /r */
12389 //  ins_encode( OpcP, RegMem( op1, op2) );
12390 //%}
12391 
12392 // Compare raw pointer (used in out-of-heap check).
12393 // Only works because non-oop pointers must be raw pointers
12394 // and raw pointers have no anti-dependencies.
12395 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12396   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12397   match(Set cr (CmpP op1 (LoadP op2)));
12398 
12399   format %{ "CMPu   $op1,$op2" %}
12400   opcode(0x3B);  /* Opcode 3B /r */
12401   ins_encode( OpcP, RegMem( op1, op2) );
12402   ins_pipe( ialu_cr_reg_mem );
12403 %}
12404 
12405 //
12406 // This will generate a signed flags result. This should be ok
12407 // since any compare to a zero should be eq/neq.
12408 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12409   match(Set cr (CmpP src zero));
12410 
12411   format %{ "TEST   $src,$src" %}
12412   opcode(0x85);
12413   ins_encode( OpcP, RegReg( src, src ) );
12414   ins_pipe( ialu_cr_reg_imm );
12415 %}
12416 
12417 // Cisc-spilled version of testP_reg
12418 // This will generate a signed flags result. This should be ok
12419 // since any compare to a zero should be eq/neq.
12420 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12421   match(Set cr (CmpP (LoadP op) zero));
12422 
12423   format %{ "TEST   $op,0xFFFFFFFF" %}
12424   ins_cost(500);
12425   opcode(0xF7);               /* Opcode F7 /0 */
12426   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12427   ins_pipe( ialu_cr_reg_imm );
12428 %}
12429 
12430 // Yanked all unsigned pointer compare operations.
12431 // Pointer compares are done with CmpP which is already unsigned.
12432 
12433 //----------Max and Min--------------------------------------------------------
12434 // Min Instructions
12435 ////
12436 //   *** Min and Max using the conditional move are slower than the
12437 //   *** branch version on a Pentium III.
12438 // // Conditional move for min
12439 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12440 //  effect( USE_DEF op2, USE op1, USE cr );
12441 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12442 //  opcode(0x4C,0x0F);
12443 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12444 //  ins_pipe( pipe_cmov_reg );
12445 //%}
12446 //
12447 //// Min Register with Register (P6 version)
12448 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12449 //  predicate(VM_Version::supports_cmov() );
12450 //  match(Set op2 (MinI op1 op2));
12451 //  ins_cost(200);
12452 //  expand %{
12453 //    eFlagsReg cr;
12454 //    compI_eReg(cr,op1,op2);
12455 //    cmovI_reg_lt(op2,op1,cr);
12456 //  %}
12457 //%}
12458 
12459 // Min Register with Register (generic version)
12460 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12461   match(Set dst (MinI dst src));
12462   effect(KILL flags);
12463   ins_cost(300);
12464 
12465   format %{ "MIN    $dst,$src" %}
12466   opcode(0xCC);
12467   ins_encode( min_enc(dst,src) );
12468   ins_pipe( pipe_slow );
12469 %}
12470 
12471 // Max Register with Register
12472 //   *** Min and Max using the conditional move are slower than the
12473 //   *** branch version on a Pentium III.
12474 // // Conditional move for max
12475 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12476 //  effect( USE_DEF op2, USE op1, USE cr );
12477 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12478 //  opcode(0x4F,0x0F);
12479 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12480 //  ins_pipe( pipe_cmov_reg );
12481 //%}
12482 //
12483 // // Max Register with Register (P6 version)
12484 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12485 //  predicate(VM_Version::supports_cmov() );
12486 //  match(Set op2 (MaxI op1 op2));
12487 //  ins_cost(200);
12488 //  expand %{
12489 //    eFlagsReg cr;
12490 //    compI_eReg(cr,op1,op2);
12491 //    cmovI_reg_gt(op2,op1,cr);
12492 //  %}
12493 //%}
12494 
12495 // Max Register with Register (generic version)
12496 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12497   match(Set dst (MaxI dst src));
12498   effect(KILL flags);
12499   ins_cost(300);
12500 
12501   format %{ "MAX    $dst,$src" %}
12502   opcode(0xCC);
12503   ins_encode( max_enc(dst,src) );
12504   ins_pipe( pipe_slow );
12505 %}
12506 
12507 // ============================================================================
12508 // Counted Loop limit node which represents exact final iterator value.
12509 // Note: the resulting value should fit into integer range since
12510 // counted loops have limit check on overflow.
12511 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12512   match(Set limit (LoopLimit (Binary init limit) stride));
12513   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12514   ins_cost(300);
12515 
12516   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12517   ins_encode %{
12518     int strd = (int)$stride$$constant;
12519     assert(strd != 1 && strd != -1, "sanity");
12520     int m1 = (strd > 0) ? 1 : -1;
12521     // Convert limit to long (EAX:EDX)
12522     __ cdql();
12523     // Convert init to long (init:tmp)
12524     __ movl($tmp$$Register, $init$$Register);
12525     __ sarl($tmp$$Register, 31);
12526     // $limit - $init
12527     __ subl($limit$$Register, $init$$Register);
12528     __ sbbl($limit_hi$$Register, $tmp$$Register);
12529     // + ($stride - 1)
12530     if (strd > 0) {
12531       __ addl($limit$$Register, (strd - 1));
12532       __ adcl($limit_hi$$Register, 0);
12533       __ movl($tmp$$Register, strd);
12534     } else {
12535       __ addl($limit$$Register, (strd + 1));
12536       __ adcl($limit_hi$$Register, -1);
12537       __ lneg($limit_hi$$Register, $limit$$Register);
12538       __ movl($tmp$$Register, -strd);
12539     }
12540     // signed devision: (EAX:EDX) / pos_stride
12541     __ idivl($tmp$$Register);
12542     if (strd < 0) {
12543       // restore sign
12544       __ negl($tmp$$Register);
12545     }
12546     // (EAX) * stride
12547     __ mull($tmp$$Register);
12548     // + init (ignore upper bits)
12549     __ addl($limit$$Register, $init$$Register);
12550   %}
12551   ins_pipe( pipe_slow );
12552 %}
12553 
12554 // ============================================================================
12555 // Branch Instructions
12556 // Jump Table
12557 instruct jumpXtnd(rRegI switch_val) %{
12558   match(Jump switch_val);
12559   ins_cost(350);
12560   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12561   ins_encode %{
12562     // Jump to Address(table_base + switch_reg)
12563     Address index(noreg, $switch_val$$Register, Address::times_1);
12564     __ jump(ArrayAddress($constantaddress, index));
12565   %}
12566   ins_pipe(pipe_jmp);
12567 %}
12568 
12569 // Jump Direct - Label defines a relative address from JMP+1
12570 instruct jmpDir(label labl) %{
12571   match(Goto);
12572   effect(USE labl);
12573 
12574   ins_cost(300);
12575   format %{ "JMP    $labl" %}
12576   size(5);
12577   ins_encode %{
12578     Label* L = $labl$$label;
12579     __ jmp(*L, false); // Always long jump
12580   %}
12581   ins_pipe( pipe_jmp );
12582 %}
12583 
12584 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12585 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12586   match(If cop cr);
12587   effect(USE labl);
12588 
12589   ins_cost(300);
12590   format %{ "J$cop    $labl" %}
12591   size(6);
12592   ins_encode %{
12593     Label* L = $labl$$label;
12594     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12595   %}
12596   ins_pipe( pipe_jcc );
12597 %}
12598 
12599 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12600 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12601   predicate(!n->has_vector_mask_set());
12602   match(CountedLoopEnd cop cr);
12603   effect(USE labl);
12604 
12605   ins_cost(300);
12606   format %{ "J$cop    $labl\t# Loop end" %}
12607   size(6);
12608   ins_encode %{
12609     Label* L = $labl$$label;
12610     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12611   %}
12612   ins_pipe( pipe_jcc );
12613 %}
12614 
12615 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12616 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12617   predicate(!n->has_vector_mask_set());
12618   match(CountedLoopEnd cop cmp);
12619   effect(USE labl);
12620 
12621   ins_cost(300);
12622   format %{ "J$cop,u  $labl\t# Loop end" %}
12623   size(6);
12624   ins_encode %{
12625     Label* L = $labl$$label;
12626     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12627   %}
12628   ins_pipe( pipe_jcc );
12629 %}
12630 
12631 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12632   predicate(!n->has_vector_mask_set());
12633   match(CountedLoopEnd cop cmp);
12634   effect(USE labl);
12635 
12636   ins_cost(200);
12637   format %{ "J$cop,u  $labl\t# Loop end" %}
12638   size(6);
12639   ins_encode %{
12640     Label* L = $labl$$label;
12641     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12642   %}
12643   ins_pipe( pipe_jcc );
12644 %}
12645 
12646 // mask version
12647 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12648 // Bounded mask operand used in following patten is needed for
12649 // post-loop multiversioning.
12650 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12651   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12652   match(CountedLoopEnd cop cr);
12653   effect(USE labl, TEMP ktmp);
12654 
12655   ins_cost(400);
12656   format %{ "J$cop    $labl\t# Loop end\n\t"
12657             "restorevectmask \t# vector mask restore for loops" %}
12658   size(10);
12659   ins_encode %{
12660     Label* L = $labl$$label;
12661     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12662     __ restorevectmask($ktmp$$KRegister);
12663   %}
12664   ins_pipe( pipe_jcc );
12665 %}
12666 
12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12668 // Bounded mask operand used in following patten is needed for
12669 // post-loop multiversioning.
12670 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12671   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12672   match(CountedLoopEnd cop cmp);
12673   effect(USE labl, TEMP ktmp);
12674 
12675   ins_cost(400);
12676   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12677             "restorevectmask \t# vector mask restore for loops" %}
12678   size(10);
12679   ins_encode %{
12680     Label* L = $labl$$label;
12681     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12682     __ restorevectmask($ktmp$$KRegister);
12683   %}
12684   ins_pipe( pipe_jcc );
12685 %}
12686 
12687 // Bounded mask operand used in following patten is needed for
12688 // post-loop multiversioning.
12689 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12690   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12691   match(CountedLoopEnd cop cmp);
12692   effect(USE labl, TEMP ktmp);
12693 
12694   ins_cost(300);
12695   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12696             "restorevectmask \t# vector mask restore for loops" %}
12697   size(10);
12698   ins_encode %{
12699     Label* L = $labl$$label;
12700     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12701     __ restorevectmask($ktmp$$KRegister);
12702   %}
12703   ins_pipe( pipe_jcc );
12704 %}
12705 
12706 // Jump Direct Conditional - using unsigned comparison
12707 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12708   match(If cop cmp);
12709   effect(USE labl);
12710 
12711   ins_cost(300);
12712   format %{ "J$cop,u  $labl" %}
12713   size(6);
12714   ins_encode %{
12715     Label* L = $labl$$label;
12716     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12717   %}
12718   ins_pipe(pipe_jcc);
12719 %}
12720 
12721 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12722   match(If cop cmp);
12723   effect(USE labl);
12724 
12725   ins_cost(200);
12726   format %{ "J$cop,u  $labl" %}
12727   size(6);
12728   ins_encode %{
12729     Label* L = $labl$$label;
12730     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12731   %}
12732   ins_pipe(pipe_jcc);
12733 %}
12734 
12735 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12736   match(If cop cmp);
12737   effect(USE labl);
12738 
12739   ins_cost(200);
12740   format %{ $$template
12741     if ($cop$$cmpcode == Assembler::notEqual) {
12742       $$emit$$"JP,u   $labl\n\t"
12743       $$emit$$"J$cop,u   $labl"
12744     } else {
12745       $$emit$$"JP,u   done\n\t"
12746       $$emit$$"J$cop,u   $labl\n\t"
12747       $$emit$$"done:"
12748     }
12749   %}
12750   ins_encode %{
12751     Label* l = $labl$$label;
12752     if ($cop$$cmpcode == Assembler::notEqual) {
12753       __ jcc(Assembler::parity, *l, false);
12754       __ jcc(Assembler::notEqual, *l, false);
12755     } else if ($cop$$cmpcode == Assembler::equal) {
12756       Label done;
12757       __ jccb(Assembler::parity, done);
12758       __ jcc(Assembler::equal, *l, false);
12759       __ bind(done);
12760     } else {
12761        ShouldNotReachHere();
12762     }
12763   %}
12764   ins_pipe(pipe_jcc);
12765 %}
12766 
12767 // ============================================================================
12768 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12769 // array for an instance of the superklass.  Set a hidden internal cache on a
12770 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12771 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12772 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12773   match(Set result (PartialSubtypeCheck sub super));
12774   effect( KILL rcx, KILL cr );
12775 
12776   ins_cost(1100);  // slightly larger than the next version
12777   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12778             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12779             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12780             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12781             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12782             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12783             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12784      "miss:\t" %}
12785 
12786   opcode(0x1); // Force a XOR of EDI
12787   ins_encode( enc_PartialSubtypeCheck() );
12788   ins_pipe( pipe_slow );
12789 %}
12790 
12791 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12792   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12793   effect( KILL rcx, KILL result );
12794 
12795   ins_cost(1000);
12796   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12797             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12798             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12799             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12800             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12801             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12802      "miss:\t" %}
12803 
12804   opcode(0x0);  // No need to XOR EDI
12805   ins_encode( enc_PartialSubtypeCheck() );
12806   ins_pipe( pipe_slow );
12807 %}
12808 
12809 // ============================================================================
12810 // Branch Instructions -- short offset versions
12811 //
12812 // These instructions are used to replace jumps of a long offset (the default
12813 // match) with jumps of a shorter offset.  These instructions are all tagged
12814 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12815 // match rules in general matching.  Instead, the ADLC generates a conversion
12816 // method in the MachNode which can be used to do in-place replacement of the
12817 // long variant with the shorter variant.  The compiler will determine if a
12818 // branch can be taken by the is_short_branch_offset() predicate in the machine
12819 // specific code section of the file.
12820 
12821 // Jump Direct - Label defines a relative address from JMP+1
12822 instruct jmpDir_short(label labl) %{
12823   match(Goto);
12824   effect(USE labl);
12825 
12826   ins_cost(300);
12827   format %{ "JMP,s  $labl" %}
12828   size(2);
12829   ins_encode %{
12830     Label* L = $labl$$label;
12831     __ jmpb(*L);
12832   %}
12833   ins_pipe( pipe_jmp );
12834   ins_short_branch(1);
12835 %}
12836 
12837 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12838 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12839   match(If cop cr);
12840   effect(USE labl);
12841 
12842   ins_cost(300);
12843   format %{ "J$cop,s  $labl" %}
12844   size(2);
12845   ins_encode %{
12846     Label* L = $labl$$label;
12847     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12848   %}
12849   ins_pipe( pipe_jcc );
12850   ins_short_branch(1);
12851 %}
12852 
12853 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12854 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12855   match(CountedLoopEnd cop cr);
12856   effect(USE labl);
12857 
12858   ins_cost(300);
12859   format %{ "J$cop,s  $labl\t# Loop end" %}
12860   size(2);
12861   ins_encode %{
12862     Label* L = $labl$$label;
12863     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12864   %}
12865   ins_pipe( pipe_jcc );
12866   ins_short_branch(1);
12867 %}
12868 
12869 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12870 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12871   match(CountedLoopEnd cop cmp);
12872   effect(USE labl);
12873 
12874   ins_cost(300);
12875   format %{ "J$cop,us $labl\t# Loop end" %}
12876   size(2);
12877   ins_encode %{
12878     Label* L = $labl$$label;
12879     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12880   %}
12881   ins_pipe( pipe_jcc );
12882   ins_short_branch(1);
12883 %}
12884 
12885 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12886   match(CountedLoopEnd cop cmp);
12887   effect(USE labl);
12888 
12889   ins_cost(300);
12890   format %{ "J$cop,us $labl\t# Loop end" %}
12891   size(2);
12892   ins_encode %{
12893     Label* L = $labl$$label;
12894     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12895   %}
12896   ins_pipe( pipe_jcc );
12897   ins_short_branch(1);
12898 %}
12899 
12900 // Jump Direct Conditional - using unsigned comparison
12901 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12902   match(If cop cmp);
12903   effect(USE labl);
12904 
12905   ins_cost(300);
12906   format %{ "J$cop,us $labl" %}
12907   size(2);
12908   ins_encode %{
12909     Label* L = $labl$$label;
12910     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12911   %}
12912   ins_pipe( pipe_jcc );
12913   ins_short_branch(1);
12914 %}
12915 
12916 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12917   match(If cop cmp);
12918   effect(USE labl);
12919 
12920   ins_cost(300);
12921   format %{ "J$cop,us $labl" %}
12922   size(2);
12923   ins_encode %{
12924     Label* L = $labl$$label;
12925     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12926   %}
12927   ins_pipe( pipe_jcc );
12928   ins_short_branch(1);
12929 %}
12930 
12931 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12932   match(If cop cmp);
12933   effect(USE labl);
12934 
12935   ins_cost(300);
12936   format %{ $$template
12937     if ($cop$$cmpcode == Assembler::notEqual) {
12938       $$emit$$"JP,u,s   $labl\n\t"
12939       $$emit$$"J$cop,u,s   $labl"
12940     } else {
12941       $$emit$$"JP,u,s   done\n\t"
12942       $$emit$$"J$cop,u,s  $labl\n\t"
12943       $$emit$$"done:"
12944     }
12945   %}
12946   size(4);
12947   ins_encode %{
12948     Label* l = $labl$$label;
12949     if ($cop$$cmpcode == Assembler::notEqual) {
12950       __ jccb(Assembler::parity, *l);
12951       __ jccb(Assembler::notEqual, *l);
12952     } else if ($cop$$cmpcode == Assembler::equal) {
12953       Label done;
12954       __ jccb(Assembler::parity, done);
12955       __ jccb(Assembler::equal, *l);
12956       __ bind(done);
12957     } else {
12958        ShouldNotReachHere();
12959     }
12960   %}
12961   ins_pipe(pipe_jcc);
12962   ins_short_branch(1);
12963 %}
12964 
12965 // ============================================================================
12966 // Long Compare
12967 //
12968 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12969 // is tricky.  The flavor of compare used depends on whether we are testing
12970 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12971 // The GE test is the negated LT test.  The LE test can be had by commuting
12972 // the operands (yielding a GE test) and then negating; negate again for the
12973 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12974 // NE test is negated from that.
12975 
12976 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12977 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12978 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12979 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12980 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12981 // foo match ends up with the wrong leaf.  One fix is to not match both
12982 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12983 // both forms beat the trinary form of long-compare and both are very useful
12984 // on Intel which has so few registers.
12985 
12986 // Manifest a CmpL result in an integer register.  Very painful.
12987 // This is the test to avoid.
12988 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12989   match(Set dst (CmpL3 src1 src2));
12990   effect( KILL flags );
12991   ins_cost(1000);
12992   format %{ "XOR    $dst,$dst\n\t"
12993             "CMP    $src1.hi,$src2.hi\n\t"
12994             "JLT,s  m_one\n\t"
12995             "JGT,s  p_one\n\t"
12996             "CMP    $src1.lo,$src2.lo\n\t"
12997             "JB,s   m_one\n\t"
12998             "JEQ,s  done\n"
12999     "p_one:\tINC    $dst\n\t"
13000             "JMP,s  done\n"
13001     "m_one:\tDEC    $dst\n"
13002      "done:" %}
13003   ins_encode %{
13004     Label p_one, m_one, done;
13005     __ xorptr($dst$$Register, $dst$$Register);
13006     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13007     __ jccb(Assembler::less,    m_one);
13008     __ jccb(Assembler::greater, p_one);
13009     __ cmpl($src1$$Register, $src2$$Register);
13010     __ jccb(Assembler::below,   m_one);
13011     __ jccb(Assembler::equal,   done);
13012     __ bind(p_one);
13013     __ incrementl($dst$$Register);
13014     __ jmpb(done);
13015     __ bind(m_one);
13016     __ decrementl($dst$$Register);
13017     __ bind(done);
13018   %}
13019   ins_pipe( pipe_slow );
13020 %}
13021 
13022 //======
13023 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13024 // compares.  Can be used for LE or GT compares by reversing arguments.
13025 // NOT GOOD FOR EQ/NE tests.
13026 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13027   match( Set flags (CmpL src zero ));
13028   ins_cost(100);
13029   format %{ "TEST   $src.hi,$src.hi" %}
13030   opcode(0x85);
13031   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13032   ins_pipe( ialu_cr_reg_reg );
13033 %}
13034 
13035 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13036 // compares.  Can be used for LE or GT compares by reversing arguments.
13037 // NOT GOOD FOR EQ/NE tests.
13038 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13039   match( Set flags (CmpL src1 src2 ));
13040   effect( TEMP tmp );
13041   ins_cost(300);
13042   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13043             "MOV    $tmp,$src1.hi\n\t"
13044             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13045   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13046   ins_pipe( ialu_cr_reg_reg );
13047 %}
13048 
13049 // Long compares reg < zero/req OR reg >= zero/req.
13050 // Just a wrapper for a normal branch, plus the predicate test.
13051 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13052   match(If cmp flags);
13053   effect(USE labl);
13054   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13055   expand %{
13056     jmpCon(cmp,flags,labl);    // JLT or JGE...
13057   %}
13058 %}
13059 
13060 //======
13061 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13062 // compares.  Can be used for LE or GT compares by reversing arguments.
13063 // NOT GOOD FOR EQ/NE tests.
13064 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13065   match(Set flags (CmpUL src zero));
13066   ins_cost(100);
13067   format %{ "TEST   $src.hi,$src.hi" %}
13068   opcode(0x85);
13069   ins_encode(OpcP, RegReg_Hi2(src, src));
13070   ins_pipe(ialu_cr_reg_reg);
13071 %}
13072 
13073 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13074 // compares.  Can be used for LE or GT compares by reversing arguments.
13075 // NOT GOOD FOR EQ/NE tests.
13076 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13077   match(Set flags (CmpUL src1 src2));
13078   effect(TEMP tmp);
13079   ins_cost(300);
13080   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13081             "MOV    $tmp,$src1.hi\n\t"
13082             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13083   ins_encode(long_cmp_flags2(src1, src2, tmp));
13084   ins_pipe(ialu_cr_reg_reg);
13085 %}
13086 
13087 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13088 // Just a wrapper for a normal branch, plus the predicate test.
13089 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13090   match(If cmp flags);
13091   effect(USE labl);
13092   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13093   expand %{
13094     jmpCon(cmp, flags, labl);    // JLT or JGE...
13095   %}
13096 %}
13097 
13098 // Compare 2 longs and CMOVE longs.
13099 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13100   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13101   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13102   ins_cost(400);
13103   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13104             "CMOV$cmp $dst.hi,$src.hi" %}
13105   opcode(0x0F,0x40);
13106   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13107   ins_pipe( pipe_cmov_reg_long );
13108 %}
13109 
13110 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13111   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13112   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13113   ins_cost(500);
13114   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115             "CMOV$cmp $dst.hi,$src.hi" %}
13116   opcode(0x0F,0x40);
13117   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13118   ins_pipe( pipe_cmov_reg_long );
13119 %}
13120 
13121 // Compare 2 longs and CMOVE ints.
13122 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13125   ins_cost(200);
13126   format %{ "CMOV$cmp $dst,$src" %}
13127   opcode(0x0F,0x40);
13128   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13129   ins_pipe( pipe_cmov_reg );
13130 %}
13131 
13132 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13133   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13134   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13135   ins_cost(250);
13136   format %{ "CMOV$cmp $dst,$src" %}
13137   opcode(0x0F,0x40);
13138   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13139   ins_pipe( pipe_cmov_mem );
13140 %}
13141 
13142 // Compare 2 longs and CMOVE ints.
13143 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13144   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13145   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13146   ins_cost(200);
13147   format %{ "CMOV$cmp $dst,$src" %}
13148   opcode(0x0F,0x40);
13149   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13150   ins_pipe( pipe_cmov_reg );
13151 %}
13152 
13153 // Compare 2 longs and CMOVE doubles
13154 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13155   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13156   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13157   ins_cost(200);
13158   expand %{
13159     fcmovDPR_regS(cmp,flags,dst,src);
13160   %}
13161 %}
13162 
13163 // Compare 2 longs and CMOVE doubles
13164 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13165   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13166   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13167   ins_cost(200);
13168   expand %{
13169     fcmovD_regS(cmp,flags,dst,src);
13170   %}
13171 %}
13172 
13173 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13174   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13175   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13176   ins_cost(200);
13177   expand %{
13178     fcmovFPR_regS(cmp,flags,dst,src);
13179   %}
13180 %}
13181 
13182 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13183   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13184   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13185   ins_cost(200);
13186   expand %{
13187     fcmovF_regS(cmp,flags,dst,src);
13188   %}
13189 %}
13190 
13191 //======
13192 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13193 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13194   match( Set flags (CmpL src zero ));
13195   effect(TEMP tmp);
13196   ins_cost(200);
13197   format %{ "MOV    $tmp,$src.lo\n\t"
13198             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13199   ins_encode( long_cmp_flags0( src, tmp ) );
13200   ins_pipe( ialu_reg_reg_long );
13201 %}
13202 
13203 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13204 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13205   match( Set flags (CmpL src1 src2 ));
13206   ins_cost(200+300);
13207   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13208             "JNE,s  skip\n\t"
13209             "CMP    $src1.hi,$src2.hi\n\t"
13210      "skip:\t" %}
13211   ins_encode( long_cmp_flags1( src1, src2 ) );
13212   ins_pipe( ialu_cr_reg_reg );
13213 %}
13214 
13215 // Long compare reg == zero/reg OR reg != zero/reg
13216 // Just a wrapper for a normal branch, plus the predicate test.
13217 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13218   match(If cmp flags);
13219   effect(USE labl);
13220   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13221   expand %{
13222     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13223   %}
13224 %}
13225 
13226 //======
13227 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13228 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13229   match(Set flags (CmpUL src zero));
13230   effect(TEMP tmp);
13231   ins_cost(200);
13232   format %{ "MOV    $tmp,$src.lo\n\t"
13233             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13234   ins_encode(long_cmp_flags0(src, tmp));
13235   ins_pipe(ialu_reg_reg_long);
13236 %}
13237 
13238 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13239 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13240   match(Set flags (CmpUL src1 src2));
13241   ins_cost(200+300);
13242   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13243             "JNE,s  skip\n\t"
13244             "CMP    $src1.hi,$src2.hi\n\t"
13245      "skip:\t" %}
13246   ins_encode(long_cmp_flags1(src1, src2));
13247   ins_pipe(ialu_cr_reg_reg);
13248 %}
13249 
13250 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13251 // Just a wrapper for a normal branch, plus the predicate test.
13252 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13253   match(If cmp flags);
13254   effect(USE labl);
13255   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13256   expand %{
13257     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13258   %}
13259 %}
13260 
13261 // Compare 2 longs and CMOVE longs.
13262 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13263   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13264   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13265   ins_cost(400);
13266   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13267             "CMOV$cmp $dst.hi,$src.hi" %}
13268   opcode(0x0F,0x40);
13269   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13270   ins_pipe( pipe_cmov_reg_long );
13271 %}
13272 
13273 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13274   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13275   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276   ins_cost(500);
13277   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278             "CMOV$cmp $dst.hi,$src.hi" %}
13279   opcode(0x0F,0x40);
13280   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13281   ins_pipe( pipe_cmov_reg_long );
13282 %}
13283 
13284 // Compare 2 longs and CMOVE ints.
13285 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13286   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13288   ins_cost(200);
13289   format %{ "CMOV$cmp $dst,$src" %}
13290   opcode(0x0F,0x40);
13291   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13292   ins_pipe( pipe_cmov_reg );
13293 %}
13294 
13295 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13296   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13297   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13298   ins_cost(250);
13299   format %{ "CMOV$cmp $dst,$src" %}
13300   opcode(0x0F,0x40);
13301   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13302   ins_pipe( pipe_cmov_mem );
13303 %}
13304 
13305 // Compare 2 longs and CMOVE ints.
13306 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13307   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13309   ins_cost(200);
13310   format %{ "CMOV$cmp $dst,$src" %}
13311   opcode(0x0F,0x40);
13312   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13313   ins_pipe( pipe_cmov_reg );
13314 %}
13315 
13316 // Compare 2 longs and CMOVE doubles
13317 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13318   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13319   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13320   ins_cost(200);
13321   expand %{
13322     fcmovDPR_regS(cmp,flags,dst,src);
13323   %}
13324 %}
13325 
13326 // Compare 2 longs and CMOVE doubles
13327 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13328   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13329   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13330   ins_cost(200);
13331   expand %{
13332     fcmovD_regS(cmp,flags,dst,src);
13333   %}
13334 %}
13335 
13336 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13337   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13338   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13339   ins_cost(200);
13340   expand %{
13341     fcmovFPR_regS(cmp,flags,dst,src);
13342   %}
13343 %}
13344 
13345 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13346   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13347   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13348   ins_cost(200);
13349   expand %{
13350     fcmovF_regS(cmp,flags,dst,src);
13351   %}
13352 %}
13353 
13354 //======
13355 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13356 // Same as cmpL_reg_flags_LEGT except must negate src
13357 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13358   match( Set flags (CmpL src zero ));
13359   effect( TEMP tmp );
13360   ins_cost(300);
13361   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13362             "CMP    $tmp,$src.lo\n\t"
13363             "SBB    $tmp,$src.hi\n\t" %}
13364   ins_encode( long_cmp_flags3(src, tmp) );
13365   ins_pipe( ialu_reg_reg_long );
13366 %}
13367 
13368 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13369 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13370 // requires a commuted test to get the same result.
13371 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13372   match( Set flags (CmpL src1 src2 ));
13373   effect( TEMP tmp );
13374   ins_cost(300);
13375   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13376             "MOV    $tmp,$src2.hi\n\t"
13377             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13378   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13379   ins_pipe( ialu_cr_reg_reg );
13380 %}
13381 
13382 // Long compares reg < zero/req OR reg >= zero/req.
13383 // Just a wrapper for a normal branch, plus the predicate test
13384 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13385   match(If cmp flags);
13386   effect(USE labl);
13387   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13388   ins_cost(300);
13389   expand %{
13390     jmpCon(cmp,flags,labl);    // JGT or JLE...
13391   %}
13392 %}
13393 
13394 //======
13395 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13396 // Same as cmpUL_reg_flags_LEGT except must negate src
13397 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13398   match(Set flags (CmpUL src zero));
13399   effect(TEMP tmp);
13400   ins_cost(300);
13401   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13402             "CMP    $tmp,$src.lo\n\t"
13403             "SBB    $tmp,$src.hi\n\t" %}
13404   ins_encode(long_cmp_flags3(src, tmp));
13405   ins_pipe(ialu_reg_reg_long);
13406 %}
13407 
13408 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13409 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13410 // requires a commuted test to get the same result.
13411 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13412   match(Set flags (CmpUL src1 src2));
13413   effect(TEMP tmp);
13414   ins_cost(300);
13415   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13416             "MOV    $tmp,$src2.hi\n\t"
13417             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13418   ins_encode(long_cmp_flags2( src2, src1, tmp));
13419   ins_pipe(ialu_cr_reg_reg);
13420 %}
13421 
13422 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13423 // Just a wrapper for a normal branch, plus the predicate test
13424 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13425   match(If cmp flags);
13426   effect(USE labl);
13427   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13428   ins_cost(300);
13429   expand %{
13430     jmpCon(cmp, flags, labl);    // JGT or JLE...
13431   %}
13432 %}
13433 
13434 // Compare 2 longs and CMOVE longs.
13435 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13436   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13437   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438   ins_cost(400);
13439   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13440             "CMOV$cmp $dst.hi,$src.hi" %}
13441   opcode(0x0F,0x40);
13442   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13443   ins_pipe( pipe_cmov_reg_long );
13444 %}
13445 
13446 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13447   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13448   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13449   ins_cost(500);
13450   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13451             "CMOV$cmp $dst.hi,$src.hi+4" %}
13452   opcode(0x0F,0x40);
13453   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13454   ins_pipe( pipe_cmov_reg_long );
13455 %}
13456 
13457 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13458   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13459   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13460   ins_cost(400);
13461   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13462             "CMOV$cmp $dst.hi,$src.hi" %}
13463   opcode(0x0F,0x40);
13464   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13465   ins_pipe( pipe_cmov_reg_long );
13466 %}
13467 
13468 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13469   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13470   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13471   ins_cost(500);
13472   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13473             "CMOV$cmp $dst.hi,$src.hi+4" %}
13474   opcode(0x0F,0x40);
13475   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13476   ins_pipe( pipe_cmov_reg_long );
13477 %}
13478 
13479 // Compare 2 longs and CMOVE ints.
13480 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13483   ins_cost(200);
13484   format %{ "CMOV$cmp $dst,$src" %}
13485   opcode(0x0F,0x40);
13486   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13487   ins_pipe( pipe_cmov_reg );
13488 %}
13489 
13490 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13491   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13492   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13493   ins_cost(250);
13494   format %{ "CMOV$cmp $dst,$src" %}
13495   opcode(0x0F,0x40);
13496   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13497   ins_pipe( pipe_cmov_mem );
13498 %}
13499 
13500 // Compare 2 longs and CMOVE ptrs.
13501 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13504   ins_cost(200);
13505   format %{ "CMOV$cmp $dst,$src" %}
13506   opcode(0x0F,0x40);
13507   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13508   ins_pipe( pipe_cmov_reg );
13509 %}
13510 
13511 // Compare 2 longs and CMOVE doubles
13512 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13513   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13514   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   expand %{
13517     fcmovDPR_regS(cmp,flags,dst,src);
13518   %}
13519 %}
13520 
13521 // Compare 2 longs and CMOVE doubles
13522 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13523   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13524   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13525   ins_cost(200);
13526   expand %{
13527     fcmovD_regS(cmp,flags,dst,src);
13528   %}
13529 %}
13530 
13531 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13532   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13533   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13534   ins_cost(200);
13535   expand %{
13536     fcmovFPR_regS(cmp,flags,dst,src);
13537   %}
13538 %}
13539 
13540 
13541 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13542   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13543   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13544   ins_cost(200);
13545   expand %{
13546     fcmovF_regS(cmp,flags,dst,src);
13547   %}
13548 %}
13549 
13550 
13551 // ============================================================================
13552 // Procedure Call/Return Instructions
13553 // Call Java Static Instruction
13554 // Note: If this code changes, the corresponding ret_addr_offset() and
13555 //       compute_padding() functions will have to be adjusted.
13556 instruct CallStaticJavaDirect(method meth) %{
13557   match(CallStaticJava);
13558   effect(USE meth);
13559 
13560   ins_cost(300);
13561   format %{ "CALL,static " %}
13562   opcode(0xE8); /* E8 cd */
13563   ins_encode( pre_call_resets,
13564               Java_Static_Call( meth ),
13565               call_epilog,
13566               post_call_FPU );
13567   ins_pipe( pipe_slow );
13568   ins_alignment(4);
13569 %}
13570 
13571 // Call Java Dynamic Instruction
13572 // Note: If this code changes, the corresponding ret_addr_offset() and
13573 //       compute_padding() functions will have to be adjusted.
13574 instruct CallDynamicJavaDirect(method meth) %{
13575   match(CallDynamicJava);
13576   effect(USE meth);
13577 
13578   ins_cost(300);
13579   format %{ "MOV    EAX,(oop)-1\n\t"
13580             "CALL,dynamic" %}
13581   opcode(0xE8); /* E8 cd */
13582   ins_encode( pre_call_resets,
13583               Java_Dynamic_Call( meth ),
13584               call_epilog,
13585               post_call_FPU );
13586   ins_pipe( pipe_slow );
13587   ins_alignment(4);
13588 %}
13589 
13590 // Call Runtime Instruction
13591 instruct CallRuntimeDirect(method meth) %{
13592   match(CallRuntime );
13593   effect(USE meth);
13594 
13595   ins_cost(300);
13596   format %{ "CALL,runtime " %}
13597   opcode(0xE8); /* E8 cd */
13598   // Use FFREEs to clear entries in float stack
13599   ins_encode( pre_call_resets,
13600               FFree_Float_Stack_All,
13601               Java_To_Runtime( meth ),
13602               post_call_FPU );
13603   ins_pipe( pipe_slow );
13604 %}
13605 
13606 // Call runtime without safepoint
13607 instruct CallLeafDirect(method meth) %{
13608   match(CallLeaf);
13609   effect(USE meth);
13610 
13611   ins_cost(300);
13612   format %{ "CALL_LEAF,runtime " %}
13613   opcode(0xE8); /* E8 cd */
13614   ins_encode( pre_call_resets,
13615               FFree_Float_Stack_All,
13616               Java_To_Runtime( meth ),
13617               Verify_FPU_For_Leaf, post_call_FPU );
13618   ins_pipe( pipe_slow );
13619 %}
13620 
13621 instruct CallLeafNoFPDirect(method meth) %{
13622   match(CallLeafNoFP);
13623   effect(USE meth);
13624 
13625   ins_cost(300);
13626   format %{ "CALL_LEAF_NOFP,runtime " %}
13627   opcode(0xE8); /* E8 cd */
13628   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13629   ins_pipe( pipe_slow );
13630 %}
13631 
13632 
13633 // Return Instruction
13634 // Remove the return address & jump to it.
13635 instruct Ret() %{
13636   match(Return);
13637   format %{ "RET" %}
13638   opcode(0xC3);
13639   ins_encode(OpcP);
13640   ins_pipe( pipe_jmp );
13641 %}
13642 
13643 // Tail Call; Jump from runtime stub to Java code.
13644 // Also known as an 'interprocedural jump'.
13645 // Target of jump will eventually return to caller.
13646 // TailJump below removes the return address.
13647 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13648   match(TailCall jump_target method_ptr);
13649   ins_cost(300);
13650   format %{ "JMP    $jump_target \t# EBX holds method" %}
13651   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13652   ins_encode( OpcP, RegOpc(jump_target) );
13653   ins_pipe( pipe_jmp );
13654 %}
13655 
13656 
13657 // Tail Jump; remove the return address; jump to target.
13658 // TailCall above leaves the return address around.
13659 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13660   match( TailJump jump_target ex_oop );
13661   ins_cost(300);
13662   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13663             "JMP    $jump_target " %}
13664   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13665   ins_encode( enc_pop_rdx,
13666               OpcP, RegOpc(jump_target) );
13667   ins_pipe( pipe_jmp );
13668 %}
13669 
13670 // Create exception oop: created by stack-crawling runtime code.
13671 // Created exception is now available to this handler, and is setup
13672 // just prior to jumping to this handler.  No code emitted.
13673 instruct CreateException( eAXRegP ex_oop )
13674 %{
13675   match(Set ex_oop (CreateEx));
13676 
13677   size(0);
13678   // use the following format syntax
13679   format %{ "# exception oop is in EAX; no code emitted" %}
13680   ins_encode();
13681   ins_pipe( empty );
13682 %}
13683 
13684 
13685 // Rethrow exception:
13686 // The exception oop will come in the first argument position.
13687 // Then JUMP (not call) to the rethrow stub code.
13688 instruct RethrowException()
13689 %{
13690   match(Rethrow);
13691 
13692   // use the following format syntax
13693   format %{ "JMP    rethrow_stub" %}
13694   ins_encode(enc_rethrow);
13695   ins_pipe( pipe_jmp );
13696 %}
13697 
13698 // inlined locking and unlocking
13699 
13700 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13701   predicate(Compile::current()->use_rtm());
13702   match(Set cr (FastLock object box));
13703   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13704   ins_cost(300);
13705   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13706   ins_encode %{
13707     __ get_thread($thread$$Register);
13708     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13709                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13710                  _counters, _rtm_counters, _stack_rtm_counters,
13711                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13712                  true, ra_->C->profile_rtm());
13713   %}
13714   ins_pipe(pipe_slow);
13715 %}
13716 
13717 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13718   predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13719   match(Set cr (FastLock object box));
13720   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13721   ins_cost(300);
13722   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13723   ins_encode %{
13724     __ get_thread($thread$$Register);
13725     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13726                  $scr$$Register, noreg, noreg, $thread$$Register, _counters, NULL, NULL, NULL, false, false);
13727   %}
13728   ins_pipe(pipe_slow);
13729 %}
13730 
13731 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13732   predicate(LockingMode != LM_LIGHTWEIGHT);
13733   match(Set cr (FastUnlock object box));
13734   effect(TEMP tmp, USE_KILL box);
13735   ins_cost(300);
13736   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13737   ins_encode %{
13738     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13739   %}
13740   ins_pipe(pipe_slow);
13741 %}
13742 
13743 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13744   predicate(LockingMode == LM_LIGHTWEIGHT);
13745   match(Set cr (FastLock object box));
13746   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13747   ins_cost(300);
13748   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13749   ins_encode %{
13750     __ get_thread($thread$$Register);
13751     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13752   %}
13753   ins_pipe(pipe_slow);
13754 %}
13755 
13756 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13757   predicate(LockingMode == LM_LIGHTWEIGHT);
13758   match(Set cr (FastUnlock object eax_reg));
13759   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13760   ins_cost(300);
13761   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13762   ins_encode %{
13763     __ get_thread($thread$$Register);
13764     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13765   %}
13766   ins_pipe(pipe_slow);
13767 %}
13768 
13769 // ============================================================================
13770 // Safepoint Instruction
13771 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13772   match(SafePoint poll);
13773   effect(KILL cr, USE poll);
13774 
13775   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13776   ins_cost(125);
13777   // EBP would need size(3)
13778   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13779   ins_encode %{
13780     __ relocate(relocInfo::poll_type);
13781     address pre_pc = __ pc();
13782     __ testl(rax, Address($poll$$Register, 0));
13783     address post_pc = __ pc();
13784     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13785   %}
13786   ins_pipe(ialu_reg_mem);
13787 %}
13788 
13789 
13790 // ============================================================================
13791 // This name is KNOWN by the ADLC and cannot be changed.
13792 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13793 // for this guy.
13794 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13795   match(Set dst (ThreadLocal));
13796   effect(DEF dst, KILL cr);
13797 
13798   format %{ "MOV    $dst, Thread::current()" %}
13799   ins_encode %{
13800     Register dstReg = as_Register($dst$$reg);
13801     __ get_thread(dstReg);
13802   %}
13803   ins_pipe( ialu_reg_fat );
13804 %}
13805 
13806 
13807 
13808 //----------PEEPHOLE RULES-----------------------------------------------------
13809 // These must follow all instruction definitions as they use the names
13810 // defined in the instructions definitions.
13811 //
13812 // peepmatch ( root_instr_name [preceding_instruction]* );
13813 //
13814 // peepconstraint %{
13815 // (instruction_number.operand_name relational_op instruction_number.operand_name
13816 //  [, ...] );
13817 // // instruction numbers are zero-based using left to right order in peepmatch
13818 //
13819 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13820 // // provide an instruction_number.operand_name for each operand that appears
13821 // // in the replacement instruction's match rule
13822 //
13823 // ---------VM FLAGS---------------------------------------------------------
13824 //
13825 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13826 //
13827 // Each peephole rule is given an identifying number starting with zero and
13828 // increasing by one in the order seen by the parser.  An individual peephole
13829 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13830 // on the command-line.
13831 //
13832 // ---------CURRENT LIMITATIONS----------------------------------------------
13833 //
13834 // Only match adjacent instructions in same basic block
13835 // Only equality constraints
13836 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13837 // Only one replacement instruction
13838 //
13839 // ---------EXAMPLE----------------------------------------------------------
13840 //
13841 // // pertinent parts of existing instructions in architecture description
13842 // instruct movI(rRegI dst, rRegI src) %{
13843 //   match(Set dst (CopyI src));
13844 // %}
13845 //
13846 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13847 //   match(Set dst (AddI dst src));
13848 //   effect(KILL cr);
13849 // %}
13850 //
13851 // // Change (inc mov) to lea
13852 // peephole %{
13853 //   // increment preceeded by register-register move
13854 //   peepmatch ( incI_eReg movI );
13855 //   // require that the destination register of the increment
13856 //   // match the destination register of the move
13857 //   peepconstraint ( 0.dst == 1.dst );
13858 //   // construct a replacement instruction that sets
13859 //   // the destination to ( move's source register + one )
13860 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13861 // %}
13862 //
13863 // Implementation no longer uses movX instructions since
13864 // machine-independent system no longer uses CopyX nodes.
13865 //
13866 // peephole %{
13867 //   peepmatch ( incI_eReg movI );
13868 //   peepconstraint ( 0.dst == 1.dst );
13869 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13870 // %}
13871 //
13872 // peephole %{
13873 //   peepmatch ( decI_eReg movI );
13874 //   peepconstraint ( 0.dst == 1.dst );
13875 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13876 // %}
13877 //
13878 // peephole %{
13879 //   peepmatch ( addI_eReg_imm movI );
13880 //   peepconstraint ( 0.dst == 1.dst );
13881 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13882 // %}
13883 //
13884 // peephole %{
13885 //   peepmatch ( addP_eReg_imm movP );
13886 //   peepconstraint ( 0.dst == 1.dst );
13887 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13888 // %}
13889 
13890 // // Change load of spilled value to only a spill
13891 // instruct storeI(memory mem, rRegI src) %{
13892 //   match(Set mem (StoreI mem src));
13893 // %}
13894 //
13895 // instruct loadI(rRegI dst, memory mem) %{
13896 //   match(Set dst (LoadI mem));
13897 // %}
13898 //
13899 peephole %{
13900   peepmatch ( loadI storeI );
13901   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13902   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13903 %}
13904 
13905 //----------SMARTSPILL RULES---------------------------------------------------
13906 // These must follow all instruction definitions as they use the names
13907 // defined in the instructions definitions.