1 //
    2 // Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  631 
  632   C->output()->set_frame_complete(cbuf.insts_size());
  633 
  634   if (C->has_mach_constant_base_node()) {
  635     // NOTE: We set the table base offset here because users might be
  636     // emitted before MachConstantBaseNode.
  637     ConstantTable& constant_table = C->output()->constant_table();
  638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  639   }
  640 }
  641 
  642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  643   return MachNode::size(ra_); // too many variables; just compute it the hard way
  644 }
  645 
  646 int MachPrologNode::reloc() const {
  647   return 0; // a large enough number
  648 }
  649 
  650 //=============================================================================
  651 #ifndef PRODUCT
  652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  653   Compile *C = ra_->C;
  654   int framesize = C->output()->frame_size_in_bytes();
  655   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  656   // Remove two words for return addr and rbp,
  657   framesize -= 2*wordSize;
  658 
  659   if (C->max_vector_size() > 16) {
  660     st->print("VZEROUPPER");
  661     st->cr(); st->print("\t");
  662   }
  663   if (C->in_24_bit_fp_mode()) {
  664     st->print("FLDCW  standard control word");
  665     st->cr(); st->print("\t");
  666   }
  667   if (framesize) {
  668     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  669     st->cr(); st->print("\t");
  670   }
  671   st->print_cr("POPL   EBP"); st->print("\t");
  672   if (do_polling() && C->is_method_compilation()) {
  673     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  674               "JA       #safepoint_stub\t"
  675               "# Safepoint: poll for GC");
  676   }
  677 }
  678 #endif
  679 
  680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  681   Compile *C = ra_->C;
  682   MacroAssembler _masm(&cbuf);
  683 
  684   if (C->max_vector_size() > 16) {
  685     // Clear upper bits of YMM registers when current compiled code uses
  686     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  687     _masm.vzeroupper();
  688   }
  689   // If method set FPU control word, restore to standard control word
  690   if (C->in_24_bit_fp_mode()) {
  691     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  692   }
  693 
  694   int framesize = C->output()->frame_size_in_bytes();
  695   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  696   // Remove two words for return addr and rbp,
  697   framesize -= 2*wordSize;
  698 
  699   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  700 
  701   if (framesize >= 128) {
  702     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  703     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  704     emit_d32(cbuf, framesize);
  705   } else if (framesize) {
  706     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  707     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  708     emit_d8(cbuf, framesize);
  709   }
  710 
  711   emit_opcode(cbuf, 0x58 | EBP_enc);
  712 
  713   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  714     __ reserved_stack_check();
  715   }
  716 
  717   if (do_polling() && C->is_method_compilation()) {
  718     Register thread = as_Register(EBX_enc);
  719     MacroAssembler masm(&cbuf);
  720     __ get_thread(thread);
  721     Label dummy_label;
  722     Label* code_stub = &dummy_label;
  723     if (!C->output()->in_scratch_emit_size()) {
  724       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  725     }
  726     __ relocate(relocInfo::poll_return_type);
  727     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  728   }
  729 }
  730 
  731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  732   return MachNode::size(ra_); // too many variables; just compute it
  733                               // the hard way
  734 }
  735 
  736 int MachEpilogNode::reloc() const {
  737   return 0; // a large enough number
  738 }
  739 
  740 const Pipeline * MachEpilogNode::pipeline() const {
  741   return MachNode::pipeline_class();
  742 }
  743 
  744 //=============================================================================
  745 
  746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  747 static enum RC rc_class( OptoReg::Name reg ) {
  748 
  749   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  750   if (OptoReg::is_stack(reg)) return rc_stack;
  751 
  752   VMReg r = OptoReg::as_VMReg(reg);
  753   if (r->is_Register()) return rc_int;
  754   if (r->is_FloatRegister()) {
  755     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  756     return rc_float;
  757   }
  758   assert(r->is_XMMRegister(), "must be");
  759   return rc_xmm;
  760 }
  761 
  762 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  763                         int opcode, const char *op_str, int size, outputStream* st ) {
  764   if( cbuf ) {
  765     emit_opcode  (*cbuf, opcode );
  766     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  767 #ifndef PRODUCT
  768   } else if( !do_size ) {
  769     if( size != 0 ) st->print("\n\t");
  770     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  771       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  772       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  773     } else { // FLD, FST, PUSH, POP
  774       st->print("%s [ESP + #%d]",op_str,offset);
  775     }
  776 #endif
  777   }
  778   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  779   return size+3+offset_size;
  780 }
  781 
  782 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  783 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  784                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  785   int in_size_in_bits = Assembler::EVEX_32bit;
  786   int evex_encoding = 0;
  787   if (reg_lo+1 == reg_hi) {
  788     in_size_in_bits = Assembler::EVEX_64bit;
  789     evex_encoding = Assembler::VEX_W;
  790   }
  791   if (cbuf) {
  792     MacroAssembler _masm(cbuf);
  793     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  794     //                          it maps more cases to single byte displacement
  795     _masm.set_managed();
  796     if (reg_lo+1 == reg_hi) { // double move?
  797       if (is_load) {
  798         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  799       } else {
  800         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  801       }
  802     } else {
  803       if (is_load) {
  804         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  805       } else {
  806         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  807       }
  808     }
  809 #ifndef PRODUCT
  810   } else if (!do_size) {
  811     if (size != 0) st->print("\n\t");
  812     if (reg_lo+1 == reg_hi) { // double move?
  813       if (is_load) st->print("%s %s,[ESP + #%d]",
  814                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  815                               Matcher::regName[reg_lo], offset);
  816       else         st->print("MOVSD  [ESP + #%d],%s",
  817                               offset, Matcher::regName[reg_lo]);
  818     } else {
  819       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  820                               Matcher::regName[reg_lo], offset);
  821       else         st->print("MOVSS  [ESP + #%d],%s",
  822                               offset, Matcher::regName[reg_lo]);
  823     }
  824 #endif
  825   }
  826   bool is_single_byte = false;
  827   if ((UseAVX > 2) && (offset != 0)) {
  828     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  829   }
  830   int offset_size = 0;
  831   if (UseAVX > 2 ) {
  832     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  833   } else {
  834     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  835   }
  836   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  837   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  838   return size+5+offset_size;
  839 }
  840 
  841 
  842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  843                             int src_hi, int dst_hi, int size, outputStream* st ) {
  844   if (cbuf) {
  845     MacroAssembler _masm(cbuf);
  846     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  847     _masm.set_managed();
  848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  851     } else {
  852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  854     }
  855 #ifndef PRODUCT
  856   } else if (!do_size) {
  857     if (size != 0) st->print("\n\t");
  858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  861       } else {
  862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  863       }
  864     } else {
  865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  867       } else {
  868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  869       }
  870     }
  871 #endif
  872   }
  873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  875   int sz = (UseAVX > 2) ? 6 : 4;
  876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  878   return size + sz;
  879 }
  880 
  881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  882                             int src_hi, int dst_hi, int size, outputStream* st ) {
  883   // 32-bit
  884   if (cbuf) {
  885     MacroAssembler _masm(cbuf);
  886     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  887     _masm.set_managed();
  888     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  889              as_Register(Matcher::_regEncode[src_lo]));
  890 #ifndef PRODUCT
  891   } else if (!do_size) {
  892     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  893 #endif
  894   }
  895   return (UseAVX> 2) ? 6 : 4;
  896 }
  897 
  898 
  899 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  900                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  901   // 32-bit
  902   if (cbuf) {
  903     MacroAssembler _masm(cbuf);
  904     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  905     _masm.set_managed();
  906     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  907              as_XMMRegister(Matcher::_regEncode[src_lo]));
  908 #ifndef PRODUCT
  909   } else if (!do_size) {
  910     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  911 #endif
  912   }
  913   return (UseAVX> 2) ? 6 : 4;
  914 }
  915 
  916 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  917   if( cbuf ) {
  918     emit_opcode(*cbuf, 0x8B );
  919     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  920 #ifndef PRODUCT
  921   } else if( !do_size ) {
  922     if( size != 0 ) st->print("\n\t");
  923     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  924 #endif
  925   }
  926   return size+2;
  927 }
  928 
  929 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  930                                  int offset, int size, outputStream* st ) {
  931   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  932     if( cbuf ) {
  933       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  934       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  935 #ifndef PRODUCT
  936     } else if( !do_size ) {
  937       if( size != 0 ) st->print("\n\t");
  938       st->print("FLD    %s",Matcher::regName[src_lo]);
  939 #endif
  940     }
  941     size += 2;
  942   }
  943 
  944   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  945   const char *op_str;
  946   int op;
  947   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  948     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  949     op = 0xDD;
  950   } else {                   // 32-bit store
  951     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  952     op = 0xD9;
  953     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  954   }
  955 
  956   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  957 }
  958 
  959 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  960 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  961                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  962 
  963 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  964                             int stack_offset, int reg, uint ireg, outputStream* st);
  965 
  966 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  967                                      int dst_offset, uint ireg, outputStream* st) {
  968   if (cbuf) {
  969     MacroAssembler _masm(cbuf);
  970     switch (ireg) {
  971     case Op_VecS:
  972       __ pushl(Address(rsp, src_offset));
  973       __ popl (Address(rsp, dst_offset));
  974       break;
  975     case Op_VecD:
  976       __ pushl(Address(rsp, src_offset));
  977       __ popl (Address(rsp, dst_offset));
  978       __ pushl(Address(rsp, src_offset+4));
  979       __ popl (Address(rsp, dst_offset+4));
  980       break;
  981     case Op_VecX:
  982       __ movdqu(Address(rsp, -16), xmm0);
  983       __ movdqu(xmm0, Address(rsp, src_offset));
  984       __ movdqu(Address(rsp, dst_offset), xmm0);
  985       __ movdqu(xmm0, Address(rsp, -16));
  986       break;
  987     case Op_VecY:
  988       __ vmovdqu(Address(rsp, -32), xmm0);
  989       __ vmovdqu(xmm0, Address(rsp, src_offset));
  990       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  991       __ vmovdqu(xmm0, Address(rsp, -32));
  992       break;
  993     case Op_VecZ:
  994       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  995       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  996       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  997       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  998       break;
  999     default:
 1000       ShouldNotReachHere();
 1001     }
 1002 #ifndef PRODUCT
 1003   } else {
 1004     switch (ireg) {
 1005     case Op_VecS:
 1006       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1007                 "popl    [rsp + #%d]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecD:
 1011       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1012                 "popq    [rsp + #%d]\n\t"
 1013                 "pushl   [rsp + #%d]\n\t"
 1014                 "popq    [rsp + #%d]",
 1015                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1016       break;
 1017      case Op_VecX:
 1018       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1019                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1020                 "movdqu  [rsp + #%d], xmm0\n\t"
 1021                 "movdqu  xmm0, [rsp - #16]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     case Op_VecY:
 1025       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1026                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1027                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1028                 "vmovdqu xmm0, [rsp - #32]",
 1029                 src_offset, dst_offset);
 1030       break;
 1031     case Op_VecZ:
 1032       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1033                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1034                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1035                 "vmovdqu xmm0, [rsp - #64]",
 1036                 src_offset, dst_offset);
 1037       break;
 1038     default:
 1039       ShouldNotReachHere();
 1040     }
 1041 #endif
 1042   }
 1043 }
 1044 
 1045 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1046   // Get registers to move
 1047   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1048   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1049   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1050   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1051 
 1052   enum RC src_second_rc = rc_class(src_second);
 1053   enum RC src_first_rc = rc_class(src_first);
 1054   enum RC dst_second_rc = rc_class(dst_second);
 1055   enum RC dst_first_rc = rc_class(dst_first);
 1056 
 1057   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1058 
 1059   // Generate spill code!
 1060   int size = 0;
 1061 
 1062   if( src_first == dst_first && src_second == dst_second )
 1063     return size;            // Self copy, no move
 1064 
 1065   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1066     uint ireg = ideal_reg();
 1067     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1068     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1069     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1070     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1071       // mem -> mem
 1072       int src_offset = ra_->reg2offset(src_first);
 1073       int dst_offset = ra_->reg2offset(dst_first);
 1074       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1075     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1076       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1077     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1078       int stack_offset = ra_->reg2offset(dst_first);
 1079       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1080     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1081       int stack_offset = ra_->reg2offset(src_first);
 1082       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1083     } else {
 1084       ShouldNotReachHere();
 1085     }
 1086     return 0;
 1087   }
 1088 
 1089   // --------------------------------------
 1090   // Check for mem-mem move.  push/pop to move.
 1091   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1092     if( src_second == dst_first ) { // overlapping stack copy ranges
 1093       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1094       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1095       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1096       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1097     }
 1098     // move low bits
 1099     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1100     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1101     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1102       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1103       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1104     }
 1105     return size;
 1106   }
 1107 
 1108   // --------------------------------------
 1109   // Check for integer reg-reg copy
 1110   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1111     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1112 
 1113   // Check for integer store
 1114   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1115     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1116 
 1117   // Check for integer load
 1118   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1119     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1120 
 1121   // Check for integer reg-xmm reg copy
 1122   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1123     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1124             "no 64 bit integer-float reg moves" );
 1125     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1126   }
 1127   // --------------------------------------
 1128   // Check for float reg-reg copy
 1129   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1130     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1131             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1132     if( cbuf ) {
 1133 
 1134       // Note the mucking with the register encode to compensate for the 0/1
 1135       // indexing issue mentioned in a comment in the reg_def sections
 1136       // for FPR registers many lines above here.
 1137 
 1138       if( src_first != FPR1L_num ) {
 1139         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1140         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1141         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1142         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1143      } else {
 1144         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1145         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1146      }
 1147 #ifndef PRODUCT
 1148     } else if( !do_size ) {
 1149       if( size != 0 ) st->print("\n\t");
 1150       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1151       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1152 #endif
 1153     }
 1154     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1155   }
 1156 
 1157   // Check for float store
 1158   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1159     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1160   }
 1161 
 1162   // Check for float load
 1163   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1164     int offset = ra_->reg2offset(src_first);
 1165     const char *op_str;
 1166     int op;
 1167     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1168       op_str = "FLD_D";
 1169       op = 0xDD;
 1170     } else {                   // 32-bit load
 1171       op_str = "FLD_S";
 1172       op = 0xD9;
 1173       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1174     }
 1175     if( cbuf ) {
 1176       emit_opcode  (*cbuf, op );
 1177       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1178       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1179       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1180 #ifndef PRODUCT
 1181     } else if( !do_size ) {
 1182       if( size != 0 ) st->print("\n\t");
 1183       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1184 #endif
 1185     }
 1186     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1187     return size + 3+offset_size+2;
 1188   }
 1189 
 1190   // Check for xmm reg-reg copy
 1191   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1192     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1193             (src_first+1 == src_second && dst_first+1 == dst_second),
 1194             "no non-adjacent float-moves" );
 1195     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1196   }
 1197 
 1198   // Check for xmm reg-integer reg copy
 1199   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1200     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1201             "no 64 bit float-integer reg moves" );
 1202     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1203   }
 1204 
 1205   // Check for xmm store
 1206   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1207     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1208   }
 1209 
 1210   // Check for float xmm load
 1211   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1212     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1213   }
 1214 
 1215   // Copy from float reg to xmm reg
 1216   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1217     // copy to the top of stack from floating point reg
 1218     // and use LEA to preserve flags
 1219     if( cbuf ) {
 1220       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1221       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1222       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1223       emit_d8(*cbuf,0xF8);
 1224 #ifndef PRODUCT
 1225     } else if( !do_size ) {
 1226       if( size != 0 ) st->print("\n\t");
 1227       st->print("LEA    ESP,[ESP-8]");
 1228 #endif
 1229     }
 1230     size += 4;
 1231 
 1232     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1233 
 1234     // Copy from the temp memory to the xmm reg.
 1235     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1236 
 1237     if( cbuf ) {
 1238       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1239       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1240       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1241       emit_d8(*cbuf,0x08);
 1242 #ifndef PRODUCT
 1243     } else if( !do_size ) {
 1244       if( size != 0 ) st->print("\n\t");
 1245       st->print("LEA    ESP,[ESP+8]");
 1246 #endif
 1247     }
 1248     size += 4;
 1249     return size;
 1250   }
 1251 
 1252   assert( size > 0, "missed a case" );
 1253 
 1254   // --------------------------------------------------------------------
 1255   // Check for second bits still needing moving.
 1256   if( src_second == dst_second )
 1257     return size;               // Self copy; no move
 1258   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1259 
 1260   // Check for second word int-int move
 1261   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1262     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1263 
 1264   // Check for second word integer store
 1265   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1266     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1267 
 1268   // Check for second word integer load
 1269   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1270     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1271 
 1272   // AVX-512 opmask specific spilling.
 1273   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1274     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1275     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1276     MacroAssembler _masm(cbuf);
 1277     int offset = ra_->reg2offset(src_first);
 1278     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1279     return 0;
 1280   }
 1281 
 1282   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1283     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1284     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1285     MacroAssembler _masm(cbuf);
 1286     int offset = ra_->reg2offset(dst_first);
 1287     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1288     return 0;
 1289   }
 1290 
 1291   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1292     Unimplemented();
 1293     return 0;
 1294   }
 1295 
 1296   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1297     Unimplemented();
 1298     return 0;
 1299   }
 1300 
 1301   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1302     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1303     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1304     MacroAssembler _masm(cbuf);
 1305     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1306     return 0;
 1307   }
 1308 
 1309   Unimplemented();
 1310   return 0; // Mute compiler
 1311 }
 1312 
 1313 #ifndef PRODUCT
 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1315   implementation( NULL, ra_, false, st );
 1316 }
 1317 #endif
 1318 
 1319 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1320   implementation( &cbuf, ra_, false, NULL );
 1321 }
 1322 
 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1324   return MachNode::size(ra_);
 1325 }
 1326 
 1327 
 1328 //=============================================================================
 1329 #ifndef PRODUCT
 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1331   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1332   int reg = ra_->get_reg_first(this);
 1333   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1334 }
 1335 #endif
 1336 
 1337 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1339   int reg = ra_->get_encode(this);
 1340   if( offset >= 128 ) {
 1341     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1342     emit_rm(cbuf, 0x2, reg, 0x04);
 1343     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1344     emit_d32(cbuf, offset);
 1345   }
 1346   else {
 1347     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1348     emit_rm(cbuf, 0x1, reg, 0x04);
 1349     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1350     emit_d8(cbuf, offset);
 1351   }
 1352 }
 1353 
 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1355   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1356   if( offset >= 128 ) {
 1357     return 7;
 1358   }
 1359   else {
 1360     return 4;
 1361   }
 1362 }
 1363 
 1364 //=============================================================================
 1365 #ifndef PRODUCT
 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1367   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1368   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1369   st->print_cr("\tNOP");
 1370   st->print_cr("\tNOP");
 1371   if( !OptoBreakpoint )
 1372     st->print_cr("\tNOP");
 1373 }
 1374 #endif
 1375 
 1376 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1377   MacroAssembler masm(&cbuf);
 1378 #ifdef ASSERT
 1379   uint insts_size = cbuf.insts_size();
 1380 #endif
 1381   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1382   masm.jump_cc(Assembler::notEqual,
 1383                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1384   /* WARNING these NOPs are critical so that verified entry point is properly
 1385      aligned for patching by NativeJump::patch_verified_entry() */
 1386   int nops_cnt = 2;
 1387   if( !OptoBreakpoint ) // Leave space for int3
 1388      nops_cnt += 1;
 1389   masm.nop(nops_cnt);
 1390 
 1391   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1392 }
 1393 
 1394 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1395   return OptoBreakpoint ? 11 : 12;
 1396 }
 1397 
 1398 
 1399 //=============================================================================
 1400 
 1401 // Vector calling convention not supported.
 1402 const bool Matcher::supports_vector_calling_convention() {
 1403   return false;
 1404 }
 1405 
 1406 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1407   Unimplemented();
 1408   return OptoRegPair(0, 0);
 1409 }
 1410 
 1411 // Is this branch offset short enough that a short branch can be used?
 1412 //
 1413 // NOTE: If the platform does not provide any short branch variants, then
 1414 //       this method should return false for offset 0.
 1415 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1416   // The passed offset is relative to address of the branch.
 1417   // On 86 a branch displacement is calculated relative to address
 1418   // of a next instruction.
 1419   offset -= br_size;
 1420 
 1421   // the short version of jmpConUCF2 contains multiple branches,
 1422   // making the reach slightly less
 1423   if (rule == jmpConUCF2_rule)
 1424     return (-126 <= offset && offset <= 125);
 1425   return (-128 <= offset && offset <= 127);
 1426 }
 1427 
 1428 // Return whether or not this register is ever used as an argument.  This
 1429 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1430 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1431 // arguments in those registers not be available to the callee.
 1432 bool Matcher::can_be_java_arg( int reg ) {
 1433   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1434   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1435   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1436   return false;
 1437 }
 1438 
 1439 bool Matcher::is_spillable_arg( int reg ) {
 1440   return can_be_java_arg(reg);
 1441 }
 1442 
 1443 uint Matcher::int_pressure_limit()
 1444 {
 1445   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1446 }
 1447 
 1448 uint Matcher::float_pressure_limit()
 1449 {
 1450   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1451 }
 1452 
 1453 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1454   // Use hardware integer DIV instruction when
 1455   // it is faster than a code which use multiply.
 1456   // Only when constant divisor fits into 32 bit
 1457   // (min_jint is excluded to get only correct
 1458   // positive 32 bit values from negative).
 1459   return VM_Version::has_fast_idiv() &&
 1460          (divisor == (int)divisor && divisor != min_jint);
 1461 }
 1462 
 1463 // Register for DIVI projection of divmodI
 1464 RegMask Matcher::divI_proj_mask() {
 1465   return EAX_REG_mask();
 1466 }
 1467 
 1468 // Register for MODI projection of divmodI
 1469 RegMask Matcher::modI_proj_mask() {
 1470   return EDX_REG_mask();
 1471 }
 1472 
 1473 // Register for DIVL projection of divmodL
 1474 RegMask Matcher::divL_proj_mask() {
 1475   ShouldNotReachHere();
 1476   return RegMask();
 1477 }
 1478 
 1479 // Register for MODL projection of divmodL
 1480 RegMask Matcher::modL_proj_mask() {
 1481   ShouldNotReachHere();
 1482   return RegMask();
 1483 }
 1484 
 1485 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1486   return NO_REG_mask();
 1487 }
 1488 
 1489 // Returns true if the high 32 bits of the value is known to be zero.
 1490 bool is_operand_hi32_zero(Node* n) {
 1491   int opc = n->Opcode();
 1492   if (opc == Op_AndL) {
 1493     Node* o2 = n->in(2);
 1494     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1495       return true;
 1496     }
 1497   }
 1498   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1499     return true;
 1500   }
 1501   return false;
 1502 }
 1503 
 1504 %}
 1505 
 1506 //----------ENCODING BLOCK-----------------------------------------------------
 1507 // This block specifies the encoding classes used by the compiler to output
 1508 // byte streams.  Encoding classes generate functions which are called by
 1509 // Machine Instruction Nodes in order to generate the bit encoding of the
 1510 // instruction.  Operands specify their base encoding interface with the
 1511 // interface keyword.  There are currently supported four interfaces,
 1512 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1513 // operand to generate a function which returns its register number when
 1514 // queried.   CONST_INTER causes an operand to generate a function which
 1515 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1516 // operand to generate four functions which return the Base Register, the
 1517 // Index Register, the Scale Value, and the Offset Value of the operand when
 1518 // queried.  COND_INTER causes an operand to generate six functions which
 1519 // return the encoding code (ie - encoding bits for the instruction)
 1520 // associated with each basic boolean condition for a conditional instruction.
 1521 // Instructions specify two basic values for encoding.  They use the
 1522 // ins_encode keyword to specify their encoding class (which must be one of
 1523 // the class names specified in the encoding block), and they use the
 1524 // opcode keyword to specify, in order, their primary, secondary, and
 1525 // tertiary opcode.  Only the opcode sections which a particular instruction
 1526 // needs for encoding need to be specified.
 1527 encode %{
 1528   // Build emit functions for each basic byte or larger field in the intel
 1529   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1530   // code in the enc_class source block.  Emit functions will live in the
 1531   // main source block for now.  In future, we can generalize this by
 1532   // adding a syntax that specifies the sizes of fields in an order,
 1533   // so that the adlc can build the emit functions automagically
 1534 
 1535   // Emit primary opcode
 1536   enc_class OpcP %{
 1537     emit_opcode(cbuf, $primary);
 1538   %}
 1539 
 1540   // Emit secondary opcode
 1541   enc_class OpcS %{
 1542     emit_opcode(cbuf, $secondary);
 1543   %}
 1544 
 1545   // Emit opcode directly
 1546   enc_class Opcode(immI d8) %{
 1547     emit_opcode(cbuf, $d8$$constant);
 1548   %}
 1549 
 1550   enc_class SizePrefix %{
 1551     emit_opcode(cbuf,0x66);
 1552   %}
 1553 
 1554   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1555     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1556   %}
 1557 
 1558   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1559     emit_opcode(cbuf,$opcode$$constant);
 1560     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1561   %}
 1562 
 1563   enc_class mov_r32_imm0( rRegI dst ) %{
 1564     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1565     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1566   %}
 1567 
 1568   enc_class cdq_enc %{
 1569     // Full implementation of Java idiv and irem; checks for
 1570     // special case as described in JVM spec., p.243 & p.271.
 1571     //
 1572     //         normal case                           special case
 1573     //
 1574     // input : rax,: dividend                         min_int
 1575     //         reg: divisor                          -1
 1576     //
 1577     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1578     //         rdx: remainder (= rax, irem reg)       0
 1579     //
 1580     //  Code sequnce:
 1581     //
 1582     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1583     //  0F 85 0B 00 00 00    jne         normal_case
 1584     //  33 D2                xor         rdx,edx
 1585     //  83 F9 FF             cmp         rcx,0FFh
 1586     //  0F 84 03 00 00 00    je          done
 1587     //                  normal_case:
 1588     //  99                   cdq
 1589     //  F7 F9                idiv        rax,ecx
 1590     //                  done:
 1591     //
 1592     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1594     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1595     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1596     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1597     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1598     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1599     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1600     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1601     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1603     // normal_case:
 1604     emit_opcode(cbuf,0x99);                                         // cdq
 1605     // idiv (note: must be emitted by the user of this rule)
 1606     // normal:
 1607   %}
 1608 
 1609   // Dense encoding for older common ops
 1610   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1611     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1612   %}
 1613 
 1614 
 1615   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1616   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1617     // Check for 8-bit immediate, and set sign extend bit in opcode
 1618     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1619       emit_opcode(cbuf, $primary | 0x02);
 1620     }
 1621     else {                          // If 32-bit immediate
 1622       emit_opcode(cbuf, $primary);
 1623     }
 1624   %}
 1625 
 1626   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1627     // Emit primary opcode and set sign-extend bit
 1628     // Check for 8-bit immediate, and set sign extend bit in opcode
 1629     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1630       emit_opcode(cbuf, $primary | 0x02);    }
 1631     else {                          // If 32-bit immediate
 1632       emit_opcode(cbuf, $primary);
 1633     }
 1634     // Emit r/m byte with secondary opcode, after primary opcode.
 1635     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1636   %}
 1637 
 1638   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1639     // Check for 8-bit immediate, and set sign extend bit in opcode
 1640     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1641       $$$emit8$imm$$constant;
 1642     }
 1643     else {                          // If 32-bit immediate
 1644       // Output immediate
 1645       $$$emit32$imm$$constant;
 1646     }
 1647   %}
 1648 
 1649   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1650     // Emit primary opcode and set sign-extend bit
 1651     // Check for 8-bit immediate, and set sign extend bit in opcode
 1652     int con = (int)$imm$$constant; // Throw away top bits
 1653     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1654     // Emit r/m byte with secondary opcode, after primary opcode.
 1655     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1656     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1657     else                               emit_d32(cbuf,con);
 1658   %}
 1659 
 1660   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1661     // Emit primary opcode and set sign-extend bit
 1662     // Check for 8-bit immediate, and set sign extend bit in opcode
 1663     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1664     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1665     // Emit r/m byte with tertiary opcode, after primary opcode.
 1666     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1667     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1668     else                               emit_d32(cbuf,con);
 1669   %}
 1670 
 1671   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1672     emit_cc(cbuf, $secondary, $dst$$reg );
 1673   %}
 1674 
 1675   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1676     int destlo = $dst$$reg;
 1677     int desthi = HIGH_FROM_LOW(destlo);
 1678     // bswap lo
 1679     emit_opcode(cbuf, 0x0F);
 1680     emit_cc(cbuf, 0xC8, destlo);
 1681     // bswap hi
 1682     emit_opcode(cbuf, 0x0F);
 1683     emit_cc(cbuf, 0xC8, desthi);
 1684     // xchg lo and hi
 1685     emit_opcode(cbuf, 0x87);
 1686     emit_rm(cbuf, 0x3, destlo, desthi);
 1687   %}
 1688 
 1689   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1690     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1691   %}
 1692 
 1693   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1694     $$$emit8$primary;
 1695     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1696   %}
 1697 
 1698   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1699     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1700     emit_d8(cbuf, op >> 8 );
 1701     emit_d8(cbuf, op & 255);
 1702   %}
 1703 
 1704   // emulate a CMOV with a conditional branch around a MOV
 1705   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1706     // Invert sense of branch from sense of CMOV
 1707     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1708     emit_d8( cbuf, $brOffs$$constant );
 1709   %}
 1710 
 1711   enc_class enc_PartialSubtypeCheck( ) %{
 1712     Register Redi = as_Register(EDI_enc); // result register
 1713     Register Reax = as_Register(EAX_enc); // super class
 1714     Register Recx = as_Register(ECX_enc); // killed
 1715     Register Resi = as_Register(ESI_enc); // sub class
 1716     Label miss;
 1717 
 1718     MacroAssembler _masm(&cbuf);
 1719     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1720                                      NULL, &miss,
 1721                                      /*set_cond_codes:*/ true);
 1722     if ($primary) {
 1723       __ xorptr(Redi, Redi);
 1724     }
 1725     __ bind(miss);
 1726   %}
 1727 
 1728   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1729     MacroAssembler masm(&cbuf);
 1730     int start = masm.offset();
 1731     if (UseSSE >= 2) {
 1732       if (VerifyFPU) {
 1733         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1734       }
 1735     } else {
 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       masm.empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1742     } else {
 1743       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       MacroAssembler masm(&cbuf);
 1750       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1751     }
 1752   %}
 1753 
 1754   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1755     // This is the instruction starting address for relocation info.
 1756     cbuf.set_insts_mark();
 1757     $$$emit8$primary;
 1758     // CALL directly to the runtime
 1759     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1760                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1761 
 1762     if (UseSSE >= 2) {
 1763       MacroAssembler _masm(&cbuf);
 1764       BasicType rt = tf()->return_type();
 1765 
 1766       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1767         // A C runtime call where the return value is unused.  In SSE2+
 1768         // mode the result needs to be removed from the FPU stack.  It's
 1769         // likely that this function call could be removed by the
 1770         // optimizer if the C function is a pure function.
 1771         __ ffree(0);
 1772       } else if (rt == T_FLOAT) {
 1773         __ lea(rsp, Address(rsp, -4));
 1774         __ fstp_s(Address(rsp, 0));
 1775         __ movflt(xmm0, Address(rsp, 0));
 1776         __ lea(rsp, Address(rsp,  4));
 1777       } else if (rt == T_DOUBLE) {
 1778         __ lea(rsp, Address(rsp, -8));
 1779         __ fstp_d(Address(rsp, 0));
 1780         __ movdbl(xmm0, Address(rsp, 0));
 1781         __ lea(rsp, Address(rsp,  8));
 1782       }
 1783     }
 1784   %}
 1785 
 1786   enc_class pre_call_resets %{
 1787     // If method sets FPU control word restore it here
 1788     debug_only(int off0 = cbuf.insts_size());
 1789     if (ra_->C->in_24_bit_fp_mode()) {
 1790       MacroAssembler _masm(&cbuf);
 1791       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1792     }
 1793     // Clear upper bits of YMM registers when current compiled code uses
 1794     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1795     MacroAssembler _masm(&cbuf);
 1796     __ vzeroupper();
 1797     debug_only(int off1 = cbuf.insts_size());
 1798     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1799   %}
 1800 
 1801   enc_class post_call_FPU %{
 1802     // If method sets FPU control word do it here also
 1803     if (Compile::current()->in_24_bit_fp_mode()) {
 1804       MacroAssembler masm(&cbuf);
 1805       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1806     }
 1807   %}
 1808 
 1809   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1810     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1811     // who we intended to call.
 1812     cbuf.set_insts_mark();
 1813     $$$emit8$primary;
 1814 
 1815     if (!_method) {
 1816       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1817                      runtime_call_Relocation::spec(),
 1818                      RELOC_IMM32);
 1819     } else {
 1820       int method_index = resolved_method_index(cbuf);
 1821       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1822                                                   : static_call_Relocation::spec(method_index);
 1823       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1824                      rspec, RELOC_DISP32);
 1825       // Emit stubs for static call.
 1826       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1827       if (stub == NULL) {
 1828         ciEnv::current()->record_failure("CodeCache is full");
 1829         return;
 1830       }
 1831     }
 1832   %}
 1833 
 1834   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1835     MacroAssembler _masm(&cbuf);
 1836     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1837   %}
 1838 
 1839   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1840     int disp = in_bytes(Method::from_compiled_offset());
 1841     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1842 
 1843     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1844     cbuf.set_insts_mark();
 1845     $$$emit8$primary;
 1846     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1847     emit_d8(cbuf, disp);             // Displacement
 1848 
 1849   %}
 1850 
 1851 //   Following encoding is no longer used, but may be restored if calling
 1852 //   convention changes significantly.
 1853 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1854 //
 1855 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1856 //     // int ic_reg     = Matcher::inline_cache_reg();
 1857 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1858 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1859 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1860 //
 1861 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1862 //     // // so we load it immediately before the call
 1863 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1864 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1865 //
 1866 //     // xor rbp,ebp
 1867 //     emit_opcode(cbuf, 0x33);
 1868 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1869 //
 1870 //     // CALL to interpreter.
 1871 //     cbuf.set_insts_mark();
 1872 //     $$$emit8$primary;
 1873 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1874 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1875 //   %}
 1876 
 1877   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1878     $$$emit8$primary;
 1879     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1880     $$$emit8$shift$$constant;
 1881   %}
 1882 
 1883   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1884     // Load immediate does not have a zero or sign extended version
 1885     // for 8-bit immediates
 1886     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1887     $$$emit32$src$$constant;
 1888   %}
 1889 
 1890   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1891     // Load immediate does not have a zero or sign extended version
 1892     // for 8-bit immediates
 1893     emit_opcode(cbuf, $primary + $dst$$reg);
 1894     $$$emit32$src$$constant;
 1895   %}
 1896 
 1897   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1898     // Load immediate does not have a zero or sign extended version
 1899     // for 8-bit immediates
 1900     int dst_enc = $dst$$reg;
 1901     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1902     if (src_con == 0) {
 1903       // xor dst, dst
 1904       emit_opcode(cbuf, 0x33);
 1905       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1906     } else {
 1907       emit_opcode(cbuf, $primary + dst_enc);
 1908       emit_d32(cbuf, src_con);
 1909     }
 1910   %}
 1911 
 1912   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1913     // Load immediate does not have a zero or sign extended version
 1914     // for 8-bit immediates
 1915     int dst_enc = $dst$$reg + 2;
 1916     int src_con = ((julong)($src$$constant)) >> 32;
 1917     if (src_con == 0) {
 1918       // xor dst, dst
 1919       emit_opcode(cbuf, 0x33);
 1920       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1921     } else {
 1922       emit_opcode(cbuf, $primary + dst_enc);
 1923       emit_d32(cbuf, src_con);
 1924     }
 1925   %}
 1926 
 1927 
 1928   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1929   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1930     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1931   %}
 1932 
 1933   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1934     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1935   %}
 1936 
 1937   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1938     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1939   %}
 1940 
 1941   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1942     $$$emit8$primary;
 1943     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1944   %}
 1945 
 1946   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1947     $$$emit8$secondary;
 1948     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1949   %}
 1950 
 1951   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1952     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1953   %}
 1954 
 1955   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1956     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1957   %}
 1958 
 1959   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1960     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1961   %}
 1962 
 1963   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1964     // Output immediate
 1965     $$$emit32$src$$constant;
 1966   %}
 1967 
 1968   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1969     // Output Float immediate bits
 1970     jfloat jf = $src$$constant;
 1971     int    jf_as_bits = jint_cast( jf );
 1972     emit_d32(cbuf, jf_as_bits);
 1973   %}
 1974 
 1975   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1976     // Output Float immediate bits
 1977     jfloat jf = $src$$constant;
 1978     int    jf_as_bits = jint_cast( jf );
 1979     emit_d32(cbuf, jf_as_bits);
 1980   %}
 1981 
 1982   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1983     // Output immediate
 1984     $$$emit16$src$$constant;
 1985   %}
 1986 
 1987   enc_class Con_d32(immI src) %{
 1988     emit_d32(cbuf,$src$$constant);
 1989   %}
 1990 
 1991   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1992     // Output immediate memory reference
 1993     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1994     emit_d32(cbuf, 0x00);
 1995   %}
 1996 
 1997   enc_class lock_prefix( ) %{
 1998     emit_opcode(cbuf,0xF0);         // [Lock]
 1999   %}
 2000 
 2001   // Cmp-xchg long value.
 2002   // Note: we need to swap rbx, and rcx before and after the
 2003   //       cmpxchg8 instruction because the instruction uses
 2004   //       rcx as the high order word of the new value to store but
 2005   //       our register encoding uses rbx,.
 2006   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2007 
 2008     // XCHG  rbx,ecx
 2009     emit_opcode(cbuf,0x87);
 2010     emit_opcode(cbuf,0xD9);
 2011     // [Lock]
 2012     emit_opcode(cbuf,0xF0);
 2013     // CMPXCHG8 [Eptr]
 2014     emit_opcode(cbuf,0x0F);
 2015     emit_opcode(cbuf,0xC7);
 2016     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2017     // XCHG  rbx,ecx
 2018     emit_opcode(cbuf,0x87);
 2019     emit_opcode(cbuf,0xD9);
 2020   %}
 2021 
 2022   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2023     // [Lock]
 2024     emit_opcode(cbuf,0xF0);
 2025 
 2026     // CMPXCHG [Eptr]
 2027     emit_opcode(cbuf,0x0F);
 2028     emit_opcode(cbuf,0xB1);
 2029     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2030   %}
 2031 
 2032   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2033     // [Lock]
 2034     emit_opcode(cbuf,0xF0);
 2035 
 2036     // CMPXCHGB [Eptr]
 2037     emit_opcode(cbuf,0x0F);
 2038     emit_opcode(cbuf,0xB0);
 2039     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2040   %}
 2041 
 2042   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2043     // [Lock]
 2044     emit_opcode(cbuf,0xF0);
 2045 
 2046     // 16-bit mode
 2047     emit_opcode(cbuf, 0x66);
 2048 
 2049     // CMPXCHGW [Eptr]
 2050     emit_opcode(cbuf,0x0F);
 2051     emit_opcode(cbuf,0xB1);
 2052     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2053   %}
 2054 
 2055   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2056     int res_encoding = $res$$reg;
 2057 
 2058     // MOV  res,0
 2059     emit_opcode( cbuf, 0xB8 + res_encoding);
 2060     emit_d32( cbuf, 0 );
 2061     // JNE,s  fail
 2062     emit_opcode(cbuf,0x75);
 2063     emit_d8(cbuf, 5 );
 2064     // MOV  res,1
 2065     emit_opcode( cbuf, 0xB8 + res_encoding);
 2066     emit_d32( cbuf, 1 );
 2067     // fail:
 2068   %}
 2069 
 2070   enc_class set_instruction_start( ) %{
 2071     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2072   %}
 2073 
 2074   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2075     int reg_encoding = $ereg$$reg;
 2076     int base  = $mem$$base;
 2077     int index = $mem$$index;
 2078     int scale = $mem$$scale;
 2079     int displace = $mem$$disp;
 2080     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2081     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2082   %}
 2083 
 2084   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2085     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2086     int base  = $mem$$base;
 2087     int index = $mem$$index;
 2088     int scale = $mem$$scale;
 2089     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2090     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2091     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2092   %}
 2093 
 2094   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2095     int r1, r2;
 2096     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2097     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2098     emit_opcode(cbuf,0x0F);
 2099     emit_opcode(cbuf,$tertiary);
 2100     emit_rm(cbuf, 0x3, r1, r2);
 2101     emit_d8(cbuf,$cnt$$constant);
 2102     emit_d8(cbuf,$primary);
 2103     emit_rm(cbuf, 0x3, $secondary, r1);
 2104     emit_d8(cbuf,$cnt$$constant);
 2105   %}
 2106 
 2107   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2108     emit_opcode( cbuf, 0x8B ); // Move
 2109     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2110     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2111       emit_d8(cbuf,$primary);
 2112       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2113       emit_d8(cbuf,$cnt$$constant-32);
 2114     }
 2115     emit_d8(cbuf,$primary);
 2116     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2117     emit_d8(cbuf,31);
 2118   %}
 2119 
 2120   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2121     int r1, r2;
 2122     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2123     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2124 
 2125     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2126     emit_rm(cbuf, 0x3, r1, r2);
 2127     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2128       emit_opcode(cbuf,$primary);
 2129       emit_rm(cbuf, 0x3, $secondary, r1);
 2130       emit_d8(cbuf,$cnt$$constant-32);
 2131     }
 2132     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2133     emit_rm(cbuf, 0x3, r2, r2);
 2134   %}
 2135 
 2136   // Clone of RegMem but accepts an extra parameter to access each
 2137   // half of a double in memory; it never needs relocation info.
 2138   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2139     emit_opcode(cbuf,$opcode$$constant);
 2140     int reg_encoding = $rm_reg$$reg;
 2141     int base     = $mem$$base;
 2142     int index    = $mem$$index;
 2143     int scale    = $mem$$scale;
 2144     int displace = $mem$$disp + $disp_for_half$$constant;
 2145     relocInfo::relocType disp_reloc = relocInfo::none;
 2146     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2147   %}
 2148 
 2149   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2150   //
 2151   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2152   // and it never needs relocation information.
 2153   // Frequently used to move data between FPU's Stack Top and memory.
 2154   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2155     int rm_byte_opcode = $rm_opcode$$constant;
 2156     int base     = $mem$$base;
 2157     int index    = $mem$$index;
 2158     int scale    = $mem$$scale;
 2159     int displace = $mem$$disp;
 2160     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2161     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2162   %}
 2163 
 2164   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2165     int rm_byte_opcode = $rm_opcode$$constant;
 2166     int base     = $mem$$base;
 2167     int index    = $mem$$index;
 2168     int scale    = $mem$$scale;
 2169     int displace = $mem$$disp;
 2170     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2171     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2172   %}
 2173 
 2174   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2175     int reg_encoding = $dst$$reg;
 2176     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2177     int index        = 0x04;            // 0x04 indicates no index
 2178     int scale        = 0x00;            // 0x00 indicates no scale
 2179     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2180     relocInfo::relocType disp_reloc = relocInfo::none;
 2181     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2182   %}
 2183 
 2184   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2185     // Compare dst,src
 2186     emit_opcode(cbuf,0x3B);
 2187     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2188     // jmp dst < src around move
 2189     emit_opcode(cbuf,0x7C);
 2190     emit_d8(cbuf,2);
 2191     // move dst,src
 2192     emit_opcode(cbuf,0x8B);
 2193     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2194   %}
 2195 
 2196   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2197     // Compare dst,src
 2198     emit_opcode(cbuf,0x3B);
 2199     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2200     // jmp dst > src around move
 2201     emit_opcode(cbuf,0x7F);
 2202     emit_d8(cbuf,2);
 2203     // move dst,src
 2204     emit_opcode(cbuf,0x8B);
 2205     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2206   %}
 2207 
 2208   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2209     // If src is FPR1, we can just FST to store it.
 2210     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2211     int reg_encoding = 0x2; // Just store
 2212     int base  = $mem$$base;
 2213     int index = $mem$$index;
 2214     int scale = $mem$$scale;
 2215     int displace = $mem$$disp;
 2216     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2217     if( $src$$reg != FPR1L_enc ) {
 2218       reg_encoding = 0x3;  // Store & pop
 2219       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2220       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2221     }
 2222     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2223     emit_opcode(cbuf,$primary);
 2224     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2225   %}
 2226 
 2227   enc_class neg_reg(rRegI dst) %{
 2228     // NEG $dst
 2229     emit_opcode(cbuf,0xF7);
 2230     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2231   %}
 2232 
 2233   enc_class setLT_reg(eCXRegI dst) %{
 2234     // SETLT $dst
 2235     emit_opcode(cbuf,0x0F);
 2236     emit_opcode(cbuf,0x9C);
 2237     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2238   %}
 2239 
 2240   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2241     int tmpReg = $tmp$$reg;
 2242 
 2243     // SUB $p,$q
 2244     emit_opcode(cbuf,0x2B);
 2245     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2246     // SBB $tmp,$tmp
 2247     emit_opcode(cbuf,0x1B);
 2248     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2249     // AND $tmp,$y
 2250     emit_opcode(cbuf,0x23);
 2251     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2252     // ADD $p,$tmp
 2253     emit_opcode(cbuf,0x03);
 2254     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2255   %}
 2256 
 2257   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2258     // TEST shift,32
 2259     emit_opcode(cbuf,0xF7);
 2260     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2261     emit_d32(cbuf,0x20);
 2262     // JEQ,s small
 2263     emit_opcode(cbuf, 0x74);
 2264     emit_d8(cbuf, 0x04);
 2265     // MOV    $dst.hi,$dst.lo
 2266     emit_opcode( cbuf, 0x8B );
 2267     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2268     // CLR    $dst.lo
 2269     emit_opcode(cbuf, 0x33);
 2270     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2271 // small:
 2272     // SHLD   $dst.hi,$dst.lo,$shift
 2273     emit_opcode(cbuf,0x0F);
 2274     emit_opcode(cbuf,0xA5);
 2275     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2276     // SHL    $dst.lo,$shift"
 2277     emit_opcode(cbuf,0xD3);
 2278     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2279   %}
 2280 
 2281   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2282     // TEST shift,32
 2283     emit_opcode(cbuf,0xF7);
 2284     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2285     emit_d32(cbuf,0x20);
 2286     // JEQ,s small
 2287     emit_opcode(cbuf, 0x74);
 2288     emit_d8(cbuf, 0x04);
 2289     // MOV    $dst.lo,$dst.hi
 2290     emit_opcode( cbuf, 0x8B );
 2291     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2292     // CLR    $dst.hi
 2293     emit_opcode(cbuf, 0x33);
 2294     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2295 // small:
 2296     // SHRD   $dst.lo,$dst.hi,$shift
 2297     emit_opcode(cbuf,0x0F);
 2298     emit_opcode(cbuf,0xAD);
 2299     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2300     // SHR    $dst.hi,$shift"
 2301     emit_opcode(cbuf,0xD3);
 2302     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2303   %}
 2304 
 2305   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2306     // TEST shift,32
 2307     emit_opcode(cbuf,0xF7);
 2308     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2309     emit_d32(cbuf,0x20);
 2310     // JEQ,s small
 2311     emit_opcode(cbuf, 0x74);
 2312     emit_d8(cbuf, 0x05);
 2313     // MOV    $dst.lo,$dst.hi
 2314     emit_opcode( cbuf, 0x8B );
 2315     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2316     // SAR    $dst.hi,31
 2317     emit_opcode(cbuf, 0xC1);
 2318     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2319     emit_d8(cbuf, 0x1F );
 2320 // small:
 2321     // SHRD   $dst.lo,$dst.hi,$shift
 2322     emit_opcode(cbuf,0x0F);
 2323     emit_opcode(cbuf,0xAD);
 2324     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2325     // SAR    $dst.hi,$shift"
 2326     emit_opcode(cbuf,0xD3);
 2327     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2328   %}
 2329 
 2330 
 2331   // ----------------- Encodings for floating point unit -----------------
 2332   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2333   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2334     $$$emit8$primary;
 2335     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2336   %}
 2337 
 2338   // Pop argument in FPR0 with FSTP ST(0)
 2339   enc_class PopFPU() %{
 2340     emit_opcode( cbuf, 0xDD );
 2341     emit_d8( cbuf, 0xD8 );
 2342   %}
 2343 
 2344   // !!!!! equivalent to Pop_Reg_F
 2345   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2346     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2347     emit_d8( cbuf, 0xD8+$dst$$reg );
 2348   %}
 2349 
 2350   enc_class Push_Reg_DPR( regDPR dst ) %{
 2351     emit_opcode( cbuf, 0xD9 );
 2352     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2353   %}
 2354 
 2355   enc_class strictfp_bias1( regDPR dst ) %{
 2356     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2357     emit_opcode( cbuf, 0x2D );
 2358     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2359     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2360     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2361   %}
 2362 
 2363   enc_class strictfp_bias2( regDPR dst ) %{
 2364     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2365     emit_opcode( cbuf, 0x2D );
 2366     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2367     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2368     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2369   %}
 2370 
 2371   // Special case for moving an integer register to a stack slot.
 2372   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2373     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2374   %}
 2375 
 2376   // Special case for moving a register to a stack slot.
 2377   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2378     // Opcode already emitted
 2379     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2380     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2381     emit_d32(cbuf, $dst$$disp);   // Displacement
 2382   %}
 2383 
 2384   // Push the integer in stackSlot 'src' onto FP-stack
 2385   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2386     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2387   %}
 2388 
 2389   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2390   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2391     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2392   %}
 2393 
 2394   // Same as Pop_Mem_F except for opcode
 2395   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2396   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2397     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2398   %}
 2399 
 2400   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2401     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2402     emit_d8( cbuf, 0xD8+$dst$$reg );
 2403   %}
 2404 
 2405   enc_class Push_Reg_FPR( regFPR dst ) %{
 2406     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2407     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2408   %}
 2409 
 2410   // Push FPU's float to a stack-slot, and pop FPU-stack
 2411   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2412     int pop = 0x02;
 2413     if ($src$$reg != FPR1L_enc) {
 2414       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2415       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2416       pop = 0x03;
 2417     }
 2418     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2419   %}
 2420 
 2421   // Push FPU's double to a stack-slot, and pop FPU-stack
 2422   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2423     int pop = 0x02;
 2424     if ($src$$reg != FPR1L_enc) {
 2425       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2426       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2427       pop = 0x03;
 2428     }
 2429     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2430   %}
 2431 
 2432   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2433   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2434     int pop = 0xD0 - 1; // -1 since we skip FLD
 2435     if ($src$$reg != FPR1L_enc) {
 2436       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2437       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2438       pop = 0xD8;
 2439     }
 2440     emit_opcode( cbuf, 0xDD );
 2441     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2442   %}
 2443 
 2444 
 2445   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2446     // load dst in FPR0
 2447     emit_opcode( cbuf, 0xD9 );
 2448     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2449     if ($src$$reg != FPR1L_enc) {
 2450       // fincstp
 2451       emit_opcode (cbuf, 0xD9);
 2452       emit_opcode (cbuf, 0xF7);
 2453       // swap src with FPR1:
 2454       // FXCH FPR1 with src
 2455       emit_opcode(cbuf, 0xD9);
 2456       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2457       // fdecstp
 2458       emit_opcode (cbuf, 0xD9);
 2459       emit_opcode (cbuf, 0xF6);
 2460     }
 2461   %}
 2462 
 2463   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2464     MacroAssembler _masm(&cbuf);
 2465     __ subptr(rsp, 8);
 2466     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2467     __ fld_d(Address(rsp, 0));
 2468     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2469     __ fld_d(Address(rsp, 0));
 2470   %}
 2471 
 2472   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2473     MacroAssembler _masm(&cbuf);
 2474     __ subptr(rsp, 4);
 2475     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2476     __ fld_s(Address(rsp, 0));
 2477     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2478     __ fld_s(Address(rsp, 0));
 2479   %}
 2480 
 2481   enc_class Push_ResultD(regD dst) %{
 2482     MacroAssembler _masm(&cbuf);
 2483     __ fstp_d(Address(rsp, 0));
 2484     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2485     __ addptr(rsp, 8);
 2486   %}
 2487 
 2488   enc_class Push_ResultF(regF dst, immI d8) %{
 2489     MacroAssembler _masm(&cbuf);
 2490     __ fstp_s(Address(rsp, 0));
 2491     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2492     __ addptr(rsp, $d8$$constant);
 2493   %}
 2494 
 2495   enc_class Push_SrcD(regD src) %{
 2496     MacroAssembler _masm(&cbuf);
 2497     __ subptr(rsp, 8);
 2498     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2499     __ fld_d(Address(rsp, 0));
 2500   %}
 2501 
 2502   enc_class push_stack_temp_qword() %{
 2503     MacroAssembler _masm(&cbuf);
 2504     __ subptr(rsp, 8);
 2505   %}
 2506 
 2507   enc_class pop_stack_temp_qword() %{
 2508     MacroAssembler _masm(&cbuf);
 2509     __ addptr(rsp, 8);
 2510   %}
 2511 
 2512   enc_class push_xmm_to_fpr1(regD src) %{
 2513     MacroAssembler _masm(&cbuf);
 2514     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2515     __ fld_d(Address(rsp, 0));
 2516   %}
 2517 
 2518   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2519     if ($src$$reg != FPR1L_enc) {
 2520       // fincstp
 2521       emit_opcode (cbuf, 0xD9);
 2522       emit_opcode (cbuf, 0xF7);
 2523       // FXCH FPR1 with src
 2524       emit_opcode(cbuf, 0xD9);
 2525       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2526       // fdecstp
 2527       emit_opcode (cbuf, 0xD9);
 2528       emit_opcode (cbuf, 0xF6);
 2529     }
 2530     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2531     // // FSTP   FPR$dst$$reg
 2532     // emit_opcode( cbuf, 0xDD );
 2533     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2534   %}
 2535 
 2536   enc_class fnstsw_sahf_skip_parity() %{
 2537     // fnstsw ax
 2538     emit_opcode( cbuf, 0xDF );
 2539     emit_opcode( cbuf, 0xE0 );
 2540     // sahf
 2541     emit_opcode( cbuf, 0x9E );
 2542     // jnp  ::skip
 2543     emit_opcode( cbuf, 0x7B );
 2544     emit_opcode( cbuf, 0x05 );
 2545   %}
 2546 
 2547   enc_class emitModDPR() %{
 2548     // fprem must be iterative
 2549     // :: loop
 2550     // fprem
 2551     emit_opcode( cbuf, 0xD9 );
 2552     emit_opcode( cbuf, 0xF8 );
 2553     // wait
 2554     emit_opcode( cbuf, 0x9b );
 2555     // fnstsw ax
 2556     emit_opcode( cbuf, 0xDF );
 2557     emit_opcode( cbuf, 0xE0 );
 2558     // sahf
 2559     emit_opcode( cbuf, 0x9E );
 2560     // jp  ::loop
 2561     emit_opcode( cbuf, 0x0F );
 2562     emit_opcode( cbuf, 0x8A );
 2563     emit_opcode( cbuf, 0xF4 );
 2564     emit_opcode( cbuf, 0xFF );
 2565     emit_opcode( cbuf, 0xFF );
 2566     emit_opcode( cbuf, 0xFF );
 2567   %}
 2568 
 2569   enc_class fpu_flags() %{
 2570     // fnstsw_ax
 2571     emit_opcode( cbuf, 0xDF);
 2572     emit_opcode( cbuf, 0xE0);
 2573     // test ax,0x0400
 2574     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2575     emit_opcode( cbuf, 0xA9 );
 2576     emit_d16   ( cbuf, 0x0400 );
 2577     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2578     // // test rax,0x0400
 2579     // emit_opcode( cbuf, 0xA9 );
 2580     // emit_d32   ( cbuf, 0x00000400 );
 2581     //
 2582     // jz exit (no unordered comparison)
 2583     emit_opcode( cbuf, 0x74 );
 2584     emit_d8    ( cbuf, 0x02 );
 2585     // mov ah,1 - treat as LT case (set carry flag)
 2586     emit_opcode( cbuf, 0xB4 );
 2587     emit_d8    ( cbuf, 0x01 );
 2588     // sahf
 2589     emit_opcode( cbuf, 0x9E);
 2590   %}
 2591 
 2592   enc_class cmpF_P6_fixup() %{
 2593     // Fixup the integer flags in case comparison involved a NaN
 2594     //
 2595     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2596     emit_opcode( cbuf, 0x7B );
 2597     emit_d8    ( cbuf, 0x03 );
 2598     // MOV AH,1 - treat as LT case (set carry flag)
 2599     emit_opcode( cbuf, 0xB4 );
 2600     emit_d8    ( cbuf, 0x01 );
 2601     // SAHF
 2602     emit_opcode( cbuf, 0x9E);
 2603     // NOP     // target for branch to avoid branch to branch
 2604     emit_opcode( cbuf, 0x90);
 2605   %}
 2606 
 2607 //     fnstsw_ax();
 2608 //     sahf();
 2609 //     movl(dst, nan_result);
 2610 //     jcc(Assembler::parity, exit);
 2611 //     movl(dst, less_result);
 2612 //     jcc(Assembler::below, exit);
 2613 //     movl(dst, equal_result);
 2614 //     jcc(Assembler::equal, exit);
 2615 //     movl(dst, greater_result);
 2616 
 2617 // less_result     =  1;
 2618 // greater_result  = -1;
 2619 // equal_result    = 0;
 2620 // nan_result      = -1;
 2621 
 2622   enc_class CmpF_Result(rRegI dst) %{
 2623     // fnstsw_ax();
 2624     emit_opcode( cbuf, 0xDF);
 2625     emit_opcode( cbuf, 0xE0);
 2626     // sahf
 2627     emit_opcode( cbuf, 0x9E);
 2628     // movl(dst, nan_result);
 2629     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2630     emit_d32( cbuf, -1 );
 2631     // jcc(Assembler::parity, exit);
 2632     emit_opcode( cbuf, 0x7A );
 2633     emit_d8    ( cbuf, 0x13 );
 2634     // movl(dst, less_result);
 2635     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2636     emit_d32( cbuf, -1 );
 2637     // jcc(Assembler::below, exit);
 2638     emit_opcode( cbuf, 0x72 );
 2639     emit_d8    ( cbuf, 0x0C );
 2640     // movl(dst, equal_result);
 2641     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2642     emit_d32( cbuf, 0 );
 2643     // jcc(Assembler::equal, exit);
 2644     emit_opcode( cbuf, 0x74 );
 2645     emit_d8    ( cbuf, 0x05 );
 2646     // movl(dst, greater_result);
 2647     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2648     emit_d32( cbuf, 1 );
 2649   %}
 2650 
 2651 
 2652   // Compare the longs and set flags
 2653   // BROKEN!  Do Not use as-is
 2654   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2655     // CMP    $src1.hi,$src2.hi
 2656     emit_opcode( cbuf, 0x3B );
 2657     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2658     // JNE,s  done
 2659     emit_opcode(cbuf,0x75);
 2660     emit_d8(cbuf, 2 );
 2661     // CMP    $src1.lo,$src2.lo
 2662     emit_opcode( cbuf, 0x3B );
 2663     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2664 // done:
 2665   %}
 2666 
 2667   enc_class convert_int_long( regL dst, rRegI src ) %{
 2668     // mov $dst.lo,$src
 2669     int dst_encoding = $dst$$reg;
 2670     int src_encoding = $src$$reg;
 2671     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2672     // mov $dst.hi,$src
 2673     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2674     // sar $dst.hi,31
 2675     emit_opcode( cbuf, 0xC1 );
 2676     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2677     emit_d8(cbuf, 0x1F );
 2678   %}
 2679 
 2680   enc_class convert_long_double( eRegL src ) %{
 2681     // push $src.hi
 2682     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2683     // push $src.lo
 2684     emit_opcode(cbuf, 0x50+$src$$reg  );
 2685     // fild 64-bits at [SP]
 2686     emit_opcode(cbuf,0xdf);
 2687     emit_d8(cbuf, 0x6C);
 2688     emit_d8(cbuf, 0x24);
 2689     emit_d8(cbuf, 0x00);
 2690     // pop stack
 2691     emit_opcode(cbuf, 0x83); // add  SP, #8
 2692     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2693     emit_d8(cbuf, 0x8);
 2694   %}
 2695 
 2696   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2697     // IMUL   EDX:EAX,$src1
 2698     emit_opcode( cbuf, 0xF7 );
 2699     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2700     // SAR    EDX,$cnt-32
 2701     int shift_count = ((int)$cnt$$constant) - 32;
 2702     if (shift_count > 0) {
 2703       emit_opcode(cbuf, 0xC1);
 2704       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2705       emit_d8(cbuf, shift_count);
 2706     }
 2707   %}
 2708 
 2709   // this version doesn't have add sp, 8
 2710   enc_class convert_long_double2( eRegL src ) %{
 2711     // push $src.hi
 2712     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2713     // push $src.lo
 2714     emit_opcode(cbuf, 0x50+$src$$reg  );
 2715     // fild 64-bits at [SP]
 2716     emit_opcode(cbuf,0xdf);
 2717     emit_d8(cbuf, 0x6C);
 2718     emit_d8(cbuf, 0x24);
 2719     emit_d8(cbuf, 0x00);
 2720   %}
 2721 
 2722   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2723     // Basic idea: long = (long)int * (long)int
 2724     // IMUL EDX:EAX, src
 2725     emit_opcode( cbuf, 0xF7 );
 2726     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2727   %}
 2728 
 2729   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2730     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2731     // MUL EDX:EAX, src
 2732     emit_opcode( cbuf, 0xF7 );
 2733     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2734   %}
 2735 
 2736   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2737     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2738     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2739     // MOV    $tmp,$src.lo
 2740     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2741     // IMUL   $tmp,EDX
 2742     emit_opcode( cbuf, 0x0F );
 2743     emit_opcode( cbuf, 0xAF );
 2744     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2745     // MOV    EDX,$src.hi
 2746     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2747     // IMUL   EDX,EAX
 2748     emit_opcode( cbuf, 0x0F );
 2749     emit_opcode( cbuf, 0xAF );
 2750     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2751     // ADD    $tmp,EDX
 2752     emit_opcode( cbuf, 0x03 );
 2753     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2754     // MUL   EDX:EAX,$src.lo
 2755     emit_opcode( cbuf, 0xF7 );
 2756     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2757     // ADD    EDX,ESI
 2758     emit_opcode( cbuf, 0x03 );
 2759     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2760   %}
 2761 
 2762   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2763     // Basic idea: lo(result) = lo(src * y_lo)
 2764     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2765     // IMUL   $tmp,EDX,$src
 2766     emit_opcode( cbuf, 0x6B );
 2767     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2768     emit_d8( cbuf, (int)$src$$constant );
 2769     // MOV    EDX,$src
 2770     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2771     emit_d32( cbuf, (int)$src$$constant );
 2772     // MUL   EDX:EAX,EDX
 2773     emit_opcode( cbuf, 0xF7 );
 2774     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2775     // ADD    EDX,ESI
 2776     emit_opcode( cbuf, 0x03 );
 2777     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2778   %}
 2779 
 2780   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2781     // PUSH src1.hi
 2782     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2783     // PUSH src1.lo
 2784     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2785     // PUSH src2.hi
 2786     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2787     // PUSH src2.lo
 2788     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2789     // CALL directly to the runtime
 2790     cbuf.set_insts_mark();
 2791     emit_opcode(cbuf,0xE8);       // Call into runtime
 2792     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2793     // Restore stack
 2794     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2795     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2796     emit_d8(cbuf, 4*4);
 2797   %}
 2798 
 2799   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2800     // PUSH src1.hi
 2801     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2802     // PUSH src1.lo
 2803     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2804     // PUSH src2.hi
 2805     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2806     // PUSH src2.lo
 2807     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2808     // CALL directly to the runtime
 2809     cbuf.set_insts_mark();
 2810     emit_opcode(cbuf,0xE8);       // Call into runtime
 2811     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2812     // Restore stack
 2813     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2814     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2815     emit_d8(cbuf, 4*4);
 2816   %}
 2817 
 2818   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2819     // MOV   $tmp,$src.lo
 2820     emit_opcode(cbuf, 0x8B);
 2821     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2822     // OR    $tmp,$src.hi
 2823     emit_opcode(cbuf, 0x0B);
 2824     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2825   %}
 2826 
 2827   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2828     // CMP    $src1.lo,$src2.lo
 2829     emit_opcode( cbuf, 0x3B );
 2830     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2831     // JNE,s  skip
 2832     emit_cc(cbuf, 0x70, 0x5);
 2833     emit_d8(cbuf,2);
 2834     // CMP    $src1.hi,$src2.hi
 2835     emit_opcode( cbuf, 0x3B );
 2836     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2837   %}
 2838 
 2839   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2840     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2841     emit_opcode( cbuf, 0x3B );
 2842     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2843     // MOV    $tmp,$src1.hi
 2844     emit_opcode( cbuf, 0x8B );
 2845     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2846     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2847     emit_opcode( cbuf, 0x1B );
 2848     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2849   %}
 2850 
 2851   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2852     // XOR    $tmp,$tmp
 2853     emit_opcode(cbuf,0x33);  // XOR
 2854     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2855     // CMP    $tmp,$src.lo
 2856     emit_opcode( cbuf, 0x3B );
 2857     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2858     // SBB    $tmp,$src.hi
 2859     emit_opcode( cbuf, 0x1B );
 2860     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2861   %}
 2862 
 2863  // Sniff, sniff... smells like Gnu Superoptimizer
 2864   enc_class neg_long( eRegL dst ) %{
 2865     emit_opcode(cbuf,0xF7);    // NEG hi
 2866     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2867     emit_opcode(cbuf,0xF7);    // NEG lo
 2868     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2869     emit_opcode(cbuf,0x83);    // SBB hi,0
 2870     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2871     emit_d8    (cbuf,0 );
 2872   %}
 2873 
 2874   enc_class enc_pop_rdx() %{
 2875     emit_opcode(cbuf,0x5A);
 2876   %}
 2877 
 2878   enc_class enc_rethrow() %{
 2879     cbuf.set_insts_mark();
 2880     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2881     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2882                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2883   %}
 2884 
 2885 
 2886   // Convert a double to an int.  Java semantics require we do complex
 2887   // manglelations in the corner cases.  So we set the rounding mode to
 2888   // 'zero', store the darned double down as an int, and reset the
 2889   // rounding mode to 'nearest'.  The hardware throws an exception which
 2890   // patches up the correct value directly to the stack.
 2891   enc_class DPR2I_encoding( regDPR src ) %{
 2892     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2893     // exceptions here, so that a NAN or other corner-case value will
 2894     // thrown an exception (but normal values get converted at full speed).
 2895     // However, I2C adapters and other float-stack manglers leave pending
 2896     // invalid-op exceptions hanging.  We would have to clear them before
 2897     // enabling them and that is more expensive than just testing for the
 2898     // invalid value Intel stores down in the corner cases.
 2899     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2900     emit_opcode(cbuf,0x2D);
 2901     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2902     // Allocate a word
 2903     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2904     emit_opcode(cbuf,0xEC);
 2905     emit_d8(cbuf,0x04);
 2906     // Encoding assumes a double has been pushed into FPR0.
 2907     // Store down the double as an int, popping the FPU stack
 2908     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2909     emit_opcode(cbuf,0x1C);
 2910     emit_d8(cbuf,0x24);
 2911     // Restore the rounding mode; mask the exception
 2912     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2913     emit_opcode(cbuf,0x2D);
 2914     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2915         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2916         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2917 
 2918     // Load the converted int; adjust CPU stack
 2919     emit_opcode(cbuf,0x58);       // POP EAX
 2920     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2921     emit_d32   (cbuf,0x80000000); //         0x80000000
 2922     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2923     emit_d8    (cbuf,0x07);       // Size of slow_call
 2924     // Push src onto stack slow-path
 2925     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2926     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2927     // CALL directly to the runtime
 2928     cbuf.set_insts_mark();
 2929     emit_opcode(cbuf,0xE8);       // Call into runtime
 2930     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2931     // Carry on here...
 2932   %}
 2933 
 2934   enc_class DPR2L_encoding( regDPR src ) %{
 2935     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2936     emit_opcode(cbuf,0x2D);
 2937     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2938     // Allocate a word
 2939     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2940     emit_opcode(cbuf,0xEC);
 2941     emit_d8(cbuf,0x08);
 2942     // Encoding assumes a double has been pushed into FPR0.
 2943     // Store down the double as a long, popping the FPU stack
 2944     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2945     emit_opcode(cbuf,0x3C);
 2946     emit_d8(cbuf,0x24);
 2947     // Restore the rounding mode; mask the exception
 2948     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2949     emit_opcode(cbuf,0x2D);
 2950     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2951         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2952         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2953 
 2954     // Load the converted int; adjust CPU stack
 2955     emit_opcode(cbuf,0x58);       // POP EAX
 2956     emit_opcode(cbuf,0x5A);       // POP EDX
 2957     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2958     emit_d8    (cbuf,0xFA);       // rdx
 2959     emit_d32   (cbuf,0x80000000); //         0x80000000
 2960     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2961     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2962     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2963     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2964     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2965     emit_d8    (cbuf,0x07);       // Size of slow_call
 2966     // Push src onto stack slow-path
 2967     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2968     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2969     // CALL directly to the runtime
 2970     cbuf.set_insts_mark();
 2971     emit_opcode(cbuf,0xE8);       // Call into runtime
 2972     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2973     // Carry on here...
 2974   %}
 2975 
 2976   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2977     // Operand was loaded from memory into fp ST (stack top)
 2978     // FMUL   ST,$src  /* D8 C8+i */
 2979     emit_opcode(cbuf, 0xD8);
 2980     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2981   %}
 2982 
 2983   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2984     // FADDP  ST,src2  /* D8 C0+i */
 2985     emit_opcode(cbuf, 0xD8);
 2986     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2987     //could use FADDP  src2,fpST  /* DE C0+i */
 2988   %}
 2989 
 2990   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2991     // FADDP  src2,ST  /* DE C0+i */
 2992     emit_opcode(cbuf, 0xDE);
 2993     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2994   %}
 2995 
 2996   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2997     // Operand has been loaded into fp ST (stack top)
 2998       // FSUB   ST,$src1
 2999       emit_opcode(cbuf, 0xD8);
 3000       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3001 
 3002       // FDIV
 3003       emit_opcode(cbuf, 0xD8);
 3004       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3005   %}
 3006 
 3007   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3008     // Operand was loaded from memory into fp ST (stack top)
 3009     // FADD   ST,$src  /* D8 C0+i */
 3010     emit_opcode(cbuf, 0xD8);
 3011     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3012 
 3013     // FMUL  ST,src2  /* D8 C*+i */
 3014     emit_opcode(cbuf, 0xD8);
 3015     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3016   %}
 3017 
 3018 
 3019   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3020     // Operand was loaded from memory into fp ST (stack top)
 3021     // FADD   ST,$src  /* D8 C0+i */
 3022     emit_opcode(cbuf, 0xD8);
 3023     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3024 
 3025     // FMULP  src2,ST  /* DE C8+i */
 3026     emit_opcode(cbuf, 0xDE);
 3027     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3028   %}
 3029 
 3030   // Atomically load the volatile long
 3031   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3032     emit_opcode(cbuf,0xDF);
 3033     int rm_byte_opcode = 0x05;
 3034     int base     = $mem$$base;
 3035     int index    = $mem$$index;
 3036     int scale    = $mem$$scale;
 3037     int displace = $mem$$disp;
 3038     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3039     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3040     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3041   %}
 3042 
 3043   // Volatile Store Long.  Must be atomic, so move it into
 3044   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3045   // target address before the store (for null-ptr checks)
 3046   // so the memory operand is used twice in the encoding.
 3047   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3048     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3049     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3050     emit_opcode(cbuf,0xDF);
 3051     int rm_byte_opcode = 0x07;
 3052     int base     = $mem$$base;
 3053     int index    = $mem$$index;
 3054     int scale    = $mem$$scale;
 3055     int displace = $mem$$disp;
 3056     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3057     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3058   %}
 3059 
 3060 %}
 3061 
 3062 
 3063 //----------FRAME--------------------------------------------------------------
 3064 // Definition of frame structure and management information.
 3065 //
 3066 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3067 //                             |   (to get allocators register number
 3068 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3069 //  r   CALLER     |        |
 3070 //  o     |        +--------+      pad to even-align allocators stack-slot
 3071 //  w     V        |  pad0  |        numbers; owned by CALLER
 3072 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3073 //  h     ^        |   in   |  5
 3074 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3075 //  |     |        |        |  3
 3076 //  |     |        +--------+
 3077 //  V     |        | old out|      Empty on Intel, window on Sparc
 3078 //        |    old |preserve|      Must be even aligned.
 3079 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3080 //        |        |   in   |  3   area for Intel ret address
 3081 //     Owned by    |preserve|      Empty on Sparc.
 3082 //       SELF      +--------+
 3083 //        |        |  pad2  |  2   pad to align old SP
 3084 //        |        +--------+  1
 3085 //        |        | locks  |  0
 3086 //        |        +--------+----> OptoReg::stack0(), even aligned
 3087 //        |        |  pad1  | 11   pad to align new SP
 3088 //        |        +--------+
 3089 //        |        |        | 10
 3090 //        |        | spills |  9   spills
 3091 //        V        |        |  8   (pad0 slot for callee)
 3092 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3093 //        ^        |  out   |  7
 3094 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3095 //     Owned by    +--------+
 3096 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3097 //        |    new |preserve|      Must be even-aligned.
 3098 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3099 //        |        |        |
 3100 //
 3101 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3102 //         known from SELF's arguments and the Java calling convention.
 3103 //         Region 6-7 is determined per call site.
 3104 // Note 2: If the calling convention leaves holes in the incoming argument
 3105 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3106 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3107 //         incoming area, as the Java calling convention is completely under
 3108 //         the control of the AD file.  Doubles can be sorted and packed to
 3109 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3110 //         varargs C calling conventions.
 3111 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3112 //         even aligned with pad0 as needed.
 3113 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3114 //         region 6-11 is even aligned; it may be padded out more so that
 3115 //         the region from SP to FP meets the minimum stack alignment.
 3116 
 3117 frame %{
 3118   // These three registers define part of the calling convention
 3119   // between compiled code and the interpreter.
 3120   inline_cache_reg(EAX);                // Inline Cache Register
 3121 
 3122   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3123   cisc_spilling_operand_name(indOffset32);
 3124 
 3125   // Number of stack slots consumed by locking an object
 3126   sync_stack_slots(1);
 3127 
 3128   // Compiled code's Frame Pointer
 3129   frame_pointer(ESP);
 3130   // Interpreter stores its frame pointer in a register which is
 3131   // stored to the stack by I2CAdaptors.
 3132   // I2CAdaptors convert from interpreted java to compiled java.
 3133   interpreter_frame_pointer(EBP);
 3134 
 3135   // Stack alignment requirement
 3136   // Alignment size in bytes (128-bit -> 16 bytes)
 3137   stack_alignment(StackAlignmentInBytes);
 3138 
 3139   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3140   // for calls to C.  Supports the var-args backing area for register parms.
 3141   varargs_C_out_slots_killed(0);
 3142 
 3143   // The after-PROLOG location of the return address.  Location of
 3144   // return address specifies a type (REG or STACK) and a number
 3145   // representing the register number (i.e. - use a register name) or
 3146   // stack slot.
 3147   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3148   // Otherwise, it is above the locks and verification slot and alignment word
 3149   return_addr(STACK - 1 +
 3150               align_up((Compile::current()->in_preserve_stack_slots() +
 3151                         Compile::current()->fixed_slots()),
 3152                        stack_alignment_in_slots()));
 3153 
 3154   // Location of C & interpreter return values
 3155   c_return_value %{
 3156     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3157     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3158     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3159 
 3160     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3161     // that C functions return float and double results in XMM0.
 3162     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3163       return OptoRegPair(XMM0b_num,XMM0_num);
 3164     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3165       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3166 
 3167     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3168   %}
 3169 
 3170   // Location of return values
 3171   return_value %{
 3172     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3173     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3174     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3175     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3176       return OptoRegPair(XMM0b_num,XMM0_num);
 3177     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3178       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3179     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3180   %}
 3181 
 3182 %}
 3183 
 3184 //----------ATTRIBUTES---------------------------------------------------------
 3185 //----------Operand Attributes-------------------------------------------------
 3186 op_attrib op_cost(0);        // Required cost attribute
 3187 
 3188 //----------Instruction Attributes---------------------------------------------
 3189 ins_attrib ins_cost(100);       // Required cost attribute
 3190 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3191 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3192                                 // non-matching short branch variant of some
 3193                                                             // long branch?
 3194 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3195                                 // specifies the alignment that some part of the instruction (not
 3196                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3197                                 // function must be provided for the instruction
 3198 
 3199 //----------OPERANDS-----------------------------------------------------------
 3200 // Operand definitions must precede instruction definitions for correct parsing
 3201 // in the ADLC because operands constitute user defined types which are used in
 3202 // instruction definitions.
 3203 
 3204 //----------Simple Operands----------------------------------------------------
 3205 // Immediate Operands
 3206 // Integer Immediate
 3207 operand immI() %{
 3208   match(ConI);
 3209 
 3210   op_cost(10);
 3211   format %{ %}
 3212   interface(CONST_INTER);
 3213 %}
 3214 
 3215 // Constant for test vs zero
 3216 operand immI_0() %{
 3217   predicate(n->get_int() == 0);
 3218   match(ConI);
 3219 
 3220   op_cost(0);
 3221   format %{ %}
 3222   interface(CONST_INTER);
 3223 %}
 3224 
 3225 // Constant for increment
 3226 operand immI_1() %{
 3227   predicate(n->get_int() == 1);
 3228   match(ConI);
 3229 
 3230   op_cost(0);
 3231   format %{ %}
 3232   interface(CONST_INTER);
 3233 %}
 3234 
 3235 // Constant for decrement
 3236 operand immI_M1() %{
 3237   predicate(n->get_int() == -1);
 3238   match(ConI);
 3239 
 3240   op_cost(0);
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 // Valid scale values for addressing modes
 3246 operand immI2() %{
 3247   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3248   match(ConI);
 3249 
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 operand immI8() %{
 3255   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3256   match(ConI);
 3257 
 3258   op_cost(5);
 3259   format %{ %}
 3260   interface(CONST_INTER);
 3261 %}
 3262 
 3263 operand immU8() %{
 3264   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3265   match(ConI);
 3266 
 3267   op_cost(5);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 operand immI16() %{
 3273   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3274   match(ConI);
 3275 
 3276   op_cost(10);
 3277   format %{ %}
 3278   interface(CONST_INTER);
 3279 %}
 3280 
 3281 // Int Immediate non-negative
 3282 operand immU31()
 3283 %{
 3284   predicate(n->get_int() >= 0);
 3285   match(ConI);
 3286 
 3287   op_cost(0);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 // Constant for long shifts
 3293 operand immI_32() %{
 3294   predicate( n->get_int() == 32 );
 3295   match(ConI);
 3296 
 3297   op_cost(0);
 3298   format %{ %}
 3299   interface(CONST_INTER);
 3300 %}
 3301 
 3302 operand immI_1_31() %{
 3303   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3304   match(ConI);
 3305 
 3306   op_cost(0);
 3307   format %{ %}
 3308   interface(CONST_INTER);
 3309 %}
 3310 
 3311 operand immI_32_63() %{
 3312   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3313   match(ConI);
 3314   op_cost(0);
 3315 
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 operand immI_2() %{
 3321   predicate( n->get_int() == 2 );
 3322   match(ConI);
 3323 
 3324   op_cost(0);
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 operand immI_3() %{
 3330   predicate( n->get_int() == 3 );
 3331   match(ConI);
 3332 
 3333   op_cost(0);
 3334   format %{ %}
 3335   interface(CONST_INTER);
 3336 %}
 3337 
 3338 operand immI_4()
 3339 %{
 3340   predicate(n->get_int() == 4);
 3341   match(ConI);
 3342 
 3343   op_cost(0);
 3344   format %{ %}
 3345   interface(CONST_INTER);
 3346 %}
 3347 
 3348 operand immI_8()
 3349 %{
 3350   predicate(n->get_int() == 8);
 3351   match(ConI);
 3352 
 3353   op_cost(0);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 // Pointer Immediate
 3359 operand immP() %{
 3360   match(ConP);
 3361 
 3362   op_cost(10);
 3363   format %{ %}
 3364   interface(CONST_INTER);
 3365 %}
 3366 
 3367 // NULL Pointer Immediate
 3368 operand immP0() %{
 3369   predicate( n->get_ptr() == 0 );
 3370   match(ConP);
 3371   op_cost(0);
 3372 
 3373   format %{ %}
 3374   interface(CONST_INTER);
 3375 %}
 3376 
 3377 // Long Immediate
 3378 operand immL() %{
 3379   match(ConL);
 3380 
 3381   op_cost(20);
 3382   format %{ %}
 3383   interface(CONST_INTER);
 3384 %}
 3385 
 3386 // Long Immediate zero
 3387 operand immL0() %{
 3388   predicate( n->get_long() == 0L );
 3389   match(ConL);
 3390   op_cost(0);
 3391 
 3392   format %{ %}
 3393   interface(CONST_INTER);
 3394 %}
 3395 
 3396 // Long Immediate zero
 3397 operand immL_M1() %{
 3398   predicate( n->get_long() == -1L );
 3399   match(ConL);
 3400   op_cost(0);
 3401 
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long immediate from 0 to 127.
 3407 // Used for a shorter form of long mul by 10.
 3408 operand immL_127() %{
 3409   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3410   match(ConL);
 3411   op_cost(0);
 3412 
 3413   format %{ %}
 3414   interface(CONST_INTER);
 3415 %}
 3416 
 3417 // Long Immediate: low 32-bit mask
 3418 operand immL_32bits() %{
 3419   predicate(n->get_long() == 0xFFFFFFFFL);
 3420   match(ConL);
 3421   op_cost(0);
 3422 
 3423   format %{ %}
 3424   interface(CONST_INTER);
 3425 %}
 3426 
 3427 // Long Immediate: low 32-bit mask
 3428 operand immL32() %{
 3429   predicate(n->get_long() == (int)(n->get_long()));
 3430   match(ConL);
 3431   op_cost(20);
 3432 
 3433   format %{ %}
 3434   interface(CONST_INTER);
 3435 %}
 3436 
 3437 //Double Immediate zero
 3438 operand immDPR0() %{
 3439   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3440   // bug that generates code such that NaNs compare equal to 0.0
 3441   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3442   match(ConD);
 3443 
 3444   op_cost(5);
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 // Double Immediate one
 3450 operand immDPR1() %{
 3451   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3452   match(ConD);
 3453 
 3454   op_cost(5);
 3455   format %{ %}
 3456   interface(CONST_INTER);
 3457 %}
 3458 
 3459 // Double Immediate
 3460 operand immDPR() %{
 3461   predicate(UseSSE<=1);
 3462   match(ConD);
 3463 
 3464   op_cost(5);
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 operand immD() %{
 3470   predicate(UseSSE>=2);
 3471   match(ConD);
 3472 
 3473   op_cost(5);
 3474   format %{ %}
 3475   interface(CONST_INTER);
 3476 %}
 3477 
 3478 // Double Immediate zero
 3479 operand immD0() %{
 3480   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3481   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3482   // compare equal to -0.0.
 3483   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3484   match(ConD);
 3485 
 3486   format %{ %}
 3487   interface(CONST_INTER);
 3488 %}
 3489 
 3490 // Float Immediate zero
 3491 operand immFPR0() %{
 3492   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3493   match(ConF);
 3494 
 3495   op_cost(5);
 3496   format %{ %}
 3497   interface(CONST_INTER);
 3498 %}
 3499 
 3500 // Float Immediate one
 3501 operand immFPR1() %{
 3502   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3503   match(ConF);
 3504 
 3505   op_cost(5);
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Float Immediate
 3511 operand immFPR() %{
 3512   predicate( UseSSE == 0 );
 3513   match(ConF);
 3514 
 3515   op_cost(5);
 3516   format %{ %}
 3517   interface(CONST_INTER);
 3518 %}
 3519 
 3520 // Float Immediate
 3521 operand immF() %{
 3522   predicate(UseSSE >= 1);
 3523   match(ConF);
 3524 
 3525   op_cost(5);
 3526   format %{ %}
 3527   interface(CONST_INTER);
 3528 %}
 3529 
 3530 // Float Immediate zero.  Zero and not -0.0
 3531 operand immF0() %{
 3532   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3533   match(ConF);
 3534 
 3535   op_cost(5);
 3536   format %{ %}
 3537   interface(CONST_INTER);
 3538 %}
 3539 
 3540 // Immediates for special shifts (sign extend)
 3541 
 3542 // Constants for increment
 3543 operand immI_16() %{
 3544   predicate( n->get_int() == 16 );
 3545   match(ConI);
 3546 
 3547   format %{ %}
 3548   interface(CONST_INTER);
 3549 %}
 3550 
 3551 operand immI_24() %{
 3552   predicate( n->get_int() == 24 );
 3553   match(ConI);
 3554 
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 // Constant for byte-wide masking
 3560 operand immI_255() %{
 3561   predicate( n->get_int() == 255 );
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 // Constant for short-wide masking
 3569 operand immI_65535() %{
 3570   predicate(n->get_int() == 65535);
 3571   match(ConI);
 3572 
 3573   format %{ %}
 3574   interface(CONST_INTER);
 3575 %}
 3576 
 3577 operand kReg()
 3578 %{
 3579   constraint(ALLOC_IN_RC(vectmask_reg));
 3580   match(RegVectMask);
 3581   format %{%}
 3582   interface(REG_INTER);
 3583 %}
 3584 
 3585 operand kReg_K1()
 3586 %{
 3587   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3588   match(RegVectMask);
 3589   format %{%}
 3590   interface(REG_INTER);
 3591 %}
 3592 
 3593 operand kReg_K2()
 3594 %{
 3595   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3596   match(RegVectMask);
 3597   format %{%}
 3598   interface(REG_INTER);
 3599 %}
 3600 
 3601 // Special Registers
 3602 operand kReg_K3()
 3603 %{
 3604   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3605   match(RegVectMask);
 3606   format %{%}
 3607   interface(REG_INTER);
 3608 %}
 3609 
 3610 operand kReg_K4()
 3611 %{
 3612   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3613   match(RegVectMask);
 3614   format %{%}
 3615   interface(REG_INTER);
 3616 %}
 3617 
 3618 operand kReg_K5()
 3619 %{
 3620   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3621   match(RegVectMask);
 3622   format %{%}
 3623   interface(REG_INTER);
 3624 %}
 3625 
 3626 operand kReg_K6()
 3627 %{
 3628   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3629   match(RegVectMask);
 3630   format %{%}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 // Special Registers
 3635 operand kReg_K7()
 3636 %{
 3637   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3638   match(RegVectMask);
 3639   format %{%}
 3640   interface(REG_INTER);
 3641 %}
 3642 
 3643 // Register Operands
 3644 // Integer Register
 3645 operand rRegI() %{
 3646   constraint(ALLOC_IN_RC(int_reg));
 3647   match(RegI);
 3648   match(xRegI);
 3649   match(eAXRegI);
 3650   match(eBXRegI);
 3651   match(eCXRegI);
 3652   match(eDXRegI);
 3653   match(eDIRegI);
 3654   match(eSIRegI);
 3655 
 3656   format %{ %}
 3657   interface(REG_INTER);
 3658 %}
 3659 
 3660 // Subset of Integer Register
 3661 operand xRegI(rRegI reg) %{
 3662   constraint(ALLOC_IN_RC(int_x_reg));
 3663   match(reg);
 3664   match(eAXRegI);
 3665   match(eBXRegI);
 3666   match(eCXRegI);
 3667   match(eDXRegI);
 3668 
 3669   format %{ %}
 3670   interface(REG_INTER);
 3671 %}
 3672 
 3673 // Special Registers
 3674 operand eAXRegI(xRegI reg) %{
 3675   constraint(ALLOC_IN_RC(eax_reg));
 3676   match(reg);
 3677   match(rRegI);
 3678 
 3679   format %{ "EAX" %}
 3680   interface(REG_INTER);
 3681 %}
 3682 
 3683 // Special Registers
 3684 operand eBXRegI(xRegI reg) %{
 3685   constraint(ALLOC_IN_RC(ebx_reg));
 3686   match(reg);
 3687   match(rRegI);
 3688 
 3689   format %{ "EBX" %}
 3690   interface(REG_INTER);
 3691 %}
 3692 
 3693 operand eCXRegI(xRegI reg) %{
 3694   constraint(ALLOC_IN_RC(ecx_reg));
 3695   match(reg);
 3696   match(rRegI);
 3697 
 3698   format %{ "ECX" %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand eDXRegI(xRegI reg) %{
 3703   constraint(ALLOC_IN_RC(edx_reg));
 3704   match(reg);
 3705   match(rRegI);
 3706 
 3707   format %{ "EDX" %}
 3708   interface(REG_INTER);
 3709 %}
 3710 
 3711 operand eDIRegI(xRegI reg) %{
 3712   constraint(ALLOC_IN_RC(edi_reg));
 3713   match(reg);
 3714   match(rRegI);
 3715 
 3716   format %{ "EDI" %}
 3717   interface(REG_INTER);
 3718 %}
 3719 
 3720 operand naxRegI() %{
 3721   constraint(ALLOC_IN_RC(nax_reg));
 3722   match(RegI);
 3723   match(eCXRegI);
 3724   match(eDXRegI);
 3725   match(eSIRegI);
 3726   match(eDIRegI);
 3727 
 3728   format %{ %}
 3729   interface(REG_INTER);
 3730 %}
 3731 
 3732 operand nadxRegI() %{
 3733   constraint(ALLOC_IN_RC(nadx_reg));
 3734   match(RegI);
 3735   match(eBXRegI);
 3736   match(eCXRegI);
 3737   match(eSIRegI);
 3738   match(eDIRegI);
 3739 
 3740   format %{ %}
 3741   interface(REG_INTER);
 3742 %}
 3743 
 3744 operand ncxRegI() %{
 3745   constraint(ALLOC_IN_RC(ncx_reg));
 3746   match(RegI);
 3747   match(eAXRegI);
 3748   match(eDXRegI);
 3749   match(eSIRegI);
 3750   match(eDIRegI);
 3751 
 3752   format %{ %}
 3753   interface(REG_INTER);
 3754 %}
 3755 
 3756 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3757 // //
 3758 operand eSIRegI(xRegI reg) %{
 3759    constraint(ALLOC_IN_RC(esi_reg));
 3760    match(reg);
 3761    match(rRegI);
 3762 
 3763    format %{ "ESI" %}
 3764    interface(REG_INTER);
 3765 %}
 3766 
 3767 // Pointer Register
 3768 operand anyRegP() %{
 3769   constraint(ALLOC_IN_RC(any_reg));
 3770   match(RegP);
 3771   match(eAXRegP);
 3772   match(eBXRegP);
 3773   match(eCXRegP);
 3774   match(eDIRegP);
 3775   match(eRegP);
 3776 
 3777   format %{ %}
 3778   interface(REG_INTER);
 3779 %}
 3780 
 3781 operand eRegP() %{
 3782   constraint(ALLOC_IN_RC(int_reg));
 3783   match(RegP);
 3784   match(eAXRegP);
 3785   match(eBXRegP);
 3786   match(eCXRegP);
 3787   match(eDIRegP);
 3788 
 3789   format %{ %}
 3790   interface(REG_INTER);
 3791 %}
 3792 
 3793 operand rRegP() %{
 3794   constraint(ALLOC_IN_RC(int_reg));
 3795   match(RegP);
 3796   match(eAXRegP);
 3797   match(eBXRegP);
 3798   match(eCXRegP);
 3799   match(eDIRegP);
 3800 
 3801   format %{ %}
 3802   interface(REG_INTER);
 3803 %}
 3804 
 3805 // On windows95, EBP is not safe to use for implicit null tests.
 3806 operand eRegP_no_EBP() %{
 3807   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3808   match(RegP);
 3809   match(eAXRegP);
 3810   match(eBXRegP);
 3811   match(eCXRegP);
 3812   match(eDIRegP);
 3813 
 3814   op_cost(100);
 3815   format %{ %}
 3816   interface(REG_INTER);
 3817 %}
 3818 
 3819 operand naxRegP() %{
 3820   constraint(ALLOC_IN_RC(nax_reg));
 3821   match(RegP);
 3822   match(eBXRegP);
 3823   match(eDXRegP);
 3824   match(eCXRegP);
 3825   match(eSIRegP);
 3826   match(eDIRegP);
 3827 
 3828   format %{ %}
 3829   interface(REG_INTER);
 3830 %}
 3831 
 3832 operand nabxRegP() %{
 3833   constraint(ALLOC_IN_RC(nabx_reg));
 3834   match(RegP);
 3835   match(eCXRegP);
 3836   match(eDXRegP);
 3837   match(eSIRegP);
 3838   match(eDIRegP);
 3839 
 3840   format %{ %}
 3841   interface(REG_INTER);
 3842 %}
 3843 
 3844 operand pRegP() %{
 3845   constraint(ALLOC_IN_RC(p_reg));
 3846   match(RegP);
 3847   match(eBXRegP);
 3848   match(eDXRegP);
 3849   match(eSIRegP);
 3850   match(eDIRegP);
 3851 
 3852   format %{ %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 // Special Registers
 3857 // Return a pointer value
 3858 operand eAXRegP(eRegP reg) %{
 3859   constraint(ALLOC_IN_RC(eax_reg));
 3860   match(reg);
 3861   format %{ "EAX" %}
 3862   interface(REG_INTER);
 3863 %}
 3864 
 3865 // Used in AtomicAdd
 3866 operand eBXRegP(eRegP reg) %{
 3867   constraint(ALLOC_IN_RC(ebx_reg));
 3868   match(reg);
 3869   format %{ "EBX" %}
 3870   interface(REG_INTER);
 3871 %}
 3872 
 3873 // Tail-call (interprocedural jump) to interpreter
 3874 operand eCXRegP(eRegP reg) %{
 3875   constraint(ALLOC_IN_RC(ecx_reg));
 3876   match(reg);
 3877   format %{ "ECX" %}
 3878   interface(REG_INTER);
 3879 %}
 3880 
 3881 operand eDXRegP(eRegP reg) %{
 3882   constraint(ALLOC_IN_RC(edx_reg));
 3883   match(reg);
 3884   format %{ "EDX" %}
 3885   interface(REG_INTER);
 3886 %}
 3887 
 3888 operand eSIRegP(eRegP reg) %{
 3889   constraint(ALLOC_IN_RC(esi_reg));
 3890   match(reg);
 3891   format %{ "ESI" %}
 3892   interface(REG_INTER);
 3893 %}
 3894 
 3895 // Used in rep stosw
 3896 operand eDIRegP(eRegP reg) %{
 3897   constraint(ALLOC_IN_RC(edi_reg));
 3898   match(reg);
 3899   format %{ "EDI" %}
 3900   interface(REG_INTER);
 3901 %}
 3902 
 3903 operand eRegL() %{
 3904   constraint(ALLOC_IN_RC(long_reg));
 3905   match(RegL);
 3906   match(eADXRegL);
 3907 
 3908   format %{ %}
 3909   interface(REG_INTER);
 3910 %}
 3911 
 3912 operand eADXRegL( eRegL reg ) %{
 3913   constraint(ALLOC_IN_RC(eadx_reg));
 3914   match(reg);
 3915 
 3916   format %{ "EDX:EAX" %}
 3917   interface(REG_INTER);
 3918 %}
 3919 
 3920 operand eBCXRegL( eRegL reg ) %{
 3921   constraint(ALLOC_IN_RC(ebcx_reg));
 3922   match(reg);
 3923 
 3924   format %{ "EBX:ECX" %}
 3925   interface(REG_INTER);
 3926 %}
 3927 
 3928 // Special case for integer high multiply
 3929 operand eADXRegL_low_only() %{
 3930   constraint(ALLOC_IN_RC(eadx_reg));
 3931   match(RegL);
 3932 
 3933   format %{ "EAX" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 // Flags register, used as output of compare instructions
 3938 operand rFlagsReg() %{
 3939   constraint(ALLOC_IN_RC(int_flags));
 3940   match(RegFlags);
 3941 
 3942   format %{ "EFLAGS" %}
 3943   interface(REG_INTER);
 3944 %}
 3945 
 3946 // Flags register, used as output of compare instructions
 3947 operand eFlagsReg() %{
 3948   constraint(ALLOC_IN_RC(int_flags));
 3949   match(RegFlags);
 3950 
 3951   format %{ "EFLAGS" %}
 3952   interface(REG_INTER);
 3953 %}
 3954 
 3955 // Flags register, used as output of FLOATING POINT compare instructions
 3956 operand eFlagsRegU() %{
 3957   constraint(ALLOC_IN_RC(int_flags));
 3958   match(RegFlags);
 3959 
 3960   format %{ "EFLAGS_U" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 operand eFlagsRegUCF() %{
 3965   constraint(ALLOC_IN_RC(int_flags));
 3966   match(RegFlags);
 3967   predicate(false);
 3968 
 3969   format %{ "EFLAGS_U_CF" %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Condition Code Register used by long compare
 3974 operand flagsReg_long_LTGE() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977   format %{ "FLAGS_LTGE" %}
 3978   interface(REG_INTER);
 3979 %}
 3980 operand flagsReg_long_EQNE() %{
 3981   constraint(ALLOC_IN_RC(int_flags));
 3982   match(RegFlags);
 3983   format %{ "FLAGS_EQNE" %}
 3984   interface(REG_INTER);
 3985 %}
 3986 operand flagsReg_long_LEGT() %{
 3987   constraint(ALLOC_IN_RC(int_flags));
 3988   match(RegFlags);
 3989   format %{ "FLAGS_LEGT" %}
 3990   interface(REG_INTER);
 3991 %}
 3992 
 3993 // Condition Code Register used by unsigned long compare
 3994 operand flagsReg_ulong_LTGE() %{
 3995   constraint(ALLOC_IN_RC(int_flags));
 3996   match(RegFlags);
 3997   format %{ "FLAGS_U_LTGE" %}
 3998   interface(REG_INTER);
 3999 %}
 4000 operand flagsReg_ulong_EQNE() %{
 4001   constraint(ALLOC_IN_RC(int_flags));
 4002   match(RegFlags);
 4003   format %{ "FLAGS_U_EQNE" %}
 4004   interface(REG_INTER);
 4005 %}
 4006 operand flagsReg_ulong_LEGT() %{
 4007   constraint(ALLOC_IN_RC(int_flags));
 4008   match(RegFlags);
 4009   format %{ "FLAGS_U_LEGT" %}
 4010   interface(REG_INTER);
 4011 %}
 4012 
 4013 // Float register operands
 4014 operand regDPR() %{
 4015   predicate( UseSSE < 2 );
 4016   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4017   match(RegD);
 4018   match(regDPR1);
 4019   match(regDPR2);
 4020   format %{ %}
 4021   interface(REG_INTER);
 4022 %}
 4023 
 4024 operand regDPR1(regDPR reg) %{
 4025   predicate( UseSSE < 2 );
 4026   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4027   match(reg);
 4028   format %{ "FPR1" %}
 4029   interface(REG_INTER);
 4030 %}
 4031 
 4032 operand regDPR2(regDPR reg) %{
 4033   predicate( UseSSE < 2 );
 4034   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4035   match(reg);
 4036   format %{ "FPR2" %}
 4037   interface(REG_INTER);
 4038 %}
 4039 
 4040 operand regnotDPR1(regDPR reg) %{
 4041   predicate( UseSSE < 2 );
 4042   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4043   match(reg);
 4044   format %{ %}
 4045   interface(REG_INTER);
 4046 %}
 4047 
 4048 // Float register operands
 4049 operand regFPR() %{
 4050   predicate( UseSSE < 2 );
 4051   constraint(ALLOC_IN_RC(fp_flt_reg));
 4052   match(RegF);
 4053   match(regFPR1);
 4054   format %{ %}
 4055   interface(REG_INTER);
 4056 %}
 4057 
 4058 // Float register operands
 4059 operand regFPR1(regFPR reg) %{
 4060   predicate( UseSSE < 2 );
 4061   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4062   match(reg);
 4063   format %{ "FPR1" %}
 4064   interface(REG_INTER);
 4065 %}
 4066 
 4067 // XMM Float register operands
 4068 operand regF() %{
 4069   predicate( UseSSE>=1 );
 4070   constraint(ALLOC_IN_RC(float_reg_legacy));
 4071   match(RegF);
 4072   format %{ %}
 4073   interface(REG_INTER);
 4074 %}
 4075 
 4076 operand legRegF() %{
 4077   predicate( UseSSE>=1 );
 4078   constraint(ALLOC_IN_RC(float_reg_legacy));
 4079   match(RegF);
 4080   format %{ %}
 4081   interface(REG_INTER);
 4082 %}
 4083 
 4084 // Float register operands
 4085 operand vlRegF() %{
 4086    constraint(ALLOC_IN_RC(float_reg_vl));
 4087    match(RegF);
 4088 
 4089    format %{ %}
 4090    interface(REG_INTER);
 4091 %}
 4092 
 4093 // XMM Double register operands
 4094 operand regD() %{
 4095   predicate( UseSSE>=2 );
 4096   constraint(ALLOC_IN_RC(double_reg_legacy));
 4097   match(RegD);
 4098   format %{ %}
 4099   interface(REG_INTER);
 4100 %}
 4101 
 4102 // Double register operands
 4103 operand legRegD() %{
 4104   predicate( UseSSE>=2 );
 4105   constraint(ALLOC_IN_RC(double_reg_legacy));
 4106   match(RegD);
 4107   format %{ %}
 4108   interface(REG_INTER);
 4109 %}
 4110 
 4111 operand vlRegD() %{
 4112    constraint(ALLOC_IN_RC(double_reg_vl));
 4113    match(RegD);
 4114 
 4115    format %{ %}
 4116    interface(REG_INTER);
 4117 %}
 4118 
 4119 //----------Memory Operands----------------------------------------------------
 4120 // Direct Memory Operand
 4121 operand direct(immP addr) %{
 4122   match(addr);
 4123 
 4124   format %{ "[$addr]" %}
 4125   interface(MEMORY_INTER) %{
 4126     base(0xFFFFFFFF);
 4127     index(0x4);
 4128     scale(0x0);
 4129     disp($addr);
 4130   %}
 4131 %}
 4132 
 4133 // Indirect Memory Operand
 4134 operand indirect(eRegP reg) %{
 4135   constraint(ALLOC_IN_RC(int_reg));
 4136   match(reg);
 4137 
 4138   format %{ "[$reg]" %}
 4139   interface(MEMORY_INTER) %{
 4140     base($reg);
 4141     index(0x4);
 4142     scale(0x0);
 4143     disp(0x0);
 4144   %}
 4145 %}
 4146 
 4147 // Indirect Memory Plus Short Offset Operand
 4148 operand indOffset8(eRegP reg, immI8 off) %{
 4149   match(AddP reg off);
 4150 
 4151   format %{ "[$reg + $off]" %}
 4152   interface(MEMORY_INTER) %{
 4153     base($reg);
 4154     index(0x4);
 4155     scale(0x0);
 4156     disp($off);
 4157   %}
 4158 %}
 4159 
 4160 // Indirect Memory Plus Long Offset Operand
 4161 operand indOffset32(eRegP reg, immI off) %{
 4162   match(AddP reg off);
 4163 
 4164   format %{ "[$reg + $off]" %}
 4165   interface(MEMORY_INTER) %{
 4166     base($reg);
 4167     index(0x4);
 4168     scale(0x0);
 4169     disp($off);
 4170   %}
 4171 %}
 4172 
 4173 // Indirect Memory Plus Long Offset Operand
 4174 operand indOffset32X(rRegI reg, immP off) %{
 4175   match(AddP off reg);
 4176 
 4177   format %{ "[$reg + $off]" %}
 4178   interface(MEMORY_INTER) %{
 4179     base($reg);
 4180     index(0x4);
 4181     scale(0x0);
 4182     disp($off);
 4183   %}
 4184 %}
 4185 
 4186 // Indirect Memory Plus Index Register Plus Offset Operand
 4187 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4188   match(AddP (AddP reg ireg) off);
 4189 
 4190   op_cost(10);
 4191   format %{"[$reg + $off + $ireg]" %}
 4192   interface(MEMORY_INTER) %{
 4193     base($reg);
 4194     index($ireg);
 4195     scale(0x0);
 4196     disp($off);
 4197   %}
 4198 %}
 4199 
 4200 // Indirect Memory Plus Index Register Plus Offset Operand
 4201 operand indIndex(eRegP reg, rRegI ireg) %{
 4202   match(AddP reg ireg);
 4203 
 4204   op_cost(10);
 4205   format %{"[$reg + $ireg]" %}
 4206   interface(MEMORY_INTER) %{
 4207     base($reg);
 4208     index($ireg);
 4209     scale(0x0);
 4210     disp(0x0);
 4211   %}
 4212 %}
 4213 
 4214 // // -------------------------------------------------------------------------
 4215 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4216 // // -------------------------------------------------------------------------
 4217 // // Scaled Memory Operands
 4218 // // Indirect Memory Times Scale Plus Offset Operand
 4219 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4220 //   match(AddP off (LShiftI ireg scale));
 4221 //
 4222 //   op_cost(10);
 4223 //   format %{"[$off + $ireg << $scale]" %}
 4224 //   interface(MEMORY_INTER) %{
 4225 //     base(0x4);
 4226 //     index($ireg);
 4227 //     scale($scale);
 4228 //     disp($off);
 4229 //   %}
 4230 // %}
 4231 
 4232 // Indirect Memory Times Scale Plus Index Register
 4233 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4234   match(AddP reg (LShiftI ireg scale));
 4235 
 4236   op_cost(10);
 4237   format %{"[$reg + $ireg << $scale]" %}
 4238   interface(MEMORY_INTER) %{
 4239     base($reg);
 4240     index($ireg);
 4241     scale($scale);
 4242     disp(0x0);
 4243   %}
 4244 %}
 4245 
 4246 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4247 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4248   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4249 
 4250   op_cost(10);
 4251   format %{"[$reg + $off + $ireg << $scale]" %}
 4252   interface(MEMORY_INTER) %{
 4253     base($reg);
 4254     index($ireg);
 4255     scale($scale);
 4256     disp($off);
 4257   %}
 4258 %}
 4259 
 4260 //----------Load Long Memory Operands------------------------------------------
 4261 // The load-long idiom will use it's address expression again after loading
 4262 // the first word of the long.  If the load-long destination overlaps with
 4263 // registers used in the addressing expression, the 2nd half will be loaded
 4264 // from a clobbered address.  Fix this by requiring that load-long use
 4265 // address registers that do not overlap with the load-long target.
 4266 
 4267 // load-long support
 4268 operand load_long_RegP() %{
 4269   constraint(ALLOC_IN_RC(esi_reg));
 4270   match(RegP);
 4271   match(eSIRegP);
 4272   op_cost(100);
 4273   format %{  %}
 4274   interface(REG_INTER);
 4275 %}
 4276 
 4277 // Indirect Memory Operand Long
 4278 operand load_long_indirect(load_long_RegP reg) %{
 4279   constraint(ALLOC_IN_RC(esi_reg));
 4280   match(reg);
 4281 
 4282   format %{ "[$reg]" %}
 4283   interface(MEMORY_INTER) %{
 4284     base($reg);
 4285     index(0x4);
 4286     scale(0x0);
 4287     disp(0x0);
 4288   %}
 4289 %}
 4290 
 4291 // Indirect Memory Plus Long Offset Operand
 4292 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4293   match(AddP reg off);
 4294 
 4295   format %{ "[$reg + $off]" %}
 4296   interface(MEMORY_INTER) %{
 4297     base($reg);
 4298     index(0x4);
 4299     scale(0x0);
 4300     disp($off);
 4301   %}
 4302 %}
 4303 
 4304 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4305 
 4306 
 4307 //----------Special Memory Operands--------------------------------------------
 4308 // Stack Slot Operand - This operand is used for loading and storing temporary
 4309 //                      values on the stack where a match requires a value to
 4310 //                      flow through memory.
 4311 operand stackSlotP(sRegP reg) %{
 4312   constraint(ALLOC_IN_RC(stack_slots));
 4313   // No match rule because this operand is only generated in matching
 4314   format %{ "[$reg]" %}
 4315   interface(MEMORY_INTER) %{
 4316     base(0x4);   // ESP
 4317     index(0x4);  // No Index
 4318     scale(0x0);  // No Scale
 4319     disp($reg);  // Stack Offset
 4320   %}
 4321 %}
 4322 
 4323 operand stackSlotI(sRegI reg) %{
 4324   constraint(ALLOC_IN_RC(stack_slots));
 4325   // No match rule because this operand is only generated in matching
 4326   format %{ "[$reg]" %}
 4327   interface(MEMORY_INTER) %{
 4328     base(0x4);   // ESP
 4329     index(0x4);  // No Index
 4330     scale(0x0);  // No Scale
 4331     disp($reg);  // Stack Offset
 4332   %}
 4333 %}
 4334 
 4335 operand stackSlotF(sRegF reg) %{
 4336   constraint(ALLOC_IN_RC(stack_slots));
 4337   // No match rule because this operand is only generated in matching
 4338   format %{ "[$reg]" %}
 4339   interface(MEMORY_INTER) %{
 4340     base(0x4);   // ESP
 4341     index(0x4);  // No Index
 4342     scale(0x0);  // No Scale
 4343     disp($reg);  // Stack Offset
 4344   %}
 4345 %}
 4346 
 4347 operand stackSlotD(sRegD reg) %{
 4348   constraint(ALLOC_IN_RC(stack_slots));
 4349   // No match rule because this operand is only generated in matching
 4350   format %{ "[$reg]" %}
 4351   interface(MEMORY_INTER) %{
 4352     base(0x4);   // ESP
 4353     index(0x4);  // No Index
 4354     scale(0x0);  // No Scale
 4355     disp($reg);  // Stack Offset
 4356   %}
 4357 %}
 4358 
 4359 operand stackSlotL(sRegL reg) %{
 4360   constraint(ALLOC_IN_RC(stack_slots));
 4361   // No match rule because this operand is only generated in matching
 4362   format %{ "[$reg]" %}
 4363   interface(MEMORY_INTER) %{
 4364     base(0x4);   // ESP
 4365     index(0x4);  // No Index
 4366     scale(0x0);  // No Scale
 4367     disp($reg);  // Stack Offset
 4368   %}
 4369 %}
 4370 
 4371 //----------Conditional Branch Operands----------------------------------------
 4372 // Comparison Op  - This is the operation of the comparison, and is limited to
 4373 //                  the following set of codes:
 4374 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4375 //
 4376 // Other attributes of the comparison, such as unsignedness, are specified
 4377 // by the comparison instruction that sets a condition code flags register.
 4378 // That result is represented by a flags operand whose subtype is appropriate
 4379 // to the unsignedness (etc.) of the comparison.
 4380 //
 4381 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4382 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4383 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4384 
 4385 // Comparision Code
 4386 operand cmpOp() %{
 4387   match(Bool);
 4388 
 4389   format %{ "" %}
 4390   interface(COND_INTER) %{
 4391     equal(0x4, "e");
 4392     not_equal(0x5, "ne");
 4393     less(0xC, "l");
 4394     greater_equal(0xD, "ge");
 4395     less_equal(0xE, "le");
 4396     greater(0xF, "g");
 4397     overflow(0x0, "o");
 4398     no_overflow(0x1, "no");
 4399   %}
 4400 %}
 4401 
 4402 // Comparison Code, unsigned compare.  Used by FP also, with
 4403 // C2 (unordered) turned into GT or LT already.  The other bits
 4404 // C0 and C3 are turned into Carry & Zero flags.
 4405 operand cmpOpU() %{
 4406   match(Bool);
 4407 
 4408   format %{ "" %}
 4409   interface(COND_INTER) %{
 4410     equal(0x4, "e");
 4411     not_equal(0x5, "ne");
 4412     less(0x2, "b");
 4413     greater_equal(0x3, "nb");
 4414     less_equal(0x6, "be");
 4415     greater(0x7, "nbe");
 4416     overflow(0x0, "o");
 4417     no_overflow(0x1, "no");
 4418   %}
 4419 %}
 4420 
 4421 // Floating comparisons that don't require any fixup for the unordered case
 4422 operand cmpOpUCF() %{
 4423   match(Bool);
 4424   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4425             n->as_Bool()->_test._test == BoolTest::ge ||
 4426             n->as_Bool()->_test._test == BoolTest::le ||
 4427             n->as_Bool()->_test._test == BoolTest::gt);
 4428   format %{ "" %}
 4429   interface(COND_INTER) %{
 4430     equal(0x4, "e");
 4431     not_equal(0x5, "ne");
 4432     less(0x2, "b");
 4433     greater_equal(0x3, "nb");
 4434     less_equal(0x6, "be");
 4435     greater(0x7, "nbe");
 4436     overflow(0x0, "o");
 4437     no_overflow(0x1, "no");
 4438   %}
 4439 %}
 4440 
 4441 
 4442 // Floating comparisons that can be fixed up with extra conditional jumps
 4443 operand cmpOpUCF2() %{
 4444   match(Bool);
 4445   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4446             n->as_Bool()->_test._test == BoolTest::eq);
 4447   format %{ "" %}
 4448   interface(COND_INTER) %{
 4449     equal(0x4, "e");
 4450     not_equal(0x5, "ne");
 4451     less(0x2, "b");
 4452     greater_equal(0x3, "nb");
 4453     less_equal(0x6, "be");
 4454     greater(0x7, "nbe");
 4455     overflow(0x0, "o");
 4456     no_overflow(0x1, "no");
 4457   %}
 4458 %}
 4459 
 4460 // Comparison Code for FP conditional move
 4461 operand cmpOp_fcmov() %{
 4462   match(Bool);
 4463 
 4464   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4465             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4466   format %{ "" %}
 4467   interface(COND_INTER) %{
 4468     equal        (0x0C8);
 4469     not_equal    (0x1C8);
 4470     less         (0x0C0);
 4471     greater_equal(0x1C0);
 4472     less_equal   (0x0D0);
 4473     greater      (0x1D0);
 4474     overflow(0x0, "o"); // not really supported by the instruction
 4475     no_overflow(0x1, "no"); // not really supported by the instruction
 4476   %}
 4477 %}
 4478 
 4479 // Comparison Code used in long compares
 4480 operand cmpOp_commute() %{
 4481   match(Bool);
 4482 
 4483   format %{ "" %}
 4484   interface(COND_INTER) %{
 4485     equal(0x4, "e");
 4486     not_equal(0x5, "ne");
 4487     less(0xF, "g");
 4488     greater_equal(0xE, "le");
 4489     less_equal(0xD, "ge");
 4490     greater(0xC, "l");
 4491     overflow(0x0, "o");
 4492     no_overflow(0x1, "no");
 4493   %}
 4494 %}
 4495 
 4496 // Comparison Code used in unsigned long compares
 4497 operand cmpOpU_commute() %{
 4498   match(Bool);
 4499 
 4500   format %{ "" %}
 4501   interface(COND_INTER) %{
 4502     equal(0x4, "e");
 4503     not_equal(0x5, "ne");
 4504     less(0x7, "nbe");
 4505     greater_equal(0x6, "be");
 4506     less_equal(0x3, "nb");
 4507     greater(0x2, "b");
 4508     overflow(0x0, "o");
 4509     no_overflow(0x1, "no");
 4510   %}
 4511 %}
 4512 
 4513 //----------OPERAND CLASSES----------------------------------------------------
 4514 // Operand Classes are groups of operands that are used as to simplify
 4515 // instruction definitions by not requiring the AD writer to specify separate
 4516 // instructions for every form of operand when the instruction accepts
 4517 // multiple operand types with the same basic encoding and format.  The classic
 4518 // case of this is memory operands.
 4519 
 4520 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4521                indIndex, indIndexScale, indIndexScaleOffset);
 4522 
 4523 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4524 // This means some kind of offset is always required and you cannot use
 4525 // an oop as the offset (done when working on static globals).
 4526 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4527                     indIndex, indIndexScale, indIndexScaleOffset);
 4528 
 4529 
 4530 //----------PIPELINE-----------------------------------------------------------
 4531 // Rules which define the behavior of the target architectures pipeline.
 4532 pipeline %{
 4533 
 4534 //----------ATTRIBUTES---------------------------------------------------------
 4535 attributes %{
 4536   variable_size_instructions;        // Fixed size instructions
 4537   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4538   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4539   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4540   instruction_fetch_units = 1;       // of 16 bytes
 4541 
 4542   // List of nop instructions
 4543   nops( MachNop );
 4544 %}
 4545 
 4546 //----------RESOURCES----------------------------------------------------------
 4547 // Resources are the functional units available to the machine
 4548 
 4549 // Generic P2/P3 pipeline
 4550 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4551 // 3 instructions decoded per cycle.
 4552 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4553 // 2 ALU op, only ALU0 handles mul/div instructions.
 4554 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4555            MS0, MS1, MEM = MS0 | MS1,
 4556            BR, FPU,
 4557            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4558 
 4559 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4560 // Pipeline Description specifies the stages in the machine's pipeline
 4561 
 4562 // Generic P2/P3 pipeline
 4563 pipe_desc(S0, S1, S2, S3, S4, S5);
 4564 
 4565 //----------PIPELINE CLASSES---------------------------------------------------
 4566 // Pipeline Classes describe the stages in which input and output are
 4567 // referenced by the hardware pipeline.
 4568 
 4569 // Naming convention: ialu or fpu
 4570 // Then: _reg
 4571 // Then: _reg if there is a 2nd register
 4572 // Then: _long if it's a pair of instructions implementing a long
 4573 // Then: _fat if it requires the big decoder
 4574 //   Or: _mem if it requires the big decoder and a memory unit.
 4575 
 4576 // Integer ALU reg operation
 4577 pipe_class ialu_reg(rRegI dst) %{
 4578     single_instruction;
 4579     dst    : S4(write);
 4580     dst    : S3(read);
 4581     DECODE : S0;        // any decoder
 4582     ALU    : S3;        // any alu
 4583 %}
 4584 
 4585 // Long ALU reg operation
 4586 pipe_class ialu_reg_long(eRegL dst) %{
 4587     instruction_count(2);
 4588     dst    : S4(write);
 4589     dst    : S3(read);
 4590     DECODE : S0(2);     // any 2 decoders
 4591     ALU    : S3(2);     // both alus
 4592 %}
 4593 
 4594 // Integer ALU reg operation using big decoder
 4595 pipe_class ialu_reg_fat(rRegI dst) %{
 4596     single_instruction;
 4597     dst    : S4(write);
 4598     dst    : S3(read);
 4599     D0     : S0;        // big decoder only
 4600     ALU    : S3;        // any alu
 4601 %}
 4602 
 4603 // Long ALU reg operation using big decoder
 4604 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4605     instruction_count(2);
 4606     dst    : S4(write);
 4607     dst    : S3(read);
 4608     D0     : S0(2);     // big decoder only; twice
 4609     ALU    : S3(2);     // any 2 alus
 4610 %}
 4611 
 4612 // Integer ALU reg-reg operation
 4613 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4614     single_instruction;
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0;        // any decoder
 4618     ALU    : S3;        // any alu
 4619 %}
 4620 
 4621 // Long ALU reg-reg operation
 4622 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4623     instruction_count(2);
 4624     dst    : S4(write);
 4625     src    : S3(read);
 4626     DECODE : S0(2);     // any 2 decoders
 4627     ALU    : S3(2);     // both alus
 4628 %}
 4629 
 4630 // Integer ALU reg-reg operation
 4631 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4632     single_instruction;
 4633     dst    : S4(write);
 4634     src    : S3(read);
 4635     D0     : S0;        // big decoder only
 4636     ALU    : S3;        // any alu
 4637 %}
 4638 
 4639 // Long ALU reg-reg operation
 4640 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4641     instruction_count(2);
 4642     dst    : S4(write);
 4643     src    : S3(read);
 4644     D0     : S0(2);     // big decoder only; twice
 4645     ALU    : S3(2);     // both alus
 4646 %}
 4647 
 4648 // Integer ALU reg-mem operation
 4649 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4650     single_instruction;
 4651     dst    : S5(write);
 4652     mem    : S3(read);
 4653     D0     : S0;        // big decoder only
 4654     ALU    : S4;        // any alu
 4655     MEM    : S3;        // any mem
 4656 %}
 4657 
 4658 // Long ALU reg-mem operation
 4659 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4660     instruction_count(2);
 4661     dst    : S5(write);
 4662     mem    : S3(read);
 4663     D0     : S0(2);     // big decoder only; twice
 4664     ALU    : S4(2);     // any 2 alus
 4665     MEM    : S3(2);     // both mems
 4666 %}
 4667 
 4668 // Integer mem operation (prefetch)
 4669 pipe_class ialu_mem(memory mem)
 4670 %{
 4671     single_instruction;
 4672     mem    : S3(read);
 4673     D0     : S0;        // big decoder only
 4674     MEM    : S3;        // any mem
 4675 %}
 4676 
 4677 // Integer Store to Memory
 4678 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4679     single_instruction;
 4680     mem    : S3(read);
 4681     src    : S5(read);
 4682     D0     : S0;        // big decoder only
 4683     ALU    : S4;        // any alu
 4684     MEM    : S3;
 4685 %}
 4686 
 4687 // Long Store to Memory
 4688 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4689     instruction_count(2);
 4690     mem    : S3(read);
 4691     src    : S5(read);
 4692     D0     : S0(2);     // big decoder only; twice
 4693     ALU    : S4(2);     // any 2 alus
 4694     MEM    : S3(2);     // Both mems
 4695 %}
 4696 
 4697 // Integer Store to Memory
 4698 pipe_class ialu_mem_imm(memory mem) %{
 4699     single_instruction;
 4700     mem    : S3(read);
 4701     D0     : S0;        // big decoder only
 4702     ALU    : S4;        // any alu
 4703     MEM    : S3;
 4704 %}
 4705 
 4706 // Integer ALU0 reg-reg operation
 4707 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4708     single_instruction;
 4709     dst    : S4(write);
 4710     src    : S3(read);
 4711     D0     : S0;        // Big decoder only
 4712     ALU0   : S3;        // only alu0
 4713 %}
 4714 
 4715 // Integer ALU0 reg-mem operation
 4716 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4717     single_instruction;
 4718     dst    : S5(write);
 4719     mem    : S3(read);
 4720     D0     : S0;        // big decoder only
 4721     ALU0   : S4;        // ALU0 only
 4722     MEM    : S3;        // any mem
 4723 %}
 4724 
 4725 // Integer ALU reg-reg operation
 4726 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4727     single_instruction;
 4728     cr     : S4(write);
 4729     src1   : S3(read);
 4730     src2   : S3(read);
 4731     DECODE : S0;        // any decoder
 4732     ALU    : S3;        // any alu
 4733 %}
 4734 
 4735 // Integer ALU reg-imm operation
 4736 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4737     single_instruction;
 4738     cr     : S4(write);
 4739     src1   : S3(read);
 4740     DECODE : S0;        // any decoder
 4741     ALU    : S3;        // any alu
 4742 %}
 4743 
 4744 // Integer ALU reg-mem operation
 4745 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4746     single_instruction;
 4747     cr     : S4(write);
 4748     src1   : S3(read);
 4749     src2   : S3(read);
 4750     D0     : S0;        // big decoder only
 4751     ALU    : S4;        // any alu
 4752     MEM    : S3;
 4753 %}
 4754 
 4755 // Conditional move reg-reg
 4756 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4757     instruction_count(4);
 4758     y      : S4(read);
 4759     q      : S3(read);
 4760     p      : S3(read);
 4761     DECODE : S0(4);     // any decoder
 4762 %}
 4763 
 4764 // Conditional move reg-reg
 4765 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4766     single_instruction;
 4767     dst    : S4(write);
 4768     src    : S3(read);
 4769     cr     : S3(read);
 4770     DECODE : S0;        // any decoder
 4771 %}
 4772 
 4773 // Conditional move reg-mem
 4774 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4775     single_instruction;
 4776     dst    : S4(write);
 4777     src    : S3(read);
 4778     cr     : S3(read);
 4779     DECODE : S0;        // any decoder
 4780     MEM    : S3;
 4781 %}
 4782 
 4783 // Conditional move reg-reg long
 4784 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4785     single_instruction;
 4786     dst    : S4(write);
 4787     src    : S3(read);
 4788     cr     : S3(read);
 4789     DECODE : S0(2);     // any 2 decoders
 4790 %}
 4791 
 4792 // Conditional move double reg-reg
 4793 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4794     single_instruction;
 4795     dst    : S4(write);
 4796     src    : S3(read);
 4797     cr     : S3(read);
 4798     DECODE : S0;        // any decoder
 4799 %}
 4800 
 4801 // Float reg-reg operation
 4802 pipe_class fpu_reg(regDPR dst) %{
 4803     instruction_count(2);
 4804     dst    : S3(read);
 4805     DECODE : S0(2);     // any 2 decoders
 4806     FPU    : S3;
 4807 %}
 4808 
 4809 // Float reg-reg operation
 4810 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4811     instruction_count(2);
 4812     dst    : S4(write);
 4813     src    : S3(read);
 4814     DECODE : S0(2);     // any 2 decoders
 4815     FPU    : S3;
 4816 %}
 4817 
 4818 // Float reg-reg operation
 4819 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4820     instruction_count(3);
 4821     dst    : S4(write);
 4822     src1   : S3(read);
 4823     src2   : S3(read);
 4824     DECODE : S0(3);     // any 3 decoders
 4825     FPU    : S3(2);
 4826 %}
 4827 
 4828 // Float reg-reg operation
 4829 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4830     instruction_count(4);
 4831     dst    : S4(write);
 4832     src1   : S3(read);
 4833     src2   : S3(read);
 4834     src3   : S3(read);
 4835     DECODE : S0(4);     // any 3 decoders
 4836     FPU    : S3(2);
 4837 %}
 4838 
 4839 // Float reg-reg operation
 4840 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4841     instruction_count(4);
 4842     dst    : S4(write);
 4843     src1   : S3(read);
 4844     src2   : S3(read);
 4845     src3   : S3(read);
 4846     DECODE : S1(3);     // any 3 decoders
 4847     D0     : S0;        // Big decoder only
 4848     FPU    : S3(2);
 4849     MEM    : S3;
 4850 %}
 4851 
 4852 // Float reg-mem operation
 4853 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4854     instruction_count(2);
 4855     dst    : S5(write);
 4856     mem    : S3(read);
 4857     D0     : S0;        // big decoder only
 4858     DECODE : S1;        // any decoder for FPU POP
 4859     FPU    : S4;
 4860     MEM    : S3;        // any mem
 4861 %}
 4862 
 4863 // Float reg-mem operation
 4864 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4865     instruction_count(3);
 4866     dst    : S5(write);
 4867     src1   : S3(read);
 4868     mem    : S3(read);
 4869     D0     : S0;        // big decoder only
 4870     DECODE : S1(2);     // any decoder for FPU POP
 4871     FPU    : S4;
 4872     MEM    : S3;        // any mem
 4873 %}
 4874 
 4875 // Float mem-reg operation
 4876 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4877     instruction_count(2);
 4878     src    : S5(read);
 4879     mem    : S3(read);
 4880     DECODE : S0;        // any decoder for FPU PUSH
 4881     D0     : S1;        // big decoder only
 4882     FPU    : S4;
 4883     MEM    : S3;        // any mem
 4884 %}
 4885 
 4886 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4887     instruction_count(3);
 4888     src1   : S3(read);
 4889     src2   : S3(read);
 4890     mem    : S3(read);
 4891     DECODE : S0(2);     // any decoder for FPU PUSH
 4892     D0     : S1;        // big decoder only
 4893     FPU    : S4;
 4894     MEM    : S3;        // any mem
 4895 %}
 4896 
 4897 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4898     instruction_count(3);
 4899     src1   : S3(read);
 4900     src2   : S3(read);
 4901     mem    : S4(read);
 4902     DECODE : S0;        // any decoder for FPU PUSH
 4903     D0     : S0(2);     // big decoder only
 4904     FPU    : S4;
 4905     MEM    : S3(2);     // any mem
 4906 %}
 4907 
 4908 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4909     instruction_count(2);
 4910     src1   : S3(read);
 4911     dst    : S4(read);
 4912     D0     : S0(2);     // big decoder only
 4913     MEM    : S3(2);     // any mem
 4914 %}
 4915 
 4916 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4917     instruction_count(3);
 4918     src1   : S3(read);
 4919     src2   : S3(read);
 4920     dst    : S4(read);
 4921     D0     : S0(3);     // big decoder only
 4922     FPU    : S4;
 4923     MEM    : S3(3);     // any mem
 4924 %}
 4925 
 4926 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4927     instruction_count(3);
 4928     src1   : S4(read);
 4929     mem    : S4(read);
 4930     DECODE : S0;        // any decoder for FPU PUSH
 4931     D0     : S0(2);     // big decoder only
 4932     FPU    : S4;
 4933     MEM    : S3(2);     // any mem
 4934 %}
 4935 
 4936 // Float load constant
 4937 pipe_class fpu_reg_con(regDPR dst) %{
 4938     instruction_count(2);
 4939     dst    : S5(write);
 4940     D0     : S0;        // big decoder only for the load
 4941     DECODE : S1;        // any decoder for FPU POP
 4942     FPU    : S4;
 4943     MEM    : S3;        // any mem
 4944 %}
 4945 
 4946 // Float load constant
 4947 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4948     instruction_count(3);
 4949     dst    : S5(write);
 4950     src    : S3(read);
 4951     D0     : S0;        // big decoder only for the load
 4952     DECODE : S1(2);     // any decoder for FPU POP
 4953     FPU    : S4;
 4954     MEM    : S3;        // any mem
 4955 %}
 4956 
 4957 // UnConditional branch
 4958 pipe_class pipe_jmp( label labl ) %{
 4959     single_instruction;
 4960     BR   : S3;
 4961 %}
 4962 
 4963 // Conditional branch
 4964 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4965     single_instruction;
 4966     cr    : S1(read);
 4967     BR    : S3;
 4968 %}
 4969 
 4970 // Allocation idiom
 4971 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4972     instruction_count(1); force_serialization;
 4973     fixed_latency(6);
 4974     heap_ptr : S3(read);
 4975     DECODE   : S0(3);
 4976     D0       : S2;
 4977     MEM      : S3;
 4978     ALU      : S3(2);
 4979     dst      : S5(write);
 4980     BR       : S5;
 4981 %}
 4982 
 4983 // Generic big/slow expanded idiom
 4984 pipe_class pipe_slow(  ) %{
 4985     instruction_count(10); multiple_bundles; force_serialization;
 4986     fixed_latency(100);
 4987     D0  : S0(2);
 4988     MEM : S3(2);
 4989 %}
 4990 
 4991 // The real do-nothing guy
 4992 pipe_class empty( ) %{
 4993     instruction_count(0);
 4994 %}
 4995 
 4996 // Define the class for the Nop node
 4997 define %{
 4998    MachNop = empty;
 4999 %}
 5000 
 5001 %}
 5002 
 5003 //----------INSTRUCTIONS-------------------------------------------------------
 5004 //
 5005 // match      -- States which machine-independent subtree may be replaced
 5006 //               by this instruction.
 5007 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5008 //               selection to identify a minimum cost tree of machine
 5009 //               instructions that matches a tree of machine-independent
 5010 //               instructions.
 5011 // format     -- A string providing the disassembly for this instruction.
 5012 //               The value of an instruction's operand may be inserted
 5013 //               by referring to it with a '$' prefix.
 5014 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5015 //               to within an encode class as $primary, $secondary, and $tertiary
 5016 //               respectively.  The primary opcode is commonly used to
 5017 //               indicate the type of machine instruction, while secondary
 5018 //               and tertiary are often used for prefix options or addressing
 5019 //               modes.
 5020 // ins_encode -- A list of encode classes with parameters. The encode class
 5021 //               name must have been defined in an 'enc_class' specification
 5022 //               in the encode section of the architecture description.
 5023 
 5024 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5025 // Load Float
 5026 instruct MoveF2LEG(legRegF dst, regF src) %{
 5027   match(Set dst src);
 5028   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5029   ins_encode %{
 5030     ShouldNotReachHere();
 5031   %}
 5032   ins_pipe( fpu_reg_reg );
 5033 %}
 5034 
 5035 // Load Float
 5036 instruct MoveLEG2F(regF dst, legRegF src) %{
 5037   match(Set dst src);
 5038   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5039   ins_encode %{
 5040     ShouldNotReachHere();
 5041   %}
 5042   ins_pipe( fpu_reg_reg );
 5043 %}
 5044 
 5045 // Load Float
 5046 instruct MoveF2VL(vlRegF dst, regF src) %{
 5047   match(Set dst src);
 5048   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5049   ins_encode %{
 5050     ShouldNotReachHere();
 5051   %}
 5052   ins_pipe( fpu_reg_reg );
 5053 %}
 5054 
 5055 // Load Float
 5056 instruct MoveVL2F(regF dst, vlRegF src) %{
 5057   match(Set dst src);
 5058   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5059   ins_encode %{
 5060     ShouldNotReachHere();
 5061   %}
 5062   ins_pipe( fpu_reg_reg );
 5063 %}
 5064 
 5065 
 5066 
 5067 // Load Double
 5068 instruct MoveD2LEG(legRegD dst, regD src) %{
 5069   match(Set dst src);
 5070   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5071   ins_encode %{
 5072     ShouldNotReachHere();
 5073   %}
 5074   ins_pipe( fpu_reg_reg );
 5075 %}
 5076 
 5077 // Load Double
 5078 instruct MoveLEG2D(regD dst, legRegD src) %{
 5079   match(Set dst src);
 5080   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5081   ins_encode %{
 5082     ShouldNotReachHere();
 5083   %}
 5084   ins_pipe( fpu_reg_reg );
 5085 %}
 5086 
 5087 // Load Double
 5088 instruct MoveD2VL(vlRegD dst, regD src) %{
 5089   match(Set dst src);
 5090   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5091   ins_encode %{
 5092     ShouldNotReachHere();
 5093   %}
 5094   ins_pipe( fpu_reg_reg );
 5095 %}
 5096 
 5097 // Load Double
 5098 instruct MoveVL2D(regD dst, vlRegD src) %{
 5099   match(Set dst src);
 5100   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5101   ins_encode %{
 5102     ShouldNotReachHere();
 5103   %}
 5104   ins_pipe( fpu_reg_reg );
 5105 %}
 5106 
 5107 //----------BSWAP-Instruction--------------------------------------------------
 5108 instruct bytes_reverse_int(rRegI dst) %{
 5109   match(Set dst (ReverseBytesI dst));
 5110 
 5111   format %{ "BSWAP  $dst" %}
 5112   opcode(0x0F, 0xC8);
 5113   ins_encode( OpcP, OpcSReg(dst) );
 5114   ins_pipe( ialu_reg );
 5115 %}
 5116 
 5117 instruct bytes_reverse_long(eRegL dst) %{
 5118   match(Set dst (ReverseBytesL dst));
 5119 
 5120   format %{ "BSWAP  $dst.lo\n\t"
 5121             "BSWAP  $dst.hi\n\t"
 5122             "XCHG   $dst.lo $dst.hi" %}
 5123 
 5124   ins_cost(125);
 5125   ins_encode( bswap_long_bytes(dst) );
 5126   ins_pipe( ialu_reg_reg);
 5127 %}
 5128 
 5129 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5130   match(Set dst (ReverseBytesUS dst));
 5131   effect(KILL cr);
 5132 
 5133   format %{ "BSWAP  $dst\n\t"
 5134             "SHR    $dst,16\n\t" %}
 5135   ins_encode %{
 5136     __ bswapl($dst$$Register);
 5137     __ shrl($dst$$Register, 16);
 5138   %}
 5139   ins_pipe( ialu_reg );
 5140 %}
 5141 
 5142 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5143   match(Set dst (ReverseBytesS dst));
 5144   effect(KILL cr);
 5145 
 5146   format %{ "BSWAP  $dst\n\t"
 5147             "SAR    $dst,16\n\t" %}
 5148   ins_encode %{
 5149     __ bswapl($dst$$Register);
 5150     __ sarl($dst$$Register, 16);
 5151   %}
 5152   ins_pipe( ialu_reg );
 5153 %}
 5154 
 5155 
 5156 //---------- Zeros Count Instructions ------------------------------------------
 5157 
 5158 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5159   predicate(UseCountLeadingZerosInstruction);
 5160   match(Set dst (CountLeadingZerosI src));
 5161   effect(KILL cr);
 5162 
 5163   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5164   ins_encode %{
 5165     __ lzcntl($dst$$Register, $src$$Register);
 5166   %}
 5167   ins_pipe(ialu_reg);
 5168 %}
 5169 
 5170 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5171   predicate(!UseCountLeadingZerosInstruction);
 5172   match(Set dst (CountLeadingZerosI src));
 5173   effect(KILL cr);
 5174 
 5175   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5176             "JNZ    skip\n\t"
 5177             "MOV    $dst, -1\n"
 5178       "skip:\n\t"
 5179             "NEG    $dst\n\t"
 5180             "ADD    $dst, 31" %}
 5181   ins_encode %{
 5182     Register Rdst = $dst$$Register;
 5183     Register Rsrc = $src$$Register;
 5184     Label skip;
 5185     __ bsrl(Rdst, Rsrc);
 5186     __ jccb(Assembler::notZero, skip);
 5187     __ movl(Rdst, -1);
 5188     __ bind(skip);
 5189     __ negl(Rdst);
 5190     __ addl(Rdst, BitsPerInt - 1);
 5191   %}
 5192   ins_pipe(ialu_reg);
 5193 %}
 5194 
 5195 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5196   predicate(UseCountLeadingZerosInstruction);
 5197   match(Set dst (CountLeadingZerosL src));
 5198   effect(TEMP dst, KILL cr);
 5199 
 5200   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5201             "JNC    done\n\t"
 5202             "LZCNT  $dst, $src.lo\n\t"
 5203             "ADD    $dst, 32\n"
 5204       "done:" %}
 5205   ins_encode %{
 5206     Register Rdst = $dst$$Register;
 5207     Register Rsrc = $src$$Register;
 5208     Label done;
 5209     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5210     __ jccb(Assembler::carryClear, done);
 5211     __ lzcntl(Rdst, Rsrc);
 5212     __ addl(Rdst, BitsPerInt);
 5213     __ bind(done);
 5214   %}
 5215   ins_pipe(ialu_reg);
 5216 %}
 5217 
 5218 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5219   predicate(!UseCountLeadingZerosInstruction);
 5220   match(Set dst (CountLeadingZerosL src));
 5221   effect(TEMP dst, KILL cr);
 5222 
 5223   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5224             "JZ     msw_is_zero\n\t"
 5225             "ADD    $dst, 32\n\t"
 5226             "JMP    not_zero\n"
 5227       "msw_is_zero:\n\t"
 5228             "BSR    $dst, $src.lo\n\t"
 5229             "JNZ    not_zero\n\t"
 5230             "MOV    $dst, -1\n"
 5231       "not_zero:\n\t"
 5232             "NEG    $dst\n\t"
 5233             "ADD    $dst, 63\n" %}
 5234  ins_encode %{
 5235     Register Rdst = $dst$$Register;
 5236     Register Rsrc = $src$$Register;
 5237     Label msw_is_zero;
 5238     Label not_zero;
 5239     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5240     __ jccb(Assembler::zero, msw_is_zero);
 5241     __ addl(Rdst, BitsPerInt);
 5242     __ jmpb(not_zero);
 5243     __ bind(msw_is_zero);
 5244     __ bsrl(Rdst, Rsrc);
 5245     __ jccb(Assembler::notZero, not_zero);
 5246     __ movl(Rdst, -1);
 5247     __ bind(not_zero);
 5248     __ negl(Rdst);
 5249     __ addl(Rdst, BitsPerLong - 1);
 5250   %}
 5251   ins_pipe(ialu_reg);
 5252 %}
 5253 
 5254 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5255   predicate(UseCountTrailingZerosInstruction);
 5256   match(Set dst (CountTrailingZerosI src));
 5257   effect(KILL cr);
 5258 
 5259   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5260   ins_encode %{
 5261     __ tzcntl($dst$$Register, $src$$Register);
 5262   %}
 5263   ins_pipe(ialu_reg);
 5264 %}
 5265 
 5266 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5267   predicate(!UseCountTrailingZerosInstruction);
 5268   match(Set dst (CountTrailingZerosI src));
 5269   effect(KILL cr);
 5270 
 5271   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5272             "JNZ    done\n\t"
 5273             "MOV    $dst, 32\n"
 5274       "done:" %}
 5275   ins_encode %{
 5276     Register Rdst = $dst$$Register;
 5277     Label done;
 5278     __ bsfl(Rdst, $src$$Register);
 5279     __ jccb(Assembler::notZero, done);
 5280     __ movl(Rdst, BitsPerInt);
 5281     __ bind(done);
 5282   %}
 5283   ins_pipe(ialu_reg);
 5284 %}
 5285 
 5286 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5287   predicate(UseCountTrailingZerosInstruction);
 5288   match(Set dst (CountTrailingZerosL src));
 5289   effect(TEMP dst, KILL cr);
 5290 
 5291   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5292             "JNC    done\n\t"
 5293             "TZCNT  $dst, $src.hi\n\t"
 5294             "ADD    $dst, 32\n"
 5295             "done:" %}
 5296   ins_encode %{
 5297     Register Rdst = $dst$$Register;
 5298     Register Rsrc = $src$$Register;
 5299     Label done;
 5300     __ tzcntl(Rdst, Rsrc);
 5301     __ jccb(Assembler::carryClear, done);
 5302     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5303     __ addl(Rdst, BitsPerInt);
 5304     __ bind(done);
 5305   %}
 5306   ins_pipe(ialu_reg);
 5307 %}
 5308 
 5309 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5310   predicate(!UseCountTrailingZerosInstruction);
 5311   match(Set dst (CountTrailingZerosL src));
 5312   effect(TEMP dst, KILL cr);
 5313 
 5314   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5315             "JNZ    done\n\t"
 5316             "BSF    $dst, $src.hi\n\t"
 5317             "JNZ    msw_not_zero\n\t"
 5318             "MOV    $dst, 32\n"
 5319       "msw_not_zero:\n\t"
 5320             "ADD    $dst, 32\n"
 5321       "done:" %}
 5322   ins_encode %{
 5323     Register Rdst = $dst$$Register;
 5324     Register Rsrc = $src$$Register;
 5325     Label msw_not_zero;
 5326     Label done;
 5327     __ bsfl(Rdst, Rsrc);
 5328     __ jccb(Assembler::notZero, done);
 5329     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5330     __ jccb(Assembler::notZero, msw_not_zero);
 5331     __ movl(Rdst, BitsPerInt);
 5332     __ bind(msw_not_zero);
 5333     __ addl(Rdst, BitsPerInt);
 5334     __ bind(done);
 5335   %}
 5336   ins_pipe(ialu_reg);
 5337 %}
 5338 
 5339 
 5340 //---------- Population Count Instructions -------------------------------------
 5341 
 5342 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5343   predicate(UsePopCountInstruction);
 5344   match(Set dst (PopCountI src));
 5345   effect(KILL cr);
 5346 
 5347   format %{ "POPCNT $dst, $src" %}
 5348   ins_encode %{
 5349     __ popcntl($dst$$Register, $src$$Register);
 5350   %}
 5351   ins_pipe(ialu_reg);
 5352 %}
 5353 
 5354 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5355   predicate(UsePopCountInstruction);
 5356   match(Set dst (PopCountI (LoadI mem)));
 5357   effect(KILL cr);
 5358 
 5359   format %{ "POPCNT $dst, $mem" %}
 5360   ins_encode %{
 5361     __ popcntl($dst$$Register, $mem$$Address);
 5362   %}
 5363   ins_pipe(ialu_reg);
 5364 %}
 5365 
 5366 // Note: Long.bitCount(long) returns an int.
 5367 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5368   predicate(UsePopCountInstruction);
 5369   match(Set dst (PopCountL src));
 5370   effect(KILL cr, TEMP tmp, TEMP dst);
 5371 
 5372   format %{ "POPCNT $dst, $src.lo\n\t"
 5373             "POPCNT $tmp, $src.hi\n\t"
 5374             "ADD    $dst, $tmp" %}
 5375   ins_encode %{
 5376     __ popcntl($dst$$Register, $src$$Register);
 5377     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5378     __ addl($dst$$Register, $tmp$$Register);
 5379   %}
 5380   ins_pipe(ialu_reg);
 5381 %}
 5382 
 5383 // Note: Long.bitCount(long) returns an int.
 5384 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5385   predicate(UsePopCountInstruction);
 5386   match(Set dst (PopCountL (LoadL mem)));
 5387   effect(KILL cr, TEMP tmp, TEMP dst);
 5388 
 5389   format %{ "POPCNT $dst, $mem\n\t"
 5390             "POPCNT $tmp, $mem+4\n\t"
 5391             "ADD    $dst, $tmp" %}
 5392   ins_encode %{
 5393     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5394     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5395     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5396     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5397     __ addl($dst$$Register, $tmp$$Register);
 5398   %}
 5399   ins_pipe(ialu_reg);
 5400 %}
 5401 
 5402 
 5403 //----------Load/Store/Move Instructions---------------------------------------
 5404 //----------Load Instructions--------------------------------------------------
 5405 // Load Byte (8bit signed)
 5406 instruct loadB(xRegI dst, memory mem) %{
 5407   match(Set dst (LoadB mem));
 5408 
 5409   ins_cost(125);
 5410   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5411 
 5412   ins_encode %{
 5413     __ movsbl($dst$$Register, $mem$$Address);
 5414   %}
 5415 
 5416   ins_pipe(ialu_reg_mem);
 5417 %}
 5418 
 5419 // Load Byte (8bit signed) into Long Register
 5420 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5421   match(Set dst (ConvI2L (LoadB mem)));
 5422   effect(KILL cr);
 5423 
 5424   ins_cost(375);
 5425   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5426             "MOV    $dst.hi,$dst.lo\n\t"
 5427             "SAR    $dst.hi,7" %}
 5428 
 5429   ins_encode %{
 5430     __ movsbl($dst$$Register, $mem$$Address);
 5431     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5432     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5433   %}
 5434 
 5435   ins_pipe(ialu_reg_mem);
 5436 %}
 5437 
 5438 // Load Unsigned Byte (8bit UNsigned)
 5439 instruct loadUB(xRegI dst, memory mem) %{
 5440   match(Set dst (LoadUB mem));
 5441 
 5442   ins_cost(125);
 5443   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5444 
 5445   ins_encode %{
 5446     __ movzbl($dst$$Register, $mem$$Address);
 5447   %}
 5448 
 5449   ins_pipe(ialu_reg_mem);
 5450 %}
 5451 
 5452 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5453 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5454   match(Set dst (ConvI2L (LoadUB mem)));
 5455   effect(KILL cr);
 5456 
 5457   ins_cost(250);
 5458   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5459             "XOR    $dst.hi,$dst.hi" %}
 5460 
 5461   ins_encode %{
 5462     Register Rdst = $dst$$Register;
 5463     __ movzbl(Rdst, $mem$$Address);
 5464     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5465   %}
 5466 
 5467   ins_pipe(ialu_reg_mem);
 5468 %}
 5469 
 5470 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5471 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5472   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5473   effect(KILL cr);
 5474 
 5475   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5476             "XOR    $dst.hi,$dst.hi\n\t"
 5477             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5478   ins_encode %{
 5479     Register Rdst = $dst$$Register;
 5480     __ movzbl(Rdst, $mem$$Address);
 5481     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5482     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5483   %}
 5484   ins_pipe(ialu_reg_mem);
 5485 %}
 5486 
 5487 // Load Short (16bit signed)
 5488 instruct loadS(rRegI dst, memory mem) %{
 5489   match(Set dst (LoadS mem));
 5490 
 5491   ins_cost(125);
 5492   format %{ "MOVSX  $dst,$mem\t# short" %}
 5493 
 5494   ins_encode %{
 5495     __ movswl($dst$$Register, $mem$$Address);
 5496   %}
 5497 
 5498   ins_pipe(ialu_reg_mem);
 5499 %}
 5500 
 5501 // Load Short (16 bit signed) to Byte (8 bit signed)
 5502 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5503   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5504 
 5505   ins_cost(125);
 5506   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5507   ins_encode %{
 5508     __ movsbl($dst$$Register, $mem$$Address);
 5509   %}
 5510   ins_pipe(ialu_reg_mem);
 5511 %}
 5512 
 5513 // Load Short (16bit signed) into Long Register
 5514 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5515   match(Set dst (ConvI2L (LoadS mem)));
 5516   effect(KILL cr);
 5517 
 5518   ins_cost(375);
 5519   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5520             "MOV    $dst.hi,$dst.lo\n\t"
 5521             "SAR    $dst.hi,15" %}
 5522 
 5523   ins_encode %{
 5524     __ movswl($dst$$Register, $mem$$Address);
 5525     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5526     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5527   %}
 5528 
 5529   ins_pipe(ialu_reg_mem);
 5530 %}
 5531 
 5532 // Load Unsigned Short/Char (16bit unsigned)
 5533 instruct loadUS(rRegI dst, memory mem) %{
 5534   match(Set dst (LoadUS mem));
 5535 
 5536   ins_cost(125);
 5537   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5538 
 5539   ins_encode %{
 5540     __ movzwl($dst$$Register, $mem$$Address);
 5541   %}
 5542 
 5543   ins_pipe(ialu_reg_mem);
 5544 %}
 5545 
 5546 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5547 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5548   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5549 
 5550   ins_cost(125);
 5551   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5552   ins_encode %{
 5553     __ movsbl($dst$$Register, $mem$$Address);
 5554   %}
 5555   ins_pipe(ialu_reg_mem);
 5556 %}
 5557 
 5558 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5559 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5560   match(Set dst (ConvI2L (LoadUS mem)));
 5561   effect(KILL cr);
 5562 
 5563   ins_cost(250);
 5564   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5565             "XOR    $dst.hi,$dst.hi" %}
 5566 
 5567   ins_encode %{
 5568     __ movzwl($dst$$Register, $mem$$Address);
 5569     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5570   %}
 5571 
 5572   ins_pipe(ialu_reg_mem);
 5573 %}
 5574 
 5575 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5576 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5577   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5578   effect(KILL cr);
 5579 
 5580   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5581             "XOR    $dst.hi,$dst.hi" %}
 5582   ins_encode %{
 5583     Register Rdst = $dst$$Register;
 5584     __ movzbl(Rdst, $mem$$Address);
 5585     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5586   %}
 5587   ins_pipe(ialu_reg_mem);
 5588 %}
 5589 
 5590 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5591 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5592   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5593   effect(KILL cr);
 5594 
 5595   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5596             "XOR    $dst.hi,$dst.hi\n\t"
 5597             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5598   ins_encode %{
 5599     Register Rdst = $dst$$Register;
 5600     __ movzwl(Rdst, $mem$$Address);
 5601     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5602     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5603   %}
 5604   ins_pipe(ialu_reg_mem);
 5605 %}
 5606 
 5607 // Load Integer
 5608 instruct loadI(rRegI dst, memory mem) %{
 5609   match(Set dst (LoadI mem));
 5610 
 5611   ins_cost(125);
 5612   format %{ "MOV    $dst,$mem\t# int" %}
 5613 
 5614   ins_encode %{
 5615     __ movl($dst$$Register, $mem$$Address);
 5616   %}
 5617 
 5618   ins_pipe(ialu_reg_mem);
 5619 %}
 5620 
 5621 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5622 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5623   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5624 
 5625   ins_cost(125);
 5626   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5627   ins_encode %{
 5628     __ movsbl($dst$$Register, $mem$$Address);
 5629   %}
 5630   ins_pipe(ialu_reg_mem);
 5631 %}
 5632 
 5633 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5634 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5635   match(Set dst (AndI (LoadI mem) mask));
 5636 
 5637   ins_cost(125);
 5638   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5639   ins_encode %{
 5640     __ movzbl($dst$$Register, $mem$$Address);
 5641   %}
 5642   ins_pipe(ialu_reg_mem);
 5643 %}
 5644 
 5645 // Load Integer (32 bit signed) to Short (16 bit signed)
 5646 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5647   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5648 
 5649   ins_cost(125);
 5650   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5651   ins_encode %{
 5652     __ movswl($dst$$Register, $mem$$Address);
 5653   %}
 5654   ins_pipe(ialu_reg_mem);
 5655 %}
 5656 
 5657 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5658 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5659   match(Set dst (AndI (LoadI mem) mask));
 5660 
 5661   ins_cost(125);
 5662   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5663   ins_encode %{
 5664     __ movzwl($dst$$Register, $mem$$Address);
 5665   %}
 5666   ins_pipe(ialu_reg_mem);
 5667 %}
 5668 
 5669 // Load Integer into Long Register
 5670 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5671   match(Set dst (ConvI2L (LoadI mem)));
 5672   effect(KILL cr);
 5673 
 5674   ins_cost(375);
 5675   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5676             "MOV    $dst.hi,$dst.lo\n\t"
 5677             "SAR    $dst.hi,31" %}
 5678 
 5679   ins_encode %{
 5680     __ movl($dst$$Register, $mem$$Address);
 5681     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5682     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5683   %}
 5684 
 5685   ins_pipe(ialu_reg_mem);
 5686 %}
 5687 
 5688 // Load Integer with mask 0xFF into Long Register
 5689 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5690   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5691   effect(KILL cr);
 5692 
 5693   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5694             "XOR    $dst.hi,$dst.hi" %}
 5695   ins_encode %{
 5696     Register Rdst = $dst$$Register;
 5697     __ movzbl(Rdst, $mem$$Address);
 5698     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5699   %}
 5700   ins_pipe(ialu_reg_mem);
 5701 %}
 5702 
 5703 // Load Integer with mask 0xFFFF into Long Register
 5704 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5705   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5706   effect(KILL cr);
 5707 
 5708   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5709             "XOR    $dst.hi,$dst.hi" %}
 5710   ins_encode %{
 5711     Register Rdst = $dst$$Register;
 5712     __ movzwl(Rdst, $mem$$Address);
 5713     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5714   %}
 5715   ins_pipe(ialu_reg_mem);
 5716 %}
 5717 
 5718 // Load Integer with 31-bit mask into Long Register
 5719 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5720   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5721   effect(KILL cr);
 5722 
 5723   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5724             "XOR    $dst.hi,$dst.hi\n\t"
 5725             "AND    $dst.lo,$mask" %}
 5726   ins_encode %{
 5727     Register Rdst = $dst$$Register;
 5728     __ movl(Rdst, $mem$$Address);
 5729     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5730     __ andl(Rdst, $mask$$constant);
 5731   %}
 5732   ins_pipe(ialu_reg_mem);
 5733 %}
 5734 
 5735 // Load Unsigned Integer into Long Register
 5736 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5737   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5738   effect(KILL cr);
 5739 
 5740   ins_cost(250);
 5741   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5742             "XOR    $dst.hi,$dst.hi" %}
 5743 
 5744   ins_encode %{
 5745     __ movl($dst$$Register, $mem$$Address);
 5746     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5747   %}
 5748 
 5749   ins_pipe(ialu_reg_mem);
 5750 %}
 5751 
 5752 // Load Long.  Cannot clobber address while loading, so restrict address
 5753 // register to ESI
 5754 instruct loadL(eRegL dst, load_long_memory mem) %{
 5755   predicate(!((LoadLNode*)n)->require_atomic_access());
 5756   match(Set dst (LoadL mem));
 5757 
 5758   ins_cost(250);
 5759   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5760             "MOV    $dst.hi,$mem+4" %}
 5761 
 5762   ins_encode %{
 5763     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5764     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5765     __ movl($dst$$Register, Amemlo);
 5766     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5767   %}
 5768 
 5769   ins_pipe(ialu_reg_long_mem);
 5770 %}
 5771 
 5772 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5773 // then store it down to the stack and reload on the int
 5774 // side.
 5775 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5776   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5777   match(Set dst (LoadL mem));
 5778 
 5779   ins_cost(200);
 5780   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5781             "FISTp  $dst" %}
 5782   ins_encode(enc_loadL_volatile(mem,dst));
 5783   ins_pipe( fpu_reg_mem );
 5784 %}
 5785 
 5786 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5787   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5788   match(Set dst (LoadL mem));
 5789   effect(TEMP tmp);
 5790   ins_cost(180);
 5791   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5792             "MOVSD  $dst,$tmp" %}
 5793   ins_encode %{
 5794     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5795     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5796   %}
 5797   ins_pipe( pipe_slow );
 5798 %}
 5799 
 5800 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5801   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5802   match(Set dst (LoadL mem));
 5803   effect(TEMP tmp);
 5804   ins_cost(160);
 5805   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5806             "MOVD   $dst.lo,$tmp\n\t"
 5807             "PSRLQ  $tmp,32\n\t"
 5808             "MOVD   $dst.hi,$tmp" %}
 5809   ins_encode %{
 5810     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5811     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5812     __ psrlq($tmp$$XMMRegister, 32);
 5813     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5814   %}
 5815   ins_pipe( pipe_slow );
 5816 %}
 5817 
 5818 // Load Range
 5819 instruct loadRange(rRegI dst, memory mem) %{
 5820   match(Set dst (LoadRange mem));
 5821 
 5822   ins_cost(125);
 5823   format %{ "MOV    $dst,$mem" %}
 5824   opcode(0x8B);
 5825   ins_encode( OpcP, RegMem(dst,mem));
 5826   ins_pipe( ialu_reg_mem );
 5827 %}
 5828 
 5829 
 5830 // Load Pointer
 5831 instruct loadP(eRegP dst, memory mem) %{
 5832   match(Set dst (LoadP mem));
 5833 
 5834   ins_cost(125);
 5835   format %{ "MOV    $dst,$mem" %}
 5836   opcode(0x8B);
 5837   ins_encode( OpcP, RegMem(dst,mem));
 5838   ins_pipe( ialu_reg_mem );
 5839 %}
 5840 
 5841 // Load Klass Pointer
 5842 instruct loadKlass(eRegP dst, memory mem) %{
 5843   match(Set dst (LoadKlass mem));
 5844 
 5845   ins_cost(125);
 5846   format %{ "MOV    $dst,$mem" %}
 5847   opcode(0x8B);
 5848   ins_encode( OpcP, RegMem(dst,mem));
 5849   ins_pipe( ialu_reg_mem );
 5850 %}
 5851 
 5852 // Load Double
 5853 instruct loadDPR(regDPR dst, memory mem) %{
 5854   predicate(UseSSE<=1);
 5855   match(Set dst (LoadD mem));
 5856 
 5857   ins_cost(150);
 5858   format %{ "FLD_D  ST,$mem\n\t"
 5859             "FSTP   $dst" %}
 5860   opcode(0xDD);               /* DD /0 */
 5861   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5862               Pop_Reg_DPR(dst) );
 5863   ins_pipe( fpu_reg_mem );
 5864 %}
 5865 
 5866 // Load Double to XMM
 5867 instruct loadD(regD dst, memory mem) %{
 5868   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5869   match(Set dst (LoadD mem));
 5870   ins_cost(145);
 5871   format %{ "MOVSD  $dst,$mem" %}
 5872   ins_encode %{
 5873     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5874   %}
 5875   ins_pipe( pipe_slow );
 5876 %}
 5877 
 5878 instruct loadD_partial(regD dst, memory mem) %{
 5879   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5880   match(Set dst (LoadD mem));
 5881   ins_cost(145);
 5882   format %{ "MOVLPD $dst,$mem" %}
 5883   ins_encode %{
 5884     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5885   %}
 5886   ins_pipe( pipe_slow );
 5887 %}
 5888 
 5889 // Load to XMM register (single-precision floating point)
 5890 // MOVSS instruction
 5891 instruct loadF(regF dst, memory mem) %{
 5892   predicate(UseSSE>=1);
 5893   match(Set dst (LoadF mem));
 5894   ins_cost(145);
 5895   format %{ "MOVSS  $dst,$mem" %}
 5896   ins_encode %{
 5897     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5898   %}
 5899   ins_pipe( pipe_slow );
 5900 %}
 5901 
 5902 // Load Float
 5903 instruct loadFPR(regFPR dst, memory mem) %{
 5904   predicate(UseSSE==0);
 5905   match(Set dst (LoadF mem));
 5906 
 5907   ins_cost(150);
 5908   format %{ "FLD_S  ST,$mem\n\t"
 5909             "FSTP   $dst" %}
 5910   opcode(0xD9);               /* D9 /0 */
 5911   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5912               Pop_Reg_FPR(dst) );
 5913   ins_pipe( fpu_reg_mem );
 5914 %}
 5915 
 5916 // Load Effective Address
 5917 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5918   match(Set dst mem);
 5919 
 5920   ins_cost(110);
 5921   format %{ "LEA    $dst,$mem" %}
 5922   opcode(0x8D);
 5923   ins_encode( OpcP, RegMem(dst,mem));
 5924   ins_pipe( ialu_reg_reg_fat );
 5925 %}
 5926 
 5927 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5928   match(Set dst mem);
 5929 
 5930   ins_cost(110);
 5931   format %{ "LEA    $dst,$mem" %}
 5932   opcode(0x8D);
 5933   ins_encode( OpcP, RegMem(dst,mem));
 5934   ins_pipe( ialu_reg_reg_fat );
 5935 %}
 5936 
 5937 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5938   match(Set dst mem);
 5939 
 5940   ins_cost(110);
 5941   format %{ "LEA    $dst,$mem" %}
 5942   opcode(0x8D);
 5943   ins_encode( OpcP, RegMem(dst,mem));
 5944   ins_pipe( ialu_reg_reg_fat );
 5945 %}
 5946 
 5947 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5948   match(Set dst mem);
 5949 
 5950   ins_cost(110);
 5951   format %{ "LEA    $dst,$mem" %}
 5952   opcode(0x8D);
 5953   ins_encode( OpcP, RegMem(dst,mem));
 5954   ins_pipe( ialu_reg_reg_fat );
 5955 %}
 5956 
 5957 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5958   match(Set dst mem);
 5959 
 5960   ins_cost(110);
 5961   format %{ "LEA    $dst,$mem" %}
 5962   opcode(0x8D);
 5963   ins_encode( OpcP, RegMem(dst,mem));
 5964   ins_pipe( ialu_reg_reg_fat );
 5965 %}
 5966 
 5967 // Load Constant
 5968 instruct loadConI(rRegI dst, immI src) %{
 5969   match(Set dst src);
 5970 
 5971   format %{ "MOV    $dst,$src" %}
 5972   ins_encode( LdImmI(dst, src) );
 5973   ins_pipe( ialu_reg_fat );
 5974 %}
 5975 
 5976 // Load Constant zero
 5977 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5978   match(Set dst src);
 5979   effect(KILL cr);
 5980 
 5981   ins_cost(50);
 5982   format %{ "XOR    $dst,$dst" %}
 5983   opcode(0x33);  /* + rd */
 5984   ins_encode( OpcP, RegReg( dst, dst ) );
 5985   ins_pipe( ialu_reg );
 5986 %}
 5987 
 5988 instruct loadConP(eRegP dst, immP src) %{
 5989   match(Set dst src);
 5990 
 5991   format %{ "MOV    $dst,$src" %}
 5992   opcode(0xB8);  /* + rd */
 5993   ins_encode( LdImmP(dst, src) );
 5994   ins_pipe( ialu_reg_fat );
 5995 %}
 5996 
 5997 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5998   match(Set dst src);
 5999   effect(KILL cr);
 6000   ins_cost(200);
 6001   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6002             "MOV    $dst.hi,$src.hi" %}
 6003   opcode(0xB8);
 6004   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6005   ins_pipe( ialu_reg_long_fat );
 6006 %}
 6007 
 6008 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6009   match(Set dst src);
 6010   effect(KILL cr);
 6011   ins_cost(150);
 6012   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6013             "XOR    $dst.hi,$dst.hi" %}
 6014   opcode(0x33,0x33);
 6015   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6016   ins_pipe( ialu_reg_long );
 6017 %}
 6018 
 6019 // The instruction usage is guarded by predicate in operand immFPR().
 6020 instruct loadConFPR(regFPR dst, immFPR con) %{
 6021   match(Set dst con);
 6022   ins_cost(125);
 6023   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6024             "FSTP   $dst" %}
 6025   ins_encode %{
 6026     __ fld_s($constantaddress($con));
 6027     __ fstp_d($dst$$reg);
 6028   %}
 6029   ins_pipe(fpu_reg_con);
 6030 %}
 6031 
 6032 // The instruction usage is guarded by predicate in operand immFPR0().
 6033 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6034   match(Set dst con);
 6035   ins_cost(125);
 6036   format %{ "FLDZ   ST\n\t"
 6037             "FSTP   $dst" %}
 6038   ins_encode %{
 6039     __ fldz();
 6040     __ fstp_d($dst$$reg);
 6041   %}
 6042   ins_pipe(fpu_reg_con);
 6043 %}
 6044 
 6045 // The instruction usage is guarded by predicate in operand immFPR1().
 6046 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6047   match(Set dst con);
 6048   ins_cost(125);
 6049   format %{ "FLD1   ST\n\t"
 6050             "FSTP   $dst" %}
 6051   ins_encode %{
 6052     __ fld1();
 6053     __ fstp_d($dst$$reg);
 6054   %}
 6055   ins_pipe(fpu_reg_con);
 6056 %}
 6057 
 6058 // The instruction usage is guarded by predicate in operand immF().
 6059 instruct loadConF(regF dst, immF con) %{
 6060   match(Set dst con);
 6061   ins_cost(125);
 6062   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6063   ins_encode %{
 6064     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6065   %}
 6066   ins_pipe(pipe_slow);
 6067 %}
 6068 
 6069 // The instruction usage is guarded by predicate in operand immF0().
 6070 instruct loadConF0(regF dst, immF0 src) %{
 6071   match(Set dst src);
 6072   ins_cost(100);
 6073   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6074   ins_encode %{
 6075     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6076   %}
 6077   ins_pipe(pipe_slow);
 6078 %}
 6079 
 6080 // The instruction usage is guarded by predicate in operand immDPR().
 6081 instruct loadConDPR(regDPR dst, immDPR con) %{
 6082   match(Set dst con);
 6083   ins_cost(125);
 6084 
 6085   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6086             "FSTP   $dst" %}
 6087   ins_encode %{
 6088     __ fld_d($constantaddress($con));
 6089     __ fstp_d($dst$$reg);
 6090   %}
 6091   ins_pipe(fpu_reg_con);
 6092 %}
 6093 
 6094 // The instruction usage is guarded by predicate in operand immDPR0().
 6095 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6096   match(Set dst con);
 6097   ins_cost(125);
 6098 
 6099   format %{ "FLDZ   ST\n\t"
 6100             "FSTP   $dst" %}
 6101   ins_encode %{
 6102     __ fldz();
 6103     __ fstp_d($dst$$reg);
 6104   %}
 6105   ins_pipe(fpu_reg_con);
 6106 %}
 6107 
 6108 // The instruction usage is guarded by predicate in operand immDPR1().
 6109 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6110   match(Set dst con);
 6111   ins_cost(125);
 6112 
 6113   format %{ "FLD1   ST\n\t"
 6114             "FSTP   $dst" %}
 6115   ins_encode %{
 6116     __ fld1();
 6117     __ fstp_d($dst$$reg);
 6118   %}
 6119   ins_pipe(fpu_reg_con);
 6120 %}
 6121 
 6122 // The instruction usage is guarded by predicate in operand immD().
 6123 instruct loadConD(regD dst, immD con) %{
 6124   match(Set dst con);
 6125   ins_cost(125);
 6126   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6127   ins_encode %{
 6128     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6129   %}
 6130   ins_pipe(pipe_slow);
 6131 %}
 6132 
 6133 // The instruction usage is guarded by predicate in operand immD0().
 6134 instruct loadConD0(regD dst, immD0 src) %{
 6135   match(Set dst src);
 6136   ins_cost(100);
 6137   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6138   ins_encode %{
 6139     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6140   %}
 6141   ins_pipe( pipe_slow );
 6142 %}
 6143 
 6144 // Load Stack Slot
 6145 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6146   match(Set dst src);
 6147   ins_cost(125);
 6148 
 6149   format %{ "MOV    $dst,$src" %}
 6150   opcode(0x8B);
 6151   ins_encode( OpcP, RegMem(dst,src));
 6152   ins_pipe( ialu_reg_mem );
 6153 %}
 6154 
 6155 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6156   match(Set dst src);
 6157 
 6158   ins_cost(200);
 6159   format %{ "MOV    $dst,$src.lo\n\t"
 6160             "MOV    $dst+4,$src.hi" %}
 6161   opcode(0x8B, 0x8B);
 6162   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6163   ins_pipe( ialu_mem_long_reg );
 6164 %}
 6165 
 6166 // Load Stack Slot
 6167 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6168   match(Set dst src);
 6169   ins_cost(125);
 6170 
 6171   format %{ "MOV    $dst,$src" %}
 6172   opcode(0x8B);
 6173   ins_encode( OpcP, RegMem(dst,src));
 6174   ins_pipe( ialu_reg_mem );
 6175 %}
 6176 
 6177 // Load Stack Slot
 6178 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6179   match(Set dst src);
 6180   ins_cost(125);
 6181 
 6182   format %{ "FLD_S  $src\n\t"
 6183             "FSTP   $dst" %}
 6184   opcode(0xD9);               /* D9 /0, FLD m32real */
 6185   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6186               Pop_Reg_FPR(dst) );
 6187   ins_pipe( fpu_reg_mem );
 6188 %}
 6189 
 6190 // Load Stack Slot
 6191 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6192   match(Set dst src);
 6193   ins_cost(125);
 6194 
 6195   format %{ "FLD_D  $src\n\t"
 6196             "FSTP   $dst" %}
 6197   opcode(0xDD);               /* DD /0, FLD m64real */
 6198   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6199               Pop_Reg_DPR(dst) );
 6200   ins_pipe( fpu_reg_mem );
 6201 %}
 6202 
 6203 // Prefetch instructions for allocation.
 6204 // Must be safe to execute with invalid address (cannot fault).
 6205 
 6206 instruct prefetchAlloc0( memory mem ) %{
 6207   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6208   match(PrefetchAllocation mem);
 6209   ins_cost(0);
 6210   size(0);
 6211   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6212   ins_encode();
 6213   ins_pipe(empty);
 6214 %}
 6215 
 6216 instruct prefetchAlloc( memory mem ) %{
 6217   predicate(AllocatePrefetchInstr==3);
 6218   match( PrefetchAllocation mem );
 6219   ins_cost(100);
 6220 
 6221   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6222   ins_encode %{
 6223     __ prefetchw($mem$$Address);
 6224   %}
 6225   ins_pipe(ialu_mem);
 6226 %}
 6227 
 6228 instruct prefetchAllocNTA( memory mem ) %{
 6229   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6230   match(PrefetchAllocation mem);
 6231   ins_cost(100);
 6232 
 6233   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6234   ins_encode %{
 6235     __ prefetchnta($mem$$Address);
 6236   %}
 6237   ins_pipe(ialu_mem);
 6238 %}
 6239 
 6240 instruct prefetchAllocT0( memory mem ) %{
 6241   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6242   match(PrefetchAllocation mem);
 6243   ins_cost(100);
 6244 
 6245   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6246   ins_encode %{
 6247     __ prefetcht0($mem$$Address);
 6248   %}
 6249   ins_pipe(ialu_mem);
 6250 %}
 6251 
 6252 instruct prefetchAllocT2( memory mem ) %{
 6253   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6254   match(PrefetchAllocation mem);
 6255   ins_cost(100);
 6256 
 6257   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6258   ins_encode %{
 6259     __ prefetcht2($mem$$Address);
 6260   %}
 6261   ins_pipe(ialu_mem);
 6262 %}
 6263 
 6264 //----------Store Instructions-------------------------------------------------
 6265 
 6266 // Store Byte
 6267 instruct storeB(memory mem, xRegI src) %{
 6268   match(Set mem (StoreB mem src));
 6269 
 6270   ins_cost(125);
 6271   format %{ "MOV8   $mem,$src" %}
 6272   opcode(0x88);
 6273   ins_encode( OpcP, RegMem( src, mem ) );
 6274   ins_pipe( ialu_mem_reg );
 6275 %}
 6276 
 6277 // Store Char/Short
 6278 instruct storeC(memory mem, rRegI src) %{
 6279   match(Set mem (StoreC mem src));
 6280 
 6281   ins_cost(125);
 6282   format %{ "MOV16  $mem,$src" %}
 6283   opcode(0x89, 0x66);
 6284   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6285   ins_pipe( ialu_mem_reg );
 6286 %}
 6287 
 6288 // Store Integer
 6289 instruct storeI(memory mem, rRegI src) %{
 6290   match(Set mem (StoreI mem src));
 6291 
 6292   ins_cost(125);
 6293   format %{ "MOV    $mem,$src" %}
 6294   opcode(0x89);
 6295   ins_encode( OpcP, RegMem( src, mem ) );
 6296   ins_pipe( ialu_mem_reg );
 6297 %}
 6298 
 6299 // Store Long
 6300 instruct storeL(long_memory mem, eRegL src) %{
 6301   predicate(!((StoreLNode*)n)->require_atomic_access());
 6302   match(Set mem (StoreL mem src));
 6303 
 6304   ins_cost(200);
 6305   format %{ "MOV    $mem,$src.lo\n\t"
 6306             "MOV    $mem+4,$src.hi" %}
 6307   opcode(0x89, 0x89);
 6308   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6309   ins_pipe( ialu_mem_long_reg );
 6310 %}
 6311 
 6312 // Store Long to Integer
 6313 instruct storeL2I(memory mem, eRegL src) %{
 6314   match(Set mem (StoreI mem (ConvL2I src)));
 6315 
 6316   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6317   ins_encode %{
 6318     __ movl($mem$$Address, $src$$Register);
 6319   %}
 6320   ins_pipe(ialu_mem_reg);
 6321 %}
 6322 
 6323 // Volatile Store Long.  Must be atomic, so move it into
 6324 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6325 // target address before the store (for null-ptr checks)
 6326 // so the memory operand is used twice in the encoding.
 6327 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6328   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6329   match(Set mem (StoreL mem src));
 6330   effect( KILL cr );
 6331   ins_cost(400);
 6332   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6333             "FILD   $src\n\t"
 6334             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6335   opcode(0x3B);
 6336   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6337   ins_pipe( fpu_reg_mem );
 6338 %}
 6339 
 6340 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6341   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6342   match(Set mem (StoreL mem src));
 6343   effect( TEMP tmp, KILL cr );
 6344   ins_cost(380);
 6345   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6346             "MOVSD  $tmp,$src\n\t"
 6347             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6348   ins_encode %{
 6349     __ cmpl(rax, $mem$$Address);
 6350     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6351     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6352   %}
 6353   ins_pipe( pipe_slow );
 6354 %}
 6355 
 6356 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6357   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6358   match(Set mem (StoreL mem src));
 6359   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6360   ins_cost(360);
 6361   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6362             "MOVD   $tmp,$src.lo\n\t"
 6363             "MOVD   $tmp2,$src.hi\n\t"
 6364             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6365             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6366   ins_encode %{
 6367     __ cmpl(rax, $mem$$Address);
 6368     __ movdl($tmp$$XMMRegister, $src$$Register);
 6369     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6370     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6371     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6372   %}
 6373   ins_pipe( pipe_slow );
 6374 %}
 6375 
 6376 // Store Pointer; for storing unknown oops and raw pointers
 6377 instruct storeP(memory mem, anyRegP src) %{
 6378   match(Set mem (StoreP mem src));
 6379 
 6380   ins_cost(125);
 6381   format %{ "MOV    $mem,$src" %}
 6382   opcode(0x89);
 6383   ins_encode( OpcP, RegMem( src, mem ) );
 6384   ins_pipe( ialu_mem_reg );
 6385 %}
 6386 
 6387 // Store Integer Immediate
 6388 instruct storeImmI(memory mem, immI src) %{
 6389   match(Set mem (StoreI mem src));
 6390 
 6391   ins_cost(150);
 6392   format %{ "MOV    $mem,$src" %}
 6393   opcode(0xC7);               /* C7 /0 */
 6394   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6395   ins_pipe( ialu_mem_imm );
 6396 %}
 6397 
 6398 // Store Short/Char Immediate
 6399 instruct storeImmI16(memory mem, immI16 src) %{
 6400   predicate(UseStoreImmI16);
 6401   match(Set mem (StoreC mem src));
 6402 
 6403   ins_cost(150);
 6404   format %{ "MOV16  $mem,$src" %}
 6405   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6406   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6407   ins_pipe( ialu_mem_imm );
 6408 %}
 6409 
 6410 // Store Pointer Immediate; null pointers or constant oops that do not
 6411 // need card-mark barriers.
 6412 instruct storeImmP(memory mem, immP src) %{
 6413   match(Set mem (StoreP mem src));
 6414 
 6415   ins_cost(150);
 6416   format %{ "MOV    $mem,$src" %}
 6417   opcode(0xC7);               /* C7 /0 */
 6418   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6419   ins_pipe( ialu_mem_imm );
 6420 %}
 6421 
 6422 // Store Byte Immediate
 6423 instruct storeImmB(memory mem, immI8 src) %{
 6424   match(Set mem (StoreB mem src));
 6425 
 6426   ins_cost(150);
 6427   format %{ "MOV8   $mem,$src" %}
 6428   opcode(0xC6);               /* C6 /0 */
 6429   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6430   ins_pipe( ialu_mem_imm );
 6431 %}
 6432 
 6433 // Store CMS card-mark Immediate
 6434 instruct storeImmCM(memory mem, immI8 src) %{
 6435   match(Set mem (StoreCM mem src));
 6436 
 6437   ins_cost(150);
 6438   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6439   opcode(0xC6);               /* C6 /0 */
 6440   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6441   ins_pipe( ialu_mem_imm );
 6442 %}
 6443 
 6444 // Store Double
 6445 instruct storeDPR( memory mem, regDPR1 src) %{
 6446   predicate(UseSSE<=1);
 6447   match(Set mem (StoreD mem src));
 6448 
 6449   ins_cost(100);
 6450   format %{ "FST_D  $mem,$src" %}
 6451   opcode(0xDD);       /* DD /2 */
 6452   ins_encode( enc_FPR_store(mem,src) );
 6453   ins_pipe( fpu_mem_reg );
 6454 %}
 6455 
 6456 // Store double does rounding on x86
 6457 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6458   predicate(UseSSE<=1);
 6459   match(Set mem (StoreD mem (RoundDouble src)));
 6460 
 6461   ins_cost(100);
 6462   format %{ "FST_D  $mem,$src\t# round" %}
 6463   opcode(0xDD);       /* DD /2 */
 6464   ins_encode( enc_FPR_store(mem,src) );
 6465   ins_pipe( fpu_mem_reg );
 6466 %}
 6467 
 6468 // Store XMM register to memory (double-precision floating points)
 6469 // MOVSD instruction
 6470 instruct storeD(memory mem, regD src) %{
 6471   predicate(UseSSE>=2);
 6472   match(Set mem (StoreD mem src));
 6473   ins_cost(95);
 6474   format %{ "MOVSD  $mem,$src" %}
 6475   ins_encode %{
 6476     __ movdbl($mem$$Address, $src$$XMMRegister);
 6477   %}
 6478   ins_pipe( pipe_slow );
 6479 %}
 6480 
 6481 // Store XMM register to memory (single-precision floating point)
 6482 // MOVSS instruction
 6483 instruct storeF(memory mem, regF src) %{
 6484   predicate(UseSSE>=1);
 6485   match(Set mem (StoreF mem src));
 6486   ins_cost(95);
 6487   format %{ "MOVSS  $mem,$src" %}
 6488   ins_encode %{
 6489     __ movflt($mem$$Address, $src$$XMMRegister);
 6490   %}
 6491   ins_pipe( pipe_slow );
 6492 %}
 6493 
 6494 
 6495 // Store Float
 6496 instruct storeFPR( memory mem, regFPR1 src) %{
 6497   predicate(UseSSE==0);
 6498   match(Set mem (StoreF mem src));
 6499 
 6500   ins_cost(100);
 6501   format %{ "FST_S  $mem,$src" %}
 6502   opcode(0xD9);       /* D9 /2 */
 6503   ins_encode( enc_FPR_store(mem,src) );
 6504   ins_pipe( fpu_mem_reg );
 6505 %}
 6506 
 6507 // Store Float does rounding on x86
 6508 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6509   predicate(UseSSE==0);
 6510   match(Set mem (StoreF mem (RoundFloat src)));
 6511 
 6512   ins_cost(100);
 6513   format %{ "FST_S  $mem,$src\t# round" %}
 6514   opcode(0xD9);       /* D9 /2 */
 6515   ins_encode( enc_FPR_store(mem,src) );
 6516   ins_pipe( fpu_mem_reg );
 6517 %}
 6518 
 6519 // Store Float does rounding on x86
 6520 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6521   predicate(UseSSE<=1);
 6522   match(Set mem (StoreF mem (ConvD2F src)));
 6523 
 6524   ins_cost(100);
 6525   format %{ "FST_S  $mem,$src\t# D-round" %}
 6526   opcode(0xD9);       /* D9 /2 */
 6527   ins_encode( enc_FPR_store(mem,src) );
 6528   ins_pipe( fpu_mem_reg );
 6529 %}
 6530 
 6531 // Store immediate Float value (it is faster than store from FPU register)
 6532 // The instruction usage is guarded by predicate in operand immFPR().
 6533 instruct storeFPR_imm( memory mem, immFPR src) %{
 6534   match(Set mem (StoreF mem src));
 6535 
 6536   ins_cost(50);
 6537   format %{ "MOV    $mem,$src\t# store float" %}
 6538   opcode(0xC7);               /* C7 /0 */
 6539   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6540   ins_pipe( ialu_mem_imm );
 6541 %}
 6542 
 6543 // Store immediate Float value (it is faster than store from XMM register)
 6544 // The instruction usage is guarded by predicate in operand immF().
 6545 instruct storeF_imm( memory mem, immF src) %{
 6546   match(Set mem (StoreF mem src));
 6547 
 6548   ins_cost(50);
 6549   format %{ "MOV    $mem,$src\t# store float" %}
 6550   opcode(0xC7);               /* C7 /0 */
 6551   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6552   ins_pipe( ialu_mem_imm );
 6553 %}
 6554 
 6555 // Store Integer to stack slot
 6556 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6557   match(Set dst src);
 6558 
 6559   ins_cost(100);
 6560   format %{ "MOV    $dst,$src" %}
 6561   opcode(0x89);
 6562   ins_encode( OpcPRegSS( dst, src ) );
 6563   ins_pipe( ialu_mem_reg );
 6564 %}
 6565 
 6566 // Store Integer to stack slot
 6567 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6568   match(Set dst src);
 6569 
 6570   ins_cost(100);
 6571   format %{ "MOV    $dst,$src" %}
 6572   opcode(0x89);
 6573   ins_encode( OpcPRegSS( dst, src ) );
 6574   ins_pipe( ialu_mem_reg );
 6575 %}
 6576 
 6577 // Store Long to stack slot
 6578 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6579   match(Set dst src);
 6580 
 6581   ins_cost(200);
 6582   format %{ "MOV    $dst,$src.lo\n\t"
 6583             "MOV    $dst+4,$src.hi" %}
 6584   opcode(0x89, 0x89);
 6585   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6586   ins_pipe( ialu_mem_long_reg );
 6587 %}
 6588 
 6589 //----------MemBar Instructions-----------------------------------------------
 6590 // Memory barrier flavors
 6591 
 6592 instruct membar_acquire() %{
 6593   match(MemBarAcquire);
 6594   match(LoadFence);
 6595   ins_cost(400);
 6596 
 6597   size(0);
 6598   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6599   ins_encode();
 6600   ins_pipe(empty);
 6601 %}
 6602 
 6603 instruct membar_acquire_lock() %{
 6604   match(MemBarAcquireLock);
 6605   ins_cost(0);
 6606 
 6607   size(0);
 6608   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6609   ins_encode( );
 6610   ins_pipe(empty);
 6611 %}
 6612 
 6613 instruct membar_release() %{
 6614   match(MemBarRelease);
 6615   match(StoreFence);
 6616   ins_cost(400);
 6617 
 6618   size(0);
 6619   format %{ "MEMBAR-release ! (empty encoding)" %}
 6620   ins_encode( );
 6621   ins_pipe(empty);
 6622 %}
 6623 
 6624 instruct membar_release_lock() %{
 6625   match(MemBarReleaseLock);
 6626   ins_cost(0);
 6627 
 6628   size(0);
 6629   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6630   ins_encode( );
 6631   ins_pipe(empty);
 6632 %}
 6633 
 6634 instruct membar_volatile(eFlagsReg cr) %{
 6635   match(MemBarVolatile);
 6636   effect(KILL cr);
 6637   ins_cost(400);
 6638 
 6639   format %{
 6640     $$template
 6641     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6642   %}
 6643   ins_encode %{
 6644     __ membar(Assembler::StoreLoad);
 6645   %}
 6646   ins_pipe(pipe_slow);
 6647 %}
 6648 
 6649 instruct unnecessary_membar_volatile() %{
 6650   match(MemBarVolatile);
 6651   predicate(Matcher::post_store_load_barrier(n));
 6652   ins_cost(0);
 6653 
 6654   size(0);
 6655   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6656   ins_encode( );
 6657   ins_pipe(empty);
 6658 %}
 6659 
 6660 instruct membar_storestore() %{
 6661   match(MemBarStoreStore);
 6662   ins_cost(0);
 6663 
 6664   size(0);
 6665   format %{ "MEMBAR-storestore (empty encoding)" %}
 6666   ins_encode( );
 6667   ins_pipe(empty);
 6668 %}
 6669 
 6670 //----------Move Instructions--------------------------------------------------
 6671 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6672   match(Set dst (CastX2P src));
 6673   format %{ "# X2P  $dst, $src" %}
 6674   ins_encode( /*empty encoding*/ );
 6675   ins_cost(0);
 6676   ins_pipe(empty);
 6677 %}
 6678 
 6679 instruct castP2X(rRegI dst, eRegP src ) %{
 6680   match(Set dst (CastP2X src));
 6681   ins_cost(50);
 6682   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6683   ins_encode( enc_Copy( dst, src) );
 6684   ins_pipe( ialu_reg_reg );
 6685 %}
 6686 
 6687 //----------Conditional Move---------------------------------------------------
 6688 // Conditional move
 6689 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6690   predicate(!VM_Version::supports_cmov() );
 6691   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6692   ins_cost(200);
 6693   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6694             "MOV    $dst,$src\n"
 6695       "skip:" %}
 6696   ins_encode %{
 6697     Label Lskip;
 6698     // Invert sense of branch from sense of CMOV
 6699     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6700     __ movl($dst$$Register, $src$$Register);
 6701     __ bind(Lskip);
 6702   %}
 6703   ins_pipe( pipe_cmov_reg );
 6704 %}
 6705 
 6706 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6707   predicate(!VM_Version::supports_cmov() );
 6708   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6709   ins_cost(200);
 6710   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6711             "MOV    $dst,$src\n"
 6712       "skip:" %}
 6713   ins_encode %{
 6714     Label Lskip;
 6715     // Invert sense of branch from sense of CMOV
 6716     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6717     __ movl($dst$$Register, $src$$Register);
 6718     __ bind(Lskip);
 6719   %}
 6720   ins_pipe( pipe_cmov_reg );
 6721 %}
 6722 
 6723 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6724   predicate(VM_Version::supports_cmov() );
 6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6726   ins_cost(200);
 6727   format %{ "CMOV$cop $dst,$src" %}
 6728   opcode(0x0F,0x40);
 6729   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6730   ins_pipe( pipe_cmov_reg );
 6731 %}
 6732 
 6733 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6734   predicate(VM_Version::supports_cmov() );
 6735   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6736   ins_cost(200);
 6737   format %{ "CMOV$cop $dst,$src" %}
 6738   opcode(0x0F,0x40);
 6739   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6740   ins_pipe( pipe_cmov_reg );
 6741 %}
 6742 
 6743 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6744   predicate(VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6746   ins_cost(200);
 6747   expand %{
 6748     cmovI_regU(cop, cr, dst, src);
 6749   %}
 6750 %}
 6751 
 6752 // Conditional move
 6753 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6754   predicate(VM_Version::supports_cmov() );
 6755   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6756   ins_cost(250);
 6757   format %{ "CMOV$cop $dst,$src" %}
 6758   opcode(0x0F,0x40);
 6759   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6760   ins_pipe( pipe_cmov_mem );
 6761 %}
 6762 
 6763 // Conditional move
 6764 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6765   predicate(VM_Version::supports_cmov() );
 6766   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6767   ins_cost(250);
 6768   format %{ "CMOV$cop $dst,$src" %}
 6769   opcode(0x0F,0x40);
 6770   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6771   ins_pipe( pipe_cmov_mem );
 6772 %}
 6773 
 6774 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6775   predicate(VM_Version::supports_cmov() );
 6776   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6777   ins_cost(250);
 6778   expand %{
 6779     cmovI_memU(cop, cr, dst, src);
 6780   %}
 6781 %}
 6782 
 6783 // Conditional move
 6784 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6785   predicate(VM_Version::supports_cmov() );
 6786   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6787   ins_cost(200);
 6788   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6789   opcode(0x0F,0x40);
 6790   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6791   ins_pipe( pipe_cmov_reg );
 6792 %}
 6793 
 6794 // Conditional move (non-P6 version)
 6795 // Note:  a CMoveP is generated for  stubs and native wrappers
 6796 //        regardless of whether we are on a P6, so we
 6797 //        emulate a cmov here
 6798 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6799   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6800   ins_cost(300);
 6801   format %{ "Jn$cop   skip\n\t"
 6802           "MOV    $dst,$src\t# pointer\n"
 6803       "skip:" %}
 6804   opcode(0x8b);
 6805   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6806   ins_pipe( pipe_cmov_reg );
 6807 %}
 6808 
 6809 // Conditional move
 6810 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6811   predicate(VM_Version::supports_cmov() );
 6812   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6813   ins_cost(200);
 6814   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6815   opcode(0x0F,0x40);
 6816   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6817   ins_pipe( pipe_cmov_reg );
 6818 %}
 6819 
 6820 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6821   predicate(VM_Version::supports_cmov() );
 6822   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6823   ins_cost(200);
 6824   expand %{
 6825     cmovP_regU(cop, cr, dst, src);
 6826   %}
 6827 %}
 6828 
 6829 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6830 // correctly meets the two pointer arguments; one is an incoming
 6831 // register but the other is a memory operand.  ALSO appears to
 6832 // be buggy with implicit null checks.
 6833 //
 6834 //// Conditional move
 6835 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6836 //  predicate(VM_Version::supports_cmov() );
 6837 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6838 //  ins_cost(250);
 6839 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6840 //  opcode(0x0F,0x40);
 6841 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6842 //  ins_pipe( pipe_cmov_mem );
 6843 //%}
 6844 //
 6845 //// Conditional move
 6846 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6847 //  predicate(VM_Version::supports_cmov() );
 6848 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6849 //  ins_cost(250);
 6850 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6851 //  opcode(0x0F,0x40);
 6852 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6853 //  ins_pipe( pipe_cmov_mem );
 6854 //%}
 6855 
 6856 // Conditional move
 6857 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6858   predicate(UseSSE<=1);
 6859   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6860   ins_cost(200);
 6861   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6862   opcode(0xDA);
 6863   ins_encode( enc_cmov_dpr(cop,src) );
 6864   ins_pipe( pipe_cmovDPR_reg );
 6865 %}
 6866 
 6867 // Conditional move
 6868 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6869   predicate(UseSSE==0);
 6870   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6871   ins_cost(200);
 6872   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6873   opcode(0xDA);
 6874   ins_encode( enc_cmov_dpr(cop,src) );
 6875   ins_pipe( pipe_cmovDPR_reg );
 6876 %}
 6877 
 6878 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6879 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6880   predicate(UseSSE<=1);
 6881   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6882   ins_cost(200);
 6883   format %{ "Jn$cop   skip\n\t"
 6884             "MOV    $dst,$src\t# double\n"
 6885       "skip:" %}
 6886   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6887   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6888   ins_pipe( pipe_cmovDPR_reg );
 6889 %}
 6890 
 6891 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6892 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6893   predicate(UseSSE==0);
 6894   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6895   ins_cost(200);
 6896   format %{ "Jn$cop    skip\n\t"
 6897             "MOV    $dst,$src\t# float\n"
 6898       "skip:" %}
 6899   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6900   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6901   ins_pipe( pipe_cmovDPR_reg );
 6902 %}
 6903 
 6904 // No CMOVE with SSE/SSE2
 6905 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6906   predicate (UseSSE>=1);
 6907   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6908   ins_cost(200);
 6909   format %{ "Jn$cop   skip\n\t"
 6910             "MOVSS  $dst,$src\t# float\n"
 6911       "skip:" %}
 6912   ins_encode %{
 6913     Label skip;
 6914     // Invert sense of branch from sense of CMOV
 6915     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6916     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6917     __ bind(skip);
 6918   %}
 6919   ins_pipe( pipe_slow );
 6920 %}
 6921 
 6922 // No CMOVE with SSE/SSE2
 6923 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6924   predicate (UseSSE>=2);
 6925   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6926   ins_cost(200);
 6927   format %{ "Jn$cop   skip\n\t"
 6928             "MOVSD  $dst,$src\t# float\n"
 6929       "skip:" %}
 6930   ins_encode %{
 6931     Label skip;
 6932     // Invert sense of branch from sense of CMOV
 6933     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6934     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6935     __ bind(skip);
 6936   %}
 6937   ins_pipe( pipe_slow );
 6938 %}
 6939 
 6940 // unsigned version
 6941 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6942   predicate (UseSSE>=1);
 6943   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6944   ins_cost(200);
 6945   format %{ "Jn$cop   skip\n\t"
 6946             "MOVSS  $dst,$src\t# float\n"
 6947       "skip:" %}
 6948   ins_encode %{
 6949     Label skip;
 6950     // Invert sense of branch from sense of CMOV
 6951     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6952     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6953     __ bind(skip);
 6954   %}
 6955   ins_pipe( pipe_slow );
 6956 %}
 6957 
 6958 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6959   predicate (UseSSE>=1);
 6960   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6961   ins_cost(200);
 6962   expand %{
 6963     fcmovF_regU(cop, cr, dst, src);
 6964   %}
 6965 %}
 6966 
 6967 // unsigned version
 6968 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6969   predicate (UseSSE>=2);
 6970   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6971   ins_cost(200);
 6972   format %{ "Jn$cop   skip\n\t"
 6973             "MOVSD  $dst,$src\t# float\n"
 6974       "skip:" %}
 6975   ins_encode %{
 6976     Label skip;
 6977     // Invert sense of branch from sense of CMOV
 6978     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6979     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6980     __ bind(skip);
 6981   %}
 6982   ins_pipe( pipe_slow );
 6983 %}
 6984 
 6985 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6986   predicate (UseSSE>=2);
 6987   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6988   ins_cost(200);
 6989   expand %{
 6990     fcmovD_regU(cop, cr, dst, src);
 6991   %}
 6992 %}
 6993 
 6994 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6995   predicate(VM_Version::supports_cmov() );
 6996   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6997   ins_cost(200);
 6998   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6999             "CMOV$cop $dst.hi,$src.hi" %}
 7000   opcode(0x0F,0x40);
 7001   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7002   ins_pipe( pipe_cmov_reg_long );
 7003 %}
 7004 
 7005 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7006   predicate(VM_Version::supports_cmov() );
 7007   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7008   ins_cost(200);
 7009   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7010             "CMOV$cop $dst.hi,$src.hi" %}
 7011   opcode(0x0F,0x40);
 7012   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7013   ins_pipe( pipe_cmov_reg_long );
 7014 %}
 7015 
 7016 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7017   predicate(VM_Version::supports_cmov() );
 7018   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7019   ins_cost(200);
 7020   expand %{
 7021     cmovL_regU(cop, cr, dst, src);
 7022   %}
 7023 %}
 7024 
 7025 //----------Arithmetic Instructions--------------------------------------------
 7026 //----------Addition Instructions----------------------------------------------
 7027 
 7028 // Integer Addition Instructions
 7029 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7030   match(Set dst (AddI dst src));
 7031   effect(KILL cr);
 7032 
 7033   size(2);
 7034   format %{ "ADD    $dst,$src" %}
 7035   opcode(0x03);
 7036   ins_encode( OpcP, RegReg( dst, src) );
 7037   ins_pipe( ialu_reg_reg );
 7038 %}
 7039 
 7040 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7041   match(Set dst (AddI dst src));
 7042   effect(KILL cr);
 7043 
 7044   format %{ "ADD    $dst,$src" %}
 7045   opcode(0x81, 0x00); /* /0 id */
 7046   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7047   ins_pipe( ialu_reg );
 7048 %}
 7049 
 7050 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7051   predicate(UseIncDec);
 7052   match(Set dst (AddI dst src));
 7053   effect(KILL cr);
 7054 
 7055   size(1);
 7056   format %{ "INC    $dst" %}
 7057   opcode(0x40); /*  */
 7058   ins_encode( Opc_plus( primary, dst ) );
 7059   ins_pipe( ialu_reg );
 7060 %}
 7061 
 7062 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7063   match(Set dst (AddI src0 src1));
 7064   ins_cost(110);
 7065 
 7066   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7067   opcode(0x8D); /* 0x8D /r */
 7068   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7069   ins_pipe( ialu_reg_reg );
 7070 %}
 7071 
 7072 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7073   match(Set dst (AddP src0 src1));
 7074   ins_cost(110);
 7075 
 7076   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7077   opcode(0x8D); /* 0x8D /r */
 7078   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7079   ins_pipe( ialu_reg_reg );
 7080 %}
 7081 
 7082 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7083   predicate(UseIncDec);
 7084   match(Set dst (AddI dst src));
 7085   effect(KILL cr);
 7086 
 7087   size(1);
 7088   format %{ "DEC    $dst" %}
 7089   opcode(0x48); /*  */
 7090   ins_encode( Opc_plus( primary, dst ) );
 7091   ins_pipe( ialu_reg );
 7092 %}
 7093 
 7094 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7095   match(Set dst (AddP dst src));
 7096   effect(KILL cr);
 7097 
 7098   size(2);
 7099   format %{ "ADD    $dst,$src" %}
 7100   opcode(0x03);
 7101   ins_encode( OpcP, RegReg( dst, src) );
 7102   ins_pipe( ialu_reg_reg );
 7103 %}
 7104 
 7105 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7106   match(Set dst (AddP dst src));
 7107   effect(KILL cr);
 7108 
 7109   format %{ "ADD    $dst,$src" %}
 7110   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7111   // ins_encode( RegImm( dst, src) );
 7112   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7113   ins_pipe( ialu_reg );
 7114 %}
 7115 
 7116 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7117   match(Set dst (AddI dst (LoadI src)));
 7118   effect(KILL cr);
 7119 
 7120   ins_cost(125);
 7121   format %{ "ADD    $dst,$src" %}
 7122   opcode(0x03);
 7123   ins_encode( OpcP, RegMem( dst, src) );
 7124   ins_pipe( ialu_reg_mem );
 7125 %}
 7126 
 7127 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7129   effect(KILL cr);
 7130 
 7131   ins_cost(150);
 7132   format %{ "ADD    $dst,$src" %}
 7133   opcode(0x01);  /* Opcode 01 /r */
 7134   ins_encode( OpcP, RegMem( src, dst ) );
 7135   ins_pipe( ialu_mem_reg );
 7136 %}
 7137 
 7138 // Add Memory with Immediate
 7139 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7140   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7141   effect(KILL cr);
 7142 
 7143   ins_cost(125);
 7144   format %{ "ADD    $dst,$src" %}
 7145   opcode(0x81);               /* Opcode 81 /0 id */
 7146   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7147   ins_pipe( ialu_mem_imm );
 7148 %}
 7149 
 7150 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7151   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7152   effect(KILL cr);
 7153 
 7154   ins_cost(125);
 7155   format %{ "INC    $dst" %}
 7156   opcode(0xFF);               /* Opcode FF /0 */
 7157   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7158   ins_pipe( ialu_mem_imm );
 7159 %}
 7160 
 7161 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7162   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7163   effect(KILL cr);
 7164 
 7165   ins_cost(125);
 7166   format %{ "DEC    $dst" %}
 7167   opcode(0xFF);               /* Opcode FF /1 */
 7168   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7169   ins_pipe( ialu_mem_imm );
 7170 %}
 7171 
 7172 
 7173 instruct checkCastPP( eRegP dst ) %{
 7174   match(Set dst (CheckCastPP dst));
 7175 
 7176   size(0);
 7177   format %{ "#checkcastPP of $dst" %}
 7178   ins_encode( /*empty encoding*/ );
 7179   ins_pipe( empty );
 7180 %}
 7181 
 7182 instruct castPP( eRegP dst ) %{
 7183   match(Set dst (CastPP dst));
 7184   format %{ "#castPP of $dst" %}
 7185   ins_encode( /*empty encoding*/ );
 7186   ins_pipe( empty );
 7187 %}
 7188 
 7189 instruct castII( rRegI dst ) %{
 7190   match(Set dst (CastII dst));
 7191   format %{ "#castII of $dst" %}
 7192   ins_encode( /*empty encoding*/ );
 7193   ins_cost(0);
 7194   ins_pipe( empty );
 7195 %}
 7196 
 7197 instruct castLL( eRegL dst ) %{
 7198   match(Set dst (CastLL dst));
 7199   format %{ "#castLL of $dst" %}
 7200   ins_encode( /*empty encoding*/ );
 7201   ins_cost(0);
 7202   ins_pipe( empty );
 7203 %}
 7204 
 7205 instruct castFF( regF dst ) %{
 7206   match(Set dst (CastFF dst));
 7207   format %{ "#castFF of $dst" %}
 7208   ins_encode( /*empty encoding*/ );
 7209   ins_cost(0);
 7210   ins_pipe( empty );
 7211 %}
 7212 
 7213 instruct castDD( regD dst ) %{
 7214   match(Set dst (CastDD dst));
 7215   format %{ "#castDD of $dst" %}
 7216   ins_encode( /*empty encoding*/ );
 7217   ins_cost(0);
 7218   ins_pipe( empty );
 7219 %}
 7220 
 7221 // Load-locked - same as a regular pointer load when used with compare-swap
 7222 instruct loadPLocked(eRegP dst, memory mem) %{
 7223   match(Set dst (LoadPLocked mem));
 7224 
 7225   ins_cost(125);
 7226   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7227   opcode(0x8B);
 7228   ins_encode( OpcP, RegMem(dst,mem));
 7229   ins_pipe( ialu_reg_mem );
 7230 %}
 7231 
 7232 // Conditional-store of the updated heap-top.
 7233 // Used during allocation of the shared heap.
 7234 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7235 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7236   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7237   // EAX is killed if there is contention, but then it's also unused.
 7238   // In the common case of no contention, EAX holds the new oop address.
 7239   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7240   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7241   ins_pipe( pipe_cmpxchg );
 7242 %}
 7243 
 7244 // Conditional-store of an int value.
 7245 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7246 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7247   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7248   effect(KILL oldval);
 7249   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7250   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7251   ins_pipe( pipe_cmpxchg );
 7252 %}
 7253 
 7254 // Conditional-store of a long value.
 7255 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7256 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7257   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7258   effect(KILL oldval);
 7259   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7260             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7261             "XCHG   EBX,ECX"
 7262   %}
 7263   ins_encode %{
 7264     // Note: we need to swap rbx, and rcx before and after the
 7265     //       cmpxchg8 instruction because the instruction uses
 7266     //       rcx as the high order word of the new value to store but
 7267     //       our register encoding uses rbx.
 7268     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7269     __ lock();
 7270     __ cmpxchg8($mem$$Address);
 7271     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7272   %}
 7273   ins_pipe( pipe_cmpxchg );
 7274 %}
 7275 
 7276 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7277 
 7278 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7279   predicate(VM_Version::supports_cx8());
 7280   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7281   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7282   effect(KILL cr, KILL oldval);
 7283   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7284             "MOV    $res,0\n\t"
 7285             "JNE,s  fail\n\t"
 7286             "MOV    $res,1\n"
 7287           "fail:" %}
 7288   ins_encode( enc_cmpxchg8(mem_ptr),
 7289               enc_flags_ne_to_boolean(res) );
 7290   ins_pipe( pipe_cmpxchg );
 7291 %}
 7292 
 7293 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7294   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7295   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7296   effect(KILL cr, KILL oldval);
 7297   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7298             "MOV    $res,0\n\t"
 7299             "JNE,s  fail\n\t"
 7300             "MOV    $res,1\n"
 7301           "fail:" %}
 7302   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7303   ins_pipe( pipe_cmpxchg );
 7304 %}
 7305 
 7306 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7307   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7308   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7309   effect(KILL cr, KILL oldval);
 7310   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7311             "MOV    $res,0\n\t"
 7312             "JNE,s  fail\n\t"
 7313             "MOV    $res,1\n"
 7314           "fail:" %}
 7315   ins_encode( enc_cmpxchgb(mem_ptr),
 7316               enc_flags_ne_to_boolean(res) );
 7317   ins_pipe( pipe_cmpxchg );
 7318 %}
 7319 
 7320 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7321   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7322   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7323   effect(KILL cr, KILL oldval);
 7324   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7325             "MOV    $res,0\n\t"
 7326             "JNE,s  fail\n\t"
 7327             "MOV    $res,1\n"
 7328           "fail:" %}
 7329   ins_encode( enc_cmpxchgw(mem_ptr),
 7330               enc_flags_ne_to_boolean(res) );
 7331   ins_pipe( pipe_cmpxchg );
 7332 %}
 7333 
 7334 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7335   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7336   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7337   effect(KILL cr, KILL oldval);
 7338   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7339             "MOV    $res,0\n\t"
 7340             "JNE,s  fail\n\t"
 7341             "MOV    $res,1\n"
 7342           "fail:" %}
 7343   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7344   ins_pipe( pipe_cmpxchg );
 7345 %}
 7346 
 7347 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7348   predicate(VM_Version::supports_cx8());
 7349   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7350   effect(KILL cr);
 7351   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7352   ins_encode( enc_cmpxchg8(mem_ptr) );
 7353   ins_pipe( pipe_cmpxchg );
 7354 %}
 7355 
 7356 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7357   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7358   effect(KILL cr);
 7359   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7360   ins_encode( enc_cmpxchg(mem_ptr) );
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7365   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7366   effect(KILL cr);
 7367   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7368   ins_encode( enc_cmpxchgb(mem_ptr) );
 7369   ins_pipe( pipe_cmpxchg );
 7370 %}
 7371 
 7372 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7373   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7374   effect(KILL cr);
 7375   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7376   ins_encode( enc_cmpxchgw(mem_ptr) );
 7377   ins_pipe( pipe_cmpxchg );
 7378 %}
 7379 
 7380 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7381   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7382   effect(KILL cr);
 7383   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7384   ins_encode( enc_cmpxchg(mem_ptr) );
 7385   ins_pipe( pipe_cmpxchg );
 7386 %}
 7387 
 7388 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7389   predicate(n->as_LoadStore()->result_not_used());
 7390   match(Set dummy (GetAndAddB mem add));
 7391   effect(KILL cr);
 7392   format %{ "ADDB  [$mem],$add" %}
 7393   ins_encode %{
 7394     __ lock();
 7395     __ addb($mem$$Address, $add$$constant);
 7396   %}
 7397   ins_pipe( pipe_cmpxchg );
 7398 %}
 7399 
 7400 // Important to match to xRegI: only 8-bit regs.
 7401 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7402   match(Set newval (GetAndAddB mem newval));
 7403   effect(KILL cr);
 7404   format %{ "XADDB  [$mem],$newval" %}
 7405   ins_encode %{
 7406     __ lock();
 7407     __ xaddb($mem$$Address, $newval$$Register);
 7408   %}
 7409   ins_pipe( pipe_cmpxchg );
 7410 %}
 7411 
 7412 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7413   predicate(n->as_LoadStore()->result_not_used());
 7414   match(Set dummy (GetAndAddS mem add));
 7415   effect(KILL cr);
 7416   format %{ "ADDS  [$mem],$add" %}
 7417   ins_encode %{
 7418     __ lock();
 7419     __ addw($mem$$Address, $add$$constant);
 7420   %}
 7421   ins_pipe( pipe_cmpxchg );
 7422 %}
 7423 
 7424 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7425   match(Set newval (GetAndAddS mem newval));
 7426   effect(KILL cr);
 7427   format %{ "XADDS  [$mem],$newval" %}
 7428   ins_encode %{
 7429     __ lock();
 7430     __ xaddw($mem$$Address, $newval$$Register);
 7431   %}
 7432   ins_pipe( pipe_cmpxchg );
 7433 %}
 7434 
 7435 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7436   predicate(n->as_LoadStore()->result_not_used());
 7437   match(Set dummy (GetAndAddI mem add));
 7438   effect(KILL cr);
 7439   format %{ "ADDL  [$mem],$add" %}
 7440   ins_encode %{
 7441     __ lock();
 7442     __ addl($mem$$Address, $add$$constant);
 7443   %}
 7444   ins_pipe( pipe_cmpxchg );
 7445 %}
 7446 
 7447 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7448   match(Set newval (GetAndAddI mem newval));
 7449   effect(KILL cr);
 7450   format %{ "XADDL  [$mem],$newval" %}
 7451   ins_encode %{
 7452     __ lock();
 7453     __ xaddl($mem$$Address, $newval$$Register);
 7454   %}
 7455   ins_pipe( pipe_cmpxchg );
 7456 %}
 7457 
 7458 // Important to match to xRegI: only 8-bit regs.
 7459 instruct xchgB( memory mem, xRegI newval) %{
 7460   match(Set newval (GetAndSetB mem newval));
 7461   format %{ "XCHGB  $newval,[$mem]" %}
 7462   ins_encode %{
 7463     __ xchgb($newval$$Register, $mem$$Address);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 instruct xchgS( memory mem, rRegI newval) %{
 7469   match(Set newval (GetAndSetS mem newval));
 7470   format %{ "XCHGW  $newval,[$mem]" %}
 7471   ins_encode %{
 7472     __ xchgw($newval$$Register, $mem$$Address);
 7473   %}
 7474   ins_pipe( pipe_cmpxchg );
 7475 %}
 7476 
 7477 instruct xchgI( memory mem, rRegI newval) %{
 7478   match(Set newval (GetAndSetI mem newval));
 7479   format %{ "XCHGL  $newval,[$mem]" %}
 7480   ins_encode %{
 7481     __ xchgl($newval$$Register, $mem$$Address);
 7482   %}
 7483   ins_pipe( pipe_cmpxchg );
 7484 %}
 7485 
 7486 instruct xchgP( memory mem, pRegP newval) %{
 7487   match(Set newval (GetAndSetP mem newval));
 7488   format %{ "XCHGL  $newval,[$mem]" %}
 7489   ins_encode %{
 7490     __ xchgl($newval$$Register, $mem$$Address);
 7491   %}
 7492   ins_pipe( pipe_cmpxchg );
 7493 %}
 7494 
 7495 //----------Subtraction Instructions-------------------------------------------
 7496 
 7497 // Integer Subtraction Instructions
 7498 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7499   match(Set dst (SubI dst src));
 7500   effect(KILL cr);
 7501 
 7502   size(2);
 7503   format %{ "SUB    $dst,$src" %}
 7504   opcode(0x2B);
 7505   ins_encode( OpcP, RegReg( dst, src) );
 7506   ins_pipe( ialu_reg_reg );
 7507 %}
 7508 
 7509 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7510   match(Set dst (SubI dst src));
 7511   effect(KILL cr);
 7512 
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7515   // ins_encode( RegImm( dst, src) );
 7516   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7517   ins_pipe( ialu_reg );
 7518 %}
 7519 
 7520 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7521   match(Set dst (SubI dst (LoadI src)));
 7522   effect(KILL cr);
 7523 
 7524   ins_cost(125);
 7525   format %{ "SUB    $dst,$src" %}
 7526   opcode(0x2B);
 7527   ins_encode( OpcP, RegMem( dst, src) );
 7528   ins_pipe( ialu_reg_mem );
 7529 %}
 7530 
 7531 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7532   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7533   effect(KILL cr);
 7534 
 7535   ins_cost(150);
 7536   format %{ "SUB    $dst,$src" %}
 7537   opcode(0x29);  /* Opcode 29 /r */
 7538   ins_encode( OpcP, RegMem( src, dst ) );
 7539   ins_pipe( ialu_mem_reg );
 7540 %}
 7541 
 7542 // Subtract from a pointer
 7543 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7544   match(Set dst (AddP dst (SubI zero src)));
 7545   effect(KILL cr);
 7546 
 7547   size(2);
 7548   format %{ "SUB    $dst,$src" %}
 7549   opcode(0x2B);
 7550   ins_encode( OpcP, RegReg( dst, src) );
 7551   ins_pipe( ialu_reg_reg );
 7552 %}
 7553 
 7554 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7555   match(Set dst (SubI zero dst));
 7556   effect(KILL cr);
 7557 
 7558   size(2);
 7559   format %{ "NEG    $dst" %}
 7560   opcode(0xF7,0x03);  // Opcode F7 /3
 7561   ins_encode( OpcP, RegOpc( dst ) );
 7562   ins_pipe( ialu_reg );
 7563 %}
 7564 
 7565 //----------Multiplication/Division Instructions-------------------------------
 7566 // Integer Multiplication Instructions
 7567 // Multiply Register
 7568 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7569   match(Set dst (MulI dst src));
 7570   effect(KILL cr);
 7571 
 7572   size(3);
 7573   ins_cost(300);
 7574   format %{ "IMUL   $dst,$src" %}
 7575   opcode(0xAF, 0x0F);
 7576   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7577   ins_pipe( ialu_reg_reg_alu0 );
 7578 %}
 7579 
 7580 // Multiply 32-bit Immediate
 7581 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7582   match(Set dst (MulI src imm));
 7583   effect(KILL cr);
 7584 
 7585   ins_cost(300);
 7586   format %{ "IMUL   $dst,$src,$imm" %}
 7587   opcode(0x69);  /* 69 /r id */
 7588   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7589   ins_pipe( ialu_reg_reg_alu0 );
 7590 %}
 7591 
 7592 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7593   match(Set dst src);
 7594   effect(KILL cr);
 7595 
 7596   // Note that this is artificially increased to make it more expensive than loadConL
 7597   ins_cost(250);
 7598   format %{ "MOV    EAX,$src\t// low word only" %}
 7599   opcode(0xB8);
 7600   ins_encode( LdImmL_Lo(dst, src) );
 7601   ins_pipe( ialu_reg_fat );
 7602 %}
 7603 
 7604 // Multiply by 32-bit Immediate, taking the shifted high order results
 7605 //  (special case for shift by 32)
 7606 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7607   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7608   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7609              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7610              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7611   effect(USE src1, KILL cr);
 7612 
 7613   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7614   ins_cost(0*100 + 1*400 - 150);
 7615   format %{ "IMUL   EDX:EAX,$src1" %}
 7616   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7617   ins_pipe( pipe_slow );
 7618 %}
 7619 
 7620 // Multiply by 32-bit Immediate, taking the shifted high order results
 7621 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7622   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7623   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7624              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7625              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7626   effect(USE src1, KILL cr);
 7627 
 7628   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7629   ins_cost(1*100 + 1*400 - 150);
 7630   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7631             "SAR    EDX,$cnt-32" %}
 7632   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7633   ins_pipe( pipe_slow );
 7634 %}
 7635 
 7636 // Multiply Memory 32-bit Immediate
 7637 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7638   match(Set dst (MulI (LoadI src) imm));
 7639   effect(KILL cr);
 7640 
 7641   ins_cost(300);
 7642   format %{ "IMUL   $dst,$src,$imm" %}
 7643   opcode(0x69);  /* 69 /r id */
 7644   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7645   ins_pipe( ialu_reg_mem_alu0 );
 7646 %}
 7647 
 7648 // Multiply Memory
 7649 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7650   match(Set dst (MulI dst (LoadI src)));
 7651   effect(KILL cr);
 7652 
 7653   ins_cost(350);
 7654   format %{ "IMUL   $dst,$src" %}
 7655   opcode(0xAF, 0x0F);
 7656   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7657   ins_pipe( ialu_reg_mem_alu0 );
 7658 %}
 7659 
 7660 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7661 %{
 7662   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7663   effect(KILL cr, KILL src2);
 7664 
 7665   expand %{ mulI_eReg(dst, src1, cr);
 7666            mulI_eReg(src2, src3, cr);
 7667            addI_eReg(dst, src2, cr); %}
 7668 %}
 7669 
 7670 // Multiply Register Int to Long
 7671 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7672   // Basic Idea: long = (long)int * (long)int
 7673   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7674   effect(DEF dst, USE src, USE src1, KILL flags);
 7675 
 7676   ins_cost(300);
 7677   format %{ "IMUL   $dst,$src1" %}
 7678 
 7679   ins_encode( long_int_multiply( dst, src1 ) );
 7680   ins_pipe( ialu_reg_reg_alu0 );
 7681 %}
 7682 
 7683 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7684   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7685   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7686   effect(KILL flags);
 7687 
 7688   ins_cost(300);
 7689   format %{ "MUL    $dst,$src1" %}
 7690 
 7691   ins_encode( long_uint_multiply(dst, src1) );
 7692   ins_pipe( ialu_reg_reg_alu0 );
 7693 %}
 7694 
 7695 // Multiply Register Long
 7696 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7697   match(Set dst (MulL dst src));
 7698   effect(KILL cr, TEMP tmp);
 7699   ins_cost(4*100+3*400);
 7700 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7701 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7702   format %{ "MOV    $tmp,$src.lo\n\t"
 7703             "IMUL   $tmp,EDX\n\t"
 7704             "MOV    EDX,$src.hi\n\t"
 7705             "IMUL   EDX,EAX\n\t"
 7706             "ADD    $tmp,EDX\n\t"
 7707             "MUL    EDX:EAX,$src.lo\n\t"
 7708             "ADD    EDX,$tmp" %}
 7709   ins_encode( long_multiply( dst, src, tmp ) );
 7710   ins_pipe( pipe_slow );
 7711 %}
 7712 
 7713 // Multiply Register Long where the left operand's high 32 bits are zero
 7714 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7715   predicate(is_operand_hi32_zero(n->in(1)));
 7716   match(Set dst (MulL dst src));
 7717   effect(KILL cr, TEMP tmp);
 7718   ins_cost(2*100+2*400);
 7719 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7720 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7721   format %{ "MOV    $tmp,$src.hi\n\t"
 7722             "IMUL   $tmp,EAX\n\t"
 7723             "MUL    EDX:EAX,$src.lo\n\t"
 7724             "ADD    EDX,$tmp" %}
 7725   ins_encode %{
 7726     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7727     __ imull($tmp$$Register, rax);
 7728     __ mull($src$$Register);
 7729     __ addl(rdx, $tmp$$Register);
 7730   %}
 7731   ins_pipe( pipe_slow );
 7732 %}
 7733 
 7734 // Multiply Register Long where the right operand's high 32 bits are zero
 7735 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7736   predicate(is_operand_hi32_zero(n->in(2)));
 7737   match(Set dst (MulL dst src));
 7738   effect(KILL cr, TEMP tmp);
 7739   ins_cost(2*100+2*400);
 7740 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7741 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7742   format %{ "MOV    $tmp,$src.lo\n\t"
 7743             "IMUL   $tmp,EDX\n\t"
 7744             "MUL    EDX:EAX,$src.lo\n\t"
 7745             "ADD    EDX,$tmp" %}
 7746   ins_encode %{
 7747     __ movl($tmp$$Register, $src$$Register);
 7748     __ imull($tmp$$Register, rdx);
 7749     __ mull($src$$Register);
 7750     __ addl(rdx, $tmp$$Register);
 7751   %}
 7752   ins_pipe( pipe_slow );
 7753 %}
 7754 
 7755 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7756 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7757   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7758   match(Set dst (MulL dst src));
 7759   effect(KILL cr);
 7760   ins_cost(1*400);
 7761 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7762 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7763   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7764   ins_encode %{
 7765     __ mull($src$$Register);
 7766   %}
 7767   ins_pipe( pipe_slow );
 7768 %}
 7769 
 7770 // Multiply Register Long by small constant
 7771 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7772   match(Set dst (MulL dst src));
 7773   effect(KILL cr, TEMP tmp);
 7774   ins_cost(2*100+2*400);
 7775   size(12);
 7776 // Basic idea: lo(result) = lo(src * EAX)
 7777 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7778   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7779             "MOV    EDX,$src\n\t"
 7780             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7781             "ADD    EDX,$tmp" %}
 7782   ins_encode( long_multiply_con( dst, src, tmp ) );
 7783   ins_pipe( pipe_slow );
 7784 %}
 7785 
 7786 // Integer DIV with Register
 7787 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7788   match(Set rax (DivI rax div));
 7789   effect(KILL rdx, KILL cr);
 7790   size(26);
 7791   ins_cost(30*100+10*100);
 7792   format %{ "CMP    EAX,0x80000000\n\t"
 7793             "JNE,s  normal\n\t"
 7794             "XOR    EDX,EDX\n\t"
 7795             "CMP    ECX,-1\n\t"
 7796             "JE,s   done\n"
 7797     "normal: CDQ\n\t"
 7798             "IDIV   $div\n\t"
 7799     "done:"        %}
 7800   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7801   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7802   ins_pipe( ialu_reg_reg_alu0 );
 7803 %}
 7804 
 7805 // Divide Register Long
 7806 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7807   match(Set dst (DivL src1 src2));
 7808   effect( KILL cr, KILL cx, KILL bx );
 7809   ins_cost(10000);
 7810   format %{ "PUSH   $src1.hi\n\t"
 7811             "PUSH   $src1.lo\n\t"
 7812             "PUSH   $src2.hi\n\t"
 7813             "PUSH   $src2.lo\n\t"
 7814             "CALL   SharedRuntime::ldiv\n\t"
 7815             "ADD    ESP,16" %}
 7816   ins_encode( long_div(src1,src2) );
 7817   ins_pipe( pipe_slow );
 7818 %}
 7819 
 7820 // Integer DIVMOD with Register, both quotient and mod results
 7821 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7822   match(DivModI rax div);
 7823   effect(KILL cr);
 7824   size(26);
 7825   ins_cost(30*100+10*100);
 7826   format %{ "CMP    EAX,0x80000000\n\t"
 7827             "JNE,s  normal\n\t"
 7828             "XOR    EDX,EDX\n\t"
 7829             "CMP    ECX,-1\n\t"
 7830             "JE,s   done\n"
 7831     "normal: CDQ\n\t"
 7832             "IDIV   $div\n\t"
 7833     "done:"        %}
 7834   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7835   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7836   ins_pipe( pipe_slow );
 7837 %}
 7838 
 7839 // Integer MOD with Register
 7840 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7841   match(Set rdx (ModI rax div));
 7842   effect(KILL rax, KILL cr);
 7843 
 7844   size(26);
 7845   ins_cost(300);
 7846   format %{ "CDQ\n\t"
 7847             "IDIV   $div" %}
 7848   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7849   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7850   ins_pipe( ialu_reg_reg_alu0 );
 7851 %}
 7852 
 7853 // Remainder Register Long
 7854 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7855   match(Set dst (ModL src1 src2));
 7856   effect( KILL cr, KILL cx, KILL bx );
 7857   ins_cost(10000);
 7858   format %{ "PUSH   $src1.hi\n\t"
 7859             "PUSH   $src1.lo\n\t"
 7860             "PUSH   $src2.hi\n\t"
 7861             "PUSH   $src2.lo\n\t"
 7862             "CALL   SharedRuntime::lrem\n\t"
 7863             "ADD    ESP,16" %}
 7864   ins_encode( long_mod(src1,src2) );
 7865   ins_pipe( pipe_slow );
 7866 %}
 7867 
 7868 // Divide Register Long (no special case since divisor != -1)
 7869 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7870   match(Set dst (DivL dst imm));
 7871   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7872   ins_cost(1000);
 7873   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7874             "XOR    $tmp2,$tmp2\n\t"
 7875             "CMP    $tmp,EDX\n\t"
 7876             "JA,s   fast\n\t"
 7877             "MOV    $tmp2,EAX\n\t"
 7878             "MOV    EAX,EDX\n\t"
 7879             "MOV    EDX,0\n\t"
 7880             "JLE,s  pos\n\t"
 7881             "LNEG   EAX : $tmp2\n\t"
 7882             "DIV    $tmp # unsigned division\n\t"
 7883             "XCHG   EAX,$tmp2\n\t"
 7884             "DIV    $tmp\n\t"
 7885             "LNEG   $tmp2 : EAX\n\t"
 7886             "JMP,s  done\n"
 7887     "pos:\n\t"
 7888             "DIV    $tmp\n\t"
 7889             "XCHG   EAX,$tmp2\n"
 7890     "fast:\n\t"
 7891             "DIV    $tmp\n"
 7892     "done:\n\t"
 7893             "MOV    EDX,$tmp2\n\t"
 7894             "NEG    EDX:EAX # if $imm < 0" %}
 7895   ins_encode %{
 7896     int con = (int)$imm$$constant;
 7897     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7898     int pcon = (con > 0) ? con : -con;
 7899     Label Lfast, Lpos, Ldone;
 7900 
 7901     __ movl($tmp$$Register, pcon);
 7902     __ xorl($tmp2$$Register,$tmp2$$Register);
 7903     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7904     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7905 
 7906     __ movl($tmp2$$Register, $dst$$Register); // save
 7907     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7908     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7909     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7910 
 7911     // Negative dividend.
 7912     // convert value to positive to use unsigned division
 7913     __ lneg($dst$$Register, $tmp2$$Register);
 7914     __ divl($tmp$$Register);
 7915     __ xchgl($dst$$Register, $tmp2$$Register);
 7916     __ divl($tmp$$Register);
 7917     // revert result back to negative
 7918     __ lneg($tmp2$$Register, $dst$$Register);
 7919     __ jmpb(Ldone);
 7920 
 7921     __ bind(Lpos);
 7922     __ divl($tmp$$Register); // Use unsigned division
 7923     __ xchgl($dst$$Register, $tmp2$$Register);
 7924     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7925 
 7926     __ bind(Lfast);
 7927     // fast path: src is positive
 7928     __ divl($tmp$$Register); // Use unsigned division
 7929 
 7930     __ bind(Ldone);
 7931     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7932     if (con < 0) {
 7933       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7934     }
 7935   %}
 7936   ins_pipe( pipe_slow );
 7937 %}
 7938 
 7939 // Remainder Register Long (remainder fit into 32 bits)
 7940 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7941   match(Set dst (ModL dst imm));
 7942   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7943   ins_cost(1000);
 7944   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7945             "CMP    $tmp,EDX\n\t"
 7946             "JA,s   fast\n\t"
 7947             "MOV    $tmp2,EAX\n\t"
 7948             "MOV    EAX,EDX\n\t"
 7949             "MOV    EDX,0\n\t"
 7950             "JLE,s  pos\n\t"
 7951             "LNEG   EAX : $tmp2\n\t"
 7952             "DIV    $tmp # unsigned division\n\t"
 7953             "MOV    EAX,$tmp2\n\t"
 7954             "DIV    $tmp\n\t"
 7955             "NEG    EDX\n\t"
 7956             "JMP,s  done\n"
 7957     "pos:\n\t"
 7958             "DIV    $tmp\n\t"
 7959             "MOV    EAX,$tmp2\n"
 7960     "fast:\n\t"
 7961             "DIV    $tmp\n"
 7962     "done:\n\t"
 7963             "MOV    EAX,EDX\n\t"
 7964             "SAR    EDX,31\n\t" %}
 7965   ins_encode %{
 7966     int con = (int)$imm$$constant;
 7967     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7968     int pcon = (con > 0) ? con : -con;
 7969     Label  Lfast, Lpos, Ldone;
 7970 
 7971     __ movl($tmp$$Register, pcon);
 7972     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7973     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7974 
 7975     __ movl($tmp2$$Register, $dst$$Register); // save
 7976     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7977     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7978     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7979 
 7980     // Negative dividend.
 7981     // convert value to positive to use unsigned division
 7982     __ lneg($dst$$Register, $tmp2$$Register);
 7983     __ divl($tmp$$Register);
 7984     __ movl($dst$$Register, $tmp2$$Register);
 7985     __ divl($tmp$$Register);
 7986     // revert remainder back to negative
 7987     __ negl(HIGH_FROM_LOW($dst$$Register));
 7988     __ jmpb(Ldone);
 7989 
 7990     __ bind(Lpos);
 7991     __ divl($tmp$$Register);
 7992     __ movl($dst$$Register, $tmp2$$Register);
 7993 
 7994     __ bind(Lfast);
 7995     // fast path: src is positive
 7996     __ divl($tmp$$Register);
 7997 
 7998     __ bind(Ldone);
 7999     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8000     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8001 
 8002   %}
 8003   ins_pipe( pipe_slow );
 8004 %}
 8005 
 8006 // Integer Shift Instructions
 8007 // Shift Left by one
 8008 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8009   match(Set dst (LShiftI dst shift));
 8010   effect(KILL cr);
 8011 
 8012   size(2);
 8013   format %{ "SHL    $dst,$shift" %}
 8014   opcode(0xD1, 0x4);  /* D1 /4 */
 8015   ins_encode( OpcP, RegOpc( dst ) );
 8016   ins_pipe( ialu_reg );
 8017 %}
 8018 
 8019 // Shift Left by 8-bit immediate
 8020 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8021   match(Set dst (LShiftI dst shift));
 8022   effect(KILL cr);
 8023 
 8024   size(3);
 8025   format %{ "SHL    $dst,$shift" %}
 8026   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8027   ins_encode( RegOpcImm( dst, shift) );
 8028   ins_pipe( ialu_reg );
 8029 %}
 8030 
 8031 // Shift Left by variable
 8032 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8033   match(Set dst (LShiftI dst shift));
 8034   effect(KILL cr);
 8035 
 8036   size(2);
 8037   format %{ "SHL    $dst,$shift" %}
 8038   opcode(0xD3, 0x4);  /* D3 /4 */
 8039   ins_encode( OpcP, RegOpc( dst ) );
 8040   ins_pipe( ialu_reg_reg );
 8041 %}
 8042 
 8043 // Arithmetic shift right by one
 8044 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8045   match(Set dst (RShiftI dst shift));
 8046   effect(KILL cr);
 8047 
 8048   size(2);
 8049   format %{ "SAR    $dst,$shift" %}
 8050   opcode(0xD1, 0x7);  /* D1 /7 */
 8051   ins_encode( OpcP, RegOpc( dst ) );
 8052   ins_pipe( ialu_reg );
 8053 %}
 8054 
 8055 // Arithmetic shift right by one
 8056 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8057   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8058   effect(KILL cr);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xD1, 0x7);  /* D1 /7 */
 8061   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8062   ins_pipe( ialu_mem_imm );
 8063 %}
 8064 
 8065 // Arithmetic Shift Right by 8-bit immediate
 8066 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8067   match(Set dst (RShiftI dst shift));
 8068   effect(KILL cr);
 8069 
 8070   size(3);
 8071   format %{ "SAR    $dst,$shift" %}
 8072   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8073   ins_encode( RegOpcImm( dst, shift ) );
 8074   ins_pipe( ialu_mem_imm );
 8075 %}
 8076 
 8077 // Arithmetic Shift Right by 8-bit immediate
 8078 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8079   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8080   effect(KILL cr);
 8081 
 8082   format %{ "SAR    $dst,$shift" %}
 8083   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8084   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8085   ins_pipe( ialu_mem_imm );
 8086 %}
 8087 
 8088 // Arithmetic Shift Right by variable
 8089 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8090   match(Set dst (RShiftI dst shift));
 8091   effect(KILL cr);
 8092 
 8093   size(2);
 8094   format %{ "SAR    $dst,$shift" %}
 8095   opcode(0xD3, 0x7);  /* D3 /7 */
 8096   ins_encode( OpcP, RegOpc( dst ) );
 8097   ins_pipe( ialu_reg_reg );
 8098 %}
 8099 
 8100 // Logical shift right by one
 8101 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8102   match(Set dst (URShiftI dst shift));
 8103   effect(KILL cr);
 8104 
 8105   size(2);
 8106   format %{ "SHR    $dst,$shift" %}
 8107   opcode(0xD1, 0x5);  /* D1 /5 */
 8108   ins_encode( OpcP, RegOpc( dst ) );
 8109   ins_pipe( ialu_reg );
 8110 %}
 8111 
 8112 // Logical Shift Right by 8-bit immediate
 8113 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8114   match(Set dst (URShiftI dst shift));
 8115   effect(KILL cr);
 8116 
 8117   size(3);
 8118   format %{ "SHR    $dst,$shift" %}
 8119   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8120   ins_encode( RegOpcImm( dst, shift) );
 8121   ins_pipe( ialu_reg );
 8122 %}
 8123 
 8124 
 8125 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8126 // This idiom is used by the compiler for the i2b bytecode.
 8127 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8128   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8129 
 8130   size(3);
 8131   format %{ "MOVSX  $dst,$src :8" %}
 8132   ins_encode %{
 8133     __ movsbl($dst$$Register, $src$$Register);
 8134   %}
 8135   ins_pipe(ialu_reg_reg);
 8136 %}
 8137 
 8138 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8139 // This idiom is used by the compiler the i2s bytecode.
 8140 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8141   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8142 
 8143   size(3);
 8144   format %{ "MOVSX  $dst,$src :16" %}
 8145   ins_encode %{
 8146     __ movswl($dst$$Register, $src$$Register);
 8147   %}
 8148   ins_pipe(ialu_reg_reg);
 8149 %}
 8150 
 8151 
 8152 // Logical Shift Right by variable
 8153 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8154   match(Set dst (URShiftI dst shift));
 8155   effect(KILL cr);
 8156 
 8157   size(2);
 8158   format %{ "SHR    $dst,$shift" %}
 8159   opcode(0xD3, 0x5);  /* D3 /5 */
 8160   ins_encode( OpcP, RegOpc( dst ) );
 8161   ins_pipe( ialu_reg_reg );
 8162 %}
 8163 
 8164 
 8165 //----------Logical Instructions-----------------------------------------------
 8166 //----------Integer Logical Instructions---------------------------------------
 8167 // And Instructions
 8168 // And Register with Register
 8169 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8170   match(Set dst (AndI dst src));
 8171   effect(KILL cr);
 8172 
 8173   size(2);
 8174   format %{ "AND    $dst,$src" %}
 8175   opcode(0x23);
 8176   ins_encode( OpcP, RegReg( dst, src) );
 8177   ins_pipe( ialu_reg_reg );
 8178 %}
 8179 
 8180 // And Register with Immediate
 8181 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8182   match(Set dst (AndI dst src));
 8183   effect(KILL cr);
 8184 
 8185   format %{ "AND    $dst,$src" %}
 8186   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8187   // ins_encode( RegImm( dst, src) );
 8188   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8189   ins_pipe( ialu_reg );
 8190 %}
 8191 
 8192 // And Register with Memory
 8193 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8194   match(Set dst (AndI dst (LoadI src)));
 8195   effect(KILL cr);
 8196 
 8197   ins_cost(125);
 8198   format %{ "AND    $dst,$src" %}
 8199   opcode(0x23);
 8200   ins_encode( OpcP, RegMem( dst, src) );
 8201   ins_pipe( ialu_reg_mem );
 8202 %}
 8203 
 8204 // And Memory with Register
 8205 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8206   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8207   effect(KILL cr);
 8208 
 8209   ins_cost(150);
 8210   format %{ "AND    $dst,$src" %}
 8211   opcode(0x21);  /* Opcode 21 /r */
 8212   ins_encode( OpcP, RegMem( src, dst ) );
 8213   ins_pipe( ialu_mem_reg );
 8214 %}
 8215 
 8216 // And Memory with Immediate
 8217 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8218   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8219   effect(KILL cr);
 8220 
 8221   ins_cost(125);
 8222   format %{ "AND    $dst,$src" %}
 8223   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8224   // ins_encode( MemImm( dst, src) );
 8225   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8226   ins_pipe( ialu_mem_imm );
 8227 %}
 8228 
 8229 // BMI1 instructions
 8230 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8231   match(Set dst (AndI (XorI src1 minus_1) src2));
 8232   predicate(UseBMI1Instructions);
 8233   effect(KILL cr);
 8234 
 8235   format %{ "ANDNL  $dst, $src1, $src2" %}
 8236 
 8237   ins_encode %{
 8238     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8239   %}
 8240   ins_pipe(ialu_reg);
 8241 %}
 8242 
 8243 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8244   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8245   predicate(UseBMI1Instructions);
 8246   effect(KILL cr);
 8247 
 8248   ins_cost(125);
 8249   format %{ "ANDNL  $dst, $src1, $src2" %}
 8250 
 8251   ins_encode %{
 8252     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8253   %}
 8254   ins_pipe(ialu_reg_mem);
 8255 %}
 8256 
 8257 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8258   match(Set dst (AndI (SubI imm_zero src) src));
 8259   predicate(UseBMI1Instructions);
 8260   effect(KILL cr);
 8261 
 8262   format %{ "BLSIL  $dst, $src" %}
 8263 
 8264   ins_encode %{
 8265     __ blsil($dst$$Register, $src$$Register);
 8266   %}
 8267   ins_pipe(ialu_reg);
 8268 %}
 8269 
 8270 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8271   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8272   predicate(UseBMI1Instructions);
 8273   effect(KILL cr);
 8274 
 8275   ins_cost(125);
 8276   format %{ "BLSIL  $dst, $src" %}
 8277 
 8278   ins_encode %{
 8279     __ blsil($dst$$Register, $src$$Address);
 8280   %}
 8281   ins_pipe(ialu_reg_mem);
 8282 %}
 8283 
 8284 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8285 %{
 8286   match(Set dst (XorI (AddI src minus_1) src));
 8287   predicate(UseBMI1Instructions);
 8288   effect(KILL cr);
 8289 
 8290   format %{ "BLSMSKL $dst, $src" %}
 8291 
 8292   ins_encode %{
 8293     __ blsmskl($dst$$Register, $src$$Register);
 8294   %}
 8295 
 8296   ins_pipe(ialu_reg);
 8297 %}
 8298 
 8299 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8300 %{
 8301   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8302   predicate(UseBMI1Instructions);
 8303   effect(KILL cr);
 8304 
 8305   ins_cost(125);
 8306   format %{ "BLSMSKL $dst, $src" %}
 8307 
 8308   ins_encode %{
 8309     __ blsmskl($dst$$Register, $src$$Address);
 8310   %}
 8311 
 8312   ins_pipe(ialu_reg_mem);
 8313 %}
 8314 
 8315 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8316 %{
 8317   match(Set dst (AndI (AddI src minus_1) src) );
 8318   predicate(UseBMI1Instructions);
 8319   effect(KILL cr);
 8320 
 8321   format %{ "BLSRL  $dst, $src" %}
 8322 
 8323   ins_encode %{
 8324     __ blsrl($dst$$Register, $src$$Register);
 8325   %}
 8326 
 8327   ins_pipe(ialu_reg);
 8328 %}
 8329 
 8330 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8331 %{
 8332   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8333   predicate(UseBMI1Instructions);
 8334   effect(KILL cr);
 8335 
 8336   ins_cost(125);
 8337   format %{ "BLSRL  $dst, $src" %}
 8338 
 8339   ins_encode %{
 8340     __ blsrl($dst$$Register, $src$$Address);
 8341   %}
 8342 
 8343   ins_pipe(ialu_reg_mem);
 8344 %}
 8345 
 8346 // Or Instructions
 8347 // Or Register with Register
 8348 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8349   match(Set dst (OrI dst src));
 8350   effect(KILL cr);
 8351 
 8352   size(2);
 8353   format %{ "OR     $dst,$src" %}
 8354   opcode(0x0B);
 8355   ins_encode( OpcP, RegReg( dst, src) );
 8356   ins_pipe( ialu_reg_reg );
 8357 %}
 8358 
 8359 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8360   match(Set dst (OrI dst (CastP2X src)));
 8361   effect(KILL cr);
 8362 
 8363   size(2);
 8364   format %{ "OR     $dst,$src" %}
 8365   opcode(0x0B);
 8366   ins_encode( OpcP, RegReg( dst, src) );
 8367   ins_pipe( ialu_reg_reg );
 8368 %}
 8369 
 8370 
 8371 // Or Register with Immediate
 8372 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8373   match(Set dst (OrI dst src));
 8374   effect(KILL cr);
 8375 
 8376   format %{ "OR     $dst,$src" %}
 8377   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8378   // ins_encode( RegImm( dst, src) );
 8379   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8380   ins_pipe( ialu_reg );
 8381 %}
 8382 
 8383 // Or Register with Memory
 8384 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8385   match(Set dst (OrI dst (LoadI src)));
 8386   effect(KILL cr);
 8387 
 8388   ins_cost(125);
 8389   format %{ "OR     $dst,$src" %}
 8390   opcode(0x0B);
 8391   ins_encode( OpcP, RegMem( dst, src) );
 8392   ins_pipe( ialu_reg_mem );
 8393 %}
 8394 
 8395 // Or Memory with Register
 8396 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8397   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8398   effect(KILL cr);
 8399 
 8400   ins_cost(150);
 8401   format %{ "OR     $dst,$src" %}
 8402   opcode(0x09);  /* Opcode 09 /r */
 8403   ins_encode( OpcP, RegMem( src, dst ) );
 8404   ins_pipe( ialu_mem_reg );
 8405 %}
 8406 
 8407 // Or Memory with Immediate
 8408 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8409   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8410   effect(KILL cr);
 8411 
 8412   ins_cost(125);
 8413   format %{ "OR     $dst,$src" %}
 8414   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8415   // ins_encode( MemImm( dst, src) );
 8416   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8417   ins_pipe( ialu_mem_imm );
 8418 %}
 8419 
 8420 // ROL/ROR
 8421 // ROL expand
 8422 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8423   effect(USE_DEF dst, USE shift, KILL cr);
 8424 
 8425   format %{ "ROL    $dst, $shift" %}
 8426   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8427   ins_encode( OpcP, RegOpc( dst ));
 8428   ins_pipe( ialu_reg );
 8429 %}
 8430 
 8431 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8432   effect(USE_DEF dst, USE shift, KILL cr);
 8433 
 8434   format %{ "ROL    $dst, $shift" %}
 8435   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8436   ins_encode( RegOpcImm(dst, shift) );
 8437   ins_pipe(ialu_reg);
 8438 %}
 8439 
 8440 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8441   effect(USE_DEF dst, USE shift, KILL cr);
 8442 
 8443   format %{ "ROL    $dst, $shift" %}
 8444   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8445   ins_encode(OpcP, RegOpc(dst));
 8446   ins_pipe( ialu_reg_reg );
 8447 %}
 8448 // end of ROL expand
 8449 
 8450 // ROL 32bit by one once
 8451 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8452   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8453 
 8454   expand %{
 8455     rolI_eReg_imm1(dst, lshift, cr);
 8456   %}
 8457 %}
 8458 
 8459 // ROL 32bit var by imm8 once
 8460 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8461   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8462   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8463 
 8464   expand %{
 8465     rolI_eReg_imm8(dst, lshift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROL 32bit var by var once
 8470 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8471   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8472 
 8473   expand %{
 8474     rolI_eReg_CL(dst, shift, cr);
 8475   %}
 8476 %}
 8477 
 8478 // ROL 32bit var by var once
 8479 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8480   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8481 
 8482   expand %{
 8483     rolI_eReg_CL(dst, shift, cr);
 8484   %}
 8485 %}
 8486 
 8487 // ROR expand
 8488 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8489   effect(USE_DEF dst, USE shift, KILL cr);
 8490 
 8491   format %{ "ROR    $dst, $shift" %}
 8492   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8493   ins_encode( OpcP, RegOpc( dst ) );
 8494   ins_pipe( ialu_reg );
 8495 %}
 8496 
 8497 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8498   effect (USE_DEF dst, USE shift, KILL cr);
 8499 
 8500   format %{ "ROR    $dst, $shift" %}
 8501   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8502   ins_encode( RegOpcImm(dst, shift) );
 8503   ins_pipe( ialu_reg );
 8504 %}
 8505 
 8506 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8507   effect(USE_DEF dst, USE shift, KILL cr);
 8508 
 8509   format %{ "ROR    $dst, $shift" %}
 8510   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8511   ins_encode(OpcP, RegOpc(dst));
 8512   ins_pipe( ialu_reg_reg );
 8513 %}
 8514 // end of ROR expand
 8515 
 8516 // ROR right once
 8517 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8518   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8519 
 8520   expand %{
 8521     rorI_eReg_imm1(dst, rshift, cr);
 8522   %}
 8523 %}
 8524 
 8525 // ROR 32bit by immI8 once
 8526 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8527   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8528   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8529 
 8530   expand %{
 8531     rorI_eReg_imm8(dst, rshift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // ROR 32bit var by var once
 8536 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8537   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8538 
 8539   expand %{
 8540     rorI_eReg_CL(dst, shift, cr);
 8541   %}
 8542 %}
 8543 
 8544 // ROR 32bit var by var once
 8545 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8546   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8547 
 8548   expand %{
 8549     rorI_eReg_CL(dst, shift, cr);
 8550   %}
 8551 %}
 8552 
 8553 // Xor Instructions
 8554 // Xor Register with Register
 8555 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8556   match(Set dst (XorI dst src));
 8557   effect(KILL cr);
 8558 
 8559   size(2);
 8560   format %{ "XOR    $dst,$src" %}
 8561   opcode(0x33);
 8562   ins_encode( OpcP, RegReg( dst, src) );
 8563   ins_pipe( ialu_reg_reg );
 8564 %}
 8565 
 8566 // Xor Register with Immediate -1
 8567 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8568   match(Set dst (XorI dst imm));
 8569 
 8570   size(2);
 8571   format %{ "NOT    $dst" %}
 8572   ins_encode %{
 8573      __ notl($dst$$Register);
 8574   %}
 8575   ins_pipe( ialu_reg );
 8576 %}
 8577 
 8578 // Xor Register with Immediate
 8579 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8580   match(Set dst (XorI dst src));
 8581   effect(KILL cr);
 8582 
 8583   format %{ "XOR    $dst,$src" %}
 8584   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8585   // ins_encode( RegImm( dst, src) );
 8586   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8587   ins_pipe( ialu_reg );
 8588 %}
 8589 
 8590 // Xor Register with Memory
 8591 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8592   match(Set dst (XorI dst (LoadI src)));
 8593   effect(KILL cr);
 8594 
 8595   ins_cost(125);
 8596   format %{ "XOR    $dst,$src" %}
 8597   opcode(0x33);
 8598   ins_encode( OpcP, RegMem(dst, src) );
 8599   ins_pipe( ialu_reg_mem );
 8600 %}
 8601 
 8602 // Xor Memory with Register
 8603 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8604   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(150);
 8608   format %{ "XOR    $dst,$src" %}
 8609   opcode(0x31);  /* Opcode 31 /r */
 8610   ins_encode( OpcP, RegMem( src, dst ) );
 8611   ins_pipe( ialu_mem_reg );
 8612 %}
 8613 
 8614 // Xor Memory with Immediate
 8615 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8616   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8617   effect(KILL cr);
 8618 
 8619   ins_cost(125);
 8620   format %{ "XOR    $dst,$src" %}
 8621   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8622   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8623   ins_pipe( ialu_mem_imm );
 8624 %}
 8625 
 8626 //----------Convert Int to Boolean---------------------------------------------
 8627 
 8628 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8629   effect( DEF dst, USE src );
 8630   format %{ "MOV    $dst,$src" %}
 8631   ins_encode( enc_Copy( dst, src) );
 8632   ins_pipe( ialu_reg_reg );
 8633 %}
 8634 
 8635 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8636   effect( USE_DEF dst, USE src, KILL cr );
 8637 
 8638   size(4);
 8639   format %{ "NEG    $dst\n\t"
 8640             "ADC    $dst,$src" %}
 8641   ins_encode( neg_reg(dst),
 8642               OpcRegReg(0x13,dst,src) );
 8643   ins_pipe( ialu_reg_reg_long );
 8644 %}
 8645 
 8646 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8647   match(Set dst (Conv2B src));
 8648 
 8649   expand %{
 8650     movI_nocopy(dst,src);
 8651     ci2b(dst,src,cr);
 8652   %}
 8653 %}
 8654 
 8655 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8656   effect( DEF dst, USE src );
 8657   format %{ "MOV    $dst,$src" %}
 8658   ins_encode( enc_Copy( dst, src) );
 8659   ins_pipe( ialu_reg_reg );
 8660 %}
 8661 
 8662 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8663   effect( USE_DEF dst, USE src, KILL cr );
 8664   format %{ "NEG    $dst\n\t"
 8665             "ADC    $dst,$src" %}
 8666   ins_encode( neg_reg(dst),
 8667               OpcRegReg(0x13,dst,src) );
 8668   ins_pipe( ialu_reg_reg_long );
 8669 %}
 8670 
 8671 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8672   match(Set dst (Conv2B src));
 8673 
 8674   expand %{
 8675     movP_nocopy(dst,src);
 8676     cp2b(dst,src,cr);
 8677   %}
 8678 %}
 8679 
 8680 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8681   match(Set dst (CmpLTMask p q));
 8682   effect(KILL cr);
 8683   ins_cost(400);
 8684 
 8685   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8686   format %{ "XOR    $dst,$dst\n\t"
 8687             "CMP    $p,$q\n\t"
 8688             "SETlt  $dst\n\t"
 8689             "NEG    $dst" %}
 8690   ins_encode %{
 8691     Register Rp = $p$$Register;
 8692     Register Rq = $q$$Register;
 8693     Register Rd = $dst$$Register;
 8694     Label done;
 8695     __ xorl(Rd, Rd);
 8696     __ cmpl(Rp, Rq);
 8697     __ setb(Assembler::less, Rd);
 8698     __ negl(Rd);
 8699   %}
 8700 
 8701   ins_pipe(pipe_slow);
 8702 %}
 8703 
 8704 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8705   match(Set dst (CmpLTMask dst zero));
 8706   effect(DEF dst, KILL cr);
 8707   ins_cost(100);
 8708 
 8709   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8710   ins_encode %{
 8711   __ sarl($dst$$Register, 31);
 8712   %}
 8713   ins_pipe(ialu_reg);
 8714 %}
 8715 
 8716 /* better to save a register than avoid a branch */
 8717 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8718   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8719   effect(KILL cr);
 8720   ins_cost(400);
 8721   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8722             "JGE    done\n\t"
 8723             "ADD    $p,$y\n"
 8724             "done:  " %}
 8725   ins_encode %{
 8726     Register Rp = $p$$Register;
 8727     Register Rq = $q$$Register;
 8728     Register Ry = $y$$Register;
 8729     Label done;
 8730     __ subl(Rp, Rq);
 8731     __ jccb(Assembler::greaterEqual, done);
 8732     __ addl(Rp, Ry);
 8733     __ bind(done);
 8734   %}
 8735 
 8736   ins_pipe(pipe_cmplt);
 8737 %}
 8738 
 8739 /* better to save a register than avoid a branch */
 8740 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8741   match(Set y (AndI (CmpLTMask p q) y));
 8742   effect(KILL cr);
 8743 
 8744   ins_cost(300);
 8745 
 8746   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8747             "JLT      done\n\t"
 8748             "XORL     $y, $y\n"
 8749             "done:  " %}
 8750   ins_encode %{
 8751     Register Rp = $p$$Register;
 8752     Register Rq = $q$$Register;
 8753     Register Ry = $y$$Register;
 8754     Label done;
 8755     __ cmpl(Rp, Rq);
 8756     __ jccb(Assembler::less, done);
 8757     __ xorl(Ry, Ry);
 8758     __ bind(done);
 8759   %}
 8760 
 8761   ins_pipe(pipe_cmplt);
 8762 %}
 8763 
 8764 /* If I enable this, I encourage spilling in the inner loop of compress.
 8765 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8766   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8767 */
 8768 //----------Overflow Math Instructions-----------------------------------------
 8769 
 8770 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8771 %{
 8772   match(Set cr (OverflowAddI op1 op2));
 8773   effect(DEF cr, USE_KILL op1, USE op2);
 8774 
 8775   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8776 
 8777   ins_encode %{
 8778     __ addl($op1$$Register, $op2$$Register);
 8779   %}
 8780   ins_pipe(ialu_reg_reg);
 8781 %}
 8782 
 8783 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8784 %{
 8785   match(Set cr (OverflowAddI op1 op2));
 8786   effect(DEF cr, USE_KILL op1, USE op2);
 8787 
 8788   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8789 
 8790   ins_encode %{
 8791     __ addl($op1$$Register, $op2$$constant);
 8792   %}
 8793   ins_pipe(ialu_reg_reg);
 8794 %}
 8795 
 8796 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8797 %{
 8798   match(Set cr (OverflowSubI op1 op2));
 8799 
 8800   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8801   ins_encode %{
 8802     __ cmpl($op1$$Register, $op2$$Register);
 8803   %}
 8804   ins_pipe(ialu_reg_reg);
 8805 %}
 8806 
 8807 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8808 %{
 8809   match(Set cr (OverflowSubI op1 op2));
 8810 
 8811   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8812   ins_encode %{
 8813     __ cmpl($op1$$Register, $op2$$constant);
 8814   %}
 8815   ins_pipe(ialu_reg_reg);
 8816 %}
 8817 
 8818 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8819 %{
 8820   match(Set cr (OverflowSubI zero op2));
 8821   effect(DEF cr, USE_KILL op2);
 8822 
 8823   format %{ "NEG    $op2\t# overflow check int" %}
 8824   ins_encode %{
 8825     __ negl($op2$$Register);
 8826   %}
 8827   ins_pipe(ialu_reg_reg);
 8828 %}
 8829 
 8830 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8831 %{
 8832   match(Set cr (OverflowMulI op1 op2));
 8833   effect(DEF cr, USE_KILL op1, USE op2);
 8834 
 8835   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8836   ins_encode %{
 8837     __ imull($op1$$Register, $op2$$Register);
 8838   %}
 8839   ins_pipe(ialu_reg_reg_alu0);
 8840 %}
 8841 
 8842 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8843 %{
 8844   match(Set cr (OverflowMulI op1 op2));
 8845   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8846 
 8847   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8848   ins_encode %{
 8849     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8850   %}
 8851   ins_pipe(ialu_reg_reg_alu0);
 8852 %}
 8853 
 8854 // Integer Absolute Instructions
 8855 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8856 %{
 8857   match(Set dst (AbsI src));
 8858   effect(TEMP dst, TEMP tmp, KILL cr);
 8859   format %{ "movl $tmp, $src\n\t"
 8860             "sarl $tmp, 31\n\t"
 8861             "movl $dst, $src\n\t"
 8862             "xorl $dst, $tmp\n\t"
 8863             "subl $dst, $tmp\n"
 8864           %}
 8865   ins_encode %{
 8866     __ movl($tmp$$Register, $src$$Register);
 8867     __ sarl($tmp$$Register, 31);
 8868     __ movl($dst$$Register, $src$$Register);
 8869     __ xorl($dst$$Register, $tmp$$Register);
 8870     __ subl($dst$$Register, $tmp$$Register);
 8871   %}
 8872 
 8873   ins_pipe(ialu_reg_reg);
 8874 %}
 8875 
 8876 //----------Long Instructions------------------------------------------------
 8877 // Add Long Register with Register
 8878 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8879   match(Set dst (AddL dst src));
 8880   effect(KILL cr);
 8881   ins_cost(200);
 8882   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8883             "ADC    $dst.hi,$src.hi" %}
 8884   opcode(0x03, 0x13);
 8885   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8886   ins_pipe( ialu_reg_reg_long );
 8887 %}
 8888 
 8889 // Add Long Register with Immediate
 8890 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8891   match(Set dst (AddL dst src));
 8892   effect(KILL cr);
 8893   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8894             "ADC    $dst.hi,$src.hi" %}
 8895   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8896   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8897   ins_pipe( ialu_reg_long );
 8898 %}
 8899 
 8900 // Add Long Register with Memory
 8901 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8902   match(Set dst (AddL dst (LoadL mem)));
 8903   effect(KILL cr);
 8904   ins_cost(125);
 8905   format %{ "ADD    $dst.lo,$mem\n\t"
 8906             "ADC    $dst.hi,$mem+4" %}
 8907   opcode(0x03, 0x13);
 8908   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8909   ins_pipe( ialu_reg_long_mem );
 8910 %}
 8911 
 8912 // Subtract Long Register with Register.
 8913 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8914   match(Set dst (SubL dst src));
 8915   effect(KILL cr);
 8916   ins_cost(200);
 8917   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8918             "SBB    $dst.hi,$src.hi" %}
 8919   opcode(0x2B, 0x1B);
 8920   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8921   ins_pipe( ialu_reg_reg_long );
 8922 %}
 8923 
 8924 // Subtract Long Register with Immediate
 8925 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8926   match(Set dst (SubL dst src));
 8927   effect(KILL cr);
 8928   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8929             "SBB    $dst.hi,$src.hi" %}
 8930   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8931   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8932   ins_pipe( ialu_reg_long );
 8933 %}
 8934 
 8935 // Subtract Long Register with Memory
 8936 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8937   match(Set dst (SubL dst (LoadL mem)));
 8938   effect(KILL cr);
 8939   ins_cost(125);
 8940   format %{ "SUB    $dst.lo,$mem\n\t"
 8941             "SBB    $dst.hi,$mem+4" %}
 8942   opcode(0x2B, 0x1B);
 8943   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8944   ins_pipe( ialu_reg_long_mem );
 8945 %}
 8946 
 8947 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8948   match(Set dst (SubL zero dst));
 8949   effect(KILL cr);
 8950   ins_cost(300);
 8951   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8952   ins_encode( neg_long(dst) );
 8953   ins_pipe( ialu_reg_reg_long );
 8954 %}
 8955 
 8956 // And Long Register with Register
 8957 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8958   match(Set dst (AndL dst src));
 8959   effect(KILL cr);
 8960   format %{ "AND    $dst.lo,$src.lo\n\t"
 8961             "AND    $dst.hi,$src.hi" %}
 8962   opcode(0x23,0x23);
 8963   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8964   ins_pipe( ialu_reg_reg_long );
 8965 %}
 8966 
 8967 // And Long Register with Immediate
 8968 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8969   match(Set dst (AndL dst src));
 8970   effect(KILL cr);
 8971   format %{ "AND    $dst.lo,$src.lo\n\t"
 8972             "AND    $dst.hi,$src.hi" %}
 8973   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8974   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8975   ins_pipe( ialu_reg_long );
 8976 %}
 8977 
 8978 // And Long Register with Memory
 8979 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8980   match(Set dst (AndL dst (LoadL mem)));
 8981   effect(KILL cr);
 8982   ins_cost(125);
 8983   format %{ "AND    $dst.lo,$mem\n\t"
 8984             "AND    $dst.hi,$mem+4" %}
 8985   opcode(0x23, 0x23);
 8986   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8987   ins_pipe( ialu_reg_long_mem );
 8988 %}
 8989 
 8990 // BMI1 instructions
 8991 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8992   match(Set dst (AndL (XorL src1 minus_1) src2));
 8993   predicate(UseBMI1Instructions);
 8994   effect(KILL cr, TEMP dst);
 8995 
 8996   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8997             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8998          %}
 8999 
 9000   ins_encode %{
 9001     Register Rdst = $dst$$Register;
 9002     Register Rsrc1 = $src1$$Register;
 9003     Register Rsrc2 = $src2$$Register;
 9004     __ andnl(Rdst, Rsrc1, Rsrc2);
 9005     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9006   %}
 9007   ins_pipe(ialu_reg_reg_long);
 9008 %}
 9009 
 9010 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9011   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9012   predicate(UseBMI1Instructions);
 9013   effect(KILL cr, TEMP dst);
 9014 
 9015   ins_cost(125);
 9016   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9017             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9018          %}
 9019 
 9020   ins_encode %{
 9021     Register Rdst = $dst$$Register;
 9022     Register Rsrc1 = $src1$$Register;
 9023     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9024 
 9025     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9026     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9027   %}
 9028   ins_pipe(ialu_reg_mem);
 9029 %}
 9030 
 9031 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9032   match(Set dst (AndL (SubL imm_zero src) src));
 9033   predicate(UseBMI1Instructions);
 9034   effect(KILL cr, TEMP dst);
 9035 
 9036   format %{ "MOVL   $dst.hi, 0\n\t"
 9037             "BLSIL  $dst.lo, $src.lo\n\t"
 9038             "JNZ    done\n\t"
 9039             "BLSIL  $dst.hi, $src.hi\n"
 9040             "done:"
 9041          %}
 9042 
 9043   ins_encode %{
 9044     Label done;
 9045     Register Rdst = $dst$$Register;
 9046     Register Rsrc = $src$$Register;
 9047     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9048     __ blsil(Rdst, Rsrc);
 9049     __ jccb(Assembler::notZero, done);
 9050     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9051     __ bind(done);
 9052   %}
 9053   ins_pipe(ialu_reg);
 9054 %}
 9055 
 9056 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9057   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9058   predicate(UseBMI1Instructions);
 9059   effect(KILL cr, TEMP dst);
 9060 
 9061   ins_cost(125);
 9062   format %{ "MOVL   $dst.hi, 0\n\t"
 9063             "BLSIL  $dst.lo, $src\n\t"
 9064             "JNZ    done\n\t"
 9065             "BLSIL  $dst.hi, $src+4\n"
 9066             "done:"
 9067          %}
 9068 
 9069   ins_encode %{
 9070     Label done;
 9071     Register Rdst = $dst$$Register;
 9072     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9073 
 9074     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9075     __ blsil(Rdst, $src$$Address);
 9076     __ jccb(Assembler::notZero, done);
 9077     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9078     __ bind(done);
 9079   %}
 9080   ins_pipe(ialu_reg_mem);
 9081 %}
 9082 
 9083 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9084 %{
 9085   match(Set dst (XorL (AddL src minus_1) src));
 9086   predicate(UseBMI1Instructions);
 9087   effect(KILL cr, TEMP dst);
 9088 
 9089   format %{ "MOVL    $dst.hi, 0\n\t"
 9090             "BLSMSKL $dst.lo, $src.lo\n\t"
 9091             "JNC     done\n\t"
 9092             "BLSMSKL $dst.hi, $src.hi\n"
 9093             "done:"
 9094          %}
 9095 
 9096   ins_encode %{
 9097     Label done;
 9098     Register Rdst = $dst$$Register;
 9099     Register Rsrc = $src$$Register;
 9100     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9101     __ blsmskl(Rdst, Rsrc);
 9102     __ jccb(Assembler::carryClear, done);
 9103     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9104     __ bind(done);
 9105   %}
 9106 
 9107   ins_pipe(ialu_reg);
 9108 %}
 9109 
 9110 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9111 %{
 9112   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9113   predicate(UseBMI1Instructions);
 9114   effect(KILL cr, TEMP dst);
 9115 
 9116   ins_cost(125);
 9117   format %{ "MOVL    $dst.hi, 0\n\t"
 9118             "BLSMSKL $dst.lo, $src\n\t"
 9119             "JNC     done\n\t"
 9120             "BLSMSKL $dst.hi, $src+4\n"
 9121             "done:"
 9122          %}
 9123 
 9124   ins_encode %{
 9125     Label done;
 9126     Register Rdst = $dst$$Register;
 9127     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9128 
 9129     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9130     __ blsmskl(Rdst, $src$$Address);
 9131     __ jccb(Assembler::carryClear, done);
 9132     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9133     __ bind(done);
 9134   %}
 9135 
 9136   ins_pipe(ialu_reg_mem);
 9137 %}
 9138 
 9139 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9140 %{
 9141   match(Set dst (AndL (AddL src minus_1) src) );
 9142   predicate(UseBMI1Instructions);
 9143   effect(KILL cr, TEMP dst);
 9144 
 9145   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9146             "BLSRL  $dst.lo, $src.lo\n\t"
 9147             "JNC    done\n\t"
 9148             "BLSRL  $dst.hi, $src.hi\n"
 9149             "done:"
 9150   %}
 9151 
 9152   ins_encode %{
 9153     Label done;
 9154     Register Rdst = $dst$$Register;
 9155     Register Rsrc = $src$$Register;
 9156     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9157     __ blsrl(Rdst, Rsrc);
 9158     __ jccb(Assembler::carryClear, done);
 9159     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9160     __ bind(done);
 9161   %}
 9162 
 9163   ins_pipe(ialu_reg);
 9164 %}
 9165 
 9166 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9167 %{
 9168   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9169   predicate(UseBMI1Instructions);
 9170   effect(KILL cr, TEMP dst);
 9171 
 9172   ins_cost(125);
 9173   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9174             "BLSRL  $dst.lo, $src\n\t"
 9175             "JNC    done\n\t"
 9176             "BLSRL  $dst.hi, $src+4\n"
 9177             "done:"
 9178   %}
 9179 
 9180   ins_encode %{
 9181     Label done;
 9182     Register Rdst = $dst$$Register;
 9183     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9184     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9185     __ blsrl(Rdst, $src$$Address);
 9186     __ jccb(Assembler::carryClear, done);
 9187     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9188     __ bind(done);
 9189   %}
 9190 
 9191   ins_pipe(ialu_reg_mem);
 9192 %}
 9193 
 9194 // Or Long Register with Register
 9195 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9196   match(Set dst (OrL dst src));
 9197   effect(KILL cr);
 9198   format %{ "OR     $dst.lo,$src.lo\n\t"
 9199             "OR     $dst.hi,$src.hi" %}
 9200   opcode(0x0B,0x0B);
 9201   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9202   ins_pipe( ialu_reg_reg_long );
 9203 %}
 9204 
 9205 // Or Long Register with Immediate
 9206 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9207   match(Set dst (OrL dst src));
 9208   effect(KILL cr);
 9209   format %{ "OR     $dst.lo,$src.lo\n\t"
 9210             "OR     $dst.hi,$src.hi" %}
 9211   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9212   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9213   ins_pipe( ialu_reg_long );
 9214 %}
 9215 
 9216 // Or Long Register with Memory
 9217 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9218   match(Set dst (OrL dst (LoadL mem)));
 9219   effect(KILL cr);
 9220   ins_cost(125);
 9221   format %{ "OR     $dst.lo,$mem\n\t"
 9222             "OR     $dst.hi,$mem+4" %}
 9223   opcode(0x0B,0x0B);
 9224   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9225   ins_pipe( ialu_reg_long_mem );
 9226 %}
 9227 
 9228 // Xor Long Register with Register
 9229 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9230   match(Set dst (XorL dst src));
 9231   effect(KILL cr);
 9232   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9233             "XOR    $dst.hi,$src.hi" %}
 9234   opcode(0x33,0x33);
 9235   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9236   ins_pipe( ialu_reg_reg_long );
 9237 %}
 9238 
 9239 // Xor Long Register with Immediate -1
 9240 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9241   match(Set dst (XorL dst imm));
 9242   format %{ "NOT    $dst.lo\n\t"
 9243             "NOT    $dst.hi" %}
 9244   ins_encode %{
 9245      __ notl($dst$$Register);
 9246      __ notl(HIGH_FROM_LOW($dst$$Register));
 9247   %}
 9248   ins_pipe( ialu_reg_long );
 9249 %}
 9250 
 9251 // Xor Long Register with Immediate
 9252 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9253   match(Set dst (XorL dst src));
 9254   effect(KILL cr);
 9255   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9256             "XOR    $dst.hi,$src.hi" %}
 9257   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9258   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9259   ins_pipe( ialu_reg_long );
 9260 %}
 9261 
 9262 // Xor Long Register with Memory
 9263 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9264   match(Set dst (XorL dst (LoadL mem)));
 9265   effect(KILL cr);
 9266   ins_cost(125);
 9267   format %{ "XOR    $dst.lo,$mem\n\t"
 9268             "XOR    $dst.hi,$mem+4" %}
 9269   opcode(0x33,0x33);
 9270   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9271   ins_pipe( ialu_reg_long_mem );
 9272 %}
 9273 
 9274 // Shift Left Long by 1
 9275 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9276   predicate(UseNewLongLShift);
 9277   match(Set dst (LShiftL dst cnt));
 9278   effect(KILL cr);
 9279   ins_cost(100);
 9280   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9281             "ADC    $dst.hi,$dst.hi" %}
 9282   ins_encode %{
 9283     __ addl($dst$$Register,$dst$$Register);
 9284     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9285   %}
 9286   ins_pipe( ialu_reg_long );
 9287 %}
 9288 
 9289 // Shift Left Long by 2
 9290 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9291   predicate(UseNewLongLShift);
 9292   match(Set dst (LShiftL dst cnt));
 9293   effect(KILL cr);
 9294   ins_cost(100);
 9295   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9296             "ADC    $dst.hi,$dst.hi\n\t"
 9297             "ADD    $dst.lo,$dst.lo\n\t"
 9298             "ADC    $dst.hi,$dst.hi" %}
 9299   ins_encode %{
 9300     __ addl($dst$$Register,$dst$$Register);
 9301     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9302     __ addl($dst$$Register,$dst$$Register);
 9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9304   %}
 9305   ins_pipe( ialu_reg_long );
 9306 %}
 9307 
 9308 // Shift Left Long by 3
 9309 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9310   predicate(UseNewLongLShift);
 9311   match(Set dst (LShiftL dst cnt));
 9312   effect(KILL cr);
 9313   ins_cost(100);
 9314   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9315             "ADC    $dst.hi,$dst.hi\n\t"
 9316             "ADD    $dst.lo,$dst.lo\n\t"
 9317             "ADC    $dst.hi,$dst.hi\n\t"
 9318             "ADD    $dst.lo,$dst.lo\n\t"
 9319             "ADC    $dst.hi,$dst.hi" %}
 9320   ins_encode %{
 9321     __ addl($dst$$Register,$dst$$Register);
 9322     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9323     __ addl($dst$$Register,$dst$$Register);
 9324     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9325     __ addl($dst$$Register,$dst$$Register);
 9326     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9327   %}
 9328   ins_pipe( ialu_reg_long );
 9329 %}
 9330 
 9331 // Shift Left Long by 1-31
 9332 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9333   match(Set dst (LShiftL dst cnt));
 9334   effect(KILL cr);
 9335   ins_cost(200);
 9336   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9337             "SHL    $dst.lo,$cnt" %}
 9338   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9339   ins_encode( move_long_small_shift(dst,cnt) );
 9340   ins_pipe( ialu_reg_long );
 9341 %}
 9342 
 9343 // Shift Left Long by 32-63
 9344 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9345   match(Set dst (LShiftL dst cnt));
 9346   effect(KILL cr);
 9347   ins_cost(300);
 9348   format %{ "MOV    $dst.hi,$dst.lo\n"
 9349           "\tSHL    $dst.hi,$cnt-32\n"
 9350           "\tXOR    $dst.lo,$dst.lo" %}
 9351   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9352   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9353   ins_pipe( ialu_reg_long );
 9354 %}
 9355 
 9356 // Shift Left Long by variable
 9357 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9358   match(Set dst (LShiftL dst shift));
 9359   effect(KILL cr);
 9360   ins_cost(500+200);
 9361   size(17);
 9362   format %{ "TEST   $shift,32\n\t"
 9363             "JEQ,s  small\n\t"
 9364             "MOV    $dst.hi,$dst.lo\n\t"
 9365             "XOR    $dst.lo,$dst.lo\n"
 9366     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9367             "SHL    $dst.lo,$shift" %}
 9368   ins_encode( shift_left_long( dst, shift ) );
 9369   ins_pipe( pipe_slow );
 9370 %}
 9371 
 9372 // Shift Right Long by 1-31
 9373 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9374   match(Set dst (URShiftL dst cnt));
 9375   effect(KILL cr);
 9376   ins_cost(200);
 9377   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9378             "SHR    $dst.hi,$cnt" %}
 9379   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9380   ins_encode( move_long_small_shift(dst,cnt) );
 9381   ins_pipe( ialu_reg_long );
 9382 %}
 9383 
 9384 // Shift Right Long by 32-63
 9385 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9386   match(Set dst (URShiftL dst cnt));
 9387   effect(KILL cr);
 9388   ins_cost(300);
 9389   format %{ "MOV    $dst.lo,$dst.hi\n"
 9390           "\tSHR    $dst.lo,$cnt-32\n"
 9391           "\tXOR    $dst.hi,$dst.hi" %}
 9392   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9393   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9394   ins_pipe( ialu_reg_long );
 9395 %}
 9396 
 9397 // Shift Right Long by variable
 9398 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9399   match(Set dst (URShiftL dst shift));
 9400   effect(KILL cr);
 9401   ins_cost(600);
 9402   size(17);
 9403   format %{ "TEST   $shift,32\n\t"
 9404             "JEQ,s  small\n\t"
 9405             "MOV    $dst.lo,$dst.hi\n\t"
 9406             "XOR    $dst.hi,$dst.hi\n"
 9407     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9408             "SHR    $dst.hi,$shift" %}
 9409   ins_encode( shift_right_long( dst, shift ) );
 9410   ins_pipe( pipe_slow );
 9411 %}
 9412 
 9413 // Shift Right Long by 1-31
 9414 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9415   match(Set dst (RShiftL dst cnt));
 9416   effect(KILL cr);
 9417   ins_cost(200);
 9418   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9419             "SAR    $dst.hi,$cnt" %}
 9420   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9421   ins_encode( move_long_small_shift(dst,cnt) );
 9422   ins_pipe( ialu_reg_long );
 9423 %}
 9424 
 9425 // Shift Right Long by 32-63
 9426 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9427   match(Set dst (RShiftL dst cnt));
 9428   effect(KILL cr);
 9429   ins_cost(300);
 9430   format %{ "MOV    $dst.lo,$dst.hi\n"
 9431           "\tSAR    $dst.lo,$cnt-32\n"
 9432           "\tSAR    $dst.hi,31" %}
 9433   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9434   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9435   ins_pipe( ialu_reg_long );
 9436 %}
 9437 
 9438 // Shift Right arithmetic Long by variable
 9439 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9440   match(Set dst (RShiftL dst shift));
 9441   effect(KILL cr);
 9442   ins_cost(600);
 9443   size(18);
 9444   format %{ "TEST   $shift,32\n\t"
 9445             "JEQ,s  small\n\t"
 9446             "MOV    $dst.lo,$dst.hi\n\t"
 9447             "SAR    $dst.hi,31\n"
 9448     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9449             "SAR    $dst.hi,$shift" %}
 9450   ins_encode( shift_right_arith_long( dst, shift ) );
 9451   ins_pipe( pipe_slow );
 9452 %}
 9453 
 9454 
 9455 //----------Double Instructions------------------------------------------------
 9456 // Double Math
 9457 
 9458 // Compare & branch
 9459 
 9460 // P6 version of float compare, sets condition codes in EFLAGS
 9461 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9462   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9463   match(Set cr (CmpD src1 src2));
 9464   effect(KILL rax);
 9465   ins_cost(150);
 9466   format %{ "FLD    $src1\n\t"
 9467             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9468             "JNP    exit\n\t"
 9469             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9470             "SAHF\n"
 9471      "exit:\tNOP               // avoid branch to branch" %}
 9472   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9473   ins_encode( Push_Reg_DPR(src1),
 9474               OpcP, RegOpc(src2),
 9475               cmpF_P6_fixup );
 9476   ins_pipe( pipe_slow );
 9477 %}
 9478 
 9479 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9480   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9481   match(Set cr (CmpD src1 src2));
 9482   ins_cost(150);
 9483   format %{ "FLD    $src1\n\t"
 9484             "FUCOMIP ST,$src2  // P6 instruction" %}
 9485   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9486   ins_encode( Push_Reg_DPR(src1),
 9487               OpcP, RegOpc(src2));
 9488   ins_pipe( pipe_slow );
 9489 %}
 9490 
 9491 // Compare & branch
 9492 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9493   predicate(UseSSE<=1);
 9494   match(Set cr (CmpD src1 src2));
 9495   effect(KILL rax);
 9496   ins_cost(200);
 9497   format %{ "FLD    $src1\n\t"
 9498             "FCOMp  $src2\n\t"
 9499             "FNSTSW AX\n\t"
 9500             "TEST   AX,0x400\n\t"
 9501             "JZ,s   flags\n\t"
 9502             "MOV    AH,1\t# unordered treat as LT\n"
 9503     "flags:\tSAHF" %}
 9504   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9505   ins_encode( Push_Reg_DPR(src1),
 9506               OpcP, RegOpc(src2),
 9507               fpu_flags);
 9508   ins_pipe( pipe_slow );
 9509 %}
 9510 
 9511 // Compare vs zero into -1,0,1
 9512 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9513   predicate(UseSSE<=1);
 9514   match(Set dst (CmpD3 src1 zero));
 9515   effect(KILL cr, KILL rax);
 9516   ins_cost(280);
 9517   format %{ "FTSTD  $dst,$src1" %}
 9518   opcode(0xE4, 0xD9);
 9519   ins_encode( Push_Reg_DPR(src1),
 9520               OpcS, OpcP, PopFPU,
 9521               CmpF_Result(dst));
 9522   ins_pipe( pipe_slow );
 9523 %}
 9524 
 9525 // Compare into -1,0,1
 9526 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9527   predicate(UseSSE<=1);
 9528   match(Set dst (CmpD3 src1 src2));
 9529   effect(KILL cr, KILL rax);
 9530   ins_cost(300);
 9531   format %{ "FCMPD  $dst,$src1,$src2" %}
 9532   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9533   ins_encode( Push_Reg_DPR(src1),
 9534               OpcP, RegOpc(src2),
 9535               CmpF_Result(dst));
 9536   ins_pipe( pipe_slow );
 9537 %}
 9538 
 9539 // float compare and set condition codes in EFLAGS by XMM regs
 9540 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9541   predicate(UseSSE>=2);
 9542   match(Set cr (CmpD src1 src2));
 9543   ins_cost(145);
 9544   format %{ "UCOMISD $src1,$src2\n\t"
 9545             "JNP,s   exit\n\t"
 9546             "PUSHF\t# saw NaN, set CF\n\t"
 9547             "AND     [rsp], #0xffffff2b\n\t"
 9548             "POPF\n"
 9549     "exit:" %}
 9550   ins_encode %{
 9551     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9552     emit_cmpfp_fixup(_masm);
 9553   %}
 9554   ins_pipe( pipe_slow );
 9555 %}
 9556 
 9557 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9558   predicate(UseSSE>=2);
 9559   match(Set cr (CmpD src1 src2));
 9560   ins_cost(100);
 9561   format %{ "UCOMISD $src1,$src2" %}
 9562   ins_encode %{
 9563     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9564   %}
 9565   ins_pipe( pipe_slow );
 9566 %}
 9567 
 9568 // float compare and set condition codes in EFLAGS by XMM regs
 9569 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9570   predicate(UseSSE>=2);
 9571   match(Set cr (CmpD src1 (LoadD src2)));
 9572   ins_cost(145);
 9573   format %{ "UCOMISD $src1,$src2\n\t"
 9574             "JNP,s   exit\n\t"
 9575             "PUSHF\t# saw NaN, set CF\n\t"
 9576             "AND     [rsp], #0xffffff2b\n\t"
 9577             "POPF\n"
 9578     "exit:" %}
 9579   ins_encode %{
 9580     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9581     emit_cmpfp_fixup(_masm);
 9582   %}
 9583   ins_pipe( pipe_slow );
 9584 %}
 9585 
 9586 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9587   predicate(UseSSE>=2);
 9588   match(Set cr (CmpD src1 (LoadD src2)));
 9589   ins_cost(100);
 9590   format %{ "UCOMISD $src1,$src2" %}
 9591   ins_encode %{
 9592     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9593   %}
 9594   ins_pipe( pipe_slow );
 9595 %}
 9596 
 9597 // Compare into -1,0,1 in XMM
 9598 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9599   predicate(UseSSE>=2);
 9600   match(Set dst (CmpD3 src1 src2));
 9601   effect(KILL cr);
 9602   ins_cost(255);
 9603   format %{ "UCOMISD $src1, $src2\n\t"
 9604             "MOV     $dst, #-1\n\t"
 9605             "JP,s    done\n\t"
 9606             "JB,s    done\n\t"
 9607             "SETNE   $dst\n\t"
 9608             "MOVZB   $dst, $dst\n"
 9609     "done:" %}
 9610   ins_encode %{
 9611     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9612     emit_cmpfp3(_masm, $dst$$Register);
 9613   %}
 9614   ins_pipe( pipe_slow );
 9615 %}
 9616 
 9617 // Compare into -1,0,1 in XMM and memory
 9618 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9619   predicate(UseSSE>=2);
 9620   match(Set dst (CmpD3 src1 (LoadD src2)));
 9621   effect(KILL cr);
 9622   ins_cost(275);
 9623   format %{ "UCOMISD $src1, $src2\n\t"
 9624             "MOV     $dst, #-1\n\t"
 9625             "JP,s    done\n\t"
 9626             "JB,s    done\n\t"
 9627             "SETNE   $dst\n\t"
 9628             "MOVZB   $dst, $dst\n"
 9629     "done:" %}
 9630   ins_encode %{
 9631     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9632     emit_cmpfp3(_masm, $dst$$Register);
 9633   %}
 9634   ins_pipe( pipe_slow );
 9635 %}
 9636 
 9637 
 9638 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9639   predicate (UseSSE <=1);
 9640   match(Set dst (SubD dst src));
 9641 
 9642   format %{ "FLD    $src\n\t"
 9643             "DSUBp  $dst,ST" %}
 9644   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9645   ins_cost(150);
 9646   ins_encode( Push_Reg_DPR(src),
 9647               OpcP, RegOpc(dst) );
 9648   ins_pipe( fpu_reg_reg );
 9649 %}
 9650 
 9651 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9652   predicate (UseSSE <=1);
 9653   match(Set dst (RoundDouble (SubD src1 src2)));
 9654   ins_cost(250);
 9655 
 9656   format %{ "FLD    $src2\n\t"
 9657             "DSUB   ST,$src1\n\t"
 9658             "FSTP_D $dst\t# D-round" %}
 9659   opcode(0xD8, 0x5);
 9660   ins_encode( Push_Reg_DPR(src2),
 9661               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9662   ins_pipe( fpu_mem_reg_reg );
 9663 %}
 9664 
 9665 
 9666 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9667   predicate (UseSSE <=1);
 9668   match(Set dst (SubD dst (LoadD src)));
 9669   ins_cost(150);
 9670 
 9671   format %{ "FLD    $src\n\t"
 9672             "DSUBp  $dst,ST" %}
 9673   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9674   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9675               OpcP, RegOpc(dst) );
 9676   ins_pipe( fpu_reg_mem );
 9677 %}
 9678 
 9679 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9680   predicate (UseSSE<=1);
 9681   match(Set dst (AbsD src));
 9682   ins_cost(100);
 9683   format %{ "FABS" %}
 9684   opcode(0xE1, 0xD9);
 9685   ins_encode( OpcS, OpcP );
 9686   ins_pipe( fpu_reg_reg );
 9687 %}
 9688 
 9689 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9690   predicate(UseSSE<=1);
 9691   match(Set dst (NegD src));
 9692   ins_cost(100);
 9693   format %{ "FCHS" %}
 9694   opcode(0xE0, 0xD9);
 9695   ins_encode( OpcS, OpcP );
 9696   ins_pipe( fpu_reg_reg );
 9697 %}
 9698 
 9699 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9700   predicate(UseSSE<=1);
 9701   match(Set dst (AddD dst src));
 9702   format %{ "FLD    $src\n\t"
 9703             "DADD   $dst,ST" %}
 9704   size(4);
 9705   ins_cost(150);
 9706   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9707   ins_encode( Push_Reg_DPR(src),
 9708               OpcP, RegOpc(dst) );
 9709   ins_pipe( fpu_reg_reg );
 9710 %}
 9711 
 9712 
 9713 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9714   predicate(UseSSE<=1);
 9715   match(Set dst (RoundDouble (AddD src1 src2)));
 9716   ins_cost(250);
 9717 
 9718   format %{ "FLD    $src2\n\t"
 9719             "DADD   ST,$src1\n\t"
 9720             "FSTP_D $dst\t# D-round" %}
 9721   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9722   ins_encode( Push_Reg_DPR(src2),
 9723               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9724   ins_pipe( fpu_mem_reg_reg );
 9725 %}
 9726 
 9727 
 9728 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9729   predicate(UseSSE<=1);
 9730   match(Set dst (AddD dst (LoadD src)));
 9731   ins_cost(150);
 9732 
 9733   format %{ "FLD    $src\n\t"
 9734             "DADDp  $dst,ST" %}
 9735   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9736   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9737               OpcP, RegOpc(dst) );
 9738   ins_pipe( fpu_reg_mem );
 9739 %}
 9740 
 9741 // add-to-memory
 9742 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9743   predicate(UseSSE<=1);
 9744   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9745   ins_cost(150);
 9746 
 9747   format %{ "FLD_D  $dst\n\t"
 9748             "DADD   ST,$src\n\t"
 9749             "FST_D  $dst" %}
 9750   opcode(0xDD, 0x0);
 9751   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9752               Opcode(0xD8), RegOpc(src),
 9753               set_instruction_start,
 9754               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9755   ins_pipe( fpu_reg_mem );
 9756 %}
 9757 
 9758 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9759   predicate(UseSSE<=1);
 9760   match(Set dst (AddD dst con));
 9761   ins_cost(125);
 9762   format %{ "FLD1\n\t"
 9763             "DADDp  $dst,ST" %}
 9764   ins_encode %{
 9765     __ fld1();
 9766     __ faddp($dst$$reg);
 9767   %}
 9768   ins_pipe(fpu_reg);
 9769 %}
 9770 
 9771 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9772   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9773   match(Set dst (AddD dst con));
 9774   ins_cost(200);
 9775   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9776             "DADDp  $dst,ST" %}
 9777   ins_encode %{
 9778     __ fld_d($constantaddress($con));
 9779     __ faddp($dst$$reg);
 9780   %}
 9781   ins_pipe(fpu_reg_mem);
 9782 %}
 9783 
 9784 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9785   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9786   match(Set dst (RoundDouble (AddD src con)));
 9787   ins_cost(200);
 9788   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9789             "DADD   ST,$src\n\t"
 9790             "FSTP_D $dst\t# D-round" %}
 9791   ins_encode %{
 9792     __ fld_d($constantaddress($con));
 9793     __ fadd($src$$reg);
 9794     __ fstp_d(Address(rsp, $dst$$disp));
 9795   %}
 9796   ins_pipe(fpu_mem_reg_con);
 9797 %}
 9798 
 9799 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9800   predicate(UseSSE<=1);
 9801   match(Set dst (MulD dst src));
 9802   format %{ "FLD    $src\n\t"
 9803             "DMULp  $dst,ST" %}
 9804   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9805   ins_cost(150);
 9806   ins_encode( Push_Reg_DPR(src),
 9807               OpcP, RegOpc(dst) );
 9808   ins_pipe( fpu_reg_reg );
 9809 %}
 9810 
 9811 // Strict FP instruction biases argument before multiply then
 9812 // biases result to avoid double rounding of subnormals.
 9813 //
 9814 // scale arg1 by multiplying arg1 by 2^(-15360)
 9815 // load arg2
 9816 // multiply scaled arg1 by arg2
 9817 // rescale product by 2^(15360)
 9818 //
 9819 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9820   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9821   match(Set dst (MulD dst src));
 9822   ins_cost(1);   // Select this instruction for all FP double multiplies
 9823 
 9824   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9825             "DMULp  $dst,ST\n\t"
 9826             "FLD    $src\n\t"
 9827             "DMULp  $dst,ST\n\t"
 9828             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9829             "DMULp  $dst,ST\n\t" %}
 9830   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9831   ins_encode( strictfp_bias1(dst),
 9832               Push_Reg_DPR(src),
 9833               OpcP, RegOpc(dst),
 9834               strictfp_bias2(dst) );
 9835   ins_pipe( fpu_reg_reg );
 9836 %}
 9837 
 9838 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9839   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9840   match(Set dst (MulD dst con));
 9841   ins_cost(200);
 9842   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9843             "DMULp  $dst,ST" %}
 9844   ins_encode %{
 9845     __ fld_d($constantaddress($con));
 9846     __ fmulp($dst$$reg);
 9847   %}
 9848   ins_pipe(fpu_reg_mem);
 9849 %}
 9850 
 9851 
 9852 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9853   predicate( UseSSE<=1 );
 9854   match(Set dst (MulD dst (LoadD src)));
 9855   ins_cost(200);
 9856   format %{ "FLD_D  $src\n\t"
 9857             "DMULp  $dst,ST" %}
 9858   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9859   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9860               OpcP, RegOpc(dst) );
 9861   ins_pipe( fpu_reg_mem );
 9862 %}
 9863 
 9864 //
 9865 // Cisc-alternate to reg-reg multiply
 9866 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9867   predicate( UseSSE<=1 );
 9868   match(Set dst (MulD src (LoadD mem)));
 9869   ins_cost(250);
 9870   format %{ "FLD_D  $mem\n\t"
 9871             "DMUL   ST,$src\n\t"
 9872             "FSTP_D $dst" %}
 9873   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9874   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9875               OpcReg_FPR(src),
 9876               Pop_Reg_DPR(dst) );
 9877   ins_pipe( fpu_reg_reg_mem );
 9878 %}
 9879 
 9880 
 9881 // MACRO3 -- addDPR a mulDPR
 9882 // This instruction is a '2-address' instruction in that the result goes
 9883 // back to src2.  This eliminates a move from the macro; possibly the
 9884 // register allocator will have to add it back (and maybe not).
 9885 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9886   predicate( UseSSE<=1 );
 9887   match(Set src2 (AddD (MulD src0 src1) src2));
 9888   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9889             "DMUL   ST,$src1\n\t"
 9890             "DADDp  $src2,ST" %}
 9891   ins_cost(250);
 9892   opcode(0xDD); /* LoadD DD /0 */
 9893   ins_encode( Push_Reg_FPR(src0),
 9894               FMul_ST_reg(src1),
 9895               FAddP_reg_ST(src2) );
 9896   ins_pipe( fpu_reg_reg_reg );
 9897 %}
 9898 
 9899 
 9900 // MACRO3 -- subDPR a mulDPR
 9901 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9902   predicate( UseSSE<=1 );
 9903   match(Set src2 (SubD (MulD src0 src1) src2));
 9904   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9905             "DMUL   ST,$src1\n\t"
 9906             "DSUBRp $src2,ST" %}
 9907   ins_cost(250);
 9908   ins_encode( Push_Reg_FPR(src0),
 9909               FMul_ST_reg(src1),
 9910               Opcode(0xDE), Opc_plus(0xE0,src2));
 9911   ins_pipe( fpu_reg_reg_reg );
 9912 %}
 9913 
 9914 
 9915 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9916   predicate( UseSSE<=1 );
 9917   match(Set dst (DivD dst src));
 9918 
 9919   format %{ "FLD    $src\n\t"
 9920             "FDIVp  $dst,ST" %}
 9921   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9922   ins_cost(150);
 9923   ins_encode( Push_Reg_DPR(src),
 9924               OpcP, RegOpc(dst) );
 9925   ins_pipe( fpu_reg_reg );
 9926 %}
 9927 
 9928 // Strict FP instruction biases argument before division then
 9929 // biases result, to avoid double rounding of subnormals.
 9930 //
 9931 // scale dividend by multiplying dividend by 2^(-15360)
 9932 // load divisor
 9933 // divide scaled dividend by divisor
 9934 // rescale quotient by 2^(15360)
 9935 //
 9936 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9937   predicate (UseSSE<=1);
 9938   match(Set dst (DivD dst src));
 9939   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9940   ins_cost(01);
 9941 
 9942   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9943             "DMULp  $dst,ST\n\t"
 9944             "FLD    $src\n\t"
 9945             "FDIVp  $dst,ST\n\t"
 9946             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9947             "DMULp  $dst,ST\n\t" %}
 9948   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9949   ins_encode( strictfp_bias1(dst),
 9950               Push_Reg_DPR(src),
 9951               OpcP, RegOpc(dst),
 9952               strictfp_bias2(dst) );
 9953   ins_pipe( fpu_reg_reg );
 9954 %}
 9955 
 9956 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9957   predicate(UseSSE<=1);
 9958   match(Set dst (ModD dst src));
 9959   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9960 
 9961   format %{ "DMOD   $dst,$src" %}
 9962   ins_cost(250);
 9963   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9964               emitModDPR(),
 9965               Push_Result_Mod_DPR(src),
 9966               Pop_Reg_DPR(dst));
 9967   ins_pipe( pipe_slow );
 9968 %}
 9969 
 9970 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9971   predicate(UseSSE>=2);
 9972   match(Set dst (ModD src0 src1));
 9973   effect(KILL rax, KILL cr);
 9974 
 9975   format %{ "SUB    ESP,8\t # DMOD\n"
 9976           "\tMOVSD  [ESP+0],$src1\n"
 9977           "\tFLD_D  [ESP+0]\n"
 9978           "\tMOVSD  [ESP+0],$src0\n"
 9979           "\tFLD_D  [ESP+0]\n"
 9980      "loop:\tFPREM\n"
 9981           "\tFWAIT\n"
 9982           "\tFNSTSW AX\n"
 9983           "\tSAHF\n"
 9984           "\tJP     loop\n"
 9985           "\tFSTP_D [ESP+0]\n"
 9986           "\tMOVSD  $dst,[ESP+0]\n"
 9987           "\tADD    ESP,8\n"
 9988           "\tFSTP   ST0\t # Restore FPU Stack"
 9989     %}
 9990   ins_cost(250);
 9991   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9992   ins_pipe( pipe_slow );
 9993 %}
 9994 
 9995 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9996   predicate (UseSSE<=1);
 9997   match(Set dst(AtanD dst src));
 9998   format %{ "DATA   $dst,$src" %}
 9999   opcode(0xD9, 0xF3);
10000   ins_encode( Push_Reg_DPR(src),
10001               OpcP, OpcS, RegOpc(dst) );
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10006   predicate (UseSSE>=2);
10007   match(Set dst(AtanD dst src));
10008   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10009   format %{ "DATA   $dst,$src" %}
10010   opcode(0xD9, 0xF3);
10011   ins_encode( Push_SrcD(src),
10012               OpcP, OpcS, Push_ResultD(dst) );
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10017   predicate (UseSSE<=1);
10018   match(Set dst (SqrtD src));
10019   format %{ "DSQRT  $dst,$src" %}
10020   opcode(0xFA, 0xD9);
10021   ins_encode( Push_Reg_DPR(src),
10022               OpcS, OpcP, Pop_Reg_DPR(dst) );
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 //-------------Float Instructions-------------------------------
10027 // Float Math
10028 
10029 // Code for float compare:
10030 //     fcompp();
10031 //     fwait(); fnstsw_ax();
10032 //     sahf();
10033 //     movl(dst, unordered_result);
10034 //     jcc(Assembler::parity, exit);
10035 //     movl(dst, less_result);
10036 //     jcc(Assembler::below, exit);
10037 //     movl(dst, equal_result);
10038 //     jcc(Assembler::equal, exit);
10039 //     movl(dst, greater_result);
10040 //   exit:
10041 
10042 // P6 version of float compare, sets condition codes in EFLAGS
10043 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10044   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10045   match(Set cr (CmpF src1 src2));
10046   effect(KILL rax);
10047   ins_cost(150);
10048   format %{ "FLD    $src1\n\t"
10049             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10050             "JNP    exit\n\t"
10051             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10052             "SAHF\n"
10053      "exit:\tNOP               // avoid branch to branch" %}
10054   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10055   ins_encode( Push_Reg_DPR(src1),
10056               OpcP, RegOpc(src2),
10057               cmpF_P6_fixup );
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10062   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10063   match(Set cr (CmpF src1 src2));
10064   ins_cost(100);
10065   format %{ "FLD    $src1\n\t"
10066             "FUCOMIP ST,$src2  // P6 instruction" %}
10067   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10068   ins_encode( Push_Reg_DPR(src1),
10069               OpcP, RegOpc(src2));
10070   ins_pipe( pipe_slow );
10071 %}
10072 
10073 
10074 // Compare & branch
10075 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10076   predicate(UseSSE == 0);
10077   match(Set cr (CmpF src1 src2));
10078   effect(KILL rax);
10079   ins_cost(200);
10080   format %{ "FLD    $src1\n\t"
10081             "FCOMp  $src2\n\t"
10082             "FNSTSW AX\n\t"
10083             "TEST   AX,0x400\n\t"
10084             "JZ,s   flags\n\t"
10085             "MOV    AH,1\t# unordered treat as LT\n"
10086     "flags:\tSAHF" %}
10087   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10088   ins_encode( Push_Reg_DPR(src1),
10089               OpcP, RegOpc(src2),
10090               fpu_flags);
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 // Compare vs zero into -1,0,1
10095 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10096   predicate(UseSSE == 0);
10097   match(Set dst (CmpF3 src1 zero));
10098   effect(KILL cr, KILL rax);
10099   ins_cost(280);
10100   format %{ "FTSTF  $dst,$src1" %}
10101   opcode(0xE4, 0xD9);
10102   ins_encode( Push_Reg_DPR(src1),
10103               OpcS, OpcP, PopFPU,
10104               CmpF_Result(dst));
10105   ins_pipe( pipe_slow );
10106 %}
10107 
10108 // Compare into -1,0,1
10109 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10110   predicate(UseSSE == 0);
10111   match(Set dst (CmpF3 src1 src2));
10112   effect(KILL cr, KILL rax);
10113   ins_cost(300);
10114   format %{ "FCMPF  $dst,$src1,$src2" %}
10115   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10116   ins_encode( Push_Reg_DPR(src1),
10117               OpcP, RegOpc(src2),
10118               CmpF_Result(dst));
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 // float compare and set condition codes in EFLAGS by XMM regs
10123 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10124   predicate(UseSSE>=1);
10125   match(Set cr (CmpF src1 src2));
10126   ins_cost(145);
10127   format %{ "UCOMISS $src1,$src2\n\t"
10128             "JNP,s   exit\n\t"
10129             "PUSHF\t# saw NaN, set CF\n\t"
10130             "AND     [rsp], #0xffffff2b\n\t"
10131             "POPF\n"
10132     "exit:" %}
10133   ins_encode %{
10134     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10135     emit_cmpfp_fixup(_masm);
10136   %}
10137   ins_pipe( pipe_slow );
10138 %}
10139 
10140 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 src2));
10143   ins_cost(100);
10144   format %{ "UCOMISS $src1,$src2" %}
10145   ins_encode %{
10146     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10147   %}
10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 // float compare and set condition codes in EFLAGS by XMM regs
10152 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10153   predicate(UseSSE>=1);
10154   match(Set cr (CmpF src1 (LoadF src2)));
10155   ins_cost(165);
10156   format %{ "UCOMISS $src1,$src2\n\t"
10157             "JNP,s   exit\n\t"
10158             "PUSHF\t# saw NaN, set CF\n\t"
10159             "AND     [rsp], #0xffffff2b\n\t"
10160             "POPF\n"
10161     "exit:" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10164     emit_cmpfp_fixup(_masm);
10165   %}
10166   ins_pipe( pipe_slow );
10167 %}
10168 
10169 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10170   predicate(UseSSE>=1);
10171   match(Set cr (CmpF src1 (LoadF src2)));
10172   ins_cost(100);
10173   format %{ "UCOMISS $src1,$src2" %}
10174   ins_encode %{
10175     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10176   %}
10177   ins_pipe( pipe_slow );
10178 %}
10179 
10180 // Compare into -1,0,1 in XMM
10181 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10182   predicate(UseSSE>=1);
10183   match(Set dst (CmpF3 src1 src2));
10184   effect(KILL cr);
10185   ins_cost(255);
10186   format %{ "UCOMISS $src1, $src2\n\t"
10187             "MOV     $dst, #-1\n\t"
10188             "JP,s    done\n\t"
10189             "JB,s    done\n\t"
10190             "SETNE   $dst\n\t"
10191             "MOVZB   $dst, $dst\n"
10192     "done:" %}
10193   ins_encode %{
10194     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10195     emit_cmpfp3(_masm, $dst$$Register);
10196   %}
10197   ins_pipe( pipe_slow );
10198 %}
10199 
10200 // Compare into -1,0,1 in XMM and memory
10201 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10202   predicate(UseSSE>=1);
10203   match(Set dst (CmpF3 src1 (LoadF src2)));
10204   effect(KILL cr);
10205   ins_cost(275);
10206   format %{ "UCOMISS $src1, $src2\n\t"
10207             "MOV     $dst, #-1\n\t"
10208             "JP,s    done\n\t"
10209             "JB,s    done\n\t"
10210             "SETNE   $dst\n\t"
10211             "MOVZB   $dst, $dst\n"
10212     "done:" %}
10213   ins_encode %{
10214     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10215     emit_cmpfp3(_masm, $dst$$Register);
10216   %}
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 // Spill to obtain 24-bit precision
10221 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10222   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10223   match(Set dst (SubF src1 src2));
10224 
10225   format %{ "FSUB   $dst,$src1 - $src2" %}
10226   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10227   ins_encode( Push_Reg_FPR(src1),
10228               OpcReg_FPR(src2),
10229               Pop_Mem_FPR(dst) );
10230   ins_pipe( fpu_mem_reg_reg );
10231 %}
10232 //
10233 // This instruction does not round to 24-bits
10234 instruct subFPR_reg(regFPR dst, regFPR src) %{
10235   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10236   match(Set dst (SubF dst src));
10237 
10238   format %{ "FSUB   $dst,$src" %}
10239   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10240   ins_encode( Push_Reg_FPR(src),
10241               OpcP, RegOpc(dst) );
10242   ins_pipe( fpu_reg_reg );
10243 %}
10244 
10245 // Spill to obtain 24-bit precision
10246 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10247   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF src1 src2));
10249 
10250   format %{ "FADD   $dst,$src1,$src2" %}
10251   opcode(0xD8, 0x0); /* D8 C0+i */
10252   ins_encode( Push_Reg_FPR(src2),
10253               OpcReg_FPR(src1),
10254               Pop_Mem_FPR(dst) );
10255   ins_pipe( fpu_mem_reg_reg );
10256 %}
10257 //
10258 // This instruction does not round to 24-bits
10259 instruct addFPR_reg(regFPR dst, regFPR src) %{
10260   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10261   match(Set dst (AddF dst src));
10262 
10263   format %{ "FLD    $src\n\t"
10264             "FADDp  $dst,ST" %}
10265   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10266   ins_encode( Push_Reg_FPR(src),
10267               OpcP, RegOpc(dst) );
10268   ins_pipe( fpu_reg_reg );
10269 %}
10270 
10271 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10272   predicate(UseSSE==0);
10273   match(Set dst (AbsF src));
10274   ins_cost(100);
10275   format %{ "FABS" %}
10276   opcode(0xE1, 0xD9);
10277   ins_encode( OpcS, OpcP );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10282   predicate(UseSSE==0);
10283   match(Set dst (NegF src));
10284   ins_cost(100);
10285   format %{ "FCHS" %}
10286   opcode(0xE0, 0xD9);
10287   ins_encode( OpcS, OpcP );
10288   ins_pipe( fpu_reg_reg );
10289 %}
10290 
10291 // Cisc-alternate to addFPR_reg
10292 // Spill to obtain 24-bit precision
10293 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10294   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10295   match(Set dst (AddF src1 (LoadF src2)));
10296 
10297   format %{ "FLD    $src2\n\t"
10298             "FADD   ST,$src1\n\t"
10299             "FSTP_S $dst" %}
10300   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10301   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10302               OpcReg_FPR(src1),
10303               Pop_Mem_FPR(dst) );
10304   ins_pipe( fpu_mem_reg_mem );
10305 %}
10306 //
10307 // Cisc-alternate to addFPR_reg
10308 // This instruction does not round to 24-bits
10309 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10310   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10311   match(Set dst (AddF dst (LoadF src)));
10312 
10313   format %{ "FADD   $dst,$src" %}
10314   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10315   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10316               OpcP, RegOpc(dst) );
10317   ins_pipe( fpu_reg_mem );
10318 %}
10319 
10320 // // Following two instructions for _222_mpegaudio
10321 // Spill to obtain 24-bit precision
10322 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10323   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10324   match(Set dst (AddF src1 src2));
10325 
10326   format %{ "FADD   $dst,$src1,$src2" %}
10327   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10328   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10329               OpcReg_FPR(src2),
10330               Pop_Mem_FPR(dst) );
10331   ins_pipe( fpu_mem_reg_mem );
10332 %}
10333 
10334 // Cisc-spill variant
10335 // Spill to obtain 24-bit precision
10336 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10337   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10338   match(Set dst (AddF src1 (LoadF src2)));
10339 
10340   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10341   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10342   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10343               set_instruction_start,
10344               OpcP, RMopc_Mem(secondary,src1),
10345               Pop_Mem_FPR(dst) );
10346   ins_pipe( fpu_mem_mem_mem );
10347 %}
10348 
10349 // Spill to obtain 24-bit precision
10350 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10351   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10352   match(Set dst (AddF src1 src2));
10353 
10354   format %{ "FADD   $dst,$src1,$src2" %}
10355   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10356   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10357               set_instruction_start,
10358               OpcP, RMopc_Mem(secondary,src1),
10359               Pop_Mem_FPR(dst) );
10360   ins_pipe( fpu_mem_mem_mem );
10361 %}
10362 
10363 
10364 // Spill to obtain 24-bit precision
10365 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10366   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10367   match(Set dst (AddF src con));
10368   format %{ "FLD    $src\n\t"
10369             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10370             "FSTP_S $dst"  %}
10371   ins_encode %{
10372     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10373     __ fadd_s($constantaddress($con));
10374     __ fstp_s(Address(rsp, $dst$$disp));
10375   %}
10376   ins_pipe(fpu_mem_reg_con);
10377 %}
10378 //
10379 // This instruction does not round to 24-bits
10380 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10381   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10382   match(Set dst (AddF src con));
10383   format %{ "FLD    $src\n\t"
10384             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10385             "FSTP   $dst"  %}
10386   ins_encode %{
10387     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10388     __ fadd_s($constantaddress($con));
10389     __ fstp_d($dst$$reg);
10390   %}
10391   ins_pipe(fpu_reg_reg_con);
10392 %}
10393 
10394 // Spill to obtain 24-bit precision
10395 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10396   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10397   match(Set dst (MulF src1 src2));
10398 
10399   format %{ "FLD    $src1\n\t"
10400             "FMUL   $src2\n\t"
10401             "FSTP_S $dst"  %}
10402   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10403   ins_encode( Push_Reg_FPR(src1),
10404               OpcReg_FPR(src2),
10405               Pop_Mem_FPR(dst) );
10406   ins_pipe( fpu_mem_reg_reg );
10407 %}
10408 //
10409 // This instruction does not round to 24-bits
10410 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10411   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10412   match(Set dst (MulF src1 src2));
10413 
10414   format %{ "FLD    $src1\n\t"
10415             "FMUL   $src2\n\t"
10416             "FSTP_S $dst"  %}
10417   opcode(0xD8, 0x1); /* D8 C8+i */
10418   ins_encode( Push_Reg_FPR(src2),
10419               OpcReg_FPR(src1),
10420               Pop_Reg_FPR(dst) );
10421   ins_pipe( fpu_reg_reg_reg );
10422 %}
10423 
10424 
10425 // Spill to obtain 24-bit precision
10426 // Cisc-alternate to reg-reg multiply
10427 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10428   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10429   match(Set dst (MulF src1 (LoadF src2)));
10430 
10431   format %{ "FLD_S  $src2\n\t"
10432             "FMUL   $src1\n\t"
10433             "FSTP_S $dst"  %}
10434   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10435   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10436               OpcReg_FPR(src1),
10437               Pop_Mem_FPR(dst) );
10438   ins_pipe( fpu_mem_reg_mem );
10439 %}
10440 //
10441 // This instruction does not round to 24-bits
10442 // Cisc-alternate to reg-reg multiply
10443 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10444   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 (LoadF src2)));
10446 
10447   format %{ "FMUL   $dst,$src1,$src2" %}
10448   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10449   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10450               OpcReg_FPR(src1),
10451               Pop_Reg_FPR(dst) );
10452   ins_pipe( fpu_reg_reg_mem );
10453 %}
10454 
10455 // Spill to obtain 24-bit precision
10456 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10457   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10458   match(Set dst (MulF src1 src2));
10459 
10460   format %{ "FMUL   $dst,$src1,$src2" %}
10461   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10462   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10463               set_instruction_start,
10464               OpcP, RMopc_Mem(secondary,src1),
10465               Pop_Mem_FPR(dst) );
10466   ins_pipe( fpu_mem_mem_mem );
10467 %}
10468 
10469 // Spill to obtain 24-bit precision
10470 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10471   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10472   match(Set dst (MulF src con));
10473 
10474   format %{ "FLD    $src\n\t"
10475             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10476             "FSTP_S $dst"  %}
10477   ins_encode %{
10478     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10479     __ fmul_s($constantaddress($con));
10480     __ fstp_s(Address(rsp, $dst$$disp));
10481   %}
10482   ins_pipe(fpu_mem_reg_con);
10483 %}
10484 //
10485 // This instruction does not round to 24-bits
10486 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10487   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10488   match(Set dst (MulF src con));
10489 
10490   format %{ "FLD    $src\n\t"
10491             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10492             "FSTP   $dst"  %}
10493   ins_encode %{
10494     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10495     __ fmul_s($constantaddress($con));
10496     __ fstp_d($dst$$reg);
10497   %}
10498   ins_pipe(fpu_reg_reg_con);
10499 %}
10500 
10501 
10502 //
10503 // MACRO1 -- subsume unshared load into mulFPR
10504 // This instruction does not round to 24-bits
10505 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (MulF (LoadF mem1) src));
10508 
10509   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10510             "FMUL   ST,$src\n\t"
10511             "FSTP   $dst" %}
10512   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10513   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10514               OpcReg_FPR(src),
10515               Pop_Reg_FPR(dst) );
10516   ins_pipe( fpu_reg_reg_mem );
10517 %}
10518 //
10519 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10520 // This instruction does not round to 24-bits
10521 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10522   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10523   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10524   ins_cost(95);
10525 
10526   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10527             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10528             "FADD   ST,$src2\n\t"
10529             "FSTP   $dst" %}
10530   opcode(0xD9); /* LoadF D9 /0 */
10531   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10532               FMul_ST_reg(src1),
10533               FAdd_ST_reg(src2),
10534               Pop_Reg_FPR(dst) );
10535   ins_pipe( fpu_reg_mem_reg_reg );
10536 %}
10537 
10538 // MACRO3 -- addFPR a mulFPR
10539 // This instruction does not round to 24-bits.  It is a '2-address'
10540 // instruction in that the result goes back to src2.  This eliminates
10541 // a move from the macro; possibly the register allocator will have
10542 // to add it back (and maybe not).
10543 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10544   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10545   match(Set src2 (AddF (MulF src0 src1) src2));
10546 
10547   format %{ "FLD    $src0     ===MACRO3===\n\t"
10548             "FMUL   ST,$src1\n\t"
10549             "FADDP  $src2,ST" %}
10550   opcode(0xD9); /* LoadF D9 /0 */
10551   ins_encode( Push_Reg_FPR(src0),
10552               FMul_ST_reg(src1),
10553               FAddP_reg_ST(src2) );
10554   ins_pipe( fpu_reg_reg_reg );
10555 %}
10556 
10557 // MACRO4 -- divFPR subFPR
10558 // This instruction does not round to 24-bits
10559 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10560   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10561   match(Set dst (DivF (SubF src2 src1) src3));
10562 
10563   format %{ "FLD    $src2   ===MACRO4===\n\t"
10564             "FSUB   ST,$src1\n\t"
10565             "FDIV   ST,$src3\n\t"
10566             "FSTP  $dst" %}
10567   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10568   ins_encode( Push_Reg_FPR(src2),
10569               subFPR_divFPR_encode(src1,src3),
10570               Pop_Reg_FPR(dst) );
10571   ins_pipe( fpu_reg_reg_reg_reg );
10572 %}
10573 
10574 // Spill to obtain 24-bit precision
10575 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10576   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10577   match(Set dst (DivF src1 src2));
10578 
10579   format %{ "FDIV   $dst,$src1,$src2" %}
10580   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10581   ins_encode( Push_Reg_FPR(src1),
10582               OpcReg_FPR(src2),
10583               Pop_Mem_FPR(dst) );
10584   ins_pipe( fpu_mem_reg_reg );
10585 %}
10586 //
10587 // This instruction does not round to 24-bits
10588 instruct divFPR_reg(regFPR dst, regFPR src) %{
10589   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10590   match(Set dst (DivF dst src));
10591 
10592   format %{ "FDIV   $dst,$src" %}
10593   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594   ins_encode( Push_Reg_FPR(src),
10595               OpcP, RegOpc(dst) );
10596   ins_pipe( fpu_reg_reg );
10597 %}
10598 
10599 
10600 // Spill to obtain 24-bit precision
10601 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10602   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10603   match(Set dst (ModF src1 src2));
10604   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10605 
10606   format %{ "FMOD   $dst,$src1,$src2" %}
10607   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10608               emitModDPR(),
10609               Push_Result_Mod_DPR(src2),
10610               Pop_Mem_FPR(dst));
10611   ins_pipe( pipe_slow );
10612 %}
10613 //
10614 // This instruction does not round to 24-bits
10615 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10616   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10617   match(Set dst (ModF dst src));
10618   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10619 
10620   format %{ "FMOD   $dst,$src" %}
10621   ins_encode(Push_Reg_Mod_DPR(dst, src),
10622               emitModDPR(),
10623               Push_Result_Mod_DPR(src),
10624               Pop_Reg_FPR(dst));
10625   ins_pipe( pipe_slow );
10626 %}
10627 
10628 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10629   predicate(UseSSE>=1);
10630   match(Set dst (ModF src0 src1));
10631   effect(KILL rax, KILL cr);
10632   format %{ "SUB    ESP,4\t # FMOD\n"
10633           "\tMOVSS  [ESP+0],$src1\n"
10634           "\tFLD_S  [ESP+0]\n"
10635           "\tMOVSS  [ESP+0],$src0\n"
10636           "\tFLD_S  [ESP+0]\n"
10637      "loop:\tFPREM\n"
10638           "\tFWAIT\n"
10639           "\tFNSTSW AX\n"
10640           "\tSAHF\n"
10641           "\tJP     loop\n"
10642           "\tFSTP_S [ESP+0]\n"
10643           "\tMOVSS  $dst,[ESP+0]\n"
10644           "\tADD    ESP,4\n"
10645           "\tFSTP   ST0\t # Restore FPU Stack"
10646     %}
10647   ins_cost(250);
10648   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10649   ins_pipe( pipe_slow );
10650 %}
10651 
10652 
10653 //----------Arithmetic Conversion Instructions---------------------------------
10654 // The conversions operations are all Alpha sorted.  Please keep it that way!
10655 
10656 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10657   predicate(UseSSE==0);
10658   match(Set dst (RoundFloat src));
10659   ins_cost(125);
10660   format %{ "FST_S  $dst,$src\t# F-round" %}
10661   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10662   ins_pipe( fpu_mem_reg );
10663 %}
10664 
10665 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10666   predicate(UseSSE<=1);
10667   match(Set dst (RoundDouble src));
10668   ins_cost(125);
10669   format %{ "FST_D  $dst,$src\t# D-round" %}
10670   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10671   ins_pipe( fpu_mem_reg );
10672 %}
10673 
10674 // Force rounding to 24-bit precision and 6-bit exponent
10675 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10676   predicate(UseSSE==0);
10677   match(Set dst (ConvD2F src));
10678   format %{ "FST_S  $dst,$src\t# F-round" %}
10679   expand %{
10680     roundFloat_mem_reg(dst,src);
10681   %}
10682 %}
10683 
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10686   predicate(UseSSE==1);
10687   match(Set dst (ConvD2F src));
10688   effect( KILL cr );
10689   format %{ "SUB    ESP,4\n\t"
10690             "FST_S  [ESP],$src\t# F-round\n\t"
10691             "MOVSS  $dst,[ESP]\n\t"
10692             "ADD ESP,4" %}
10693   ins_encode %{
10694     __ subptr(rsp, 4);
10695     if ($src$$reg != FPR1L_enc) {
10696       __ fld_s($src$$reg-1);
10697       __ fstp_s(Address(rsp, 0));
10698     } else {
10699       __ fst_s(Address(rsp, 0));
10700     }
10701     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10702     __ addptr(rsp, 4);
10703   %}
10704   ins_pipe( pipe_slow );
10705 %}
10706 
10707 // Force rounding double precision to single precision
10708 instruct convD2F_reg(regF dst, regD src) %{
10709   predicate(UseSSE>=2);
10710   match(Set dst (ConvD2F src));
10711   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10712   ins_encode %{
10713     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10719   predicate(UseSSE==0);
10720   match(Set dst (ConvF2D src));
10721   format %{ "FST_S  $dst,$src\t# D-round" %}
10722   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10723   ins_pipe( fpu_reg_reg );
10724 %}
10725 
10726 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10727   predicate(UseSSE==1);
10728   match(Set dst (ConvF2D src));
10729   format %{ "FST_D  $dst,$src\t# D-round" %}
10730   expand %{
10731     roundDouble_mem_reg(dst,src);
10732   %}
10733 %}
10734 
10735 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10736   predicate(UseSSE==1);
10737   match(Set dst (ConvF2D src));
10738   effect( KILL cr );
10739   format %{ "SUB    ESP,4\n\t"
10740             "MOVSS  [ESP] $src\n\t"
10741             "FLD_S  [ESP]\n\t"
10742             "ADD    ESP,4\n\t"
10743             "FSTP   $dst\t# D-round" %}
10744   ins_encode %{
10745     __ subptr(rsp, 4);
10746     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10747     __ fld_s(Address(rsp, 0));
10748     __ addptr(rsp, 4);
10749     __ fstp_d($dst$$reg);
10750   %}
10751   ins_pipe( pipe_slow );
10752 %}
10753 
10754 instruct convF2D_reg(regD dst, regF src) %{
10755   predicate(UseSSE>=2);
10756   match(Set dst (ConvF2D src));
10757   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10758   ins_encode %{
10759     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10760   %}
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10765 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10766   predicate(UseSSE<=1);
10767   match(Set dst (ConvD2I src));
10768   effect( KILL tmp, KILL cr );
10769   format %{ "FLD    $src\t# Convert double to int \n\t"
10770             "FLDCW  trunc mode\n\t"
10771             "SUB    ESP,4\n\t"
10772             "FISTp  [ESP + #0]\n\t"
10773             "FLDCW  std/24-bit mode\n\t"
10774             "POP    EAX\n\t"
10775             "CMP    EAX,0x80000000\n\t"
10776             "JNE,s  fast\n\t"
10777             "FLD_D  $src\n\t"
10778             "CALL   d2i_wrapper\n"
10779       "fast:" %}
10780   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10781   ins_pipe( pipe_slow );
10782 %}
10783 
10784 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10785 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10786   predicate(UseSSE>=2);
10787   match(Set dst (ConvD2I src));
10788   effect( KILL tmp, KILL cr );
10789   format %{ "CVTTSD2SI $dst, $src\n\t"
10790             "CMP    $dst,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "SUB    ESP, 8\n\t"
10793             "MOVSD  [ESP], $src\n\t"
10794             "FLD_D  [ESP]\n\t"
10795             "ADD    ESP, 8\n\t"
10796             "CALL   d2i_wrapper\n"
10797       "fast:" %}
10798   ins_encode %{
10799     Label fast;
10800     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10801     __ cmpl($dst$$Register, 0x80000000);
10802     __ jccb(Assembler::notEqual, fast);
10803     __ subptr(rsp, 8);
10804     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10805     __ fld_d(Address(rsp, 0));
10806     __ addptr(rsp, 8);
10807     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10808     __ bind(fast);
10809   %}
10810   ins_pipe( pipe_slow );
10811 %}
10812 
10813 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10814   predicate(UseSSE<=1);
10815   match(Set dst (ConvD2L src));
10816   effect( KILL cr );
10817   format %{ "FLD    $src\t# Convert double to long\n\t"
10818             "FLDCW  trunc mode\n\t"
10819             "SUB    ESP,8\n\t"
10820             "FISTp  [ESP + #0]\n\t"
10821             "FLDCW  std/24-bit mode\n\t"
10822             "POP    EAX\n\t"
10823             "POP    EDX\n\t"
10824             "CMP    EDX,0x80000000\n\t"
10825             "JNE,s  fast\n\t"
10826             "TEST   EAX,EAX\n\t"
10827             "JNE,s  fast\n\t"
10828             "FLD    $src\n\t"
10829             "CALL   d2l_wrapper\n"
10830       "fast:" %}
10831   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10832   ins_pipe( pipe_slow );
10833 %}
10834 
10835 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10836 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10837   predicate (UseSSE>=2);
10838   match(Set dst (ConvD2L src));
10839   effect( KILL cr );
10840   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10841             "MOVSD  [ESP],$src\n\t"
10842             "FLD_D  [ESP]\n\t"
10843             "FLDCW  trunc mode\n\t"
10844             "FISTp  [ESP + #0]\n\t"
10845             "FLDCW  std/24-bit mode\n\t"
10846             "POP    EAX\n\t"
10847             "POP    EDX\n\t"
10848             "CMP    EDX,0x80000000\n\t"
10849             "JNE,s  fast\n\t"
10850             "TEST   EAX,EAX\n\t"
10851             "JNE,s  fast\n\t"
10852             "SUB    ESP,8\n\t"
10853             "MOVSD  [ESP],$src\n\t"
10854             "FLD_D  [ESP]\n\t"
10855             "ADD    ESP,8\n\t"
10856             "CALL   d2l_wrapper\n"
10857       "fast:" %}
10858   ins_encode %{
10859     Label fast;
10860     __ subptr(rsp, 8);
10861     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10862     __ fld_d(Address(rsp, 0));
10863     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10864     __ fistp_d(Address(rsp, 0));
10865     // Restore the rounding mode, mask the exception
10866     if (Compile::current()->in_24_bit_fp_mode()) {
10867       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10868     } else {
10869       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10870     }
10871     // Load the converted long, adjust CPU stack
10872     __ pop(rax);
10873     __ pop(rdx);
10874     __ cmpl(rdx, 0x80000000);
10875     __ jccb(Assembler::notEqual, fast);
10876     __ testl(rax, rax);
10877     __ jccb(Assembler::notEqual, fast);
10878     __ subptr(rsp, 8);
10879     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10880     __ fld_d(Address(rsp, 0));
10881     __ addptr(rsp, 8);
10882     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10883     __ bind(fast);
10884   %}
10885   ins_pipe( pipe_slow );
10886 %}
10887 
10888 // Convert a double to an int.  Java semantics require we do complex
10889 // manglations in the corner cases.  So we set the rounding mode to
10890 // 'zero', store the darned double down as an int, and reset the
10891 // rounding mode to 'nearest'.  The hardware stores a flag value down
10892 // if we would overflow or converted a NAN; we check for this and
10893 // and go the slow path if needed.
10894 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10895   predicate(UseSSE==0);
10896   match(Set dst (ConvF2I src));
10897   effect( KILL tmp, KILL cr );
10898   format %{ "FLD    $src\t# Convert float to int \n\t"
10899             "FLDCW  trunc mode\n\t"
10900             "SUB    ESP,4\n\t"
10901             "FISTp  [ESP + #0]\n\t"
10902             "FLDCW  std/24-bit mode\n\t"
10903             "POP    EAX\n\t"
10904             "CMP    EAX,0x80000000\n\t"
10905             "JNE,s  fast\n\t"
10906             "FLD    $src\n\t"
10907             "CALL   d2i_wrapper\n"
10908       "fast:" %}
10909   // DPR2I_encoding works for FPR2I
10910   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10911   ins_pipe( pipe_slow );
10912 %}
10913 
10914 // Convert a float in xmm to an int reg.
10915 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10916   predicate(UseSSE>=1);
10917   match(Set dst (ConvF2I src));
10918   effect( KILL tmp, KILL cr );
10919   format %{ "CVTTSS2SI $dst, $src\n\t"
10920             "CMP    $dst,0x80000000\n\t"
10921             "JNE,s  fast\n\t"
10922             "SUB    ESP, 4\n\t"
10923             "MOVSS  [ESP], $src\n\t"
10924             "FLD    [ESP]\n\t"
10925             "ADD    ESP, 4\n\t"
10926             "CALL   d2i_wrapper\n"
10927       "fast:" %}
10928   ins_encode %{
10929     Label fast;
10930     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10931     __ cmpl($dst$$Register, 0x80000000);
10932     __ jccb(Assembler::notEqual, fast);
10933     __ subptr(rsp, 4);
10934     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10935     __ fld_s(Address(rsp, 0));
10936     __ addptr(rsp, 4);
10937     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10938     __ bind(fast);
10939   %}
10940   ins_pipe( pipe_slow );
10941 %}
10942 
10943 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10944   predicate(UseSSE==0);
10945   match(Set dst (ConvF2L src));
10946   effect( KILL cr );
10947   format %{ "FLD    $src\t# Convert float to long\n\t"
10948             "FLDCW  trunc mode\n\t"
10949             "SUB    ESP,8\n\t"
10950             "FISTp  [ESP + #0]\n\t"
10951             "FLDCW  std/24-bit mode\n\t"
10952             "POP    EAX\n\t"
10953             "POP    EDX\n\t"
10954             "CMP    EDX,0x80000000\n\t"
10955             "JNE,s  fast\n\t"
10956             "TEST   EAX,EAX\n\t"
10957             "JNE,s  fast\n\t"
10958             "FLD    $src\n\t"
10959             "CALL   d2l_wrapper\n"
10960       "fast:" %}
10961   // DPR2L_encoding works for FPR2L
10962   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10963   ins_pipe( pipe_slow );
10964 %}
10965 
10966 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10967 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10968   predicate (UseSSE>=1);
10969   match(Set dst (ConvF2L src));
10970   effect( KILL cr );
10971   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10972             "MOVSS  [ESP],$src\n\t"
10973             "FLD_S  [ESP]\n\t"
10974             "FLDCW  trunc mode\n\t"
10975             "FISTp  [ESP + #0]\n\t"
10976             "FLDCW  std/24-bit mode\n\t"
10977             "POP    EAX\n\t"
10978             "POP    EDX\n\t"
10979             "CMP    EDX,0x80000000\n\t"
10980             "JNE,s  fast\n\t"
10981             "TEST   EAX,EAX\n\t"
10982             "JNE,s  fast\n\t"
10983             "SUB    ESP,4\t# Convert float to long\n\t"
10984             "MOVSS  [ESP],$src\n\t"
10985             "FLD_S  [ESP]\n\t"
10986             "ADD    ESP,4\n\t"
10987             "CALL   d2l_wrapper\n"
10988       "fast:" %}
10989   ins_encode %{
10990     Label fast;
10991     __ subptr(rsp, 8);
10992     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10993     __ fld_s(Address(rsp, 0));
10994     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10995     __ fistp_d(Address(rsp, 0));
10996     // Restore the rounding mode, mask the exception
10997     if (Compile::current()->in_24_bit_fp_mode()) {
10998       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10999     } else {
11000       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11001     }
11002     // Load the converted long, adjust CPU stack
11003     __ pop(rax);
11004     __ pop(rdx);
11005     __ cmpl(rdx, 0x80000000);
11006     __ jccb(Assembler::notEqual, fast);
11007     __ testl(rax, rax);
11008     __ jccb(Assembler::notEqual, fast);
11009     __ subptr(rsp, 4);
11010     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11011     __ fld_s(Address(rsp, 0));
11012     __ addptr(rsp, 4);
11013     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11014     __ bind(fast);
11015   %}
11016   ins_pipe( pipe_slow );
11017 %}
11018 
11019 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11020   predicate( UseSSE<=1 );
11021   match(Set dst (ConvI2D src));
11022   format %{ "FILD   $src\n\t"
11023             "FSTP   $dst" %}
11024   opcode(0xDB, 0x0);  /* DB /0 */
11025   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11026   ins_pipe( fpu_reg_mem );
11027 %}
11028 
11029 instruct convI2D_reg(regD dst, rRegI src) %{
11030   predicate( UseSSE>=2 && !UseXmmI2D );
11031   match(Set dst (ConvI2D src));
11032   format %{ "CVTSI2SD $dst,$src" %}
11033   ins_encode %{
11034     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11035   %}
11036   ins_pipe( pipe_slow );
11037 %}
11038 
11039 instruct convI2D_mem(regD dst, memory mem) %{
11040   predicate( UseSSE>=2 );
11041   match(Set dst (ConvI2D (LoadI mem)));
11042   format %{ "CVTSI2SD $dst,$mem" %}
11043   ins_encode %{
11044     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11045   %}
11046   ins_pipe( pipe_slow );
11047 %}
11048 
11049 instruct convXI2D_reg(regD dst, rRegI src)
11050 %{
11051   predicate( UseSSE>=2 && UseXmmI2D );
11052   match(Set dst (ConvI2D src));
11053 
11054   format %{ "MOVD  $dst,$src\n\t"
11055             "CVTDQ2PD $dst,$dst\t# i2d" %}
11056   ins_encode %{
11057     __ movdl($dst$$XMMRegister, $src$$Register);
11058     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11059   %}
11060   ins_pipe(pipe_slow); // XXX
11061 %}
11062 
11063 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11064   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11065   match(Set dst (ConvI2D (LoadI mem)));
11066   format %{ "FILD   $mem\n\t"
11067             "FSTP   $dst" %}
11068   opcode(0xDB);      /* DB /0 */
11069   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11070               Pop_Reg_DPR(dst));
11071   ins_pipe( fpu_reg_mem );
11072 %}
11073 
11074 // Convert a byte to a float; no rounding step needed.
11075 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11076   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11077   match(Set dst (ConvI2F src));
11078   format %{ "FILD   $src\n\t"
11079             "FSTP   $dst" %}
11080 
11081   opcode(0xDB, 0x0);  /* DB /0 */
11082   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11083   ins_pipe( fpu_reg_mem );
11084 %}
11085 
11086 // In 24-bit mode, force exponent rounding by storing back out
11087 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11088   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11089   match(Set dst (ConvI2F src));
11090   ins_cost(200);
11091   format %{ "FILD   $src\n\t"
11092             "FSTP_S $dst" %}
11093   opcode(0xDB, 0x0);  /* DB /0 */
11094   ins_encode( Push_Mem_I(src),
11095               Pop_Mem_FPR(dst));
11096   ins_pipe( fpu_mem_mem );
11097 %}
11098 
11099 // In 24-bit mode, force exponent rounding by storing back out
11100 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11101   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11102   match(Set dst (ConvI2F (LoadI mem)));
11103   ins_cost(200);
11104   format %{ "FILD   $mem\n\t"
11105             "FSTP_S $dst" %}
11106   opcode(0xDB);  /* DB /0 */
11107   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11108               Pop_Mem_FPR(dst));
11109   ins_pipe( fpu_mem_mem );
11110 %}
11111 
11112 // This instruction does not round to 24-bits
11113 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11114   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11115   match(Set dst (ConvI2F src));
11116   format %{ "FILD   $src\n\t"
11117             "FSTP   $dst" %}
11118   opcode(0xDB, 0x0);  /* DB /0 */
11119   ins_encode( Push_Mem_I(src),
11120               Pop_Reg_FPR(dst));
11121   ins_pipe( fpu_reg_mem );
11122 %}
11123 
11124 // This instruction does not round to 24-bits
11125 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11126   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11127   match(Set dst (ConvI2F (LoadI mem)));
11128   format %{ "FILD   $mem\n\t"
11129             "FSTP   $dst" %}
11130   opcode(0xDB);      /* DB /0 */
11131   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11132               Pop_Reg_FPR(dst));
11133   ins_pipe( fpu_reg_mem );
11134 %}
11135 
11136 // Convert an int to a float in xmm; no rounding step needed.
11137 instruct convI2F_reg(regF dst, rRegI src) %{
11138   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11139   match(Set dst (ConvI2F src));
11140   format %{ "CVTSI2SS $dst, $src" %}
11141   ins_encode %{
11142     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11143   %}
11144   ins_pipe( pipe_slow );
11145 %}
11146 
11147  instruct convXI2F_reg(regF dst, rRegI src)
11148 %{
11149   predicate( UseSSE>=2 && UseXmmI2F );
11150   match(Set dst (ConvI2F src));
11151 
11152   format %{ "MOVD  $dst,$src\n\t"
11153             "CVTDQ2PS $dst,$dst\t# i2f" %}
11154   ins_encode %{
11155     __ movdl($dst$$XMMRegister, $src$$Register);
11156     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11157   %}
11158   ins_pipe(pipe_slow); // XXX
11159 %}
11160 
11161 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11162   match(Set dst (ConvI2L src));
11163   effect(KILL cr);
11164   ins_cost(375);
11165   format %{ "MOV    $dst.lo,$src\n\t"
11166             "MOV    $dst.hi,$src\n\t"
11167             "SAR    $dst.hi,31" %}
11168   ins_encode(convert_int_long(dst,src));
11169   ins_pipe( ialu_reg_reg_long );
11170 %}
11171 
11172 // Zero-extend convert int to long
11173 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11174   match(Set dst (AndL (ConvI2L src) mask) );
11175   effect( KILL flags );
11176   ins_cost(250);
11177   format %{ "MOV    $dst.lo,$src\n\t"
11178             "XOR    $dst.hi,$dst.hi" %}
11179   opcode(0x33); // XOR
11180   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11181   ins_pipe( ialu_reg_reg_long );
11182 %}
11183 
11184 // Zero-extend long
11185 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11186   match(Set dst (AndL src mask) );
11187   effect( KILL flags );
11188   ins_cost(250);
11189   format %{ "MOV    $dst.lo,$src.lo\n\t"
11190             "XOR    $dst.hi,$dst.hi\n\t" %}
11191   opcode(0x33); // XOR
11192   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11193   ins_pipe( ialu_reg_reg_long );
11194 %}
11195 
11196 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11197   predicate (UseSSE<=1);
11198   match(Set dst (ConvL2D src));
11199   effect( KILL cr );
11200   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11201             "PUSH   $src.lo\n\t"
11202             "FILD   ST,[ESP + #0]\n\t"
11203             "ADD    ESP,8\n\t"
11204             "FSTP_D $dst\t# D-round" %}
11205   opcode(0xDF, 0x5);  /* DF /5 */
11206   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11207   ins_pipe( pipe_slow );
11208 %}
11209 
11210 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11211   predicate (UseSSE>=2);
11212   match(Set dst (ConvL2D src));
11213   effect( KILL cr );
11214   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11215             "PUSH   $src.lo\n\t"
11216             "FILD_D [ESP]\n\t"
11217             "FSTP_D [ESP]\n\t"
11218             "MOVSD  $dst,[ESP]\n\t"
11219             "ADD    ESP,8" %}
11220   opcode(0xDF, 0x5);  /* DF /5 */
11221   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11222   ins_pipe( pipe_slow );
11223 %}
11224 
11225 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11226   predicate (UseSSE>=1);
11227   match(Set dst (ConvL2F src));
11228   effect( KILL cr );
11229   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11230             "PUSH   $src.lo\n\t"
11231             "FILD_D [ESP]\n\t"
11232             "FSTP_S [ESP]\n\t"
11233             "MOVSS  $dst,[ESP]\n\t"
11234             "ADD    ESP,8" %}
11235   opcode(0xDF, 0x5);  /* DF /5 */
11236   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11237   ins_pipe( pipe_slow );
11238 %}
11239 
11240 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11241   match(Set dst (ConvL2F src));
11242   effect( KILL cr );
11243   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11244             "PUSH   $src.lo\n\t"
11245             "FILD   ST,[ESP + #0]\n\t"
11246             "ADD    ESP,8\n\t"
11247             "FSTP_S $dst\t# F-round" %}
11248   opcode(0xDF, 0x5);  /* DF /5 */
11249   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11250   ins_pipe( pipe_slow );
11251 %}
11252 
11253 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11254   match(Set dst (ConvL2I src));
11255   effect( DEF dst, USE src );
11256   format %{ "MOV    $dst,$src.lo" %}
11257   ins_encode(enc_CopyL_Lo(dst,src));
11258   ins_pipe( ialu_reg_reg );
11259 %}
11260 
11261 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11262   match(Set dst (MoveF2I src));
11263   effect( DEF dst, USE src );
11264   ins_cost(100);
11265   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11266   ins_encode %{
11267     __ movl($dst$$Register, Address(rsp, $src$$disp));
11268   %}
11269   ins_pipe( ialu_reg_mem );
11270 %}
11271 
11272 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11273   predicate(UseSSE==0);
11274   match(Set dst (MoveF2I src));
11275   effect( DEF dst, USE src );
11276 
11277   ins_cost(125);
11278   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11279   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11280   ins_pipe( fpu_mem_reg );
11281 %}
11282 
11283 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11284   predicate(UseSSE>=1);
11285   match(Set dst (MoveF2I src));
11286   effect( DEF dst, USE src );
11287 
11288   ins_cost(95);
11289   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11290   ins_encode %{
11291     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11292   %}
11293   ins_pipe( pipe_slow );
11294 %}
11295 
11296 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11297   predicate(UseSSE>=2);
11298   match(Set dst (MoveF2I src));
11299   effect( DEF dst, USE src );
11300   ins_cost(85);
11301   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11302   ins_encode %{
11303     __ movdl($dst$$Register, $src$$XMMRegister);
11304   %}
11305   ins_pipe( pipe_slow );
11306 %}
11307 
11308 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11309   match(Set dst (MoveI2F src));
11310   effect( DEF dst, USE src );
11311 
11312   ins_cost(100);
11313   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11314   ins_encode %{
11315     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11316   %}
11317   ins_pipe( ialu_mem_reg );
11318 %}
11319 
11320 
11321 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11322   predicate(UseSSE==0);
11323   match(Set dst (MoveI2F src));
11324   effect(DEF dst, USE src);
11325 
11326   ins_cost(125);
11327   format %{ "FLD_S  $src\n\t"
11328             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11329   opcode(0xD9);               /* D9 /0, FLD m32real */
11330   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11331               Pop_Reg_FPR(dst) );
11332   ins_pipe( fpu_reg_mem );
11333 %}
11334 
11335 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11336   predicate(UseSSE>=1);
11337   match(Set dst (MoveI2F src));
11338   effect( DEF dst, USE src );
11339 
11340   ins_cost(95);
11341   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11342   ins_encode %{
11343     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11344   %}
11345   ins_pipe( pipe_slow );
11346 %}
11347 
11348 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11349   predicate(UseSSE>=2);
11350   match(Set dst (MoveI2F src));
11351   effect( DEF dst, USE src );
11352 
11353   ins_cost(85);
11354   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11355   ins_encode %{
11356     __ movdl($dst$$XMMRegister, $src$$Register);
11357   %}
11358   ins_pipe( pipe_slow );
11359 %}
11360 
11361 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11362   match(Set dst (MoveD2L src));
11363   effect(DEF dst, USE src);
11364 
11365   ins_cost(250);
11366   format %{ "MOV    $dst.lo,$src\n\t"
11367             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11368   opcode(0x8B, 0x8B);
11369   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11370   ins_pipe( ialu_mem_long_reg );
11371 %}
11372 
11373 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11374   predicate(UseSSE<=1);
11375   match(Set dst (MoveD2L src));
11376   effect(DEF dst, USE src);
11377 
11378   ins_cost(125);
11379   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11380   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11381   ins_pipe( fpu_mem_reg );
11382 %}
11383 
11384 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11385   predicate(UseSSE>=2);
11386   match(Set dst (MoveD2L src));
11387   effect(DEF dst, USE src);
11388   ins_cost(95);
11389   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11390   ins_encode %{
11391     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11392   %}
11393   ins_pipe( pipe_slow );
11394 %}
11395 
11396 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11397   predicate(UseSSE>=2);
11398   match(Set dst (MoveD2L src));
11399   effect(DEF dst, USE src, TEMP tmp);
11400   ins_cost(85);
11401   format %{ "MOVD   $dst.lo,$src\n\t"
11402             "PSHUFLW $tmp,$src,0x4E\n\t"
11403             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11404   ins_encode %{
11405     __ movdl($dst$$Register, $src$$XMMRegister);
11406     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11407     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11408   %}
11409   ins_pipe( pipe_slow );
11410 %}
11411 
11412 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11413   match(Set dst (MoveL2D src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(200);
11417   format %{ "MOV    $dst,$src.lo\n\t"
11418             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11419   opcode(0x89, 0x89);
11420   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11421   ins_pipe( ialu_mem_long_reg );
11422 %}
11423 
11424 
11425 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11426   predicate(UseSSE<=1);
11427   match(Set dst (MoveL2D src));
11428   effect(DEF dst, USE src);
11429   ins_cost(125);
11430 
11431   format %{ "FLD_D  $src\n\t"
11432             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11433   opcode(0xDD);               /* DD /0, FLD m64real */
11434   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11435               Pop_Reg_DPR(dst) );
11436   ins_pipe( fpu_reg_mem );
11437 %}
11438 
11439 
11440 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11441   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11442   match(Set dst (MoveL2D src));
11443   effect(DEF dst, USE src);
11444 
11445   ins_cost(95);
11446   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11447   ins_encode %{
11448     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11449   %}
11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11454   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11455   match(Set dst (MoveL2D src));
11456   effect(DEF dst, USE src);
11457 
11458   ins_cost(95);
11459   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11460   ins_encode %{
11461     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11462   %}
11463   ins_pipe( pipe_slow );
11464 %}
11465 
11466 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11467   predicate(UseSSE>=2);
11468   match(Set dst (MoveL2D src));
11469   effect(TEMP dst, USE src, TEMP tmp);
11470   ins_cost(85);
11471   format %{ "MOVD   $dst,$src.lo\n\t"
11472             "MOVD   $tmp,$src.hi\n\t"
11473             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11474   ins_encode %{
11475     __ movdl($dst$$XMMRegister, $src$$Register);
11476     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11477     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11478   %}
11479   ins_pipe( pipe_slow );
11480 %}
11481 
11482 
11483 // =======================================================================
11484 // fast clearing of an array
11485 // Small ClearArray non-AVX512.
11486 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11487   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11488   match(Set dummy (ClearArray cnt base));
11489   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11490 
11491   format %{ $$template
11492     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11493     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11494     $$emit$$"JG     LARGE\n\t"
11495     $$emit$$"SHL    ECX, 1\n\t"
11496     $$emit$$"DEC    ECX\n\t"
11497     $$emit$$"JS     DONE\t# Zero length\n\t"
11498     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11499     $$emit$$"DEC    ECX\n\t"
11500     $$emit$$"JGE    LOOP\n\t"
11501     $$emit$$"JMP    DONE\n\t"
11502     $$emit$$"# LARGE:\n\t"
11503     if (UseFastStosb) {
11504        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11505        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11506     } else if (UseXMMForObjInit) {
11507        $$emit$$"MOV     RDI,RAX\n\t"
11508        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11509        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11510        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11511        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11512        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11513        $$emit$$"ADD     0x40,RAX\n\t"
11514        $$emit$$"# L_zero_64_bytes:\n\t"
11515        $$emit$$"SUB     0x8,RCX\n\t"
11516        $$emit$$"JGE     L_loop\n\t"
11517        $$emit$$"ADD     0x4,RCX\n\t"
11518        $$emit$$"JL      L_tail\n\t"
11519        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11520        $$emit$$"ADD     0x20,RAX\n\t"
11521        $$emit$$"SUB     0x4,RCX\n\t"
11522        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11523        $$emit$$"ADD     0x4,RCX\n\t"
11524        $$emit$$"JLE     L_end\n\t"
11525        $$emit$$"DEC     RCX\n\t"
11526        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11527        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11528        $$emit$$"ADD     0x8,RAX\n\t"
11529        $$emit$$"DEC     RCX\n\t"
11530        $$emit$$"JGE     L_sloop\n\t"
11531        $$emit$$"# L_end:\n\t"
11532     } else {
11533        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11534        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11535     }
11536     $$emit$$"# DONE"
11537   %}
11538   ins_encode %{
11539     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11540                  $tmp$$XMMRegister, false, knoreg);
11541   %}
11542   ins_pipe( pipe_slow );
11543 %}
11544 
11545 // Small ClearArray AVX512 non-constant length.
11546 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11547   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11548   match(Set dummy (ClearArray cnt base));
11549   ins_cost(125);
11550   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11551 
11552   format %{ $$template
11553     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11554     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11555     $$emit$$"JG     LARGE\n\t"
11556     $$emit$$"SHL    ECX, 1\n\t"
11557     $$emit$$"DEC    ECX\n\t"
11558     $$emit$$"JS     DONE\t# Zero length\n\t"
11559     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11560     $$emit$$"DEC    ECX\n\t"
11561     $$emit$$"JGE    LOOP\n\t"
11562     $$emit$$"JMP    DONE\n\t"
11563     $$emit$$"# LARGE:\n\t"
11564     if (UseFastStosb) {
11565        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11566        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11567     } else if (UseXMMForObjInit) {
11568        $$emit$$"MOV     RDI,RAX\n\t"
11569        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11570        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11571        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11572        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11573        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11574        $$emit$$"ADD     0x40,RAX\n\t"
11575        $$emit$$"# L_zero_64_bytes:\n\t"
11576        $$emit$$"SUB     0x8,RCX\n\t"
11577        $$emit$$"JGE     L_loop\n\t"
11578        $$emit$$"ADD     0x4,RCX\n\t"
11579        $$emit$$"JL      L_tail\n\t"
11580        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11581        $$emit$$"ADD     0x20,RAX\n\t"
11582        $$emit$$"SUB     0x4,RCX\n\t"
11583        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11584        $$emit$$"ADD     0x4,RCX\n\t"
11585        $$emit$$"JLE     L_end\n\t"
11586        $$emit$$"DEC     RCX\n\t"
11587        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11588        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11589        $$emit$$"ADD     0x8,RAX\n\t"
11590        $$emit$$"DEC     RCX\n\t"
11591        $$emit$$"JGE     L_sloop\n\t"
11592        $$emit$$"# L_end:\n\t"
11593     } else {
11594        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11595        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11596     }
11597     $$emit$$"# DONE"
11598   %}
11599   ins_encode %{
11600     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11601                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11602   %}
11603   ins_pipe( pipe_slow );
11604 %}
11605 
11606 // Large ClearArray non-AVX512.
11607 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11608   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11609   match(Set dummy (ClearArray cnt base));
11610   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11611   format %{ $$template
11612     if (UseFastStosb) {
11613        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11614        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11615        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11616     } else if (UseXMMForObjInit) {
11617        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11618        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11619        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11620        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11621        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11622        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11623        $$emit$$"ADD     0x40,RAX\n\t"
11624        $$emit$$"# L_zero_64_bytes:\n\t"
11625        $$emit$$"SUB     0x8,RCX\n\t"
11626        $$emit$$"JGE     L_loop\n\t"
11627        $$emit$$"ADD     0x4,RCX\n\t"
11628        $$emit$$"JL      L_tail\n\t"
11629        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630        $$emit$$"ADD     0x20,RAX\n\t"
11631        $$emit$$"SUB     0x4,RCX\n\t"
11632        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11633        $$emit$$"ADD     0x4,RCX\n\t"
11634        $$emit$$"JLE     L_end\n\t"
11635        $$emit$$"DEC     RCX\n\t"
11636        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11637        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11638        $$emit$$"ADD     0x8,RAX\n\t"
11639        $$emit$$"DEC     RCX\n\t"
11640        $$emit$$"JGE     L_sloop\n\t"
11641        $$emit$$"# L_end:\n\t"
11642     } else {
11643        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11644        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11645        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11646     }
11647     $$emit$$"# DONE"
11648   %}
11649   ins_encode %{
11650     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11651                  $tmp$$XMMRegister, true, knoreg);
11652   %}
11653   ins_pipe( pipe_slow );
11654 %}
11655 
11656 // Large ClearArray AVX512.
11657 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11658   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11659   match(Set dummy (ClearArray cnt base));
11660   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11661   format %{ $$template
11662     if (UseFastStosb) {
11663        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11664        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11665        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11666     } else if (UseXMMForObjInit) {
11667        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11668        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11669        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11670        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11671        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11672        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11673        $$emit$$"ADD     0x40,RAX\n\t"
11674        $$emit$$"# L_zero_64_bytes:\n\t"
11675        $$emit$$"SUB     0x8,RCX\n\t"
11676        $$emit$$"JGE     L_loop\n\t"
11677        $$emit$$"ADD     0x4,RCX\n\t"
11678        $$emit$$"JL      L_tail\n\t"
11679        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11680        $$emit$$"ADD     0x20,RAX\n\t"
11681        $$emit$$"SUB     0x4,RCX\n\t"
11682        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11683        $$emit$$"ADD     0x4,RCX\n\t"
11684        $$emit$$"JLE     L_end\n\t"
11685        $$emit$$"DEC     RCX\n\t"
11686        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11687        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11688        $$emit$$"ADD     0x8,RAX\n\t"
11689        $$emit$$"DEC     RCX\n\t"
11690        $$emit$$"JGE     L_sloop\n\t"
11691        $$emit$$"# L_end:\n\t"
11692     } else {
11693        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11694        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11695        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11696     }
11697     $$emit$$"# DONE"
11698   %}
11699   ins_encode %{
11700     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11701                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11702   %}
11703   ins_pipe( pipe_slow );
11704 %}
11705 
11706 // Small ClearArray AVX512 constant length.
11707 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11708 %{
11709   predicate(!((ClearArrayNode*)n)->is_large() &&
11710                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11711   match(Set dummy (ClearArray cnt base));
11712   ins_cost(100);
11713   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11714   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11715   ins_encode %{
11716    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11717   %}
11718   ins_pipe(pipe_slow);
11719 %}
11720 
11721 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11722                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11723   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11724   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11725   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11726 
11727   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11728   ins_encode %{
11729     __ string_compare($str1$$Register, $str2$$Register,
11730                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11731                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11732   %}
11733   ins_pipe( pipe_slow );
11734 %}
11735 
11736 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11737                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11738   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11739   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11740   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11741 
11742   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11743   ins_encode %{
11744     __ string_compare($str1$$Register, $str2$$Register,
11745                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11746                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11747   %}
11748   ins_pipe( pipe_slow );
11749 %}
11750 
11751 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11752                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11753   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11754   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11755   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11756 
11757   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11758   ins_encode %{
11759     __ string_compare($str1$$Register, $str2$$Register,
11760                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11761                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11762   %}
11763   ins_pipe( pipe_slow );
11764 %}
11765 
11766 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11767                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11768   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11769   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11770   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11771 
11772   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11773   ins_encode %{
11774     __ string_compare($str1$$Register, $str2$$Register,
11775                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11776                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11777   %}
11778   ins_pipe( pipe_slow );
11779 %}
11780 
11781 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11782                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11783   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11784   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11785   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11786 
11787   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11788   ins_encode %{
11789     __ string_compare($str1$$Register, $str2$$Register,
11790                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11791                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11792   %}
11793   ins_pipe( pipe_slow );
11794 %}
11795 
11796 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11797                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11798   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11799   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11800   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11801 
11802   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11803   ins_encode %{
11804     __ string_compare($str1$$Register, $str2$$Register,
11805                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11806                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11807   %}
11808   ins_pipe( pipe_slow );
11809 %}
11810 
11811 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11812                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11813   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11814   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11815   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11816 
11817   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11818   ins_encode %{
11819     __ string_compare($str2$$Register, $str1$$Register,
11820                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11821                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11822   %}
11823   ins_pipe( pipe_slow );
11824 %}
11825 
11826 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11827                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11828   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11829   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11830   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11831 
11832   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11833   ins_encode %{
11834     __ string_compare($str2$$Register, $str1$$Register,
11835                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11836                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11837   %}
11838   ins_pipe( pipe_slow );
11839 %}
11840 
11841 // fast string equals
11842 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11843                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11844   predicate(!VM_Version::supports_avx512vlbw());
11845   match(Set result (StrEquals (Binary str1 str2) cnt));
11846   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11847 
11848   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11849   ins_encode %{
11850     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11851                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11852                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11853   %}
11854 
11855   ins_pipe( pipe_slow );
11856 %}
11857 
11858 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11859                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11860   predicate(VM_Version::supports_avx512vlbw());
11861   match(Set result (StrEquals (Binary str1 str2) cnt));
11862   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11863 
11864   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11865   ins_encode %{
11866     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11867                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11868                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11869   %}
11870 
11871   ins_pipe( pipe_slow );
11872 %}
11873 
11874 
11875 // fast search of substring with known size.
11876 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11877                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11878   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11879   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11880   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11881 
11882   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11883   ins_encode %{
11884     int icnt2 = (int)$int_cnt2$$constant;
11885     if (icnt2 >= 16) {
11886       // IndexOf for constant substrings with size >= 16 elements
11887       // which don't need to be loaded through stack.
11888       __ string_indexofC8($str1$$Register, $str2$$Register,
11889                           $cnt1$$Register, $cnt2$$Register,
11890                           icnt2, $result$$Register,
11891                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11892     } else {
11893       // Small strings are loaded through stack if they cross page boundary.
11894       __ string_indexof($str1$$Register, $str2$$Register,
11895                         $cnt1$$Register, $cnt2$$Register,
11896                         icnt2, $result$$Register,
11897                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11898     }
11899   %}
11900   ins_pipe( pipe_slow );
11901 %}
11902 
11903 // fast search of substring with known size.
11904 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11905                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11906   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11907   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11908   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11909 
11910   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11911   ins_encode %{
11912     int icnt2 = (int)$int_cnt2$$constant;
11913     if (icnt2 >= 8) {
11914       // IndexOf for constant substrings with size >= 8 elements
11915       // which don't need to be loaded through stack.
11916       __ string_indexofC8($str1$$Register, $str2$$Register,
11917                           $cnt1$$Register, $cnt2$$Register,
11918                           icnt2, $result$$Register,
11919                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11920     } else {
11921       // Small strings are loaded through stack if they cross page boundary.
11922       __ string_indexof($str1$$Register, $str2$$Register,
11923                         $cnt1$$Register, $cnt2$$Register,
11924                         icnt2, $result$$Register,
11925                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11926     }
11927   %}
11928   ins_pipe( pipe_slow );
11929 %}
11930 
11931 // fast search of substring with known size.
11932 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11933                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11934   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11935   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11936   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11937 
11938   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11939   ins_encode %{
11940     int icnt2 = (int)$int_cnt2$$constant;
11941     if (icnt2 >= 8) {
11942       // IndexOf for constant substrings with size >= 8 elements
11943       // which don't need to be loaded through stack.
11944       __ string_indexofC8($str1$$Register, $str2$$Register,
11945                           $cnt1$$Register, $cnt2$$Register,
11946                           icnt2, $result$$Register,
11947                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11948     } else {
11949       // Small strings are loaded through stack if they cross page boundary.
11950       __ string_indexof($str1$$Register, $str2$$Register,
11951                         $cnt1$$Register, $cnt2$$Register,
11952                         icnt2, $result$$Register,
11953                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11954     }
11955   %}
11956   ins_pipe( pipe_slow );
11957 %}
11958 
11959 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11960                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11961   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11962   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11963   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11964 
11965   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11966   ins_encode %{
11967     __ string_indexof($str1$$Register, $str2$$Register,
11968                       $cnt1$$Register, $cnt2$$Register,
11969                       (-1), $result$$Register,
11970                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11971   %}
11972   ins_pipe( pipe_slow );
11973 %}
11974 
11975 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11976                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11977   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11978   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11979   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11980 
11981   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11982   ins_encode %{
11983     __ string_indexof($str1$$Register, $str2$$Register,
11984                       $cnt1$$Register, $cnt2$$Register,
11985                       (-1), $result$$Register,
11986                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11987   %}
11988   ins_pipe( pipe_slow );
11989 %}
11990 
11991 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11992                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11993   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11994   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11995   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11996 
11997   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11998   ins_encode %{
11999     __ string_indexof($str1$$Register, $str2$$Register,
12000                       $cnt1$$Register, $cnt2$$Register,
12001                       (-1), $result$$Register,
12002                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12003   %}
12004   ins_pipe( pipe_slow );
12005 %}
12006 
12007 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12008                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12009   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12010   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12011   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12012   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12013   ins_encode %{
12014     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12015                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12016   %}
12017   ins_pipe( pipe_slow );
12018 %}
12019 
12020 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12021                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12022   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12023   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12024   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12025   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12026   ins_encode %{
12027     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12028                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12029   %}
12030   ins_pipe( pipe_slow );
12031 %}
12032 
12033 
12034 // fast array equals
12035 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12036                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12037 %{
12038   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12039   match(Set result (AryEq ary1 ary2));
12040   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12041   //ins_cost(300);
12042 
12043   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12044   ins_encode %{
12045     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12046                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12047                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12053                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12054 %{
12055   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12056   match(Set result (AryEq ary1 ary2));
12057   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12058   //ins_cost(300);
12059 
12060   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12061   ins_encode %{
12062     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12063                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12064                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12065   %}
12066   ins_pipe( pipe_slow );
12067 %}
12068 
12069 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12070                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12071 %{
12072   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12073   match(Set result (AryEq ary1 ary2));
12074   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12075   //ins_cost(300);
12076 
12077   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12078   ins_encode %{
12079     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12080                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12081                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12082   %}
12083   ins_pipe( pipe_slow );
12084 %}
12085 
12086 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12087                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12088 %{
12089   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12090   match(Set result (AryEq ary1 ary2));
12091   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12092   //ins_cost(300);
12093 
12094   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12095   ins_encode %{
12096     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12097                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12098                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12099   %}
12100   ins_pipe( pipe_slow );
12101 %}
12102 
12103 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12104                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12105 %{
12106   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12107   match(Set result (HasNegatives ary1 len));
12108   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12109 
12110   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12111   ins_encode %{
12112     __ has_negatives($ary1$$Register, $len$$Register,
12113                      $result$$Register, $tmp3$$Register,
12114                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12115   %}
12116   ins_pipe( pipe_slow );
12117 %}
12118 
12119 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12120                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12121 %{
12122   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12123   match(Set result (HasNegatives ary1 len));
12124   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12125 
12126   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12127   ins_encode %{
12128     __ has_negatives($ary1$$Register, $len$$Register,
12129                      $result$$Register, $tmp3$$Register,
12130                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12131   %}
12132   ins_pipe( pipe_slow );
12133 %}
12134 
12135 
12136 // fast char[] to byte[] compression
12137 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12138                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12139   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12140   match(Set result (StrCompressedCopy src (Binary dst len)));
12141   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12142 
12143   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12144   ins_encode %{
12145     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12146                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12147                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12148                            knoreg, knoreg);
12149   %}
12150   ins_pipe( pipe_slow );
12151 %}
12152 
12153 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12154                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12155   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12156   match(Set result (StrCompressedCopy src (Binary dst len)));
12157   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12158 
12159   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12160   ins_encode %{
12161     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12162                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12163                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12164                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12165   %}
12166   ins_pipe( pipe_slow );
12167 %}
12168 
12169 // fast byte[] to char[] inflation
12170 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12171                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12172   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12173   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12174   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12175 
12176   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12177   ins_encode %{
12178     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12179                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12180   %}
12181   ins_pipe( pipe_slow );
12182 %}
12183 
12184 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12185                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12186   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12187   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12188   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12189 
12190   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12191   ins_encode %{
12192     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12193                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12194   %}
12195   ins_pipe( pipe_slow );
12196 %}
12197 
12198 // encode char[] to byte[] in ISO_8859_1
12199 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12200                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12201                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12202   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12203   match(Set result (EncodeISOArray src (Binary dst len)));
12204   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12205 
12206   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12207   ins_encode %{
12208     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12209                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12210                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12211   %}
12212   ins_pipe( pipe_slow );
12213 %}
12214 
12215 // encode char[] to byte[] in ASCII
12216 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12217                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12218                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12219   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12220   match(Set result (EncodeISOArray src (Binary dst len)));
12221   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12222 
12223   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12224   ins_encode %{
12225     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12226                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12227                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12228   %}
12229   ins_pipe( pipe_slow );
12230 %}
12231 
12232 //----------Control Flow Instructions------------------------------------------
12233 // Signed compare Instructions
12234 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12235   match(Set cr (CmpI op1 op2));
12236   effect( DEF cr, USE op1, USE op2 );
12237   format %{ "CMP    $op1,$op2" %}
12238   opcode(0x3B);  /* Opcode 3B /r */
12239   ins_encode( OpcP, RegReg( op1, op2) );
12240   ins_pipe( ialu_cr_reg_reg );
12241 %}
12242 
12243 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12244   match(Set cr (CmpI op1 op2));
12245   effect( DEF cr, USE op1 );
12246   format %{ "CMP    $op1,$op2" %}
12247   opcode(0x81,0x07);  /* Opcode 81 /7 */
12248   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12249   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12250   ins_pipe( ialu_cr_reg_imm );
12251 %}
12252 
12253 // Cisc-spilled version of cmpI_eReg
12254 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12255   match(Set cr (CmpI op1 (LoadI op2)));
12256 
12257   format %{ "CMP    $op1,$op2" %}
12258   ins_cost(500);
12259   opcode(0x3B);  /* Opcode 3B /r */
12260   ins_encode( OpcP, RegMem( op1, op2) );
12261   ins_pipe( ialu_cr_reg_mem );
12262 %}
12263 
12264 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12265   match(Set cr (CmpI src zero));
12266   effect( DEF cr, USE src );
12267 
12268   format %{ "TEST   $src,$src" %}
12269   opcode(0x85);
12270   ins_encode( OpcP, RegReg( src, src ) );
12271   ins_pipe( ialu_cr_reg_imm );
12272 %}
12273 
12274 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12275   match(Set cr (CmpI (AndI src con) zero));
12276 
12277   format %{ "TEST   $src,$con" %}
12278   opcode(0xF7,0x00);
12279   ins_encode( OpcP, RegOpc(src), Con32(con) );
12280   ins_pipe( ialu_cr_reg_imm );
12281 %}
12282 
12283 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12284   match(Set cr (CmpI (AndI src mem) zero));
12285 
12286   format %{ "TEST   $src,$mem" %}
12287   opcode(0x85);
12288   ins_encode( OpcP, RegMem( src, mem ) );
12289   ins_pipe( ialu_cr_reg_mem );
12290 %}
12291 
12292 // Unsigned compare Instructions; really, same as signed except they
12293 // produce an eFlagsRegU instead of eFlagsReg.
12294 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12295   match(Set cr (CmpU op1 op2));
12296 
12297   format %{ "CMPu   $op1,$op2" %}
12298   opcode(0x3B);  /* Opcode 3B /r */
12299   ins_encode( OpcP, RegReg( op1, op2) );
12300   ins_pipe( ialu_cr_reg_reg );
12301 %}
12302 
12303 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12304   match(Set cr (CmpU op1 op2));
12305 
12306   format %{ "CMPu   $op1,$op2" %}
12307   opcode(0x81,0x07);  /* Opcode 81 /7 */
12308   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12309   ins_pipe( ialu_cr_reg_imm );
12310 %}
12311 
12312 // // Cisc-spilled version of cmpU_eReg
12313 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12314   match(Set cr (CmpU op1 (LoadI op2)));
12315 
12316   format %{ "CMPu   $op1,$op2" %}
12317   ins_cost(500);
12318   opcode(0x3B);  /* Opcode 3B /r */
12319   ins_encode( OpcP, RegMem( op1, op2) );
12320   ins_pipe( ialu_cr_reg_mem );
12321 %}
12322 
12323 // // Cisc-spilled version of cmpU_eReg
12324 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12325 //  match(Set cr (CmpU (LoadI op1) op2));
12326 //
12327 //  format %{ "CMPu   $op1,$op2" %}
12328 //  ins_cost(500);
12329 //  opcode(0x39);  /* Opcode 39 /r */
12330 //  ins_encode( OpcP, RegMem( op1, op2) );
12331 //%}
12332 
12333 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12334   match(Set cr (CmpU src zero));
12335 
12336   format %{ "TESTu  $src,$src" %}
12337   opcode(0x85);
12338   ins_encode( OpcP, RegReg( src, src ) );
12339   ins_pipe( ialu_cr_reg_imm );
12340 %}
12341 
12342 // Unsigned pointer compare Instructions
12343 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12344   match(Set cr (CmpP op1 op2));
12345 
12346   format %{ "CMPu   $op1,$op2" %}
12347   opcode(0x3B);  /* Opcode 3B /r */
12348   ins_encode( OpcP, RegReg( op1, op2) );
12349   ins_pipe( ialu_cr_reg_reg );
12350 %}
12351 
12352 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12353   match(Set cr (CmpP op1 op2));
12354 
12355   format %{ "CMPu   $op1,$op2" %}
12356   opcode(0x81,0x07);  /* Opcode 81 /7 */
12357   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12358   ins_pipe( ialu_cr_reg_imm );
12359 %}
12360 
12361 // // Cisc-spilled version of cmpP_eReg
12362 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12363   match(Set cr (CmpP op1 (LoadP op2)));
12364 
12365   format %{ "CMPu   $op1,$op2" %}
12366   ins_cost(500);
12367   opcode(0x3B);  /* Opcode 3B /r */
12368   ins_encode( OpcP, RegMem( op1, op2) );
12369   ins_pipe( ialu_cr_reg_mem );
12370 %}
12371 
12372 // // Cisc-spilled version of cmpP_eReg
12373 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12374 //  match(Set cr (CmpP (LoadP op1) op2));
12375 //
12376 //  format %{ "CMPu   $op1,$op2" %}
12377 //  ins_cost(500);
12378 //  opcode(0x39);  /* Opcode 39 /r */
12379 //  ins_encode( OpcP, RegMem( op1, op2) );
12380 //%}
12381 
12382 // Compare raw pointer (used in out-of-heap check).
12383 // Only works because non-oop pointers must be raw pointers
12384 // and raw pointers have no anti-dependencies.
12385 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12386   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12387   match(Set cr (CmpP op1 (LoadP op2)));
12388 
12389   format %{ "CMPu   $op1,$op2" %}
12390   opcode(0x3B);  /* Opcode 3B /r */
12391   ins_encode( OpcP, RegMem( op1, op2) );
12392   ins_pipe( ialu_cr_reg_mem );
12393 %}
12394 
12395 //
12396 // This will generate a signed flags result. This should be ok
12397 // since any compare to a zero should be eq/neq.
12398 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12399   match(Set cr (CmpP src zero));
12400 
12401   format %{ "TEST   $src,$src" %}
12402   opcode(0x85);
12403   ins_encode( OpcP, RegReg( src, src ) );
12404   ins_pipe( ialu_cr_reg_imm );
12405 %}
12406 
12407 // Cisc-spilled version of testP_reg
12408 // This will generate a signed flags result. This should be ok
12409 // since any compare to a zero should be eq/neq.
12410 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12411   match(Set cr (CmpP (LoadP op) zero));
12412 
12413   format %{ "TEST   $op,0xFFFFFFFF" %}
12414   ins_cost(500);
12415   opcode(0xF7);               /* Opcode F7 /0 */
12416   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12417   ins_pipe( ialu_cr_reg_imm );
12418 %}
12419 
12420 // Yanked all unsigned pointer compare operations.
12421 // Pointer compares are done with CmpP which is already unsigned.
12422 
12423 //----------Max and Min--------------------------------------------------------
12424 // Min Instructions
12425 ////
12426 //   *** Min and Max using the conditional move are slower than the
12427 //   *** branch version on a Pentium III.
12428 // // Conditional move for min
12429 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12430 //  effect( USE_DEF op2, USE op1, USE cr );
12431 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12432 //  opcode(0x4C,0x0F);
12433 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12434 //  ins_pipe( pipe_cmov_reg );
12435 //%}
12436 //
12437 //// Min Register with Register (P6 version)
12438 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12439 //  predicate(VM_Version::supports_cmov() );
12440 //  match(Set op2 (MinI op1 op2));
12441 //  ins_cost(200);
12442 //  expand %{
12443 //    eFlagsReg cr;
12444 //    compI_eReg(cr,op1,op2);
12445 //    cmovI_reg_lt(op2,op1,cr);
12446 //  %}
12447 //%}
12448 
12449 // Min Register with Register (generic version)
12450 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12451   match(Set dst (MinI dst src));
12452   effect(KILL flags);
12453   ins_cost(300);
12454 
12455   format %{ "MIN    $dst,$src" %}
12456   opcode(0xCC);
12457   ins_encode( min_enc(dst,src) );
12458   ins_pipe( pipe_slow );
12459 %}
12460 
12461 // Max Register with Register
12462 //   *** Min and Max using the conditional move are slower than the
12463 //   *** branch version on a Pentium III.
12464 // // Conditional move for max
12465 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12466 //  effect( USE_DEF op2, USE op1, USE cr );
12467 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12468 //  opcode(0x4F,0x0F);
12469 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12470 //  ins_pipe( pipe_cmov_reg );
12471 //%}
12472 //
12473 // // Max Register with Register (P6 version)
12474 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12475 //  predicate(VM_Version::supports_cmov() );
12476 //  match(Set op2 (MaxI op1 op2));
12477 //  ins_cost(200);
12478 //  expand %{
12479 //    eFlagsReg cr;
12480 //    compI_eReg(cr,op1,op2);
12481 //    cmovI_reg_gt(op2,op1,cr);
12482 //  %}
12483 //%}
12484 
12485 // Max Register with Register (generic version)
12486 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12487   match(Set dst (MaxI dst src));
12488   effect(KILL flags);
12489   ins_cost(300);
12490 
12491   format %{ "MAX    $dst,$src" %}
12492   opcode(0xCC);
12493   ins_encode( max_enc(dst,src) );
12494   ins_pipe( pipe_slow );
12495 %}
12496 
12497 // ============================================================================
12498 // Counted Loop limit node which represents exact final iterator value.
12499 // Note: the resulting value should fit into integer range since
12500 // counted loops have limit check on overflow.
12501 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12502   match(Set limit (LoopLimit (Binary init limit) stride));
12503   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12504   ins_cost(300);
12505 
12506   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12507   ins_encode %{
12508     int strd = (int)$stride$$constant;
12509     assert(strd != 1 && strd != -1, "sanity");
12510     int m1 = (strd > 0) ? 1 : -1;
12511     // Convert limit to long (EAX:EDX)
12512     __ cdql();
12513     // Convert init to long (init:tmp)
12514     __ movl($tmp$$Register, $init$$Register);
12515     __ sarl($tmp$$Register, 31);
12516     // $limit - $init
12517     __ subl($limit$$Register, $init$$Register);
12518     __ sbbl($limit_hi$$Register, $tmp$$Register);
12519     // + ($stride - 1)
12520     if (strd > 0) {
12521       __ addl($limit$$Register, (strd - 1));
12522       __ adcl($limit_hi$$Register, 0);
12523       __ movl($tmp$$Register, strd);
12524     } else {
12525       __ addl($limit$$Register, (strd + 1));
12526       __ adcl($limit_hi$$Register, -1);
12527       __ lneg($limit_hi$$Register, $limit$$Register);
12528       __ movl($tmp$$Register, -strd);
12529     }
12530     // signed devision: (EAX:EDX) / pos_stride
12531     __ idivl($tmp$$Register);
12532     if (strd < 0) {
12533       // restore sign
12534       __ negl($tmp$$Register);
12535     }
12536     // (EAX) * stride
12537     __ mull($tmp$$Register);
12538     // + init (ignore upper bits)
12539     __ addl($limit$$Register, $init$$Register);
12540   %}
12541   ins_pipe( pipe_slow );
12542 %}
12543 
12544 // ============================================================================
12545 // Branch Instructions
12546 // Jump Table
12547 instruct jumpXtnd(rRegI switch_val) %{
12548   match(Jump switch_val);
12549   ins_cost(350);
12550   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12551   ins_encode %{
12552     // Jump to Address(table_base + switch_reg)
12553     Address index(noreg, $switch_val$$Register, Address::times_1);
12554     __ jump(ArrayAddress($constantaddress, index));
12555   %}
12556   ins_pipe(pipe_jmp);
12557 %}
12558 
12559 // Jump Direct - Label defines a relative address from JMP+1
12560 instruct jmpDir(label labl) %{
12561   match(Goto);
12562   effect(USE labl);
12563 
12564   ins_cost(300);
12565   format %{ "JMP    $labl" %}
12566   size(5);
12567   ins_encode %{
12568     Label* L = $labl$$label;
12569     __ jmp(*L, false); // Always long jump
12570   %}
12571   ins_pipe( pipe_jmp );
12572 %}
12573 
12574 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12575 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12576   match(If cop cr);
12577   effect(USE labl);
12578 
12579   ins_cost(300);
12580   format %{ "J$cop    $labl" %}
12581   size(6);
12582   ins_encode %{
12583     Label* L = $labl$$label;
12584     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12585   %}
12586   ins_pipe( pipe_jcc );
12587 %}
12588 
12589 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12590 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12591   predicate(!n->has_vector_mask_set());
12592   match(CountedLoopEnd cop cr);
12593   effect(USE labl);
12594 
12595   ins_cost(300);
12596   format %{ "J$cop    $labl\t# Loop end" %}
12597   size(6);
12598   ins_encode %{
12599     Label* L = $labl$$label;
12600     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12601   %}
12602   ins_pipe( pipe_jcc );
12603 %}
12604 
12605 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12606 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12607   predicate(!n->has_vector_mask_set());
12608   match(CountedLoopEnd cop cmp);
12609   effect(USE labl);
12610 
12611   ins_cost(300);
12612   format %{ "J$cop,u  $labl\t# Loop end" %}
12613   size(6);
12614   ins_encode %{
12615     Label* L = $labl$$label;
12616     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12617   %}
12618   ins_pipe( pipe_jcc );
12619 %}
12620 
12621 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12622   predicate(!n->has_vector_mask_set());
12623   match(CountedLoopEnd cop cmp);
12624   effect(USE labl);
12625 
12626   ins_cost(200);
12627   format %{ "J$cop,u  $labl\t# Loop end" %}
12628   size(6);
12629   ins_encode %{
12630     Label* L = $labl$$label;
12631     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12632   %}
12633   ins_pipe( pipe_jcc );
12634 %}
12635 
12636 // mask version
12637 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12638 // Bounded mask operand used in following patten is needed for
12639 // post-loop multiversioning.
12640 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12641   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12642   match(CountedLoopEnd cop cr);
12643   effect(USE labl, TEMP ktmp);
12644 
12645   ins_cost(400);
12646   format %{ "J$cop    $labl\t# Loop end\n\t"
12647             "restorevectmask \t# vector mask restore for loops" %}
12648   size(10);
12649   ins_encode %{
12650     Label* L = $labl$$label;
12651     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12652     __ restorevectmask($ktmp$$KRegister);
12653   %}
12654   ins_pipe( pipe_jcc );
12655 %}
12656 
12657 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12658 // Bounded mask operand used in following patten is needed for
12659 // post-loop multiversioning.
12660 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12661   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12662   match(CountedLoopEnd cop cmp);
12663   effect(USE labl, TEMP ktmp);
12664 
12665   ins_cost(400);
12666   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12667             "restorevectmask \t# vector mask restore for loops" %}
12668   size(10);
12669   ins_encode %{
12670     Label* L = $labl$$label;
12671     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12672     __ restorevectmask($ktmp$$KRegister);
12673   %}
12674   ins_pipe( pipe_jcc );
12675 %}
12676 
12677 // Bounded mask operand used in following patten is needed for
12678 // post-loop multiversioning.
12679 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12680   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12681   match(CountedLoopEnd cop cmp);
12682   effect(USE labl, TEMP ktmp);
12683 
12684   ins_cost(300);
12685   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12686             "restorevectmask \t# vector mask restore for loops" %}
12687   size(10);
12688   ins_encode %{
12689     Label* L = $labl$$label;
12690     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12691     __ restorevectmask($ktmp$$KRegister);
12692   %}
12693   ins_pipe( pipe_jcc );
12694 %}
12695 
12696 // Jump Direct Conditional - using unsigned comparison
12697 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12698   match(If cop cmp);
12699   effect(USE labl);
12700 
12701   ins_cost(300);
12702   format %{ "J$cop,u  $labl" %}
12703   size(6);
12704   ins_encode %{
12705     Label* L = $labl$$label;
12706     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12707   %}
12708   ins_pipe(pipe_jcc);
12709 %}
12710 
12711 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12712   match(If cop cmp);
12713   effect(USE labl);
12714 
12715   ins_cost(200);
12716   format %{ "J$cop,u  $labl" %}
12717   size(6);
12718   ins_encode %{
12719     Label* L = $labl$$label;
12720     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12721   %}
12722   ins_pipe(pipe_jcc);
12723 %}
12724 
12725 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12726   match(If cop cmp);
12727   effect(USE labl);
12728 
12729   ins_cost(200);
12730   format %{ $$template
12731     if ($cop$$cmpcode == Assembler::notEqual) {
12732       $$emit$$"JP,u   $labl\n\t"
12733       $$emit$$"J$cop,u   $labl"
12734     } else {
12735       $$emit$$"JP,u   done\n\t"
12736       $$emit$$"J$cop,u   $labl\n\t"
12737       $$emit$$"done:"
12738     }
12739   %}
12740   ins_encode %{
12741     Label* l = $labl$$label;
12742     if ($cop$$cmpcode == Assembler::notEqual) {
12743       __ jcc(Assembler::parity, *l, false);
12744       __ jcc(Assembler::notEqual, *l, false);
12745     } else if ($cop$$cmpcode == Assembler::equal) {
12746       Label done;
12747       __ jccb(Assembler::parity, done);
12748       __ jcc(Assembler::equal, *l, false);
12749       __ bind(done);
12750     } else {
12751        ShouldNotReachHere();
12752     }
12753   %}
12754   ins_pipe(pipe_jcc);
12755 %}
12756 
12757 // ============================================================================
12758 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12759 // array for an instance of the superklass.  Set a hidden internal cache on a
12760 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12761 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12762 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12763   match(Set result (PartialSubtypeCheck sub super));
12764   effect( KILL rcx, KILL cr );
12765 
12766   ins_cost(1100);  // slightly larger than the next version
12767   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12768             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12769             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12770             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12771             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12772             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12773             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12774      "miss:\t" %}
12775 
12776   opcode(0x1); // Force a XOR of EDI
12777   ins_encode( enc_PartialSubtypeCheck() );
12778   ins_pipe( pipe_slow );
12779 %}
12780 
12781 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12782   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12783   effect( KILL rcx, KILL result );
12784 
12785   ins_cost(1000);
12786   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12787             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12788             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12789             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12790             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12791             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12792      "miss:\t" %}
12793 
12794   opcode(0x0);  // No need to XOR EDI
12795   ins_encode( enc_PartialSubtypeCheck() );
12796   ins_pipe( pipe_slow );
12797 %}
12798 
12799 // ============================================================================
12800 // Branch Instructions -- short offset versions
12801 //
12802 // These instructions are used to replace jumps of a long offset (the default
12803 // match) with jumps of a shorter offset.  These instructions are all tagged
12804 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12805 // match rules in general matching.  Instead, the ADLC generates a conversion
12806 // method in the MachNode which can be used to do in-place replacement of the
12807 // long variant with the shorter variant.  The compiler will determine if a
12808 // branch can be taken by the is_short_branch_offset() predicate in the machine
12809 // specific code section of the file.
12810 
12811 // Jump Direct - Label defines a relative address from JMP+1
12812 instruct jmpDir_short(label labl) %{
12813   match(Goto);
12814   effect(USE labl);
12815 
12816   ins_cost(300);
12817   format %{ "JMP,s  $labl" %}
12818   size(2);
12819   ins_encode %{
12820     Label* L = $labl$$label;
12821     __ jmpb(*L);
12822   %}
12823   ins_pipe( pipe_jmp );
12824   ins_short_branch(1);
12825 %}
12826 
12827 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12828 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12829   match(If cop cr);
12830   effect(USE labl);
12831 
12832   ins_cost(300);
12833   format %{ "J$cop,s  $labl" %}
12834   size(2);
12835   ins_encode %{
12836     Label* L = $labl$$label;
12837     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12838   %}
12839   ins_pipe( pipe_jcc );
12840   ins_short_branch(1);
12841 %}
12842 
12843 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12844 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12845   match(CountedLoopEnd cop cr);
12846   effect(USE labl);
12847 
12848   ins_cost(300);
12849   format %{ "J$cop,s  $labl\t# Loop end" %}
12850   size(2);
12851   ins_encode %{
12852     Label* L = $labl$$label;
12853     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12854   %}
12855   ins_pipe( pipe_jcc );
12856   ins_short_branch(1);
12857 %}
12858 
12859 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12860 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12861   match(CountedLoopEnd cop cmp);
12862   effect(USE labl);
12863 
12864   ins_cost(300);
12865   format %{ "J$cop,us $labl\t# Loop end" %}
12866   size(2);
12867   ins_encode %{
12868     Label* L = $labl$$label;
12869     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12870   %}
12871   ins_pipe( pipe_jcc );
12872   ins_short_branch(1);
12873 %}
12874 
12875 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12876   match(CountedLoopEnd cop cmp);
12877   effect(USE labl);
12878 
12879   ins_cost(300);
12880   format %{ "J$cop,us $labl\t# Loop end" %}
12881   size(2);
12882   ins_encode %{
12883     Label* L = $labl$$label;
12884     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12885   %}
12886   ins_pipe( pipe_jcc );
12887   ins_short_branch(1);
12888 %}
12889 
12890 // Jump Direct Conditional - using unsigned comparison
12891 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12892   match(If cop cmp);
12893   effect(USE labl);
12894 
12895   ins_cost(300);
12896   format %{ "J$cop,us $labl" %}
12897   size(2);
12898   ins_encode %{
12899     Label* L = $labl$$label;
12900     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12901   %}
12902   ins_pipe( pipe_jcc );
12903   ins_short_branch(1);
12904 %}
12905 
12906 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12907   match(If cop cmp);
12908   effect(USE labl);
12909 
12910   ins_cost(300);
12911   format %{ "J$cop,us $labl" %}
12912   size(2);
12913   ins_encode %{
12914     Label* L = $labl$$label;
12915     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12916   %}
12917   ins_pipe( pipe_jcc );
12918   ins_short_branch(1);
12919 %}
12920 
12921 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12922   match(If cop cmp);
12923   effect(USE labl);
12924 
12925   ins_cost(300);
12926   format %{ $$template
12927     if ($cop$$cmpcode == Assembler::notEqual) {
12928       $$emit$$"JP,u,s   $labl\n\t"
12929       $$emit$$"J$cop,u,s   $labl"
12930     } else {
12931       $$emit$$"JP,u,s   done\n\t"
12932       $$emit$$"J$cop,u,s  $labl\n\t"
12933       $$emit$$"done:"
12934     }
12935   %}
12936   size(4);
12937   ins_encode %{
12938     Label* l = $labl$$label;
12939     if ($cop$$cmpcode == Assembler::notEqual) {
12940       __ jccb(Assembler::parity, *l);
12941       __ jccb(Assembler::notEqual, *l);
12942     } else if ($cop$$cmpcode == Assembler::equal) {
12943       Label done;
12944       __ jccb(Assembler::parity, done);
12945       __ jccb(Assembler::equal, *l);
12946       __ bind(done);
12947     } else {
12948        ShouldNotReachHere();
12949     }
12950   %}
12951   ins_pipe(pipe_jcc);
12952   ins_short_branch(1);
12953 %}
12954 
12955 // ============================================================================
12956 // Long Compare
12957 //
12958 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12959 // is tricky.  The flavor of compare used depends on whether we are testing
12960 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12961 // The GE test is the negated LT test.  The LE test can be had by commuting
12962 // the operands (yielding a GE test) and then negating; negate again for the
12963 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12964 // NE test is negated from that.
12965 
12966 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12967 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12968 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12969 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12970 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12971 // foo match ends up with the wrong leaf.  One fix is to not match both
12972 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12973 // both forms beat the trinary form of long-compare and both are very useful
12974 // on Intel which has so few registers.
12975 
12976 // Manifest a CmpL result in an integer register.  Very painful.
12977 // This is the test to avoid.
12978 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12979   match(Set dst (CmpL3 src1 src2));
12980   effect( KILL flags );
12981   ins_cost(1000);
12982   format %{ "XOR    $dst,$dst\n\t"
12983             "CMP    $src1.hi,$src2.hi\n\t"
12984             "JLT,s  m_one\n\t"
12985             "JGT,s  p_one\n\t"
12986             "CMP    $src1.lo,$src2.lo\n\t"
12987             "JB,s   m_one\n\t"
12988             "JEQ,s  done\n"
12989     "p_one:\tINC    $dst\n\t"
12990             "JMP,s  done\n"
12991     "m_one:\tDEC    $dst\n"
12992      "done:" %}
12993   ins_encode %{
12994     Label p_one, m_one, done;
12995     __ xorptr($dst$$Register, $dst$$Register);
12996     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12997     __ jccb(Assembler::less,    m_one);
12998     __ jccb(Assembler::greater, p_one);
12999     __ cmpl($src1$$Register, $src2$$Register);
13000     __ jccb(Assembler::below,   m_one);
13001     __ jccb(Assembler::equal,   done);
13002     __ bind(p_one);
13003     __ incrementl($dst$$Register);
13004     __ jmpb(done);
13005     __ bind(m_one);
13006     __ decrementl($dst$$Register);
13007     __ bind(done);
13008   %}
13009   ins_pipe( pipe_slow );
13010 %}
13011 
13012 //======
13013 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13014 // compares.  Can be used for LE or GT compares by reversing arguments.
13015 // NOT GOOD FOR EQ/NE tests.
13016 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13017   match( Set flags (CmpL src zero ));
13018   ins_cost(100);
13019   format %{ "TEST   $src.hi,$src.hi" %}
13020   opcode(0x85);
13021   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13022   ins_pipe( ialu_cr_reg_reg );
13023 %}
13024 
13025 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13026 // compares.  Can be used for LE or GT compares by reversing arguments.
13027 // NOT GOOD FOR EQ/NE tests.
13028 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13029   match( Set flags (CmpL src1 src2 ));
13030   effect( TEMP tmp );
13031   ins_cost(300);
13032   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13033             "MOV    $tmp,$src1.hi\n\t"
13034             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13035   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13036   ins_pipe( ialu_cr_reg_reg );
13037 %}
13038 
13039 // Long compares reg < zero/req OR reg >= zero/req.
13040 // Just a wrapper for a normal branch, plus the predicate test.
13041 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13042   match(If cmp flags);
13043   effect(USE labl);
13044   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13045   expand %{
13046     jmpCon(cmp,flags,labl);    // JLT or JGE...
13047   %}
13048 %}
13049 
13050 //======
13051 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13052 // compares.  Can be used for LE or GT compares by reversing arguments.
13053 // NOT GOOD FOR EQ/NE tests.
13054 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13055   match(Set flags (CmpUL src zero));
13056   ins_cost(100);
13057   format %{ "TEST   $src.hi,$src.hi" %}
13058   opcode(0x85);
13059   ins_encode(OpcP, RegReg_Hi2(src, src));
13060   ins_pipe(ialu_cr_reg_reg);
13061 %}
13062 
13063 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13064 // compares.  Can be used for LE or GT compares by reversing arguments.
13065 // NOT GOOD FOR EQ/NE tests.
13066 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13067   match(Set flags (CmpUL src1 src2));
13068   effect(TEMP tmp);
13069   ins_cost(300);
13070   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13071             "MOV    $tmp,$src1.hi\n\t"
13072             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13073   ins_encode(long_cmp_flags2(src1, src2, tmp));
13074   ins_pipe(ialu_cr_reg_reg);
13075 %}
13076 
13077 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13078 // Just a wrapper for a normal branch, plus the predicate test.
13079 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13080   match(If cmp flags);
13081   effect(USE labl);
13082   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13083   expand %{
13084     jmpCon(cmp, flags, labl);    // JLT or JGE...
13085   %}
13086 %}
13087 
13088 // Compare 2 longs and CMOVE longs.
13089 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13090   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13091   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13092   ins_cost(400);
13093   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13094             "CMOV$cmp $dst.hi,$src.hi" %}
13095   opcode(0x0F,0x40);
13096   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13097   ins_pipe( pipe_cmov_reg_long );
13098 %}
13099 
13100 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13101   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13102   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13103   ins_cost(500);
13104   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13105             "CMOV$cmp $dst.hi,$src.hi" %}
13106   opcode(0x0F,0x40);
13107   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13108   ins_pipe( pipe_cmov_reg_long );
13109 %}
13110 
13111 // Compare 2 longs and CMOVE ints.
13112 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13113   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13114   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13115   ins_cost(200);
13116   format %{ "CMOV$cmp $dst,$src" %}
13117   opcode(0x0F,0x40);
13118   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13119   ins_pipe( pipe_cmov_reg );
13120 %}
13121 
13122 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13125   ins_cost(250);
13126   format %{ "CMOV$cmp $dst,$src" %}
13127   opcode(0x0F,0x40);
13128   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13129   ins_pipe( pipe_cmov_mem );
13130 %}
13131 
13132 // Compare 2 longs and CMOVE ints.
13133 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13134   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13135   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13136   ins_cost(200);
13137   format %{ "CMOV$cmp $dst,$src" %}
13138   opcode(0x0F,0x40);
13139   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13140   ins_pipe( pipe_cmov_reg );
13141 %}
13142 
13143 // Compare 2 longs and CMOVE doubles
13144 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13145   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13146   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13147   ins_cost(200);
13148   expand %{
13149     fcmovDPR_regS(cmp,flags,dst,src);
13150   %}
13151 %}
13152 
13153 // Compare 2 longs and CMOVE doubles
13154 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13155   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13156   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13157   ins_cost(200);
13158   expand %{
13159     fcmovD_regS(cmp,flags,dst,src);
13160   %}
13161 %}
13162 
13163 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13164   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13165   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13166   ins_cost(200);
13167   expand %{
13168     fcmovFPR_regS(cmp,flags,dst,src);
13169   %}
13170 %}
13171 
13172 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13173   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13174   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13175   ins_cost(200);
13176   expand %{
13177     fcmovF_regS(cmp,flags,dst,src);
13178   %}
13179 %}
13180 
13181 //======
13182 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13183 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13184   match( Set flags (CmpL src zero ));
13185   effect(TEMP tmp);
13186   ins_cost(200);
13187   format %{ "MOV    $tmp,$src.lo\n\t"
13188             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13189   ins_encode( long_cmp_flags0( src, tmp ) );
13190   ins_pipe( ialu_reg_reg_long );
13191 %}
13192 
13193 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13194 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13195   match( Set flags (CmpL src1 src2 ));
13196   ins_cost(200+300);
13197   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13198             "JNE,s  skip\n\t"
13199             "CMP    $src1.hi,$src2.hi\n\t"
13200      "skip:\t" %}
13201   ins_encode( long_cmp_flags1( src1, src2 ) );
13202   ins_pipe( ialu_cr_reg_reg );
13203 %}
13204 
13205 // Long compare reg == zero/reg OR reg != zero/reg
13206 // Just a wrapper for a normal branch, plus the predicate test.
13207 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13208   match(If cmp flags);
13209   effect(USE labl);
13210   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13211   expand %{
13212     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13213   %}
13214 %}
13215 
13216 //======
13217 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13218 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13219   match(Set flags (CmpUL src zero));
13220   effect(TEMP tmp);
13221   ins_cost(200);
13222   format %{ "MOV    $tmp,$src.lo\n\t"
13223             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13224   ins_encode(long_cmp_flags0(src, tmp));
13225   ins_pipe(ialu_reg_reg_long);
13226 %}
13227 
13228 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13229 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13230   match(Set flags (CmpUL src1 src2));
13231   ins_cost(200+300);
13232   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13233             "JNE,s  skip\n\t"
13234             "CMP    $src1.hi,$src2.hi\n\t"
13235      "skip:\t" %}
13236   ins_encode(long_cmp_flags1(src1, src2));
13237   ins_pipe(ialu_cr_reg_reg);
13238 %}
13239 
13240 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13241 // Just a wrapper for a normal branch, plus the predicate test.
13242 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13243   match(If cmp flags);
13244   effect(USE labl);
13245   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13246   expand %{
13247     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13248   %}
13249 %}
13250 
13251 // Compare 2 longs and CMOVE longs.
13252 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13253   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13254   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13255   ins_cost(400);
13256   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13257             "CMOV$cmp $dst.hi,$src.hi" %}
13258   opcode(0x0F,0x40);
13259   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13260   ins_pipe( pipe_cmov_reg_long );
13261 %}
13262 
13263 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13264   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13265   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13266   ins_cost(500);
13267   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13268             "CMOV$cmp $dst.hi,$src.hi" %}
13269   opcode(0x0F,0x40);
13270   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13271   ins_pipe( pipe_cmov_reg_long );
13272 %}
13273 
13274 // Compare 2 longs and CMOVE ints.
13275 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13276   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13277   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13278   ins_cost(200);
13279   format %{ "CMOV$cmp $dst,$src" %}
13280   opcode(0x0F,0x40);
13281   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13282   ins_pipe( pipe_cmov_reg );
13283 %}
13284 
13285 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13286   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13288   ins_cost(250);
13289   format %{ "CMOV$cmp $dst,$src" %}
13290   opcode(0x0F,0x40);
13291   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13292   ins_pipe( pipe_cmov_mem );
13293 %}
13294 
13295 // Compare 2 longs and CMOVE ints.
13296 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13297   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13298   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13299   ins_cost(200);
13300   format %{ "CMOV$cmp $dst,$src" %}
13301   opcode(0x0F,0x40);
13302   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13303   ins_pipe( pipe_cmov_reg );
13304 %}
13305 
13306 // Compare 2 longs and CMOVE doubles
13307 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13308   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13309   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13310   ins_cost(200);
13311   expand %{
13312     fcmovDPR_regS(cmp,flags,dst,src);
13313   %}
13314 %}
13315 
13316 // Compare 2 longs and CMOVE doubles
13317 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13318   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13319   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13320   ins_cost(200);
13321   expand %{
13322     fcmovD_regS(cmp,flags,dst,src);
13323   %}
13324 %}
13325 
13326 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13327   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13328   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13329   ins_cost(200);
13330   expand %{
13331     fcmovFPR_regS(cmp,flags,dst,src);
13332   %}
13333 %}
13334 
13335 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13336   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13337   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13338   ins_cost(200);
13339   expand %{
13340     fcmovF_regS(cmp,flags,dst,src);
13341   %}
13342 %}
13343 
13344 //======
13345 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13346 // Same as cmpL_reg_flags_LEGT except must negate src
13347 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13348   match( Set flags (CmpL src zero ));
13349   effect( TEMP tmp );
13350   ins_cost(300);
13351   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13352             "CMP    $tmp,$src.lo\n\t"
13353             "SBB    $tmp,$src.hi\n\t" %}
13354   ins_encode( long_cmp_flags3(src, tmp) );
13355   ins_pipe( ialu_reg_reg_long );
13356 %}
13357 
13358 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13359 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13360 // requires a commuted test to get the same result.
13361 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13362   match( Set flags (CmpL src1 src2 ));
13363   effect( TEMP tmp );
13364   ins_cost(300);
13365   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13366             "MOV    $tmp,$src2.hi\n\t"
13367             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13368   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13369   ins_pipe( ialu_cr_reg_reg );
13370 %}
13371 
13372 // Long compares reg < zero/req OR reg >= zero/req.
13373 // Just a wrapper for a normal branch, plus the predicate test
13374 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13375   match(If cmp flags);
13376   effect(USE labl);
13377   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13378   ins_cost(300);
13379   expand %{
13380     jmpCon(cmp,flags,labl);    // JGT or JLE...
13381   %}
13382 %}
13383 
13384 //======
13385 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13386 // Same as cmpUL_reg_flags_LEGT except must negate src
13387 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13388   match(Set flags (CmpUL src zero));
13389   effect(TEMP tmp);
13390   ins_cost(300);
13391   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13392             "CMP    $tmp,$src.lo\n\t"
13393             "SBB    $tmp,$src.hi\n\t" %}
13394   ins_encode(long_cmp_flags3(src, tmp));
13395   ins_pipe(ialu_reg_reg_long);
13396 %}
13397 
13398 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13399 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13400 // requires a commuted test to get the same result.
13401 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13402   match(Set flags (CmpUL src1 src2));
13403   effect(TEMP tmp);
13404   ins_cost(300);
13405   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13406             "MOV    $tmp,$src2.hi\n\t"
13407             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13408   ins_encode(long_cmp_flags2( src2, src1, tmp));
13409   ins_pipe(ialu_cr_reg_reg);
13410 %}
13411 
13412 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13413 // Just a wrapper for a normal branch, plus the predicate test
13414 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13415   match(If cmp flags);
13416   effect(USE labl);
13417   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13418   ins_cost(300);
13419   expand %{
13420     jmpCon(cmp, flags, labl);    // JGT or JLE...
13421   %}
13422 %}
13423 
13424 // Compare 2 longs and CMOVE longs.
13425 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13426   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13427   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13428   ins_cost(400);
13429   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13430             "CMOV$cmp $dst.hi,$src.hi" %}
13431   opcode(0x0F,0x40);
13432   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13433   ins_pipe( pipe_cmov_reg_long );
13434 %}
13435 
13436 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13437   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13438   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13439   ins_cost(500);
13440   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13441             "CMOV$cmp $dst.hi,$src.hi+4" %}
13442   opcode(0x0F,0x40);
13443   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13444   ins_pipe( pipe_cmov_reg_long );
13445 %}
13446 
13447 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13448   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13449   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13450   ins_cost(400);
13451   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13452             "CMOV$cmp $dst.hi,$src.hi" %}
13453   opcode(0x0F,0x40);
13454   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13455   ins_pipe( pipe_cmov_reg_long );
13456 %}
13457 
13458 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13459   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13460   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13461   ins_cost(500);
13462   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13463             "CMOV$cmp $dst.hi,$src.hi+4" %}
13464   opcode(0x0F,0x40);
13465   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13466   ins_pipe( pipe_cmov_reg_long );
13467 %}
13468 
13469 // Compare 2 longs and CMOVE ints.
13470 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13471   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13472   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13473   ins_cost(200);
13474   format %{ "CMOV$cmp $dst,$src" %}
13475   opcode(0x0F,0x40);
13476   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13477   ins_pipe( pipe_cmov_reg );
13478 %}
13479 
13480 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13483   ins_cost(250);
13484   format %{ "CMOV$cmp $dst,$src" %}
13485   opcode(0x0F,0x40);
13486   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13487   ins_pipe( pipe_cmov_mem );
13488 %}
13489 
13490 // Compare 2 longs and CMOVE ptrs.
13491 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13492   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13493   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13494   ins_cost(200);
13495   format %{ "CMOV$cmp $dst,$src" %}
13496   opcode(0x0F,0x40);
13497   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13498   ins_pipe( pipe_cmov_reg );
13499 %}
13500 
13501 // Compare 2 longs and CMOVE doubles
13502 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13503   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13504   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13505   ins_cost(200);
13506   expand %{
13507     fcmovDPR_regS(cmp,flags,dst,src);
13508   %}
13509 %}
13510 
13511 // Compare 2 longs and CMOVE doubles
13512 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13513   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13514   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   expand %{
13517     fcmovD_regS(cmp,flags,dst,src);
13518   %}
13519 %}
13520 
13521 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13522   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13523   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13524   ins_cost(200);
13525   expand %{
13526     fcmovFPR_regS(cmp,flags,dst,src);
13527   %}
13528 %}
13529 
13530 
13531 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13532   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13533   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13534   ins_cost(200);
13535   expand %{
13536     fcmovF_regS(cmp,flags,dst,src);
13537   %}
13538 %}
13539 
13540 
13541 // ============================================================================
13542 // Procedure Call/Return Instructions
13543 // Call Java Static Instruction
13544 // Note: If this code changes, the corresponding ret_addr_offset() and
13545 //       compute_padding() functions will have to be adjusted.
13546 instruct CallStaticJavaDirect(method meth) %{
13547   match(CallStaticJava);
13548   effect(USE meth);
13549 
13550   ins_cost(300);
13551   format %{ "CALL,static " %}
13552   opcode(0xE8); /* E8 cd */
13553   ins_encode( pre_call_resets,
13554               Java_Static_Call( meth ),
13555               call_epilog,
13556               post_call_FPU );
13557   ins_pipe( pipe_slow );
13558   ins_alignment(4);
13559 %}
13560 
13561 // Call Java Dynamic Instruction
13562 // Note: If this code changes, the corresponding ret_addr_offset() and
13563 //       compute_padding() functions will have to be adjusted.
13564 instruct CallDynamicJavaDirect(method meth) %{
13565   match(CallDynamicJava);
13566   effect(USE meth);
13567 
13568   ins_cost(300);
13569   format %{ "MOV    EAX,(oop)-1\n\t"
13570             "CALL,dynamic" %}
13571   opcode(0xE8); /* E8 cd */
13572   ins_encode( pre_call_resets,
13573               Java_Dynamic_Call( meth ),
13574               call_epilog,
13575               post_call_FPU );
13576   ins_pipe( pipe_slow );
13577   ins_alignment(4);
13578 %}
13579 
13580 // Call Runtime Instruction
13581 instruct CallRuntimeDirect(method meth) %{
13582   match(CallRuntime );
13583   effect(USE meth);
13584 
13585   ins_cost(300);
13586   format %{ "CALL,runtime " %}
13587   opcode(0xE8); /* E8 cd */
13588   // Use FFREEs to clear entries in float stack
13589   ins_encode( pre_call_resets,
13590               FFree_Float_Stack_All,
13591               Java_To_Runtime( meth ),
13592               post_call_FPU );
13593   ins_pipe( pipe_slow );
13594 %}
13595 
13596 // Call runtime without safepoint
13597 instruct CallLeafDirect(method meth) %{
13598   match(CallLeaf);
13599   effect(USE meth);
13600 
13601   ins_cost(300);
13602   format %{ "CALL_LEAF,runtime " %}
13603   opcode(0xE8); /* E8 cd */
13604   ins_encode( pre_call_resets,
13605               FFree_Float_Stack_All,
13606               Java_To_Runtime( meth ),
13607               Verify_FPU_For_Leaf, post_call_FPU );
13608   ins_pipe( pipe_slow );
13609 %}
13610 
13611 instruct CallLeafNoFPDirect(method meth) %{
13612   match(CallLeafNoFP);
13613   effect(USE meth);
13614 
13615   ins_cost(300);
13616   format %{ "CALL_LEAF_NOFP,runtime " %}
13617   opcode(0xE8); /* E8 cd */
13618   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13619   ins_pipe( pipe_slow );
13620 %}
13621 
13622 
13623 // Return Instruction
13624 // Remove the return address & jump to it.
13625 instruct Ret() %{
13626   match(Return);
13627   format %{ "RET" %}
13628   opcode(0xC3);
13629   ins_encode(OpcP);
13630   ins_pipe( pipe_jmp );
13631 %}
13632 
13633 // Tail Call; Jump from runtime stub to Java code.
13634 // Also known as an 'interprocedural jump'.
13635 // Target of jump will eventually return to caller.
13636 // TailJump below removes the return address.
13637 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13638   match(TailCall jump_target method_ptr);
13639   ins_cost(300);
13640   format %{ "JMP    $jump_target \t# EBX holds method" %}
13641   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13642   ins_encode( OpcP, RegOpc(jump_target) );
13643   ins_pipe( pipe_jmp );
13644 %}
13645 
13646 
13647 // Tail Jump; remove the return address; jump to target.
13648 // TailCall above leaves the return address around.
13649 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13650   match( TailJump jump_target ex_oop );
13651   ins_cost(300);
13652   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13653             "JMP    $jump_target " %}
13654   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13655   ins_encode( enc_pop_rdx,
13656               OpcP, RegOpc(jump_target) );
13657   ins_pipe( pipe_jmp );
13658 %}
13659 
13660 // Create exception oop: created by stack-crawling runtime code.
13661 // Created exception is now available to this handler, and is setup
13662 // just prior to jumping to this handler.  No code emitted.
13663 instruct CreateException( eAXRegP ex_oop )
13664 %{
13665   match(Set ex_oop (CreateEx));
13666 
13667   size(0);
13668   // use the following format syntax
13669   format %{ "# exception oop is in EAX; no code emitted" %}
13670   ins_encode();
13671   ins_pipe( empty );
13672 %}
13673 
13674 
13675 // Rethrow exception:
13676 // The exception oop will come in the first argument position.
13677 // Then JUMP (not call) to the rethrow stub code.
13678 instruct RethrowException()
13679 %{
13680   match(Rethrow);
13681 
13682   // use the following format syntax
13683   format %{ "JMP    rethrow_stub" %}
13684   ins_encode(enc_rethrow);
13685   ins_pipe( pipe_jmp );
13686 %}
13687 
13688 // inlined locking and unlocking
13689 
13690 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13691   predicate(Compile::current()->use_rtm());
13692   match(Set cr (FastLock object box));
13693   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13694   ins_cost(300);
13695   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13696   ins_encode %{
13697     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13698                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13699                  _rtm_counters, _stack_rtm_counters,
13700                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13701                  true, ra_->C->profile_rtm());
13702   %}
13703   ins_pipe(pipe_slow);
13704 %}
13705 
13706 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13707   predicate(!Compile::current()->use_rtm());
13708   match(Set cr (FastLock object box));
13709   effect(TEMP tmp, TEMP scr, USE_KILL box);
13710   ins_cost(300);
13711   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13712   ins_encode %{
13713     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13714                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13715   %}
13716   ins_pipe(pipe_slow);
13717 %}
13718 
13719 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13720   match(Set cr (FastUnlock object box));
13721   effect(TEMP tmp, USE_KILL box);
13722   ins_cost(300);
13723   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13724   ins_encode %{
13725     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13726   %}
13727   ins_pipe(pipe_slow);
13728 %}
13729 
13730 
13731 
13732 // ============================================================================
13733 // Safepoint Instruction
13734 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13735   match(SafePoint poll);
13736   effect(KILL cr, USE poll);
13737 
13738   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13739   ins_cost(125);
13740   // EBP would need size(3)
13741   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13742   ins_encode %{
13743     __ relocate(relocInfo::poll_type);
13744     address pre_pc = __ pc();
13745     __ testl(rax, Address($poll$$Register, 0));
13746     address post_pc = __ pc();
13747     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13748   %}
13749   ins_pipe(ialu_reg_mem);
13750 %}
13751 
13752 
13753 // ============================================================================
13754 // This name is KNOWN by the ADLC and cannot be changed.
13755 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13756 // for this guy.
13757 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13758   match(Set dst (ThreadLocal));
13759   effect(DEF dst, KILL cr);
13760 
13761   format %{ "MOV    $dst, Thread::current()" %}
13762   ins_encode %{
13763     Register dstReg = as_Register($dst$$reg);
13764     __ get_thread(dstReg);
13765   %}
13766   ins_pipe( ialu_reg_fat );
13767 %}
13768 
13769 
13770 
13771 //----------PEEPHOLE RULES-----------------------------------------------------
13772 // These must follow all instruction definitions as they use the names
13773 // defined in the instructions definitions.
13774 //
13775 // peepmatch ( root_instr_name [preceding_instruction]* );
13776 //
13777 // peepconstraint %{
13778 // (instruction_number.operand_name relational_op instruction_number.operand_name
13779 //  [, ...] );
13780 // // instruction numbers are zero-based using left to right order in peepmatch
13781 //
13782 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13783 // // provide an instruction_number.operand_name for each operand that appears
13784 // // in the replacement instruction's match rule
13785 //
13786 // ---------VM FLAGS---------------------------------------------------------
13787 //
13788 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13789 //
13790 // Each peephole rule is given an identifying number starting with zero and
13791 // increasing by one in the order seen by the parser.  An individual peephole
13792 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13793 // on the command-line.
13794 //
13795 // ---------CURRENT LIMITATIONS----------------------------------------------
13796 //
13797 // Only match adjacent instructions in same basic block
13798 // Only equality constraints
13799 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13800 // Only one replacement instruction
13801 //
13802 // ---------EXAMPLE----------------------------------------------------------
13803 //
13804 // // pertinent parts of existing instructions in architecture description
13805 // instruct movI(rRegI dst, rRegI src) %{
13806 //   match(Set dst (CopyI src));
13807 // %}
13808 //
13809 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13810 //   match(Set dst (AddI dst src));
13811 //   effect(KILL cr);
13812 // %}
13813 //
13814 // // Change (inc mov) to lea
13815 // peephole %{
13816 //   // increment preceeded by register-register move
13817 //   peepmatch ( incI_eReg movI );
13818 //   // require that the destination register of the increment
13819 //   // match the destination register of the move
13820 //   peepconstraint ( 0.dst == 1.dst );
13821 //   // construct a replacement instruction that sets
13822 //   // the destination to ( move's source register + one )
13823 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13824 // %}
13825 //
13826 // Implementation no longer uses movX instructions since
13827 // machine-independent system no longer uses CopyX nodes.
13828 //
13829 // peephole %{
13830 //   peepmatch ( incI_eReg movI );
13831 //   peepconstraint ( 0.dst == 1.dst );
13832 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13833 // %}
13834 //
13835 // peephole %{
13836 //   peepmatch ( decI_eReg movI );
13837 //   peepconstraint ( 0.dst == 1.dst );
13838 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13839 // %}
13840 //
13841 // peephole %{
13842 //   peepmatch ( addI_eReg_imm movI );
13843 //   peepconstraint ( 0.dst == 1.dst );
13844 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13845 // %}
13846 //
13847 // peephole %{
13848 //   peepmatch ( addP_eReg_imm movP );
13849 //   peepconstraint ( 0.dst == 1.dst );
13850 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13851 // %}
13852 
13853 // // Change load of spilled value to only a spill
13854 // instruct storeI(memory mem, rRegI src) %{
13855 //   match(Set mem (StoreI mem src));
13856 // %}
13857 //
13858 // instruct loadI(rRegI dst, memory mem) %{
13859 //   match(Set dst (LoadI mem));
13860 // %}
13861 //
13862 peephole %{
13863   peepmatch ( loadI storeI );
13864   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13865   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13866 %}
13867 
13868 //----------SMARTSPILL RULES---------------------------------------------------
13869 // These must follow all instruction definitions as they use the names
13870 // defined in the instructions definitions.