1 //
    2 // Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  631 
  632   C->output()->set_frame_complete(cbuf.insts_size());
  633 
  634   if (C->has_mach_constant_base_node()) {
  635     // NOTE: We set the table base offset here because users might be
  636     // emitted before MachConstantBaseNode.
  637     ConstantTable& constant_table = C->output()->constant_table();
  638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  639   }
  640 }
  641 
  642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  643   return MachNode::size(ra_); // too many variables; just compute it the hard way
  644 }
  645 
  646 int MachPrologNode::reloc() const {
  647   return 0; // a large enough number
  648 }
  649 
  650 //=============================================================================
  651 #ifndef PRODUCT
  652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  653   Compile *C = ra_->C;
  654   int framesize = C->output()->frame_size_in_bytes();
  655   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  656   // Remove two words for return addr and rbp,
  657   framesize -= 2*wordSize;
  658 
  659   if (C->max_vector_size() > 16) {
  660     st->print("VZEROUPPER");
  661     st->cr(); st->print("\t");
  662   }
  663   if (C->in_24_bit_fp_mode()) {
  664     st->print("FLDCW  standard control word");
  665     st->cr(); st->print("\t");
  666   }
  667   if (framesize) {
  668     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  669     st->cr(); st->print("\t");
  670   }
  671   st->print_cr("POPL   EBP"); st->print("\t");
  672   if (do_polling() && C->is_method_compilation()) {
  673     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  674               "JA       #safepoint_stub\t"
  675               "# Safepoint: poll for GC");
  676   }
  677 }
  678 #endif
  679 
  680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  681   Compile *C = ra_->C;
  682   MacroAssembler _masm(&cbuf);
  683 
  684   if (C->max_vector_size() > 16) {
  685     // Clear upper bits of YMM registers when current compiled code uses
  686     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  687     _masm.vzeroupper();
  688   }
  689   // If method set FPU control word, restore to standard control word
  690   if (C->in_24_bit_fp_mode()) {
  691     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  692   }
  693 
  694   int framesize = C->output()->frame_size_in_bytes();
  695   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  696   // Remove two words for return addr and rbp,
  697   framesize -= 2*wordSize;
  698 
  699   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  700 
  701   if (framesize >= 128) {
  702     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  703     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  704     emit_d32(cbuf, framesize);
  705   } else if (framesize) {
  706     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  707     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  708     emit_d8(cbuf, framesize);
  709   }
  710 
  711   emit_opcode(cbuf, 0x58 | EBP_enc);
  712 
  713   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  714     __ reserved_stack_check();
  715   }
  716 
  717   if (do_polling() && C->is_method_compilation()) {
  718     Register thread = as_Register(EBX_enc);
  719     MacroAssembler masm(&cbuf);
  720     __ get_thread(thread);
  721     Label dummy_label;
  722     Label* code_stub = &dummy_label;
  723     if (!C->output()->in_scratch_emit_size()) {
  724       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  725     }
  726     __ relocate(relocInfo::poll_return_type);
  727     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  728   }
  729 }
  730 
  731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  732   return MachNode::size(ra_); // too many variables; just compute it
  733                               // the hard way
  734 }
  735 
  736 int MachEpilogNode::reloc() const {
  737   return 0; // a large enough number
  738 }
  739 
  740 const Pipeline * MachEpilogNode::pipeline() const {
  741   return MachNode::pipeline_class();
  742 }
  743 
  744 //=============================================================================
  745 
  746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  747 static enum RC rc_class( OptoReg::Name reg ) {
  748 
  749   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  750   if (OptoReg::is_stack(reg)) return rc_stack;
  751 
  752   VMReg r = OptoReg::as_VMReg(reg);
  753   if (r->is_Register()) return rc_int;
  754   if (r->is_FloatRegister()) {
  755     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  756     return rc_float;
  757   }
  758   assert(r->is_XMMRegister(), "must be");
  759   return rc_xmm;
  760 }
  761 
  762 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  763                         int opcode, const char *op_str, int size, outputStream* st ) {
  764   if( cbuf ) {
  765     emit_opcode  (*cbuf, opcode );
  766     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  767 #ifndef PRODUCT
  768   } else if( !do_size ) {
  769     if( size != 0 ) st->print("\n\t");
  770     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  771       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  772       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  773     } else { // FLD, FST, PUSH, POP
  774       st->print("%s [ESP + #%d]",op_str,offset);
  775     }
  776 #endif
  777   }
  778   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  779   return size+3+offset_size;
  780 }
  781 
  782 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  783 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  784                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  785   int in_size_in_bits = Assembler::EVEX_32bit;
  786   int evex_encoding = 0;
  787   if (reg_lo+1 == reg_hi) {
  788     in_size_in_bits = Assembler::EVEX_64bit;
  789     evex_encoding = Assembler::VEX_W;
  790   }
  791   if (cbuf) {
  792     MacroAssembler _masm(cbuf);
  793     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  794     //                          it maps more cases to single byte displacement
  795     _masm.set_managed();
  796     if (reg_lo+1 == reg_hi) { // double move?
  797       if (is_load) {
  798         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  799       } else {
  800         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  801       }
  802     } else {
  803       if (is_load) {
  804         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  805       } else {
  806         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  807       }
  808     }
  809 #ifndef PRODUCT
  810   } else if (!do_size) {
  811     if (size != 0) st->print("\n\t");
  812     if (reg_lo+1 == reg_hi) { // double move?
  813       if (is_load) st->print("%s %s,[ESP + #%d]",
  814                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  815                               Matcher::regName[reg_lo], offset);
  816       else         st->print("MOVSD  [ESP + #%d],%s",
  817                               offset, Matcher::regName[reg_lo]);
  818     } else {
  819       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  820                               Matcher::regName[reg_lo], offset);
  821       else         st->print("MOVSS  [ESP + #%d],%s",
  822                               offset, Matcher::regName[reg_lo]);
  823     }
  824 #endif
  825   }
  826   bool is_single_byte = false;
  827   if ((UseAVX > 2) && (offset != 0)) {
  828     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  829   }
  830   int offset_size = 0;
  831   if (UseAVX > 2 ) {
  832     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  833   } else {
  834     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  835   }
  836   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  837   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  838   return size+5+offset_size;
  839 }
  840 
  841 
  842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  843                             int src_hi, int dst_hi, int size, outputStream* st ) {
  844   if (cbuf) {
  845     MacroAssembler _masm(cbuf);
  846     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  847     _masm.set_managed();
  848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  851     } else {
  852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  854     }
  855 #ifndef PRODUCT
  856   } else if (!do_size) {
  857     if (size != 0) st->print("\n\t");
  858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  861       } else {
  862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  863       }
  864     } else {
  865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  867       } else {
  868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  869       }
  870     }
  871 #endif
  872   }
  873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  875   int sz = (UseAVX > 2) ? 6 : 4;
  876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  878   return size + sz;
  879 }
  880 
  881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  882                             int src_hi, int dst_hi, int size, outputStream* st ) {
  883   // 32-bit
  884   if (cbuf) {
  885     MacroAssembler _masm(cbuf);
  886     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  887     _masm.set_managed();
  888     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  889              as_Register(Matcher::_regEncode[src_lo]));
  890 #ifndef PRODUCT
  891   } else if (!do_size) {
  892     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  893 #endif
  894   }
  895   return (UseAVX> 2) ? 6 : 4;
  896 }
  897 
  898 
  899 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  900                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  901   // 32-bit
  902   if (cbuf) {
  903     MacroAssembler _masm(cbuf);
  904     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  905     _masm.set_managed();
  906     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  907              as_XMMRegister(Matcher::_regEncode[src_lo]));
  908 #ifndef PRODUCT
  909   } else if (!do_size) {
  910     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  911 #endif
  912   }
  913   return (UseAVX> 2) ? 6 : 4;
  914 }
  915 
  916 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  917   if( cbuf ) {
  918     emit_opcode(*cbuf, 0x8B );
  919     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  920 #ifndef PRODUCT
  921   } else if( !do_size ) {
  922     if( size != 0 ) st->print("\n\t");
  923     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  924 #endif
  925   }
  926   return size+2;
  927 }
  928 
  929 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  930                                  int offset, int size, outputStream* st ) {
  931   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  932     if( cbuf ) {
  933       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  934       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  935 #ifndef PRODUCT
  936     } else if( !do_size ) {
  937       if( size != 0 ) st->print("\n\t");
  938       st->print("FLD    %s",Matcher::regName[src_lo]);
  939 #endif
  940     }
  941     size += 2;
  942   }
  943 
  944   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  945   const char *op_str;
  946   int op;
  947   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  948     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  949     op = 0xDD;
  950   } else {                   // 32-bit store
  951     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  952     op = 0xD9;
  953     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  954   }
  955 
  956   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  957 }
  958 
  959 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  960 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  961                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  962 
  963 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  964                             int stack_offset, int reg, uint ireg, outputStream* st);
  965 
  966 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  967                                      int dst_offset, uint ireg, outputStream* st) {
  968   if (cbuf) {
  969     MacroAssembler _masm(cbuf);
  970     switch (ireg) {
  971     case Op_VecS:
  972       __ pushl(Address(rsp, src_offset));
  973       __ popl (Address(rsp, dst_offset));
  974       break;
  975     case Op_VecD:
  976       __ pushl(Address(rsp, src_offset));
  977       __ popl (Address(rsp, dst_offset));
  978       __ pushl(Address(rsp, src_offset+4));
  979       __ popl (Address(rsp, dst_offset+4));
  980       break;
  981     case Op_VecX:
  982       __ movdqu(Address(rsp, -16), xmm0);
  983       __ movdqu(xmm0, Address(rsp, src_offset));
  984       __ movdqu(Address(rsp, dst_offset), xmm0);
  985       __ movdqu(xmm0, Address(rsp, -16));
  986       break;
  987     case Op_VecY:
  988       __ vmovdqu(Address(rsp, -32), xmm0);
  989       __ vmovdqu(xmm0, Address(rsp, src_offset));
  990       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  991       __ vmovdqu(xmm0, Address(rsp, -32));
  992       break;
  993     case Op_VecZ:
  994       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  995       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  996       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  997       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  998       break;
  999     default:
 1000       ShouldNotReachHere();
 1001     }
 1002 #ifndef PRODUCT
 1003   } else {
 1004     switch (ireg) {
 1005     case Op_VecS:
 1006       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1007                 "popl    [rsp + #%d]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecD:
 1011       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1012                 "popq    [rsp + #%d]\n\t"
 1013                 "pushl   [rsp + #%d]\n\t"
 1014                 "popq    [rsp + #%d]",
 1015                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1016       break;
 1017      case Op_VecX:
 1018       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1019                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1020                 "movdqu  [rsp + #%d], xmm0\n\t"
 1021                 "movdqu  xmm0, [rsp - #16]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     case Op_VecY:
 1025       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1026                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1027                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1028                 "vmovdqu xmm0, [rsp - #32]",
 1029                 src_offset, dst_offset);
 1030       break;
 1031     case Op_VecZ:
 1032       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1033                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1034                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1035                 "vmovdqu xmm0, [rsp - #64]",
 1036                 src_offset, dst_offset);
 1037       break;
 1038     default:
 1039       ShouldNotReachHere();
 1040     }
 1041 #endif
 1042   }
 1043 }
 1044 
 1045 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1046   // Get registers to move
 1047   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1048   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1049   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1050   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1051 
 1052   enum RC src_second_rc = rc_class(src_second);
 1053   enum RC src_first_rc = rc_class(src_first);
 1054   enum RC dst_second_rc = rc_class(dst_second);
 1055   enum RC dst_first_rc = rc_class(dst_first);
 1056 
 1057   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1058 
 1059   // Generate spill code!
 1060   int size = 0;
 1061 
 1062   if( src_first == dst_first && src_second == dst_second )
 1063     return size;            // Self copy, no move
 1064 
 1065   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1066     uint ireg = ideal_reg();
 1067     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1068     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1069     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1070     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1071       // mem -> mem
 1072       int src_offset = ra_->reg2offset(src_first);
 1073       int dst_offset = ra_->reg2offset(dst_first);
 1074       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1075     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1076       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1077     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1078       int stack_offset = ra_->reg2offset(dst_first);
 1079       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1080     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1081       int stack_offset = ra_->reg2offset(src_first);
 1082       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1083     } else {
 1084       ShouldNotReachHere();
 1085     }
 1086     return 0;
 1087   }
 1088 
 1089   // --------------------------------------
 1090   // Check for mem-mem move.  push/pop to move.
 1091   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1092     if( src_second == dst_first ) { // overlapping stack copy ranges
 1093       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1094       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1095       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1096       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1097     }
 1098     // move low bits
 1099     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1100     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1101     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1102       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1103       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1104     }
 1105     return size;
 1106   }
 1107 
 1108   // --------------------------------------
 1109   // Check for integer reg-reg copy
 1110   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1111     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1112 
 1113   // Check for integer store
 1114   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1115     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1116 
 1117   // Check for integer load
 1118   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1119     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1120 
 1121   // Check for integer reg-xmm reg copy
 1122   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1123     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1124             "no 64 bit integer-float reg moves" );
 1125     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1126   }
 1127   // --------------------------------------
 1128   // Check for float reg-reg copy
 1129   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1130     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1131             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1132     if( cbuf ) {
 1133 
 1134       // Note the mucking with the register encode to compensate for the 0/1
 1135       // indexing issue mentioned in a comment in the reg_def sections
 1136       // for FPR registers many lines above here.
 1137 
 1138       if( src_first != FPR1L_num ) {
 1139         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1140         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1141         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1142         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1143      } else {
 1144         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1145         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1146      }
 1147 #ifndef PRODUCT
 1148     } else if( !do_size ) {
 1149       if( size != 0 ) st->print("\n\t");
 1150       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1151       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1152 #endif
 1153     }
 1154     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1155   }
 1156 
 1157   // Check for float store
 1158   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1159     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1160   }
 1161 
 1162   // Check for float load
 1163   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1164     int offset = ra_->reg2offset(src_first);
 1165     const char *op_str;
 1166     int op;
 1167     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1168       op_str = "FLD_D";
 1169       op = 0xDD;
 1170     } else {                   // 32-bit load
 1171       op_str = "FLD_S";
 1172       op = 0xD9;
 1173       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1174     }
 1175     if( cbuf ) {
 1176       emit_opcode  (*cbuf, op );
 1177       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1178       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1179       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1180 #ifndef PRODUCT
 1181     } else if( !do_size ) {
 1182       if( size != 0 ) st->print("\n\t");
 1183       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1184 #endif
 1185     }
 1186     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1187     return size + 3+offset_size+2;
 1188   }
 1189 
 1190   // Check for xmm reg-reg copy
 1191   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1192     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1193             (src_first+1 == src_second && dst_first+1 == dst_second),
 1194             "no non-adjacent float-moves" );
 1195     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1196   }
 1197 
 1198   // Check for xmm reg-integer reg copy
 1199   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1200     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1201             "no 64 bit float-integer reg moves" );
 1202     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1203   }
 1204 
 1205   // Check for xmm store
 1206   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1207     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1208   }
 1209 
 1210   // Check for float xmm load
 1211   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1212     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1213   }
 1214 
 1215   // Copy from float reg to xmm reg
 1216   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1217     // copy to the top of stack from floating point reg
 1218     // and use LEA to preserve flags
 1219     if( cbuf ) {
 1220       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1221       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1222       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1223       emit_d8(*cbuf,0xF8);
 1224 #ifndef PRODUCT
 1225     } else if( !do_size ) {
 1226       if( size != 0 ) st->print("\n\t");
 1227       st->print("LEA    ESP,[ESP-8]");
 1228 #endif
 1229     }
 1230     size += 4;
 1231 
 1232     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1233 
 1234     // Copy from the temp memory to the xmm reg.
 1235     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1236 
 1237     if( cbuf ) {
 1238       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1239       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1240       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1241       emit_d8(*cbuf,0x08);
 1242 #ifndef PRODUCT
 1243     } else if( !do_size ) {
 1244       if( size != 0 ) st->print("\n\t");
 1245       st->print("LEA    ESP,[ESP+8]");
 1246 #endif
 1247     }
 1248     size += 4;
 1249     return size;
 1250   }
 1251 
 1252   assert( size > 0, "missed a case" );
 1253 
 1254   // --------------------------------------------------------------------
 1255   // Check for second bits still needing moving.
 1256   if( src_second == dst_second )
 1257     return size;               // Self copy; no move
 1258   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1259 
 1260   // Check for second word int-int move
 1261   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1262     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1263 
 1264   // Check for second word integer store
 1265   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1266     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1267 
 1268   // Check for second word integer load
 1269   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1270     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1271 
 1272   // AVX-512 opmask specific spilling.
 1273   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1274     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1275     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1276     MacroAssembler _masm(cbuf);
 1277     int offset = ra_->reg2offset(src_first);
 1278     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1279     return 0;
 1280   }
 1281 
 1282   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1283     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1284     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1285     MacroAssembler _masm(cbuf);
 1286     int offset = ra_->reg2offset(dst_first);
 1287     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1288     return 0;
 1289   }
 1290 
 1291   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1292     Unimplemented();
 1293     return 0;
 1294   }
 1295 
 1296   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1297     Unimplemented();
 1298     return 0;
 1299   }
 1300 
 1301   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1302     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1303     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1304     MacroAssembler _masm(cbuf);
 1305     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1306     return 0;
 1307   }
 1308 
 1309   Unimplemented();
 1310   return 0; // Mute compiler
 1311 }
 1312 
 1313 #ifndef PRODUCT
 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1315   implementation( NULL, ra_, false, st );
 1316 }
 1317 #endif
 1318 
 1319 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1320   implementation( &cbuf, ra_, false, NULL );
 1321 }
 1322 
 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1324   return MachNode::size(ra_);
 1325 }
 1326 
 1327 
 1328 //=============================================================================
 1329 #ifndef PRODUCT
 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1331   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1332   int reg = ra_->get_reg_first(this);
 1333   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1334 }
 1335 #endif
 1336 
 1337 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1339   int reg = ra_->get_encode(this);
 1340   if( offset >= 128 ) {
 1341     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1342     emit_rm(cbuf, 0x2, reg, 0x04);
 1343     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1344     emit_d32(cbuf, offset);
 1345   }
 1346   else {
 1347     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1348     emit_rm(cbuf, 0x1, reg, 0x04);
 1349     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1350     emit_d8(cbuf, offset);
 1351   }
 1352 }
 1353 
 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1355   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1356   if( offset >= 128 ) {
 1357     return 7;
 1358   }
 1359   else {
 1360     return 4;
 1361   }
 1362 }
 1363 
 1364 //=============================================================================
 1365 #ifndef PRODUCT
 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1367   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1368   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1369   st->print_cr("\tNOP");
 1370   st->print_cr("\tNOP");
 1371   if( !OptoBreakpoint )
 1372     st->print_cr("\tNOP");
 1373 }
 1374 #endif
 1375 
 1376 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1377   MacroAssembler masm(&cbuf);
 1378 #ifdef ASSERT
 1379   uint insts_size = cbuf.insts_size();
 1380 #endif
 1381   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1382   masm.jump_cc(Assembler::notEqual,
 1383                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1384   /* WARNING these NOPs are critical so that verified entry point is properly
 1385      aligned for patching by NativeJump::patch_verified_entry() */
 1386   int nops_cnt = 2;
 1387   if( !OptoBreakpoint ) // Leave space for int3
 1388      nops_cnt += 1;
 1389   masm.nop(nops_cnt);
 1390 
 1391   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1392 }
 1393 
 1394 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1395   return OptoBreakpoint ? 11 : 12;
 1396 }
 1397 
 1398 
 1399 //=============================================================================
 1400 
 1401 // Vector calling convention not supported.
 1402 const bool Matcher::supports_vector_calling_convention() {
 1403   return false;
 1404 }
 1405 
 1406 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1407   Unimplemented();
 1408   return OptoRegPair(0, 0);
 1409 }
 1410 
 1411 // Is this branch offset short enough that a short branch can be used?
 1412 //
 1413 // NOTE: If the platform does not provide any short branch variants, then
 1414 //       this method should return false for offset 0.
 1415 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1416   // The passed offset is relative to address of the branch.
 1417   // On 86 a branch displacement is calculated relative to address
 1418   // of a next instruction.
 1419   offset -= br_size;
 1420 
 1421   // the short version of jmpConUCF2 contains multiple branches,
 1422   // making the reach slightly less
 1423   if (rule == jmpConUCF2_rule)
 1424     return (-126 <= offset && offset <= 125);
 1425   return (-128 <= offset && offset <= 127);
 1426 }
 1427 
 1428 // Return whether or not this register is ever used as an argument.  This
 1429 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1430 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1431 // arguments in those registers not be available to the callee.
 1432 bool Matcher::can_be_java_arg( int reg ) {
 1433   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1434   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1435   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1436   return false;
 1437 }
 1438 
 1439 bool Matcher::is_spillable_arg( int reg ) {
 1440   return can_be_java_arg(reg);
 1441 }
 1442 
 1443 uint Matcher::int_pressure_limit()
 1444 {
 1445   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1446 }
 1447 
 1448 uint Matcher::float_pressure_limit()
 1449 {
 1450   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1451 }
 1452 
 1453 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1454   // Use hardware integer DIV instruction when
 1455   // it is faster than a code which use multiply.
 1456   // Only when constant divisor fits into 32 bit
 1457   // (min_jint is excluded to get only correct
 1458   // positive 32 bit values from negative).
 1459   return VM_Version::has_fast_idiv() &&
 1460          (divisor == (int)divisor && divisor != min_jint);
 1461 }
 1462 
 1463 // Register for DIVI projection of divmodI
 1464 RegMask Matcher::divI_proj_mask() {
 1465   return EAX_REG_mask();
 1466 }
 1467 
 1468 // Register for MODI projection of divmodI
 1469 RegMask Matcher::modI_proj_mask() {
 1470   return EDX_REG_mask();
 1471 }
 1472 
 1473 // Register for DIVL projection of divmodL
 1474 RegMask Matcher::divL_proj_mask() {
 1475   ShouldNotReachHere();
 1476   return RegMask();
 1477 }
 1478 
 1479 // Register for MODL projection of divmodL
 1480 RegMask Matcher::modL_proj_mask() {
 1481   ShouldNotReachHere();
 1482   return RegMask();
 1483 }
 1484 
 1485 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1486   return NO_REG_mask();
 1487 }
 1488 
 1489 // Returns true if the high 32 bits of the value is known to be zero.
 1490 bool is_operand_hi32_zero(Node* n) {
 1491   int opc = n->Opcode();
 1492   if (opc == Op_AndL) {
 1493     Node* o2 = n->in(2);
 1494     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1495       return true;
 1496     }
 1497   }
 1498   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1499     return true;
 1500   }
 1501   return false;
 1502 }
 1503 
 1504 %}
 1505 
 1506 //----------ENCODING BLOCK-----------------------------------------------------
 1507 // This block specifies the encoding classes used by the compiler to output
 1508 // byte streams.  Encoding classes generate functions which are called by
 1509 // Machine Instruction Nodes in order to generate the bit encoding of the
 1510 // instruction.  Operands specify their base encoding interface with the
 1511 // interface keyword.  There are currently supported four interfaces,
 1512 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1513 // operand to generate a function which returns its register number when
 1514 // queried.   CONST_INTER causes an operand to generate a function which
 1515 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1516 // operand to generate four functions which return the Base Register, the
 1517 // Index Register, the Scale Value, and the Offset Value of the operand when
 1518 // queried.  COND_INTER causes an operand to generate six functions which
 1519 // return the encoding code (ie - encoding bits for the instruction)
 1520 // associated with each basic boolean condition for a conditional instruction.
 1521 // Instructions specify two basic values for encoding.  They use the
 1522 // ins_encode keyword to specify their encoding class (which must be one of
 1523 // the class names specified in the encoding block), and they use the
 1524 // opcode keyword to specify, in order, their primary, secondary, and
 1525 // tertiary opcode.  Only the opcode sections which a particular instruction
 1526 // needs for encoding need to be specified.
 1527 encode %{
 1528   // Build emit functions for each basic byte or larger field in the intel
 1529   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1530   // code in the enc_class source block.  Emit functions will live in the
 1531   // main source block for now.  In future, we can generalize this by
 1532   // adding a syntax that specifies the sizes of fields in an order,
 1533   // so that the adlc can build the emit functions automagically
 1534 
 1535   // Emit primary opcode
 1536   enc_class OpcP %{
 1537     emit_opcode(cbuf, $primary);
 1538   %}
 1539 
 1540   // Emit secondary opcode
 1541   enc_class OpcS %{
 1542     emit_opcode(cbuf, $secondary);
 1543   %}
 1544 
 1545   // Emit opcode directly
 1546   enc_class Opcode(immI d8) %{
 1547     emit_opcode(cbuf, $d8$$constant);
 1548   %}
 1549 
 1550   enc_class SizePrefix %{
 1551     emit_opcode(cbuf,0x66);
 1552   %}
 1553 
 1554   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1555     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1556   %}
 1557 
 1558   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1559     emit_opcode(cbuf,$opcode$$constant);
 1560     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1561   %}
 1562 
 1563   enc_class mov_r32_imm0( rRegI dst ) %{
 1564     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1565     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1566   %}
 1567 
 1568   enc_class cdq_enc %{
 1569     // Full implementation of Java idiv and irem; checks for
 1570     // special case as described in JVM spec., p.243 & p.271.
 1571     //
 1572     //         normal case                           special case
 1573     //
 1574     // input : rax,: dividend                         min_int
 1575     //         reg: divisor                          -1
 1576     //
 1577     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1578     //         rdx: remainder (= rax, irem reg)       0
 1579     //
 1580     //  Code sequnce:
 1581     //
 1582     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1583     //  0F 85 0B 00 00 00    jne         normal_case
 1584     //  33 D2                xor         rdx,edx
 1585     //  83 F9 FF             cmp         rcx,0FFh
 1586     //  0F 84 03 00 00 00    je          done
 1587     //                  normal_case:
 1588     //  99                   cdq
 1589     //  F7 F9                idiv        rax,ecx
 1590     //                  done:
 1591     //
 1592     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1594     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1595     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1596     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1597     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1598     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1599     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1600     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1601     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1603     // normal_case:
 1604     emit_opcode(cbuf,0x99);                                         // cdq
 1605     // idiv (note: must be emitted by the user of this rule)
 1606     // normal:
 1607   %}
 1608 
 1609   // Dense encoding for older common ops
 1610   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1611     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1612   %}
 1613 
 1614 
 1615   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1616   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1617     // Check for 8-bit immediate, and set sign extend bit in opcode
 1618     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1619       emit_opcode(cbuf, $primary | 0x02);
 1620     }
 1621     else {                          // If 32-bit immediate
 1622       emit_opcode(cbuf, $primary);
 1623     }
 1624   %}
 1625 
 1626   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1627     // Emit primary opcode and set sign-extend bit
 1628     // Check for 8-bit immediate, and set sign extend bit in opcode
 1629     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1630       emit_opcode(cbuf, $primary | 0x02);    }
 1631     else {                          // If 32-bit immediate
 1632       emit_opcode(cbuf, $primary);
 1633     }
 1634     // Emit r/m byte with secondary opcode, after primary opcode.
 1635     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1636   %}
 1637 
 1638   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1639     // Check for 8-bit immediate, and set sign extend bit in opcode
 1640     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1641       $$$emit8$imm$$constant;
 1642     }
 1643     else {                          // If 32-bit immediate
 1644       // Output immediate
 1645       $$$emit32$imm$$constant;
 1646     }
 1647   %}
 1648 
 1649   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1650     // Emit primary opcode and set sign-extend bit
 1651     // Check for 8-bit immediate, and set sign extend bit in opcode
 1652     int con = (int)$imm$$constant; // Throw away top bits
 1653     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1654     // Emit r/m byte with secondary opcode, after primary opcode.
 1655     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1656     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1657     else                               emit_d32(cbuf,con);
 1658   %}
 1659 
 1660   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1661     // Emit primary opcode and set sign-extend bit
 1662     // Check for 8-bit immediate, and set sign extend bit in opcode
 1663     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1664     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1665     // Emit r/m byte with tertiary opcode, after primary opcode.
 1666     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1667     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1668     else                               emit_d32(cbuf,con);
 1669   %}
 1670 
 1671   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1672     emit_cc(cbuf, $secondary, $dst$$reg );
 1673   %}
 1674 
 1675   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1676     int destlo = $dst$$reg;
 1677     int desthi = HIGH_FROM_LOW(destlo);
 1678     // bswap lo
 1679     emit_opcode(cbuf, 0x0F);
 1680     emit_cc(cbuf, 0xC8, destlo);
 1681     // bswap hi
 1682     emit_opcode(cbuf, 0x0F);
 1683     emit_cc(cbuf, 0xC8, desthi);
 1684     // xchg lo and hi
 1685     emit_opcode(cbuf, 0x87);
 1686     emit_rm(cbuf, 0x3, destlo, desthi);
 1687   %}
 1688 
 1689   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1690     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1691   %}
 1692 
 1693   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1694     $$$emit8$primary;
 1695     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1696   %}
 1697 
 1698   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1699     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1700     emit_d8(cbuf, op >> 8 );
 1701     emit_d8(cbuf, op & 255);
 1702   %}
 1703 
 1704   // emulate a CMOV with a conditional branch around a MOV
 1705   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1706     // Invert sense of branch from sense of CMOV
 1707     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1708     emit_d8( cbuf, $brOffs$$constant );
 1709   %}
 1710 
 1711   enc_class enc_PartialSubtypeCheck( ) %{
 1712     Register Redi = as_Register(EDI_enc); // result register
 1713     Register Reax = as_Register(EAX_enc); // super class
 1714     Register Recx = as_Register(ECX_enc); // killed
 1715     Register Resi = as_Register(ESI_enc); // sub class
 1716     Label miss;
 1717 
 1718     MacroAssembler _masm(&cbuf);
 1719     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1720                                      NULL, &miss,
 1721                                      /*set_cond_codes:*/ true);
 1722     if ($primary) {
 1723       __ xorptr(Redi, Redi);
 1724     }
 1725     __ bind(miss);
 1726   %}
 1727 
 1728   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1729     MacroAssembler masm(&cbuf);
 1730     int start = masm.offset();
 1731     if (UseSSE >= 2) {
 1732       if (VerifyFPU) {
 1733         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1734       }
 1735     } else {
 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       masm.empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1742     } else {
 1743       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       MacroAssembler masm(&cbuf);
 1750       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1751     }
 1752   %}
 1753 
 1754   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1755     // This is the instruction starting address for relocation info.
 1756     MacroAssembler _masm(&cbuf);
 1757     cbuf.set_insts_mark();
 1758     $$$emit8$primary;
 1759     // CALL directly to the runtime
 1760     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1761                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1762     __ post_call_nop();
 1763 
 1764     if (UseSSE >= 2) {
 1765       MacroAssembler _masm(&cbuf);
 1766       BasicType rt = tf()->return_type();
 1767 
 1768       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1769         // A C runtime call where the return value is unused.  In SSE2+
 1770         // mode the result needs to be removed from the FPU stack.  It's
 1771         // likely that this function call could be removed by the
 1772         // optimizer if the C function is a pure function.
 1773         __ ffree(0);
 1774       } else if (rt == T_FLOAT) {
 1775         __ lea(rsp, Address(rsp, -4));
 1776         __ fstp_s(Address(rsp, 0));
 1777         __ movflt(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  4));
 1779       } else if (rt == T_DOUBLE) {
 1780         __ lea(rsp, Address(rsp, -8));
 1781         __ fstp_d(Address(rsp, 0));
 1782         __ movdbl(xmm0, Address(rsp, 0));
 1783         __ lea(rsp, Address(rsp,  8));
 1784       }
 1785     }
 1786   %}
 1787 
 1788   enc_class pre_call_resets %{
 1789     // If method sets FPU control word restore it here
 1790     debug_only(int off0 = cbuf.insts_size());
 1791     if (ra_->C->in_24_bit_fp_mode()) {
 1792       MacroAssembler _masm(&cbuf);
 1793       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1794     }
 1795     // Clear upper bits of YMM registers when current compiled code uses
 1796     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1797     MacroAssembler _masm(&cbuf);
 1798     __ vzeroupper();
 1799     debug_only(int off1 = cbuf.insts_size());
 1800     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1801   %}
 1802 
 1803   enc_class post_call_FPU %{
 1804     // If method sets FPU control word do it here also
 1805     if (Compile::current()->in_24_bit_fp_mode()) {
 1806       MacroAssembler masm(&cbuf);
 1807       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1808     }
 1809   %}
 1810 
 1811   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1812     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1813     // who we intended to call.
 1814     MacroAssembler _masm(&cbuf);
 1815     cbuf.set_insts_mark();
 1816     $$$emit8$primary;
 1817 
 1818     if (!_method) {
 1819       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1820                      runtime_call_Relocation::spec(),
 1821                      RELOC_IMM32);
 1822       __ post_call_nop();
 1823     } else {
 1824       int method_index = resolved_method_index(cbuf);
 1825       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1826                                                   : static_call_Relocation::spec(method_index);
 1827       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1828                      rspec, RELOC_DISP32);
 1829       __ post_call_nop();
 1830       // Emit stubs for static call.
 1831       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1832       if (stub == NULL) {
 1833         ciEnv::current()->record_failure("CodeCache is full");
 1834         return;
 1835       }
 1836     }
 1837   %}
 1838 
 1839   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1840     MacroAssembler _masm(&cbuf);
 1841     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1842     __ post_call_nop();
 1843   %}
 1844 
 1845   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1846     int disp = in_bytes(Method::from_compiled_offset());
 1847     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1848 
 1849     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1850     MacroAssembler _masm(&cbuf);
 1851     cbuf.set_insts_mark();
 1852     $$$emit8$primary;
 1853     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1854     emit_d8(cbuf, disp);             // Displacement
 1855     __ post_call_nop();
 1856   %}
 1857 
 1858 //   Following encoding is no longer used, but may be restored if calling
 1859 //   convention changes significantly.
 1860 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1861 //
 1862 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1863 //     // int ic_reg     = Matcher::inline_cache_reg();
 1864 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1865 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1866 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1867 //
 1868 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1869 //     // // so we load it immediately before the call
 1870 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1871 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1872 //
 1873 //     // xor rbp,ebp
 1874 //     emit_opcode(cbuf, 0x33);
 1875 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1876 //
 1877 //     // CALL to interpreter.
 1878 //     cbuf.set_insts_mark();
 1879 //     $$$emit8$primary;
 1880 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1881 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1882 //   %}
 1883 
 1884   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1885     $$$emit8$primary;
 1886     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1887     $$$emit8$shift$$constant;
 1888   %}
 1889 
 1890   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1891     // Load immediate does not have a zero or sign extended version
 1892     // for 8-bit immediates
 1893     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1894     $$$emit32$src$$constant;
 1895   %}
 1896 
 1897   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1898     // Load immediate does not have a zero or sign extended version
 1899     // for 8-bit immediates
 1900     emit_opcode(cbuf, $primary + $dst$$reg);
 1901     $$$emit32$src$$constant;
 1902   %}
 1903 
 1904   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1905     // Load immediate does not have a zero or sign extended version
 1906     // for 8-bit immediates
 1907     int dst_enc = $dst$$reg;
 1908     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1909     if (src_con == 0) {
 1910       // xor dst, dst
 1911       emit_opcode(cbuf, 0x33);
 1912       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1913     } else {
 1914       emit_opcode(cbuf, $primary + dst_enc);
 1915       emit_d32(cbuf, src_con);
 1916     }
 1917   %}
 1918 
 1919   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1920     // Load immediate does not have a zero or sign extended version
 1921     // for 8-bit immediates
 1922     int dst_enc = $dst$$reg + 2;
 1923     int src_con = ((julong)($src$$constant)) >> 32;
 1924     if (src_con == 0) {
 1925       // xor dst, dst
 1926       emit_opcode(cbuf, 0x33);
 1927       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1928     } else {
 1929       emit_opcode(cbuf, $primary + dst_enc);
 1930       emit_d32(cbuf, src_con);
 1931     }
 1932   %}
 1933 
 1934 
 1935   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1936   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1937     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1938   %}
 1939 
 1940   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1941     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1942   %}
 1943 
 1944   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1945     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1946   %}
 1947 
 1948   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1949     $$$emit8$primary;
 1950     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1951   %}
 1952 
 1953   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1954     $$$emit8$secondary;
 1955     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1956   %}
 1957 
 1958   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1959     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1960   %}
 1961 
 1962   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1963     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1964   %}
 1965 
 1966   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1967     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1968   %}
 1969 
 1970   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1971     // Output immediate
 1972     $$$emit32$src$$constant;
 1973   %}
 1974 
 1975   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1976     // Output Float immediate bits
 1977     jfloat jf = $src$$constant;
 1978     int    jf_as_bits = jint_cast( jf );
 1979     emit_d32(cbuf, jf_as_bits);
 1980   %}
 1981 
 1982   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1983     // Output Float immediate bits
 1984     jfloat jf = $src$$constant;
 1985     int    jf_as_bits = jint_cast( jf );
 1986     emit_d32(cbuf, jf_as_bits);
 1987   %}
 1988 
 1989   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1990     // Output immediate
 1991     $$$emit16$src$$constant;
 1992   %}
 1993 
 1994   enc_class Con_d32(immI src) %{
 1995     emit_d32(cbuf,$src$$constant);
 1996   %}
 1997 
 1998   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1999     // Output immediate memory reference
 2000     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2001     emit_d32(cbuf, 0x00);
 2002   %}
 2003 
 2004   enc_class lock_prefix( ) %{
 2005     emit_opcode(cbuf,0xF0);         // [Lock]
 2006   %}
 2007 
 2008   // Cmp-xchg long value.
 2009   // Note: we need to swap rbx, and rcx before and after the
 2010   //       cmpxchg8 instruction because the instruction uses
 2011   //       rcx as the high order word of the new value to store but
 2012   //       our register encoding uses rbx,.
 2013   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2014 
 2015     // XCHG  rbx,ecx
 2016     emit_opcode(cbuf,0x87);
 2017     emit_opcode(cbuf,0xD9);
 2018     // [Lock]
 2019     emit_opcode(cbuf,0xF0);
 2020     // CMPXCHG8 [Eptr]
 2021     emit_opcode(cbuf,0x0F);
 2022     emit_opcode(cbuf,0xC7);
 2023     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2024     // XCHG  rbx,ecx
 2025     emit_opcode(cbuf,0x87);
 2026     emit_opcode(cbuf,0xD9);
 2027   %}
 2028 
 2029   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2030     // [Lock]
 2031     emit_opcode(cbuf,0xF0);
 2032 
 2033     // CMPXCHG [Eptr]
 2034     emit_opcode(cbuf,0x0F);
 2035     emit_opcode(cbuf,0xB1);
 2036     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2037   %}
 2038 
 2039   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2040     // [Lock]
 2041     emit_opcode(cbuf,0xF0);
 2042 
 2043     // CMPXCHGB [Eptr]
 2044     emit_opcode(cbuf,0x0F);
 2045     emit_opcode(cbuf,0xB0);
 2046     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2047   %}
 2048 
 2049   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2050     // [Lock]
 2051     emit_opcode(cbuf,0xF0);
 2052 
 2053     // 16-bit mode
 2054     emit_opcode(cbuf, 0x66);
 2055 
 2056     // CMPXCHGW [Eptr]
 2057     emit_opcode(cbuf,0x0F);
 2058     emit_opcode(cbuf,0xB1);
 2059     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2060   %}
 2061 
 2062   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2063     int res_encoding = $res$$reg;
 2064 
 2065     // MOV  res,0
 2066     emit_opcode( cbuf, 0xB8 + res_encoding);
 2067     emit_d32( cbuf, 0 );
 2068     // JNE,s  fail
 2069     emit_opcode(cbuf,0x75);
 2070     emit_d8(cbuf, 5 );
 2071     // MOV  res,1
 2072     emit_opcode( cbuf, 0xB8 + res_encoding);
 2073     emit_d32( cbuf, 1 );
 2074     // fail:
 2075   %}
 2076 
 2077   enc_class set_instruction_start( ) %{
 2078     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2079   %}
 2080 
 2081   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2082     int reg_encoding = $ereg$$reg;
 2083     int base  = $mem$$base;
 2084     int index = $mem$$index;
 2085     int scale = $mem$$scale;
 2086     int displace = $mem$$disp;
 2087     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2088     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2089   %}
 2090 
 2091   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2092     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2093     int base  = $mem$$base;
 2094     int index = $mem$$index;
 2095     int scale = $mem$$scale;
 2096     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2097     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2098     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2099   %}
 2100 
 2101   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2102     int r1, r2;
 2103     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2104     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2105     emit_opcode(cbuf,0x0F);
 2106     emit_opcode(cbuf,$tertiary);
 2107     emit_rm(cbuf, 0x3, r1, r2);
 2108     emit_d8(cbuf,$cnt$$constant);
 2109     emit_d8(cbuf,$primary);
 2110     emit_rm(cbuf, 0x3, $secondary, r1);
 2111     emit_d8(cbuf,$cnt$$constant);
 2112   %}
 2113 
 2114   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2115     emit_opcode( cbuf, 0x8B ); // Move
 2116     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2117     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2118       emit_d8(cbuf,$primary);
 2119       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2120       emit_d8(cbuf,$cnt$$constant-32);
 2121     }
 2122     emit_d8(cbuf,$primary);
 2123     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2124     emit_d8(cbuf,31);
 2125   %}
 2126 
 2127   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2128     int r1, r2;
 2129     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2130     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2131 
 2132     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2133     emit_rm(cbuf, 0x3, r1, r2);
 2134     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2135       emit_opcode(cbuf,$primary);
 2136       emit_rm(cbuf, 0x3, $secondary, r1);
 2137       emit_d8(cbuf,$cnt$$constant-32);
 2138     }
 2139     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2140     emit_rm(cbuf, 0x3, r2, r2);
 2141   %}
 2142 
 2143   // Clone of RegMem but accepts an extra parameter to access each
 2144   // half of a double in memory; it never needs relocation info.
 2145   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2146     emit_opcode(cbuf,$opcode$$constant);
 2147     int reg_encoding = $rm_reg$$reg;
 2148     int base     = $mem$$base;
 2149     int index    = $mem$$index;
 2150     int scale    = $mem$$scale;
 2151     int displace = $mem$$disp + $disp_for_half$$constant;
 2152     relocInfo::relocType disp_reloc = relocInfo::none;
 2153     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2154   %}
 2155 
 2156   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2157   //
 2158   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2159   // and it never needs relocation information.
 2160   // Frequently used to move data between FPU's Stack Top and memory.
 2161   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2162     int rm_byte_opcode = $rm_opcode$$constant;
 2163     int base     = $mem$$base;
 2164     int index    = $mem$$index;
 2165     int scale    = $mem$$scale;
 2166     int displace = $mem$$disp;
 2167     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2168     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2169   %}
 2170 
 2171   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2172     int rm_byte_opcode = $rm_opcode$$constant;
 2173     int base     = $mem$$base;
 2174     int index    = $mem$$index;
 2175     int scale    = $mem$$scale;
 2176     int displace = $mem$$disp;
 2177     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2178     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2179   %}
 2180 
 2181   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2182     int reg_encoding = $dst$$reg;
 2183     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2184     int index        = 0x04;            // 0x04 indicates no index
 2185     int scale        = 0x00;            // 0x00 indicates no scale
 2186     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2187     relocInfo::relocType disp_reloc = relocInfo::none;
 2188     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2189   %}
 2190 
 2191   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2192     // Compare dst,src
 2193     emit_opcode(cbuf,0x3B);
 2194     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2195     // jmp dst < src around move
 2196     emit_opcode(cbuf,0x7C);
 2197     emit_d8(cbuf,2);
 2198     // move dst,src
 2199     emit_opcode(cbuf,0x8B);
 2200     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2201   %}
 2202 
 2203   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2204     // Compare dst,src
 2205     emit_opcode(cbuf,0x3B);
 2206     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2207     // jmp dst > src around move
 2208     emit_opcode(cbuf,0x7F);
 2209     emit_d8(cbuf,2);
 2210     // move dst,src
 2211     emit_opcode(cbuf,0x8B);
 2212     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2213   %}
 2214 
 2215   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2216     // If src is FPR1, we can just FST to store it.
 2217     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2218     int reg_encoding = 0x2; // Just store
 2219     int base  = $mem$$base;
 2220     int index = $mem$$index;
 2221     int scale = $mem$$scale;
 2222     int displace = $mem$$disp;
 2223     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2224     if( $src$$reg != FPR1L_enc ) {
 2225       reg_encoding = 0x3;  // Store & pop
 2226       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2227       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2228     }
 2229     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2230     emit_opcode(cbuf,$primary);
 2231     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2232   %}
 2233 
 2234   enc_class neg_reg(rRegI dst) %{
 2235     // NEG $dst
 2236     emit_opcode(cbuf,0xF7);
 2237     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2238   %}
 2239 
 2240   enc_class setLT_reg(eCXRegI dst) %{
 2241     // SETLT $dst
 2242     emit_opcode(cbuf,0x0F);
 2243     emit_opcode(cbuf,0x9C);
 2244     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2245   %}
 2246 
 2247   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2248     int tmpReg = $tmp$$reg;
 2249 
 2250     // SUB $p,$q
 2251     emit_opcode(cbuf,0x2B);
 2252     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2253     // SBB $tmp,$tmp
 2254     emit_opcode(cbuf,0x1B);
 2255     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2256     // AND $tmp,$y
 2257     emit_opcode(cbuf,0x23);
 2258     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2259     // ADD $p,$tmp
 2260     emit_opcode(cbuf,0x03);
 2261     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2262   %}
 2263 
 2264   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2265     // TEST shift,32
 2266     emit_opcode(cbuf,0xF7);
 2267     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2268     emit_d32(cbuf,0x20);
 2269     // JEQ,s small
 2270     emit_opcode(cbuf, 0x74);
 2271     emit_d8(cbuf, 0x04);
 2272     // MOV    $dst.hi,$dst.lo
 2273     emit_opcode( cbuf, 0x8B );
 2274     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2275     // CLR    $dst.lo
 2276     emit_opcode(cbuf, 0x33);
 2277     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2278 // small:
 2279     // SHLD   $dst.hi,$dst.lo,$shift
 2280     emit_opcode(cbuf,0x0F);
 2281     emit_opcode(cbuf,0xA5);
 2282     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2283     // SHL    $dst.lo,$shift"
 2284     emit_opcode(cbuf,0xD3);
 2285     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2286   %}
 2287 
 2288   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2289     // TEST shift,32
 2290     emit_opcode(cbuf,0xF7);
 2291     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2292     emit_d32(cbuf,0x20);
 2293     // JEQ,s small
 2294     emit_opcode(cbuf, 0x74);
 2295     emit_d8(cbuf, 0x04);
 2296     // MOV    $dst.lo,$dst.hi
 2297     emit_opcode( cbuf, 0x8B );
 2298     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2299     // CLR    $dst.hi
 2300     emit_opcode(cbuf, 0x33);
 2301     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2302 // small:
 2303     // SHRD   $dst.lo,$dst.hi,$shift
 2304     emit_opcode(cbuf,0x0F);
 2305     emit_opcode(cbuf,0xAD);
 2306     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2307     // SHR    $dst.hi,$shift"
 2308     emit_opcode(cbuf,0xD3);
 2309     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2310   %}
 2311 
 2312   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2313     // TEST shift,32
 2314     emit_opcode(cbuf,0xF7);
 2315     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2316     emit_d32(cbuf,0x20);
 2317     // JEQ,s small
 2318     emit_opcode(cbuf, 0x74);
 2319     emit_d8(cbuf, 0x05);
 2320     // MOV    $dst.lo,$dst.hi
 2321     emit_opcode( cbuf, 0x8B );
 2322     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2323     // SAR    $dst.hi,31
 2324     emit_opcode(cbuf, 0xC1);
 2325     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2326     emit_d8(cbuf, 0x1F );
 2327 // small:
 2328     // SHRD   $dst.lo,$dst.hi,$shift
 2329     emit_opcode(cbuf,0x0F);
 2330     emit_opcode(cbuf,0xAD);
 2331     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2332     // SAR    $dst.hi,$shift"
 2333     emit_opcode(cbuf,0xD3);
 2334     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2335   %}
 2336 
 2337 
 2338   // ----------------- Encodings for floating point unit -----------------
 2339   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2340   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2341     $$$emit8$primary;
 2342     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2343   %}
 2344 
 2345   // Pop argument in FPR0 with FSTP ST(0)
 2346   enc_class PopFPU() %{
 2347     emit_opcode( cbuf, 0xDD );
 2348     emit_d8( cbuf, 0xD8 );
 2349   %}
 2350 
 2351   // !!!!! equivalent to Pop_Reg_F
 2352   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2353     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2354     emit_d8( cbuf, 0xD8+$dst$$reg );
 2355   %}
 2356 
 2357   enc_class Push_Reg_DPR( regDPR dst ) %{
 2358     emit_opcode( cbuf, 0xD9 );
 2359     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2360   %}
 2361 
 2362   enc_class strictfp_bias1( regDPR dst ) %{
 2363     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2364     emit_opcode( cbuf, 0x2D );
 2365     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2366     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2367     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2368   %}
 2369 
 2370   enc_class strictfp_bias2( regDPR dst ) %{
 2371     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2372     emit_opcode( cbuf, 0x2D );
 2373     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2374     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2375     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2376   %}
 2377 
 2378   // Special case for moving an integer register to a stack slot.
 2379   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2380     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2381   %}
 2382 
 2383   // Special case for moving a register to a stack slot.
 2384   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2385     // Opcode already emitted
 2386     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2387     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2388     emit_d32(cbuf, $dst$$disp);   // Displacement
 2389   %}
 2390 
 2391   // Push the integer in stackSlot 'src' onto FP-stack
 2392   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2393     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2394   %}
 2395 
 2396   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2397   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2398     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2399   %}
 2400 
 2401   // Same as Pop_Mem_F except for opcode
 2402   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2403   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2404     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2405   %}
 2406 
 2407   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2408     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2409     emit_d8( cbuf, 0xD8+$dst$$reg );
 2410   %}
 2411 
 2412   enc_class Push_Reg_FPR( regFPR dst ) %{
 2413     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2414     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2415   %}
 2416 
 2417   // Push FPU's float to a stack-slot, and pop FPU-stack
 2418   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2419     int pop = 0x02;
 2420     if ($src$$reg != FPR1L_enc) {
 2421       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2422       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2423       pop = 0x03;
 2424     }
 2425     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2426   %}
 2427 
 2428   // Push FPU's double to a stack-slot, and pop FPU-stack
 2429   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2430     int pop = 0x02;
 2431     if ($src$$reg != FPR1L_enc) {
 2432       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2433       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2434       pop = 0x03;
 2435     }
 2436     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2437   %}
 2438 
 2439   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2440   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2441     int pop = 0xD0 - 1; // -1 since we skip FLD
 2442     if ($src$$reg != FPR1L_enc) {
 2443       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2444       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2445       pop = 0xD8;
 2446     }
 2447     emit_opcode( cbuf, 0xDD );
 2448     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2449   %}
 2450 
 2451 
 2452   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2453     // load dst in FPR0
 2454     emit_opcode( cbuf, 0xD9 );
 2455     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2456     if ($src$$reg != FPR1L_enc) {
 2457       // fincstp
 2458       emit_opcode (cbuf, 0xD9);
 2459       emit_opcode (cbuf, 0xF7);
 2460       // swap src with FPR1:
 2461       // FXCH FPR1 with src
 2462       emit_opcode(cbuf, 0xD9);
 2463       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2464       // fdecstp
 2465       emit_opcode (cbuf, 0xD9);
 2466       emit_opcode (cbuf, 0xF6);
 2467     }
 2468   %}
 2469 
 2470   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2471     MacroAssembler _masm(&cbuf);
 2472     __ subptr(rsp, 8);
 2473     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2474     __ fld_d(Address(rsp, 0));
 2475     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2476     __ fld_d(Address(rsp, 0));
 2477   %}
 2478 
 2479   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2480     MacroAssembler _masm(&cbuf);
 2481     __ subptr(rsp, 4);
 2482     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2483     __ fld_s(Address(rsp, 0));
 2484     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2485     __ fld_s(Address(rsp, 0));
 2486   %}
 2487 
 2488   enc_class Push_ResultD(regD dst) %{
 2489     MacroAssembler _masm(&cbuf);
 2490     __ fstp_d(Address(rsp, 0));
 2491     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2492     __ addptr(rsp, 8);
 2493   %}
 2494 
 2495   enc_class Push_ResultF(regF dst, immI d8) %{
 2496     MacroAssembler _masm(&cbuf);
 2497     __ fstp_s(Address(rsp, 0));
 2498     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2499     __ addptr(rsp, $d8$$constant);
 2500   %}
 2501 
 2502   enc_class Push_SrcD(regD src) %{
 2503     MacroAssembler _masm(&cbuf);
 2504     __ subptr(rsp, 8);
 2505     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2506     __ fld_d(Address(rsp, 0));
 2507   %}
 2508 
 2509   enc_class push_stack_temp_qword() %{
 2510     MacroAssembler _masm(&cbuf);
 2511     __ subptr(rsp, 8);
 2512   %}
 2513 
 2514   enc_class pop_stack_temp_qword() %{
 2515     MacroAssembler _masm(&cbuf);
 2516     __ addptr(rsp, 8);
 2517   %}
 2518 
 2519   enc_class push_xmm_to_fpr1(regD src) %{
 2520     MacroAssembler _masm(&cbuf);
 2521     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2522     __ fld_d(Address(rsp, 0));
 2523   %}
 2524 
 2525   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2526     if ($src$$reg != FPR1L_enc) {
 2527       // fincstp
 2528       emit_opcode (cbuf, 0xD9);
 2529       emit_opcode (cbuf, 0xF7);
 2530       // FXCH FPR1 with src
 2531       emit_opcode(cbuf, 0xD9);
 2532       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2533       // fdecstp
 2534       emit_opcode (cbuf, 0xD9);
 2535       emit_opcode (cbuf, 0xF6);
 2536     }
 2537     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2538     // // FSTP   FPR$dst$$reg
 2539     // emit_opcode( cbuf, 0xDD );
 2540     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2541   %}
 2542 
 2543   enc_class fnstsw_sahf_skip_parity() %{
 2544     // fnstsw ax
 2545     emit_opcode( cbuf, 0xDF );
 2546     emit_opcode( cbuf, 0xE0 );
 2547     // sahf
 2548     emit_opcode( cbuf, 0x9E );
 2549     // jnp  ::skip
 2550     emit_opcode( cbuf, 0x7B );
 2551     emit_opcode( cbuf, 0x05 );
 2552   %}
 2553 
 2554   enc_class emitModDPR() %{
 2555     // fprem must be iterative
 2556     // :: loop
 2557     // fprem
 2558     emit_opcode( cbuf, 0xD9 );
 2559     emit_opcode( cbuf, 0xF8 );
 2560     // wait
 2561     emit_opcode( cbuf, 0x9b );
 2562     // fnstsw ax
 2563     emit_opcode( cbuf, 0xDF );
 2564     emit_opcode( cbuf, 0xE0 );
 2565     // sahf
 2566     emit_opcode( cbuf, 0x9E );
 2567     // jp  ::loop
 2568     emit_opcode( cbuf, 0x0F );
 2569     emit_opcode( cbuf, 0x8A );
 2570     emit_opcode( cbuf, 0xF4 );
 2571     emit_opcode( cbuf, 0xFF );
 2572     emit_opcode( cbuf, 0xFF );
 2573     emit_opcode( cbuf, 0xFF );
 2574   %}
 2575 
 2576   enc_class fpu_flags() %{
 2577     // fnstsw_ax
 2578     emit_opcode( cbuf, 0xDF);
 2579     emit_opcode( cbuf, 0xE0);
 2580     // test ax,0x0400
 2581     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2582     emit_opcode( cbuf, 0xA9 );
 2583     emit_d16   ( cbuf, 0x0400 );
 2584     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2585     // // test rax,0x0400
 2586     // emit_opcode( cbuf, 0xA9 );
 2587     // emit_d32   ( cbuf, 0x00000400 );
 2588     //
 2589     // jz exit (no unordered comparison)
 2590     emit_opcode( cbuf, 0x74 );
 2591     emit_d8    ( cbuf, 0x02 );
 2592     // mov ah,1 - treat as LT case (set carry flag)
 2593     emit_opcode( cbuf, 0xB4 );
 2594     emit_d8    ( cbuf, 0x01 );
 2595     // sahf
 2596     emit_opcode( cbuf, 0x9E);
 2597   %}
 2598 
 2599   enc_class cmpF_P6_fixup() %{
 2600     // Fixup the integer flags in case comparison involved a NaN
 2601     //
 2602     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2603     emit_opcode( cbuf, 0x7B );
 2604     emit_d8    ( cbuf, 0x03 );
 2605     // MOV AH,1 - treat as LT case (set carry flag)
 2606     emit_opcode( cbuf, 0xB4 );
 2607     emit_d8    ( cbuf, 0x01 );
 2608     // SAHF
 2609     emit_opcode( cbuf, 0x9E);
 2610     // NOP     // target for branch to avoid branch to branch
 2611     emit_opcode( cbuf, 0x90);
 2612   %}
 2613 
 2614 //     fnstsw_ax();
 2615 //     sahf();
 2616 //     movl(dst, nan_result);
 2617 //     jcc(Assembler::parity, exit);
 2618 //     movl(dst, less_result);
 2619 //     jcc(Assembler::below, exit);
 2620 //     movl(dst, equal_result);
 2621 //     jcc(Assembler::equal, exit);
 2622 //     movl(dst, greater_result);
 2623 
 2624 // less_result     =  1;
 2625 // greater_result  = -1;
 2626 // equal_result    = 0;
 2627 // nan_result      = -1;
 2628 
 2629   enc_class CmpF_Result(rRegI dst) %{
 2630     // fnstsw_ax();
 2631     emit_opcode( cbuf, 0xDF);
 2632     emit_opcode( cbuf, 0xE0);
 2633     // sahf
 2634     emit_opcode( cbuf, 0x9E);
 2635     // movl(dst, nan_result);
 2636     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2637     emit_d32( cbuf, -1 );
 2638     // jcc(Assembler::parity, exit);
 2639     emit_opcode( cbuf, 0x7A );
 2640     emit_d8    ( cbuf, 0x13 );
 2641     // movl(dst, less_result);
 2642     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2643     emit_d32( cbuf, -1 );
 2644     // jcc(Assembler::below, exit);
 2645     emit_opcode( cbuf, 0x72 );
 2646     emit_d8    ( cbuf, 0x0C );
 2647     // movl(dst, equal_result);
 2648     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2649     emit_d32( cbuf, 0 );
 2650     // jcc(Assembler::equal, exit);
 2651     emit_opcode( cbuf, 0x74 );
 2652     emit_d8    ( cbuf, 0x05 );
 2653     // movl(dst, greater_result);
 2654     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2655     emit_d32( cbuf, 1 );
 2656   %}
 2657 
 2658 
 2659   // Compare the longs and set flags
 2660   // BROKEN!  Do Not use as-is
 2661   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2662     // CMP    $src1.hi,$src2.hi
 2663     emit_opcode( cbuf, 0x3B );
 2664     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2665     // JNE,s  done
 2666     emit_opcode(cbuf,0x75);
 2667     emit_d8(cbuf, 2 );
 2668     // CMP    $src1.lo,$src2.lo
 2669     emit_opcode( cbuf, 0x3B );
 2670     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2671 // done:
 2672   %}
 2673 
 2674   enc_class convert_int_long( regL dst, rRegI src ) %{
 2675     // mov $dst.lo,$src
 2676     int dst_encoding = $dst$$reg;
 2677     int src_encoding = $src$$reg;
 2678     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2679     // mov $dst.hi,$src
 2680     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2681     // sar $dst.hi,31
 2682     emit_opcode( cbuf, 0xC1 );
 2683     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2684     emit_d8(cbuf, 0x1F );
 2685   %}
 2686 
 2687   enc_class convert_long_double( eRegL src ) %{
 2688     // push $src.hi
 2689     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2690     // push $src.lo
 2691     emit_opcode(cbuf, 0x50+$src$$reg  );
 2692     // fild 64-bits at [SP]
 2693     emit_opcode(cbuf,0xdf);
 2694     emit_d8(cbuf, 0x6C);
 2695     emit_d8(cbuf, 0x24);
 2696     emit_d8(cbuf, 0x00);
 2697     // pop stack
 2698     emit_opcode(cbuf, 0x83); // add  SP, #8
 2699     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2700     emit_d8(cbuf, 0x8);
 2701   %}
 2702 
 2703   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2704     // IMUL   EDX:EAX,$src1
 2705     emit_opcode( cbuf, 0xF7 );
 2706     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2707     // SAR    EDX,$cnt-32
 2708     int shift_count = ((int)$cnt$$constant) - 32;
 2709     if (shift_count > 0) {
 2710       emit_opcode(cbuf, 0xC1);
 2711       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2712       emit_d8(cbuf, shift_count);
 2713     }
 2714   %}
 2715 
 2716   // this version doesn't have add sp, 8
 2717   enc_class convert_long_double2( eRegL src ) %{
 2718     // push $src.hi
 2719     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2720     // push $src.lo
 2721     emit_opcode(cbuf, 0x50+$src$$reg  );
 2722     // fild 64-bits at [SP]
 2723     emit_opcode(cbuf,0xdf);
 2724     emit_d8(cbuf, 0x6C);
 2725     emit_d8(cbuf, 0x24);
 2726     emit_d8(cbuf, 0x00);
 2727   %}
 2728 
 2729   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2730     // Basic idea: long = (long)int * (long)int
 2731     // IMUL EDX:EAX, src
 2732     emit_opcode( cbuf, 0xF7 );
 2733     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2734   %}
 2735 
 2736   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2737     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2738     // MUL EDX:EAX, src
 2739     emit_opcode( cbuf, 0xF7 );
 2740     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2741   %}
 2742 
 2743   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2744     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2745     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2746     // MOV    $tmp,$src.lo
 2747     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2748     // IMUL   $tmp,EDX
 2749     emit_opcode( cbuf, 0x0F );
 2750     emit_opcode( cbuf, 0xAF );
 2751     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2752     // MOV    EDX,$src.hi
 2753     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2754     // IMUL   EDX,EAX
 2755     emit_opcode( cbuf, 0x0F );
 2756     emit_opcode( cbuf, 0xAF );
 2757     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2758     // ADD    $tmp,EDX
 2759     emit_opcode( cbuf, 0x03 );
 2760     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2761     // MUL   EDX:EAX,$src.lo
 2762     emit_opcode( cbuf, 0xF7 );
 2763     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2764     // ADD    EDX,ESI
 2765     emit_opcode( cbuf, 0x03 );
 2766     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2767   %}
 2768 
 2769   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2770     // Basic idea: lo(result) = lo(src * y_lo)
 2771     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2772     // IMUL   $tmp,EDX,$src
 2773     emit_opcode( cbuf, 0x6B );
 2774     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2775     emit_d8( cbuf, (int)$src$$constant );
 2776     // MOV    EDX,$src
 2777     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2778     emit_d32( cbuf, (int)$src$$constant );
 2779     // MUL   EDX:EAX,EDX
 2780     emit_opcode( cbuf, 0xF7 );
 2781     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2782     // ADD    EDX,ESI
 2783     emit_opcode( cbuf, 0x03 );
 2784     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2785   %}
 2786 
 2787   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2788     // PUSH src1.hi
 2789     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2790     // PUSH src1.lo
 2791     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2792     // PUSH src2.hi
 2793     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2794     // PUSH src2.lo
 2795     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2796     // CALL directly to the runtime
 2797     MacroAssembler _masm(&cbuf);
 2798     cbuf.set_insts_mark();
 2799     emit_opcode(cbuf,0xE8);       // Call into runtime
 2800     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2801     __ post_call_nop();
 2802     // Restore stack
 2803     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2804     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2805     emit_d8(cbuf, 4*4);
 2806   %}
 2807 
 2808   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2809     // PUSH src1.hi
 2810     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2811     // PUSH src1.lo
 2812     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2813     // PUSH src2.hi
 2814     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2815     // PUSH src2.lo
 2816     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2817     // CALL directly to the runtime
 2818     MacroAssembler _masm(&cbuf);
 2819     cbuf.set_insts_mark();
 2820     emit_opcode(cbuf,0xE8);       // Call into runtime
 2821     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2822     __ post_call_nop();
 2823     // Restore stack
 2824     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2825     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2826     emit_d8(cbuf, 4*4);
 2827   %}
 2828 
 2829   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2830     // MOV   $tmp,$src.lo
 2831     emit_opcode(cbuf, 0x8B);
 2832     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2833     // OR    $tmp,$src.hi
 2834     emit_opcode(cbuf, 0x0B);
 2835     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2836   %}
 2837 
 2838   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2839     // CMP    $src1.lo,$src2.lo
 2840     emit_opcode( cbuf, 0x3B );
 2841     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2842     // JNE,s  skip
 2843     emit_cc(cbuf, 0x70, 0x5);
 2844     emit_d8(cbuf,2);
 2845     // CMP    $src1.hi,$src2.hi
 2846     emit_opcode( cbuf, 0x3B );
 2847     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2848   %}
 2849 
 2850   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2851     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2852     emit_opcode( cbuf, 0x3B );
 2853     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2854     // MOV    $tmp,$src1.hi
 2855     emit_opcode( cbuf, 0x8B );
 2856     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2857     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2858     emit_opcode( cbuf, 0x1B );
 2859     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2860   %}
 2861 
 2862   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2863     // XOR    $tmp,$tmp
 2864     emit_opcode(cbuf,0x33);  // XOR
 2865     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2866     // CMP    $tmp,$src.lo
 2867     emit_opcode( cbuf, 0x3B );
 2868     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2869     // SBB    $tmp,$src.hi
 2870     emit_opcode( cbuf, 0x1B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2872   %}
 2873 
 2874  // Sniff, sniff... smells like Gnu Superoptimizer
 2875   enc_class neg_long( eRegL dst ) %{
 2876     emit_opcode(cbuf,0xF7);    // NEG hi
 2877     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2878     emit_opcode(cbuf,0xF7);    // NEG lo
 2879     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2880     emit_opcode(cbuf,0x83);    // SBB hi,0
 2881     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2882     emit_d8    (cbuf,0 );
 2883   %}
 2884 
 2885   enc_class enc_pop_rdx() %{
 2886     emit_opcode(cbuf,0x5A);
 2887   %}
 2888 
 2889   enc_class enc_rethrow() %{
 2890     MacroAssembler _masm(&cbuf);
 2891     cbuf.set_insts_mark();
 2892     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2893     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2894                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2895     __ post_call_nop();
 2896   %}
 2897 
 2898 
 2899   // Convert a double to an int.  Java semantics require we do complex
 2900   // manglelations in the corner cases.  So we set the rounding mode to
 2901   // 'zero', store the darned double down as an int, and reset the
 2902   // rounding mode to 'nearest'.  The hardware throws an exception which
 2903   // patches up the correct value directly to the stack.
 2904   enc_class DPR2I_encoding( regDPR src ) %{
 2905     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2906     // exceptions here, so that a NAN or other corner-case value will
 2907     // thrown an exception (but normal values get converted at full speed).
 2908     // However, I2C adapters and other float-stack manglers leave pending
 2909     // invalid-op exceptions hanging.  We would have to clear them before
 2910     // enabling them and that is more expensive than just testing for the
 2911     // invalid value Intel stores down in the corner cases.
 2912     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2913     emit_opcode(cbuf,0x2D);
 2914     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2915     // Allocate a word
 2916     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2917     emit_opcode(cbuf,0xEC);
 2918     emit_d8(cbuf,0x04);
 2919     // Encoding assumes a double has been pushed into FPR0.
 2920     // Store down the double as an int, popping the FPU stack
 2921     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2922     emit_opcode(cbuf,0x1C);
 2923     emit_d8(cbuf,0x24);
 2924     // Restore the rounding mode; mask the exception
 2925     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2926     emit_opcode(cbuf,0x2D);
 2927     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2928         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2929         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2930 
 2931     // Load the converted int; adjust CPU stack
 2932     emit_opcode(cbuf,0x58);       // POP EAX
 2933     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2934     emit_d32   (cbuf,0x80000000); //         0x80000000
 2935     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2936     emit_d8    (cbuf,0x07);       // Size of slow_call
 2937     // Push src onto stack slow-path
 2938     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2939     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2940     // CALL directly to the runtime
 2941     MacroAssembler _masm(&cbuf);
 2942     cbuf.set_insts_mark();
 2943     emit_opcode(cbuf,0xE8);       // Call into runtime
 2944     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2945     __ post_call_nop();
 2946     // Carry on here...
 2947   %}
 2948 
 2949   enc_class DPR2L_encoding( regDPR src ) %{
 2950     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2951     emit_opcode(cbuf,0x2D);
 2952     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2953     // Allocate a word
 2954     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2955     emit_opcode(cbuf,0xEC);
 2956     emit_d8(cbuf,0x08);
 2957     // Encoding assumes a double has been pushed into FPR0.
 2958     // Store down the double as a long, popping the FPU stack
 2959     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2960     emit_opcode(cbuf,0x3C);
 2961     emit_d8(cbuf,0x24);
 2962     // Restore the rounding mode; mask the exception
 2963     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2964     emit_opcode(cbuf,0x2D);
 2965     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2966         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2967         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2968 
 2969     // Load the converted int; adjust CPU stack
 2970     emit_opcode(cbuf,0x58);       // POP EAX
 2971     emit_opcode(cbuf,0x5A);       // POP EDX
 2972     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2973     emit_d8    (cbuf,0xFA);       // rdx
 2974     emit_d32   (cbuf,0x80000000); //         0x80000000
 2975     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2976     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2977     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2978     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2979     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2980     emit_d8    (cbuf,0x07);       // Size of slow_call
 2981     // Push src onto stack slow-path
 2982     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2983     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2984     // CALL directly to the runtime
 2985     MacroAssembler _masm(&cbuf);
 2986     cbuf.set_insts_mark();
 2987     emit_opcode(cbuf,0xE8);       // Call into runtime
 2988     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2989     __ post_call_nop();
 2990     // Carry on here...
 2991   %}
 2992 
 2993   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2994     // Operand was loaded from memory into fp ST (stack top)
 2995     // FMUL   ST,$src  /* D8 C8+i */
 2996     emit_opcode(cbuf, 0xD8);
 2997     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2998   %}
 2999 
 3000   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3001     // FADDP  ST,src2  /* D8 C0+i */
 3002     emit_opcode(cbuf, 0xD8);
 3003     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3004     //could use FADDP  src2,fpST  /* DE C0+i */
 3005   %}
 3006 
 3007   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3008     // FADDP  src2,ST  /* DE C0+i */
 3009     emit_opcode(cbuf, 0xDE);
 3010     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3011   %}
 3012 
 3013   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3014     // Operand has been loaded into fp ST (stack top)
 3015       // FSUB   ST,$src1
 3016       emit_opcode(cbuf, 0xD8);
 3017       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3018 
 3019       // FDIV
 3020       emit_opcode(cbuf, 0xD8);
 3021       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3022   %}
 3023 
 3024   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3025     // Operand was loaded from memory into fp ST (stack top)
 3026     // FADD   ST,$src  /* D8 C0+i */
 3027     emit_opcode(cbuf, 0xD8);
 3028     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3029 
 3030     // FMUL  ST,src2  /* D8 C*+i */
 3031     emit_opcode(cbuf, 0xD8);
 3032     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3033   %}
 3034 
 3035 
 3036   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3037     // Operand was loaded from memory into fp ST (stack top)
 3038     // FADD   ST,$src  /* D8 C0+i */
 3039     emit_opcode(cbuf, 0xD8);
 3040     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3041 
 3042     // FMULP  src2,ST  /* DE C8+i */
 3043     emit_opcode(cbuf, 0xDE);
 3044     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3045   %}
 3046 
 3047   // Atomically load the volatile long
 3048   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3049     emit_opcode(cbuf,0xDF);
 3050     int rm_byte_opcode = 0x05;
 3051     int base     = $mem$$base;
 3052     int index    = $mem$$index;
 3053     int scale    = $mem$$scale;
 3054     int displace = $mem$$disp;
 3055     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3056     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3057     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3058   %}
 3059 
 3060   // Volatile Store Long.  Must be atomic, so move it into
 3061   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3062   // target address before the store (for null-ptr checks)
 3063   // so the memory operand is used twice in the encoding.
 3064   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3065     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3066     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3067     emit_opcode(cbuf,0xDF);
 3068     int rm_byte_opcode = 0x07;
 3069     int base     = $mem$$base;
 3070     int index    = $mem$$index;
 3071     int scale    = $mem$$scale;
 3072     int displace = $mem$$disp;
 3073     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3074     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3075   %}
 3076 
 3077 %}
 3078 
 3079 
 3080 //----------FRAME--------------------------------------------------------------
 3081 // Definition of frame structure and management information.
 3082 //
 3083 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3084 //                             |   (to get allocators register number
 3085 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3086 //  r   CALLER     |        |
 3087 //  o     |        +--------+      pad to even-align allocators stack-slot
 3088 //  w     V        |  pad0  |        numbers; owned by CALLER
 3089 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3090 //  h     ^        |   in   |  5
 3091 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3092 //  |     |        |        |  3
 3093 //  |     |        +--------+
 3094 //  V     |        | old out|      Empty on Intel, window on Sparc
 3095 //        |    old |preserve|      Must be even aligned.
 3096 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3097 //        |        |   in   |  3   area for Intel ret address
 3098 //     Owned by    |preserve|      Empty on Sparc.
 3099 //       SELF      +--------+
 3100 //        |        |  pad2  |  2   pad to align old SP
 3101 //        |        +--------+  1
 3102 //        |        | locks  |  0
 3103 //        |        +--------+----> OptoReg::stack0(), even aligned
 3104 //        |        |  pad1  | 11   pad to align new SP
 3105 //        |        +--------+
 3106 //        |        |        | 10
 3107 //        |        | spills |  9   spills
 3108 //        V        |        |  8   (pad0 slot for callee)
 3109 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3110 //        ^        |  out   |  7
 3111 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3112 //     Owned by    +--------+
 3113 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3114 //        |    new |preserve|      Must be even-aligned.
 3115 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3116 //        |        |        |
 3117 //
 3118 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3119 //         known from SELF's arguments and the Java calling convention.
 3120 //         Region 6-7 is determined per call site.
 3121 // Note 2: If the calling convention leaves holes in the incoming argument
 3122 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3123 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3124 //         incoming area, as the Java calling convention is completely under
 3125 //         the control of the AD file.  Doubles can be sorted and packed to
 3126 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3127 //         varargs C calling conventions.
 3128 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3129 //         even aligned with pad0 as needed.
 3130 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3131 //         region 6-11 is even aligned; it may be padded out more so that
 3132 //         the region from SP to FP meets the minimum stack alignment.
 3133 
 3134 frame %{
 3135   // These three registers define part of the calling convention
 3136   // between compiled code and the interpreter.
 3137   inline_cache_reg(EAX);                // Inline Cache Register
 3138 
 3139   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3140   cisc_spilling_operand_name(indOffset32);
 3141 
 3142   // Number of stack slots consumed by locking an object
 3143   sync_stack_slots(1);
 3144 
 3145   // Compiled code's Frame Pointer
 3146   frame_pointer(ESP);
 3147   // Interpreter stores its frame pointer in a register which is
 3148   // stored to the stack by I2CAdaptors.
 3149   // I2CAdaptors convert from interpreted java to compiled java.
 3150   interpreter_frame_pointer(EBP);
 3151 
 3152   // Stack alignment requirement
 3153   // Alignment size in bytes (128-bit -> 16 bytes)
 3154   stack_alignment(StackAlignmentInBytes);
 3155 
 3156   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3157   // for calls to C.  Supports the var-args backing area for register parms.
 3158   varargs_C_out_slots_killed(0);
 3159 
 3160   // The after-PROLOG location of the return address.  Location of
 3161   // return address specifies a type (REG or STACK) and a number
 3162   // representing the register number (i.e. - use a register name) or
 3163   // stack slot.
 3164   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3165   // Otherwise, it is above the locks and verification slot and alignment word
 3166   return_addr(STACK - 1 +
 3167               align_up((Compile::current()->in_preserve_stack_slots() +
 3168                         Compile::current()->fixed_slots()),
 3169                        stack_alignment_in_slots()));
 3170 
 3171   // Location of C & interpreter return values
 3172   c_return_value %{
 3173     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3174     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3175     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3176 
 3177     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3178     // that C functions return float and double results in XMM0.
 3179     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3180       return OptoRegPair(XMM0b_num,XMM0_num);
 3181     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3182       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3183 
 3184     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3185   %}
 3186 
 3187   // Location of return values
 3188   return_value %{
 3189     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3190     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3191     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3192     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3193       return OptoRegPair(XMM0b_num,XMM0_num);
 3194     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3195       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3196     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3197   %}
 3198 
 3199 %}
 3200 
 3201 //----------ATTRIBUTES---------------------------------------------------------
 3202 //----------Operand Attributes-------------------------------------------------
 3203 op_attrib op_cost(0);        // Required cost attribute
 3204 
 3205 //----------Instruction Attributes---------------------------------------------
 3206 ins_attrib ins_cost(100);       // Required cost attribute
 3207 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3208 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3209                                 // non-matching short branch variant of some
 3210                                                             // long branch?
 3211 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3212                                 // specifies the alignment that some part of the instruction (not
 3213                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3214                                 // function must be provided for the instruction
 3215 
 3216 //----------OPERANDS-----------------------------------------------------------
 3217 // Operand definitions must precede instruction definitions for correct parsing
 3218 // in the ADLC because operands constitute user defined types which are used in
 3219 // instruction definitions.
 3220 
 3221 //----------Simple Operands----------------------------------------------------
 3222 // Immediate Operands
 3223 // Integer Immediate
 3224 operand immI() %{
 3225   match(ConI);
 3226 
 3227   op_cost(10);
 3228   format %{ %}
 3229   interface(CONST_INTER);
 3230 %}
 3231 
 3232 // Constant for test vs zero
 3233 operand immI_0() %{
 3234   predicate(n->get_int() == 0);
 3235   match(ConI);
 3236 
 3237   op_cost(0);
 3238   format %{ %}
 3239   interface(CONST_INTER);
 3240 %}
 3241 
 3242 // Constant for increment
 3243 operand immI_1() %{
 3244   predicate(n->get_int() == 1);
 3245   match(ConI);
 3246 
 3247   op_cost(0);
 3248   format %{ %}
 3249   interface(CONST_INTER);
 3250 %}
 3251 
 3252 // Constant for decrement
 3253 operand immI_M1() %{
 3254   predicate(n->get_int() == -1);
 3255   match(ConI);
 3256 
 3257   op_cost(0);
 3258   format %{ %}
 3259   interface(CONST_INTER);
 3260 %}
 3261 
 3262 // Valid scale values for addressing modes
 3263 operand immI2() %{
 3264   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3265   match(ConI);
 3266 
 3267   format %{ %}
 3268   interface(CONST_INTER);
 3269 %}
 3270 
 3271 operand immI8() %{
 3272   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3273   match(ConI);
 3274 
 3275   op_cost(5);
 3276   format %{ %}
 3277   interface(CONST_INTER);
 3278 %}
 3279 
 3280 operand immU8() %{
 3281   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3282   match(ConI);
 3283 
 3284   op_cost(5);
 3285   format %{ %}
 3286   interface(CONST_INTER);
 3287 %}
 3288 
 3289 operand immI16() %{
 3290   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3291   match(ConI);
 3292 
 3293   op_cost(10);
 3294   format %{ %}
 3295   interface(CONST_INTER);
 3296 %}
 3297 
 3298 // Int Immediate non-negative
 3299 operand immU31()
 3300 %{
 3301   predicate(n->get_int() >= 0);
 3302   match(ConI);
 3303 
 3304   op_cost(0);
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 // Constant for long shifts
 3310 operand immI_32() %{
 3311   predicate( n->get_int() == 32 );
 3312   match(ConI);
 3313 
 3314   op_cost(0);
 3315   format %{ %}
 3316   interface(CONST_INTER);
 3317 %}
 3318 
 3319 operand immI_1_31() %{
 3320   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3321   match(ConI);
 3322 
 3323   op_cost(0);
 3324   format %{ %}
 3325   interface(CONST_INTER);
 3326 %}
 3327 
 3328 operand immI_32_63() %{
 3329   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3330   match(ConI);
 3331   op_cost(0);
 3332 
 3333   format %{ %}
 3334   interface(CONST_INTER);
 3335 %}
 3336 
 3337 operand immI_2() %{
 3338   predicate( n->get_int() == 2 );
 3339   match(ConI);
 3340 
 3341   op_cost(0);
 3342   format %{ %}
 3343   interface(CONST_INTER);
 3344 %}
 3345 
 3346 operand immI_3() %{
 3347   predicate( n->get_int() == 3 );
 3348   match(ConI);
 3349 
 3350   op_cost(0);
 3351   format %{ %}
 3352   interface(CONST_INTER);
 3353 %}
 3354 
 3355 operand immI_4()
 3356 %{
 3357   predicate(n->get_int() == 4);
 3358   match(ConI);
 3359 
 3360   op_cost(0);
 3361   format %{ %}
 3362   interface(CONST_INTER);
 3363 %}
 3364 
 3365 operand immI_8()
 3366 %{
 3367   predicate(n->get_int() == 8);
 3368   match(ConI);
 3369 
 3370   op_cost(0);
 3371   format %{ %}
 3372   interface(CONST_INTER);
 3373 %}
 3374 
 3375 // Pointer Immediate
 3376 operand immP() %{
 3377   match(ConP);
 3378 
 3379   op_cost(10);
 3380   format %{ %}
 3381   interface(CONST_INTER);
 3382 %}
 3383 
 3384 // NULL Pointer Immediate
 3385 operand immP0() %{
 3386   predicate( n->get_ptr() == 0 );
 3387   match(ConP);
 3388   op_cost(0);
 3389 
 3390   format %{ %}
 3391   interface(CONST_INTER);
 3392 %}
 3393 
 3394 // Long Immediate
 3395 operand immL() %{
 3396   match(ConL);
 3397 
 3398   op_cost(20);
 3399   format %{ %}
 3400   interface(CONST_INTER);
 3401 %}
 3402 
 3403 // Long Immediate zero
 3404 operand immL0() %{
 3405   predicate( n->get_long() == 0L );
 3406   match(ConL);
 3407   op_cost(0);
 3408 
 3409   format %{ %}
 3410   interface(CONST_INTER);
 3411 %}
 3412 
 3413 // Long Immediate zero
 3414 operand immL_M1() %{
 3415   predicate( n->get_long() == -1L );
 3416   match(ConL);
 3417   op_cost(0);
 3418 
 3419   format %{ %}
 3420   interface(CONST_INTER);
 3421 %}
 3422 
 3423 // Long immediate from 0 to 127.
 3424 // Used for a shorter form of long mul by 10.
 3425 operand immL_127() %{
 3426   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3427   match(ConL);
 3428   op_cost(0);
 3429 
 3430   format %{ %}
 3431   interface(CONST_INTER);
 3432 %}
 3433 
 3434 // Long Immediate: low 32-bit mask
 3435 operand immL_32bits() %{
 3436   predicate(n->get_long() == 0xFFFFFFFFL);
 3437   match(ConL);
 3438   op_cost(0);
 3439 
 3440   format %{ %}
 3441   interface(CONST_INTER);
 3442 %}
 3443 
 3444 // Long Immediate: low 32-bit mask
 3445 operand immL32() %{
 3446   predicate(n->get_long() == (int)(n->get_long()));
 3447   match(ConL);
 3448   op_cost(20);
 3449 
 3450   format %{ %}
 3451   interface(CONST_INTER);
 3452 %}
 3453 
 3454 //Double Immediate zero
 3455 operand immDPR0() %{
 3456   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3457   // bug that generates code such that NaNs compare equal to 0.0
 3458   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3459   match(ConD);
 3460 
 3461   op_cost(5);
 3462   format %{ %}
 3463   interface(CONST_INTER);
 3464 %}
 3465 
 3466 // Double Immediate one
 3467 operand immDPR1() %{
 3468   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3469   match(ConD);
 3470 
 3471   op_cost(5);
 3472   format %{ %}
 3473   interface(CONST_INTER);
 3474 %}
 3475 
 3476 // Double Immediate
 3477 operand immDPR() %{
 3478   predicate(UseSSE<=1);
 3479   match(ConD);
 3480 
 3481   op_cost(5);
 3482   format %{ %}
 3483   interface(CONST_INTER);
 3484 %}
 3485 
 3486 operand immD() %{
 3487   predicate(UseSSE>=2);
 3488   match(ConD);
 3489 
 3490   op_cost(5);
 3491   format %{ %}
 3492   interface(CONST_INTER);
 3493 %}
 3494 
 3495 // Double Immediate zero
 3496 operand immD0() %{
 3497   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3498   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3499   // compare equal to -0.0.
 3500   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3501   match(ConD);
 3502 
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Float Immediate zero
 3508 operand immFPR0() %{
 3509   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3510   match(ConF);
 3511 
 3512   op_cost(5);
 3513   format %{ %}
 3514   interface(CONST_INTER);
 3515 %}
 3516 
 3517 // Float Immediate one
 3518 operand immFPR1() %{
 3519   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3520   match(ConF);
 3521 
 3522   op_cost(5);
 3523   format %{ %}
 3524   interface(CONST_INTER);
 3525 %}
 3526 
 3527 // Float Immediate
 3528 operand immFPR() %{
 3529   predicate( UseSSE == 0 );
 3530   match(ConF);
 3531 
 3532   op_cost(5);
 3533   format %{ %}
 3534   interface(CONST_INTER);
 3535 %}
 3536 
 3537 // Float Immediate
 3538 operand immF() %{
 3539   predicate(UseSSE >= 1);
 3540   match(ConF);
 3541 
 3542   op_cost(5);
 3543   format %{ %}
 3544   interface(CONST_INTER);
 3545 %}
 3546 
 3547 // Float Immediate zero.  Zero and not -0.0
 3548 operand immF0() %{
 3549   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3550   match(ConF);
 3551 
 3552   op_cost(5);
 3553   format %{ %}
 3554   interface(CONST_INTER);
 3555 %}
 3556 
 3557 // Immediates for special shifts (sign extend)
 3558 
 3559 // Constants for increment
 3560 operand immI_16() %{
 3561   predicate( n->get_int() == 16 );
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 operand immI_24() %{
 3569   predicate( n->get_int() == 24 );
 3570   match(ConI);
 3571 
 3572   format %{ %}
 3573   interface(CONST_INTER);
 3574 %}
 3575 
 3576 // Constant for byte-wide masking
 3577 operand immI_255() %{
 3578   predicate( n->get_int() == 255 );
 3579   match(ConI);
 3580 
 3581   format %{ %}
 3582   interface(CONST_INTER);
 3583 %}
 3584 
 3585 // Constant for short-wide masking
 3586 operand immI_65535() %{
 3587   predicate(n->get_int() == 65535);
 3588   match(ConI);
 3589 
 3590   format %{ %}
 3591   interface(CONST_INTER);
 3592 %}
 3593 
 3594 operand kReg()
 3595 %{
 3596   constraint(ALLOC_IN_RC(vectmask_reg));
 3597   match(RegVectMask);
 3598   format %{%}
 3599   interface(REG_INTER);
 3600 %}
 3601 
 3602 operand kReg_K1()
 3603 %{
 3604   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3605   match(RegVectMask);
 3606   format %{%}
 3607   interface(REG_INTER);
 3608 %}
 3609 
 3610 operand kReg_K2()
 3611 %{
 3612   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3613   match(RegVectMask);
 3614   format %{%}
 3615   interface(REG_INTER);
 3616 %}
 3617 
 3618 // Special Registers
 3619 operand kReg_K3()
 3620 %{
 3621   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3622   match(RegVectMask);
 3623   format %{%}
 3624   interface(REG_INTER);
 3625 %}
 3626 
 3627 operand kReg_K4()
 3628 %{
 3629   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3630   match(RegVectMask);
 3631   format %{%}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 operand kReg_K5()
 3636 %{
 3637   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3638   match(RegVectMask);
 3639   format %{%}
 3640   interface(REG_INTER);
 3641 %}
 3642 
 3643 operand kReg_K6()
 3644 %{
 3645   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3646   match(RegVectMask);
 3647   format %{%}
 3648   interface(REG_INTER);
 3649 %}
 3650 
 3651 // Special Registers
 3652 operand kReg_K7()
 3653 %{
 3654   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3655   match(RegVectMask);
 3656   format %{%}
 3657   interface(REG_INTER);
 3658 %}
 3659 
 3660 // Register Operands
 3661 // Integer Register
 3662 operand rRegI() %{
 3663   constraint(ALLOC_IN_RC(int_reg));
 3664   match(RegI);
 3665   match(xRegI);
 3666   match(eAXRegI);
 3667   match(eBXRegI);
 3668   match(eCXRegI);
 3669   match(eDXRegI);
 3670   match(eDIRegI);
 3671   match(eSIRegI);
 3672 
 3673   format %{ %}
 3674   interface(REG_INTER);
 3675 %}
 3676 
 3677 // Subset of Integer Register
 3678 operand xRegI(rRegI reg) %{
 3679   constraint(ALLOC_IN_RC(int_x_reg));
 3680   match(reg);
 3681   match(eAXRegI);
 3682   match(eBXRegI);
 3683   match(eCXRegI);
 3684   match(eDXRegI);
 3685 
 3686   format %{ %}
 3687   interface(REG_INTER);
 3688 %}
 3689 
 3690 // Special Registers
 3691 operand eAXRegI(xRegI reg) %{
 3692   constraint(ALLOC_IN_RC(eax_reg));
 3693   match(reg);
 3694   match(rRegI);
 3695 
 3696   format %{ "EAX" %}
 3697   interface(REG_INTER);
 3698 %}
 3699 
 3700 // Special Registers
 3701 operand eBXRegI(xRegI reg) %{
 3702   constraint(ALLOC_IN_RC(ebx_reg));
 3703   match(reg);
 3704   match(rRegI);
 3705 
 3706   format %{ "EBX" %}
 3707   interface(REG_INTER);
 3708 %}
 3709 
 3710 operand eCXRegI(xRegI reg) %{
 3711   constraint(ALLOC_IN_RC(ecx_reg));
 3712   match(reg);
 3713   match(rRegI);
 3714 
 3715   format %{ "ECX" %}
 3716   interface(REG_INTER);
 3717 %}
 3718 
 3719 operand eDXRegI(xRegI reg) %{
 3720   constraint(ALLOC_IN_RC(edx_reg));
 3721   match(reg);
 3722   match(rRegI);
 3723 
 3724   format %{ "EDX" %}
 3725   interface(REG_INTER);
 3726 %}
 3727 
 3728 operand eDIRegI(xRegI reg) %{
 3729   constraint(ALLOC_IN_RC(edi_reg));
 3730   match(reg);
 3731   match(rRegI);
 3732 
 3733   format %{ "EDI" %}
 3734   interface(REG_INTER);
 3735 %}
 3736 
 3737 operand naxRegI() %{
 3738   constraint(ALLOC_IN_RC(nax_reg));
 3739   match(RegI);
 3740   match(eCXRegI);
 3741   match(eDXRegI);
 3742   match(eSIRegI);
 3743   match(eDIRegI);
 3744 
 3745   format %{ %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 operand nadxRegI() %{
 3750   constraint(ALLOC_IN_RC(nadx_reg));
 3751   match(RegI);
 3752   match(eBXRegI);
 3753   match(eCXRegI);
 3754   match(eSIRegI);
 3755   match(eDIRegI);
 3756 
 3757   format %{ %}
 3758   interface(REG_INTER);
 3759 %}
 3760 
 3761 operand ncxRegI() %{
 3762   constraint(ALLOC_IN_RC(ncx_reg));
 3763   match(RegI);
 3764   match(eAXRegI);
 3765   match(eDXRegI);
 3766   match(eSIRegI);
 3767   match(eDIRegI);
 3768 
 3769   format %{ %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3774 // //
 3775 operand eSIRegI(xRegI reg) %{
 3776    constraint(ALLOC_IN_RC(esi_reg));
 3777    match(reg);
 3778    match(rRegI);
 3779 
 3780    format %{ "ESI" %}
 3781    interface(REG_INTER);
 3782 %}
 3783 
 3784 // Pointer Register
 3785 operand anyRegP() %{
 3786   constraint(ALLOC_IN_RC(any_reg));
 3787   match(RegP);
 3788   match(eAXRegP);
 3789   match(eBXRegP);
 3790   match(eCXRegP);
 3791   match(eDIRegP);
 3792   match(eRegP);
 3793 
 3794   format %{ %}
 3795   interface(REG_INTER);
 3796 %}
 3797 
 3798 operand eRegP() %{
 3799   constraint(ALLOC_IN_RC(int_reg));
 3800   match(RegP);
 3801   match(eAXRegP);
 3802   match(eBXRegP);
 3803   match(eCXRegP);
 3804   match(eDIRegP);
 3805 
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand rRegP() %{
 3811   constraint(ALLOC_IN_RC(int_reg));
 3812   match(RegP);
 3813   match(eAXRegP);
 3814   match(eBXRegP);
 3815   match(eCXRegP);
 3816   match(eDIRegP);
 3817 
 3818   format %{ %}
 3819   interface(REG_INTER);
 3820 %}
 3821 
 3822 // On windows95, EBP is not safe to use for implicit null tests.
 3823 operand eRegP_no_EBP() %{
 3824   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3825   match(RegP);
 3826   match(eAXRegP);
 3827   match(eBXRegP);
 3828   match(eCXRegP);
 3829   match(eDIRegP);
 3830 
 3831   op_cost(100);
 3832   format %{ %}
 3833   interface(REG_INTER);
 3834 %}
 3835 
 3836 operand naxRegP() %{
 3837   constraint(ALLOC_IN_RC(nax_reg));
 3838   match(RegP);
 3839   match(eBXRegP);
 3840   match(eDXRegP);
 3841   match(eCXRegP);
 3842   match(eSIRegP);
 3843   match(eDIRegP);
 3844 
 3845   format %{ %}
 3846   interface(REG_INTER);
 3847 %}
 3848 
 3849 operand nabxRegP() %{
 3850   constraint(ALLOC_IN_RC(nabx_reg));
 3851   match(RegP);
 3852   match(eCXRegP);
 3853   match(eDXRegP);
 3854   match(eSIRegP);
 3855   match(eDIRegP);
 3856 
 3857   format %{ %}
 3858   interface(REG_INTER);
 3859 %}
 3860 
 3861 operand pRegP() %{
 3862   constraint(ALLOC_IN_RC(p_reg));
 3863   match(RegP);
 3864   match(eBXRegP);
 3865   match(eDXRegP);
 3866   match(eSIRegP);
 3867   match(eDIRegP);
 3868 
 3869   format %{ %}
 3870   interface(REG_INTER);
 3871 %}
 3872 
 3873 // Special Registers
 3874 // Return a pointer value
 3875 operand eAXRegP(eRegP reg) %{
 3876   constraint(ALLOC_IN_RC(eax_reg));
 3877   match(reg);
 3878   format %{ "EAX" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 // Used in AtomicAdd
 3883 operand eBXRegP(eRegP reg) %{
 3884   constraint(ALLOC_IN_RC(ebx_reg));
 3885   match(reg);
 3886   format %{ "EBX" %}
 3887   interface(REG_INTER);
 3888 %}
 3889 
 3890 // Tail-call (interprocedural jump) to interpreter
 3891 operand eCXRegP(eRegP reg) %{
 3892   constraint(ALLOC_IN_RC(ecx_reg));
 3893   match(reg);
 3894   format %{ "ECX" %}
 3895   interface(REG_INTER);
 3896 %}
 3897 
 3898 operand eDXRegP(eRegP reg) %{
 3899   constraint(ALLOC_IN_RC(edx_reg));
 3900   match(reg);
 3901   format %{ "EDX" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 operand eSIRegP(eRegP reg) %{
 3906   constraint(ALLOC_IN_RC(esi_reg));
 3907   match(reg);
 3908   format %{ "ESI" %}
 3909   interface(REG_INTER);
 3910 %}
 3911 
 3912 // Used in rep stosw
 3913 operand eDIRegP(eRegP reg) %{
 3914   constraint(ALLOC_IN_RC(edi_reg));
 3915   match(reg);
 3916   format %{ "EDI" %}
 3917   interface(REG_INTER);
 3918 %}
 3919 
 3920 operand eRegL() %{
 3921   constraint(ALLOC_IN_RC(long_reg));
 3922   match(RegL);
 3923   match(eADXRegL);
 3924 
 3925   format %{ %}
 3926   interface(REG_INTER);
 3927 %}
 3928 
 3929 operand eADXRegL( eRegL reg ) %{
 3930   constraint(ALLOC_IN_RC(eadx_reg));
 3931   match(reg);
 3932 
 3933   format %{ "EDX:EAX" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 operand eBCXRegL( eRegL reg ) %{
 3938   constraint(ALLOC_IN_RC(ebcx_reg));
 3939   match(reg);
 3940 
 3941   format %{ "EBX:ECX" %}
 3942   interface(REG_INTER);
 3943 %}
 3944 
 3945 // Special case for integer high multiply
 3946 operand eADXRegL_low_only() %{
 3947   constraint(ALLOC_IN_RC(eadx_reg));
 3948   match(RegL);
 3949 
 3950   format %{ "EAX" %}
 3951   interface(REG_INTER);
 3952 %}
 3953 
 3954 // Flags register, used as output of compare instructions
 3955 operand rFlagsReg() %{
 3956   constraint(ALLOC_IN_RC(int_flags));
 3957   match(RegFlags);
 3958 
 3959   format %{ "EFLAGS" %}
 3960   interface(REG_INTER);
 3961 %}
 3962 
 3963 // Flags register, used as output of compare instructions
 3964 operand eFlagsReg() %{
 3965   constraint(ALLOC_IN_RC(int_flags));
 3966   match(RegFlags);
 3967 
 3968   format %{ "EFLAGS" %}
 3969   interface(REG_INTER);
 3970 %}
 3971 
 3972 // Flags register, used as output of FLOATING POINT compare instructions
 3973 operand eFlagsRegU() %{
 3974   constraint(ALLOC_IN_RC(int_flags));
 3975   match(RegFlags);
 3976 
 3977   format %{ "EFLAGS_U" %}
 3978   interface(REG_INTER);
 3979 %}
 3980 
 3981 operand eFlagsRegUCF() %{
 3982   constraint(ALLOC_IN_RC(int_flags));
 3983   match(RegFlags);
 3984   predicate(false);
 3985 
 3986   format %{ "EFLAGS_U_CF" %}
 3987   interface(REG_INTER);
 3988 %}
 3989 
 3990 // Condition Code Register used by long compare
 3991 operand flagsReg_long_LTGE() %{
 3992   constraint(ALLOC_IN_RC(int_flags));
 3993   match(RegFlags);
 3994   format %{ "FLAGS_LTGE" %}
 3995   interface(REG_INTER);
 3996 %}
 3997 operand flagsReg_long_EQNE() %{
 3998   constraint(ALLOC_IN_RC(int_flags));
 3999   match(RegFlags);
 4000   format %{ "FLAGS_EQNE" %}
 4001   interface(REG_INTER);
 4002 %}
 4003 operand flagsReg_long_LEGT() %{
 4004   constraint(ALLOC_IN_RC(int_flags));
 4005   match(RegFlags);
 4006   format %{ "FLAGS_LEGT" %}
 4007   interface(REG_INTER);
 4008 %}
 4009 
 4010 // Condition Code Register used by unsigned long compare
 4011 operand flagsReg_ulong_LTGE() %{
 4012   constraint(ALLOC_IN_RC(int_flags));
 4013   match(RegFlags);
 4014   format %{ "FLAGS_U_LTGE" %}
 4015   interface(REG_INTER);
 4016 %}
 4017 operand flagsReg_ulong_EQNE() %{
 4018   constraint(ALLOC_IN_RC(int_flags));
 4019   match(RegFlags);
 4020   format %{ "FLAGS_U_EQNE" %}
 4021   interface(REG_INTER);
 4022 %}
 4023 operand flagsReg_ulong_LEGT() %{
 4024   constraint(ALLOC_IN_RC(int_flags));
 4025   match(RegFlags);
 4026   format %{ "FLAGS_U_LEGT" %}
 4027   interface(REG_INTER);
 4028 %}
 4029 
 4030 // Float register operands
 4031 operand regDPR() %{
 4032   predicate( UseSSE < 2 );
 4033   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4034   match(RegD);
 4035   match(regDPR1);
 4036   match(regDPR2);
 4037   format %{ %}
 4038   interface(REG_INTER);
 4039 %}
 4040 
 4041 operand regDPR1(regDPR reg) %{
 4042   predicate( UseSSE < 2 );
 4043   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4044   match(reg);
 4045   format %{ "FPR1" %}
 4046   interface(REG_INTER);
 4047 %}
 4048 
 4049 operand regDPR2(regDPR reg) %{
 4050   predicate( UseSSE < 2 );
 4051   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4052   match(reg);
 4053   format %{ "FPR2" %}
 4054   interface(REG_INTER);
 4055 %}
 4056 
 4057 operand regnotDPR1(regDPR reg) %{
 4058   predicate( UseSSE < 2 );
 4059   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4060   match(reg);
 4061   format %{ %}
 4062   interface(REG_INTER);
 4063 %}
 4064 
 4065 // Float register operands
 4066 operand regFPR() %{
 4067   predicate( UseSSE < 2 );
 4068   constraint(ALLOC_IN_RC(fp_flt_reg));
 4069   match(RegF);
 4070   match(regFPR1);
 4071   format %{ %}
 4072   interface(REG_INTER);
 4073 %}
 4074 
 4075 // Float register operands
 4076 operand regFPR1(regFPR reg) %{
 4077   predicate( UseSSE < 2 );
 4078   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4079   match(reg);
 4080   format %{ "FPR1" %}
 4081   interface(REG_INTER);
 4082 %}
 4083 
 4084 // XMM Float register operands
 4085 operand regF() %{
 4086   predicate( UseSSE>=1 );
 4087   constraint(ALLOC_IN_RC(float_reg_legacy));
 4088   match(RegF);
 4089   format %{ %}
 4090   interface(REG_INTER);
 4091 %}
 4092 
 4093 operand legRegF() %{
 4094   predicate( UseSSE>=1 );
 4095   constraint(ALLOC_IN_RC(float_reg_legacy));
 4096   match(RegF);
 4097   format %{ %}
 4098   interface(REG_INTER);
 4099 %}
 4100 
 4101 // Float register operands
 4102 operand vlRegF() %{
 4103    constraint(ALLOC_IN_RC(float_reg_vl));
 4104    match(RegF);
 4105 
 4106    format %{ %}
 4107    interface(REG_INTER);
 4108 %}
 4109 
 4110 // XMM Double register operands
 4111 operand regD() %{
 4112   predicate( UseSSE>=2 );
 4113   constraint(ALLOC_IN_RC(double_reg_legacy));
 4114   match(RegD);
 4115   format %{ %}
 4116   interface(REG_INTER);
 4117 %}
 4118 
 4119 // Double register operands
 4120 operand legRegD() %{
 4121   predicate( UseSSE>=2 );
 4122   constraint(ALLOC_IN_RC(double_reg_legacy));
 4123   match(RegD);
 4124   format %{ %}
 4125   interface(REG_INTER);
 4126 %}
 4127 
 4128 operand vlRegD() %{
 4129    constraint(ALLOC_IN_RC(double_reg_vl));
 4130    match(RegD);
 4131 
 4132    format %{ %}
 4133    interface(REG_INTER);
 4134 %}
 4135 
 4136 //----------Memory Operands----------------------------------------------------
 4137 // Direct Memory Operand
 4138 operand direct(immP addr) %{
 4139   match(addr);
 4140 
 4141   format %{ "[$addr]" %}
 4142   interface(MEMORY_INTER) %{
 4143     base(0xFFFFFFFF);
 4144     index(0x4);
 4145     scale(0x0);
 4146     disp($addr);
 4147   %}
 4148 %}
 4149 
 4150 // Indirect Memory Operand
 4151 operand indirect(eRegP reg) %{
 4152   constraint(ALLOC_IN_RC(int_reg));
 4153   match(reg);
 4154 
 4155   format %{ "[$reg]" %}
 4156   interface(MEMORY_INTER) %{
 4157     base($reg);
 4158     index(0x4);
 4159     scale(0x0);
 4160     disp(0x0);
 4161   %}
 4162 %}
 4163 
 4164 // Indirect Memory Plus Short Offset Operand
 4165 operand indOffset8(eRegP reg, immI8 off) %{
 4166   match(AddP reg off);
 4167 
 4168   format %{ "[$reg + $off]" %}
 4169   interface(MEMORY_INTER) %{
 4170     base($reg);
 4171     index(0x4);
 4172     scale(0x0);
 4173     disp($off);
 4174   %}
 4175 %}
 4176 
 4177 // Indirect Memory Plus Long Offset Operand
 4178 operand indOffset32(eRegP reg, immI off) %{
 4179   match(AddP reg off);
 4180 
 4181   format %{ "[$reg + $off]" %}
 4182   interface(MEMORY_INTER) %{
 4183     base($reg);
 4184     index(0x4);
 4185     scale(0x0);
 4186     disp($off);
 4187   %}
 4188 %}
 4189 
 4190 // Indirect Memory Plus Long Offset Operand
 4191 operand indOffset32X(rRegI reg, immP off) %{
 4192   match(AddP off reg);
 4193 
 4194   format %{ "[$reg + $off]" %}
 4195   interface(MEMORY_INTER) %{
 4196     base($reg);
 4197     index(0x4);
 4198     scale(0x0);
 4199     disp($off);
 4200   %}
 4201 %}
 4202 
 4203 // Indirect Memory Plus Index Register Plus Offset Operand
 4204 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4205   match(AddP (AddP reg ireg) off);
 4206 
 4207   op_cost(10);
 4208   format %{"[$reg + $off + $ireg]" %}
 4209   interface(MEMORY_INTER) %{
 4210     base($reg);
 4211     index($ireg);
 4212     scale(0x0);
 4213     disp($off);
 4214   %}
 4215 %}
 4216 
 4217 // Indirect Memory Plus Index Register Plus Offset Operand
 4218 operand indIndex(eRegP reg, rRegI ireg) %{
 4219   match(AddP reg ireg);
 4220 
 4221   op_cost(10);
 4222   format %{"[$reg + $ireg]" %}
 4223   interface(MEMORY_INTER) %{
 4224     base($reg);
 4225     index($ireg);
 4226     scale(0x0);
 4227     disp(0x0);
 4228   %}
 4229 %}
 4230 
 4231 // // -------------------------------------------------------------------------
 4232 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4233 // // -------------------------------------------------------------------------
 4234 // // Scaled Memory Operands
 4235 // // Indirect Memory Times Scale Plus Offset Operand
 4236 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4237 //   match(AddP off (LShiftI ireg scale));
 4238 //
 4239 //   op_cost(10);
 4240 //   format %{"[$off + $ireg << $scale]" %}
 4241 //   interface(MEMORY_INTER) %{
 4242 //     base(0x4);
 4243 //     index($ireg);
 4244 //     scale($scale);
 4245 //     disp($off);
 4246 //   %}
 4247 // %}
 4248 
 4249 // Indirect Memory Times Scale Plus Index Register
 4250 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4251   match(AddP reg (LShiftI ireg scale));
 4252 
 4253   op_cost(10);
 4254   format %{"[$reg + $ireg << $scale]" %}
 4255   interface(MEMORY_INTER) %{
 4256     base($reg);
 4257     index($ireg);
 4258     scale($scale);
 4259     disp(0x0);
 4260   %}
 4261 %}
 4262 
 4263 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4264 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4265   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4266 
 4267   op_cost(10);
 4268   format %{"[$reg + $off + $ireg << $scale]" %}
 4269   interface(MEMORY_INTER) %{
 4270     base($reg);
 4271     index($ireg);
 4272     scale($scale);
 4273     disp($off);
 4274   %}
 4275 %}
 4276 
 4277 //----------Load Long Memory Operands------------------------------------------
 4278 // The load-long idiom will use it's address expression again after loading
 4279 // the first word of the long.  If the load-long destination overlaps with
 4280 // registers used in the addressing expression, the 2nd half will be loaded
 4281 // from a clobbered address.  Fix this by requiring that load-long use
 4282 // address registers that do not overlap with the load-long target.
 4283 
 4284 // load-long support
 4285 operand load_long_RegP() %{
 4286   constraint(ALLOC_IN_RC(esi_reg));
 4287   match(RegP);
 4288   match(eSIRegP);
 4289   op_cost(100);
 4290   format %{  %}
 4291   interface(REG_INTER);
 4292 %}
 4293 
 4294 // Indirect Memory Operand Long
 4295 operand load_long_indirect(load_long_RegP reg) %{
 4296   constraint(ALLOC_IN_RC(esi_reg));
 4297   match(reg);
 4298 
 4299   format %{ "[$reg]" %}
 4300   interface(MEMORY_INTER) %{
 4301     base($reg);
 4302     index(0x4);
 4303     scale(0x0);
 4304     disp(0x0);
 4305   %}
 4306 %}
 4307 
 4308 // Indirect Memory Plus Long Offset Operand
 4309 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4310   match(AddP reg off);
 4311 
 4312   format %{ "[$reg + $off]" %}
 4313   interface(MEMORY_INTER) %{
 4314     base($reg);
 4315     index(0x4);
 4316     scale(0x0);
 4317     disp($off);
 4318   %}
 4319 %}
 4320 
 4321 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4322 
 4323 
 4324 //----------Special Memory Operands--------------------------------------------
 4325 // Stack Slot Operand - This operand is used for loading and storing temporary
 4326 //                      values on the stack where a match requires a value to
 4327 //                      flow through memory.
 4328 operand stackSlotP(sRegP reg) %{
 4329   constraint(ALLOC_IN_RC(stack_slots));
 4330   // No match rule because this operand is only generated in matching
 4331   format %{ "[$reg]" %}
 4332   interface(MEMORY_INTER) %{
 4333     base(0x4);   // ESP
 4334     index(0x4);  // No Index
 4335     scale(0x0);  // No Scale
 4336     disp($reg);  // Stack Offset
 4337   %}
 4338 %}
 4339 
 4340 operand stackSlotI(sRegI reg) %{
 4341   constraint(ALLOC_IN_RC(stack_slots));
 4342   // No match rule because this operand is only generated in matching
 4343   format %{ "[$reg]" %}
 4344   interface(MEMORY_INTER) %{
 4345     base(0x4);   // ESP
 4346     index(0x4);  // No Index
 4347     scale(0x0);  // No Scale
 4348     disp($reg);  // Stack Offset
 4349   %}
 4350 %}
 4351 
 4352 operand stackSlotF(sRegF reg) %{
 4353   constraint(ALLOC_IN_RC(stack_slots));
 4354   // No match rule because this operand is only generated in matching
 4355   format %{ "[$reg]" %}
 4356   interface(MEMORY_INTER) %{
 4357     base(0x4);   // ESP
 4358     index(0x4);  // No Index
 4359     scale(0x0);  // No Scale
 4360     disp($reg);  // Stack Offset
 4361   %}
 4362 %}
 4363 
 4364 operand stackSlotD(sRegD reg) %{
 4365   constraint(ALLOC_IN_RC(stack_slots));
 4366   // No match rule because this operand is only generated in matching
 4367   format %{ "[$reg]" %}
 4368   interface(MEMORY_INTER) %{
 4369     base(0x4);   // ESP
 4370     index(0x4);  // No Index
 4371     scale(0x0);  // No Scale
 4372     disp($reg);  // Stack Offset
 4373   %}
 4374 %}
 4375 
 4376 operand stackSlotL(sRegL reg) %{
 4377   constraint(ALLOC_IN_RC(stack_slots));
 4378   // No match rule because this operand is only generated in matching
 4379   format %{ "[$reg]" %}
 4380   interface(MEMORY_INTER) %{
 4381     base(0x4);   // ESP
 4382     index(0x4);  // No Index
 4383     scale(0x0);  // No Scale
 4384     disp($reg);  // Stack Offset
 4385   %}
 4386 %}
 4387 
 4388 //----------Conditional Branch Operands----------------------------------------
 4389 // Comparison Op  - This is the operation of the comparison, and is limited to
 4390 //                  the following set of codes:
 4391 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4392 //
 4393 // Other attributes of the comparison, such as unsignedness, are specified
 4394 // by the comparison instruction that sets a condition code flags register.
 4395 // That result is represented by a flags operand whose subtype is appropriate
 4396 // to the unsignedness (etc.) of the comparison.
 4397 //
 4398 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4399 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4400 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4401 
 4402 // Comparision Code
 4403 operand cmpOp() %{
 4404   match(Bool);
 4405 
 4406   format %{ "" %}
 4407   interface(COND_INTER) %{
 4408     equal(0x4, "e");
 4409     not_equal(0x5, "ne");
 4410     less(0xC, "l");
 4411     greater_equal(0xD, "ge");
 4412     less_equal(0xE, "le");
 4413     greater(0xF, "g");
 4414     overflow(0x0, "o");
 4415     no_overflow(0x1, "no");
 4416   %}
 4417 %}
 4418 
 4419 // Comparison Code, unsigned compare.  Used by FP also, with
 4420 // C2 (unordered) turned into GT or LT already.  The other bits
 4421 // C0 and C3 are turned into Carry & Zero flags.
 4422 operand cmpOpU() %{
 4423   match(Bool);
 4424 
 4425   format %{ "" %}
 4426   interface(COND_INTER) %{
 4427     equal(0x4, "e");
 4428     not_equal(0x5, "ne");
 4429     less(0x2, "b");
 4430     greater_equal(0x3, "nb");
 4431     less_equal(0x6, "be");
 4432     greater(0x7, "nbe");
 4433     overflow(0x0, "o");
 4434     no_overflow(0x1, "no");
 4435   %}
 4436 %}
 4437 
 4438 // Floating comparisons that don't require any fixup for the unordered case
 4439 operand cmpOpUCF() %{
 4440   match(Bool);
 4441   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4442             n->as_Bool()->_test._test == BoolTest::ge ||
 4443             n->as_Bool()->_test._test == BoolTest::le ||
 4444             n->as_Bool()->_test._test == BoolTest::gt);
 4445   format %{ "" %}
 4446   interface(COND_INTER) %{
 4447     equal(0x4, "e");
 4448     not_equal(0x5, "ne");
 4449     less(0x2, "b");
 4450     greater_equal(0x3, "nb");
 4451     less_equal(0x6, "be");
 4452     greater(0x7, "nbe");
 4453     overflow(0x0, "o");
 4454     no_overflow(0x1, "no");
 4455   %}
 4456 %}
 4457 
 4458 
 4459 // Floating comparisons that can be fixed up with extra conditional jumps
 4460 operand cmpOpUCF2() %{
 4461   match(Bool);
 4462   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4463             n->as_Bool()->_test._test == BoolTest::eq);
 4464   format %{ "" %}
 4465   interface(COND_INTER) %{
 4466     equal(0x4, "e");
 4467     not_equal(0x5, "ne");
 4468     less(0x2, "b");
 4469     greater_equal(0x3, "nb");
 4470     less_equal(0x6, "be");
 4471     greater(0x7, "nbe");
 4472     overflow(0x0, "o");
 4473     no_overflow(0x1, "no");
 4474   %}
 4475 %}
 4476 
 4477 // Comparison Code for FP conditional move
 4478 operand cmpOp_fcmov() %{
 4479   match(Bool);
 4480 
 4481   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4482             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4483   format %{ "" %}
 4484   interface(COND_INTER) %{
 4485     equal        (0x0C8);
 4486     not_equal    (0x1C8);
 4487     less         (0x0C0);
 4488     greater_equal(0x1C0);
 4489     less_equal   (0x0D0);
 4490     greater      (0x1D0);
 4491     overflow(0x0, "o"); // not really supported by the instruction
 4492     no_overflow(0x1, "no"); // not really supported by the instruction
 4493   %}
 4494 %}
 4495 
 4496 // Comparison Code used in long compares
 4497 operand cmpOp_commute() %{
 4498   match(Bool);
 4499 
 4500   format %{ "" %}
 4501   interface(COND_INTER) %{
 4502     equal(0x4, "e");
 4503     not_equal(0x5, "ne");
 4504     less(0xF, "g");
 4505     greater_equal(0xE, "le");
 4506     less_equal(0xD, "ge");
 4507     greater(0xC, "l");
 4508     overflow(0x0, "o");
 4509     no_overflow(0x1, "no");
 4510   %}
 4511 %}
 4512 
 4513 // Comparison Code used in unsigned long compares
 4514 operand cmpOpU_commute() %{
 4515   match(Bool);
 4516 
 4517   format %{ "" %}
 4518   interface(COND_INTER) %{
 4519     equal(0x4, "e");
 4520     not_equal(0x5, "ne");
 4521     less(0x7, "nbe");
 4522     greater_equal(0x6, "be");
 4523     less_equal(0x3, "nb");
 4524     greater(0x2, "b");
 4525     overflow(0x0, "o");
 4526     no_overflow(0x1, "no");
 4527   %}
 4528 %}
 4529 
 4530 //----------OPERAND CLASSES----------------------------------------------------
 4531 // Operand Classes are groups of operands that are used as to simplify
 4532 // instruction definitions by not requiring the AD writer to specify separate
 4533 // instructions for every form of operand when the instruction accepts
 4534 // multiple operand types with the same basic encoding and format.  The classic
 4535 // case of this is memory operands.
 4536 
 4537 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4538                indIndex, indIndexScale, indIndexScaleOffset);
 4539 
 4540 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4541 // This means some kind of offset is always required and you cannot use
 4542 // an oop as the offset (done when working on static globals).
 4543 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4544                     indIndex, indIndexScale, indIndexScaleOffset);
 4545 
 4546 
 4547 //----------PIPELINE-----------------------------------------------------------
 4548 // Rules which define the behavior of the target architectures pipeline.
 4549 pipeline %{
 4550 
 4551 //----------ATTRIBUTES---------------------------------------------------------
 4552 attributes %{
 4553   variable_size_instructions;        // Fixed size instructions
 4554   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4555   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4556   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4557   instruction_fetch_units = 1;       // of 16 bytes
 4558 
 4559   // List of nop instructions
 4560   nops( MachNop );
 4561 %}
 4562 
 4563 //----------RESOURCES----------------------------------------------------------
 4564 // Resources are the functional units available to the machine
 4565 
 4566 // Generic P2/P3 pipeline
 4567 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4568 // 3 instructions decoded per cycle.
 4569 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4570 // 2 ALU op, only ALU0 handles mul/div instructions.
 4571 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4572            MS0, MS1, MEM = MS0 | MS1,
 4573            BR, FPU,
 4574            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4575 
 4576 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4577 // Pipeline Description specifies the stages in the machine's pipeline
 4578 
 4579 // Generic P2/P3 pipeline
 4580 pipe_desc(S0, S1, S2, S3, S4, S5);
 4581 
 4582 //----------PIPELINE CLASSES---------------------------------------------------
 4583 // Pipeline Classes describe the stages in which input and output are
 4584 // referenced by the hardware pipeline.
 4585 
 4586 // Naming convention: ialu or fpu
 4587 // Then: _reg
 4588 // Then: _reg if there is a 2nd register
 4589 // Then: _long if it's a pair of instructions implementing a long
 4590 // Then: _fat if it requires the big decoder
 4591 //   Or: _mem if it requires the big decoder and a memory unit.
 4592 
 4593 // Integer ALU reg operation
 4594 pipe_class ialu_reg(rRegI dst) %{
 4595     single_instruction;
 4596     dst    : S4(write);
 4597     dst    : S3(read);
 4598     DECODE : S0;        // any decoder
 4599     ALU    : S3;        // any alu
 4600 %}
 4601 
 4602 // Long ALU reg operation
 4603 pipe_class ialu_reg_long(eRegL dst) %{
 4604     instruction_count(2);
 4605     dst    : S4(write);
 4606     dst    : S3(read);
 4607     DECODE : S0(2);     // any 2 decoders
 4608     ALU    : S3(2);     // both alus
 4609 %}
 4610 
 4611 // Integer ALU reg operation using big decoder
 4612 pipe_class ialu_reg_fat(rRegI dst) %{
 4613     single_instruction;
 4614     dst    : S4(write);
 4615     dst    : S3(read);
 4616     D0     : S0;        // big decoder only
 4617     ALU    : S3;        // any alu
 4618 %}
 4619 
 4620 // Long ALU reg operation using big decoder
 4621 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4622     instruction_count(2);
 4623     dst    : S4(write);
 4624     dst    : S3(read);
 4625     D0     : S0(2);     // big decoder only; twice
 4626     ALU    : S3(2);     // any 2 alus
 4627 %}
 4628 
 4629 // Integer ALU reg-reg operation
 4630 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4631     single_instruction;
 4632     dst    : S4(write);
 4633     src    : S3(read);
 4634     DECODE : S0;        // any decoder
 4635     ALU    : S3;        // any alu
 4636 %}
 4637 
 4638 // Long ALU reg-reg operation
 4639 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4640     instruction_count(2);
 4641     dst    : S4(write);
 4642     src    : S3(read);
 4643     DECODE : S0(2);     // any 2 decoders
 4644     ALU    : S3(2);     // both alus
 4645 %}
 4646 
 4647 // Integer ALU reg-reg operation
 4648 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4649     single_instruction;
 4650     dst    : S4(write);
 4651     src    : S3(read);
 4652     D0     : S0;        // big decoder only
 4653     ALU    : S3;        // any alu
 4654 %}
 4655 
 4656 // Long ALU reg-reg operation
 4657 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4658     instruction_count(2);
 4659     dst    : S4(write);
 4660     src    : S3(read);
 4661     D0     : S0(2);     // big decoder only; twice
 4662     ALU    : S3(2);     // both alus
 4663 %}
 4664 
 4665 // Integer ALU reg-mem operation
 4666 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4667     single_instruction;
 4668     dst    : S5(write);
 4669     mem    : S3(read);
 4670     D0     : S0;        // big decoder only
 4671     ALU    : S4;        // any alu
 4672     MEM    : S3;        // any mem
 4673 %}
 4674 
 4675 // Long ALU reg-mem operation
 4676 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4677     instruction_count(2);
 4678     dst    : S5(write);
 4679     mem    : S3(read);
 4680     D0     : S0(2);     // big decoder only; twice
 4681     ALU    : S4(2);     // any 2 alus
 4682     MEM    : S3(2);     // both mems
 4683 %}
 4684 
 4685 // Integer mem operation (prefetch)
 4686 pipe_class ialu_mem(memory mem)
 4687 %{
 4688     single_instruction;
 4689     mem    : S3(read);
 4690     D0     : S0;        // big decoder only
 4691     MEM    : S3;        // any mem
 4692 %}
 4693 
 4694 // Integer Store to Memory
 4695 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4696     single_instruction;
 4697     mem    : S3(read);
 4698     src    : S5(read);
 4699     D0     : S0;        // big decoder only
 4700     ALU    : S4;        // any alu
 4701     MEM    : S3;
 4702 %}
 4703 
 4704 // Long Store to Memory
 4705 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4706     instruction_count(2);
 4707     mem    : S3(read);
 4708     src    : S5(read);
 4709     D0     : S0(2);     // big decoder only; twice
 4710     ALU    : S4(2);     // any 2 alus
 4711     MEM    : S3(2);     // Both mems
 4712 %}
 4713 
 4714 // Integer Store to Memory
 4715 pipe_class ialu_mem_imm(memory mem) %{
 4716     single_instruction;
 4717     mem    : S3(read);
 4718     D0     : S0;        // big decoder only
 4719     ALU    : S4;        // any alu
 4720     MEM    : S3;
 4721 %}
 4722 
 4723 // Integer ALU0 reg-reg operation
 4724 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4725     single_instruction;
 4726     dst    : S4(write);
 4727     src    : S3(read);
 4728     D0     : S0;        // Big decoder only
 4729     ALU0   : S3;        // only alu0
 4730 %}
 4731 
 4732 // Integer ALU0 reg-mem operation
 4733 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4734     single_instruction;
 4735     dst    : S5(write);
 4736     mem    : S3(read);
 4737     D0     : S0;        // big decoder only
 4738     ALU0   : S4;        // ALU0 only
 4739     MEM    : S3;        // any mem
 4740 %}
 4741 
 4742 // Integer ALU reg-reg operation
 4743 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4744     single_instruction;
 4745     cr     : S4(write);
 4746     src1   : S3(read);
 4747     src2   : S3(read);
 4748     DECODE : S0;        // any decoder
 4749     ALU    : S3;        // any alu
 4750 %}
 4751 
 4752 // Integer ALU reg-imm operation
 4753 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4754     single_instruction;
 4755     cr     : S4(write);
 4756     src1   : S3(read);
 4757     DECODE : S0;        // any decoder
 4758     ALU    : S3;        // any alu
 4759 %}
 4760 
 4761 // Integer ALU reg-mem operation
 4762 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4763     single_instruction;
 4764     cr     : S4(write);
 4765     src1   : S3(read);
 4766     src2   : S3(read);
 4767     D0     : S0;        // big decoder only
 4768     ALU    : S4;        // any alu
 4769     MEM    : S3;
 4770 %}
 4771 
 4772 // Conditional move reg-reg
 4773 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4774     instruction_count(4);
 4775     y      : S4(read);
 4776     q      : S3(read);
 4777     p      : S3(read);
 4778     DECODE : S0(4);     // any decoder
 4779 %}
 4780 
 4781 // Conditional move reg-reg
 4782 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4783     single_instruction;
 4784     dst    : S4(write);
 4785     src    : S3(read);
 4786     cr     : S3(read);
 4787     DECODE : S0;        // any decoder
 4788 %}
 4789 
 4790 // Conditional move reg-mem
 4791 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4792     single_instruction;
 4793     dst    : S4(write);
 4794     src    : S3(read);
 4795     cr     : S3(read);
 4796     DECODE : S0;        // any decoder
 4797     MEM    : S3;
 4798 %}
 4799 
 4800 // Conditional move reg-reg long
 4801 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4802     single_instruction;
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     cr     : S3(read);
 4806     DECODE : S0(2);     // any 2 decoders
 4807 %}
 4808 
 4809 // Conditional move double reg-reg
 4810 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4811     single_instruction;
 4812     dst    : S4(write);
 4813     src    : S3(read);
 4814     cr     : S3(read);
 4815     DECODE : S0;        // any decoder
 4816 %}
 4817 
 4818 // Float reg-reg operation
 4819 pipe_class fpu_reg(regDPR dst) %{
 4820     instruction_count(2);
 4821     dst    : S3(read);
 4822     DECODE : S0(2);     // any 2 decoders
 4823     FPU    : S3;
 4824 %}
 4825 
 4826 // Float reg-reg operation
 4827 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4828     instruction_count(2);
 4829     dst    : S4(write);
 4830     src    : S3(read);
 4831     DECODE : S0(2);     // any 2 decoders
 4832     FPU    : S3;
 4833 %}
 4834 
 4835 // Float reg-reg operation
 4836 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4837     instruction_count(3);
 4838     dst    : S4(write);
 4839     src1   : S3(read);
 4840     src2   : S3(read);
 4841     DECODE : S0(3);     // any 3 decoders
 4842     FPU    : S3(2);
 4843 %}
 4844 
 4845 // Float reg-reg operation
 4846 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4847     instruction_count(4);
 4848     dst    : S4(write);
 4849     src1   : S3(read);
 4850     src2   : S3(read);
 4851     src3   : S3(read);
 4852     DECODE : S0(4);     // any 3 decoders
 4853     FPU    : S3(2);
 4854 %}
 4855 
 4856 // Float reg-reg operation
 4857 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4858     instruction_count(4);
 4859     dst    : S4(write);
 4860     src1   : S3(read);
 4861     src2   : S3(read);
 4862     src3   : S3(read);
 4863     DECODE : S1(3);     // any 3 decoders
 4864     D0     : S0;        // Big decoder only
 4865     FPU    : S3(2);
 4866     MEM    : S3;
 4867 %}
 4868 
 4869 // Float reg-mem operation
 4870 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4871     instruction_count(2);
 4872     dst    : S5(write);
 4873     mem    : S3(read);
 4874     D0     : S0;        // big decoder only
 4875     DECODE : S1;        // any decoder for FPU POP
 4876     FPU    : S4;
 4877     MEM    : S3;        // any mem
 4878 %}
 4879 
 4880 // Float reg-mem operation
 4881 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4882     instruction_count(3);
 4883     dst    : S5(write);
 4884     src1   : S3(read);
 4885     mem    : S3(read);
 4886     D0     : S0;        // big decoder only
 4887     DECODE : S1(2);     // any decoder for FPU POP
 4888     FPU    : S4;
 4889     MEM    : S3;        // any mem
 4890 %}
 4891 
 4892 // Float mem-reg operation
 4893 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4894     instruction_count(2);
 4895     src    : S5(read);
 4896     mem    : S3(read);
 4897     DECODE : S0;        // any decoder for FPU PUSH
 4898     D0     : S1;        // big decoder only
 4899     FPU    : S4;
 4900     MEM    : S3;        // any mem
 4901 %}
 4902 
 4903 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4904     instruction_count(3);
 4905     src1   : S3(read);
 4906     src2   : S3(read);
 4907     mem    : S3(read);
 4908     DECODE : S0(2);     // any decoder for FPU PUSH
 4909     D0     : S1;        // big decoder only
 4910     FPU    : S4;
 4911     MEM    : S3;        // any mem
 4912 %}
 4913 
 4914 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4915     instruction_count(3);
 4916     src1   : S3(read);
 4917     src2   : S3(read);
 4918     mem    : S4(read);
 4919     DECODE : S0;        // any decoder for FPU PUSH
 4920     D0     : S0(2);     // big decoder only
 4921     FPU    : S4;
 4922     MEM    : S3(2);     // any mem
 4923 %}
 4924 
 4925 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4926     instruction_count(2);
 4927     src1   : S3(read);
 4928     dst    : S4(read);
 4929     D0     : S0(2);     // big decoder only
 4930     MEM    : S3(2);     // any mem
 4931 %}
 4932 
 4933 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4934     instruction_count(3);
 4935     src1   : S3(read);
 4936     src2   : S3(read);
 4937     dst    : S4(read);
 4938     D0     : S0(3);     // big decoder only
 4939     FPU    : S4;
 4940     MEM    : S3(3);     // any mem
 4941 %}
 4942 
 4943 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4944     instruction_count(3);
 4945     src1   : S4(read);
 4946     mem    : S4(read);
 4947     DECODE : S0;        // any decoder for FPU PUSH
 4948     D0     : S0(2);     // big decoder only
 4949     FPU    : S4;
 4950     MEM    : S3(2);     // any mem
 4951 %}
 4952 
 4953 // Float load constant
 4954 pipe_class fpu_reg_con(regDPR dst) %{
 4955     instruction_count(2);
 4956     dst    : S5(write);
 4957     D0     : S0;        // big decoder only for the load
 4958     DECODE : S1;        // any decoder for FPU POP
 4959     FPU    : S4;
 4960     MEM    : S3;        // any mem
 4961 %}
 4962 
 4963 // Float load constant
 4964 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4965     instruction_count(3);
 4966     dst    : S5(write);
 4967     src    : S3(read);
 4968     D0     : S0;        // big decoder only for the load
 4969     DECODE : S1(2);     // any decoder for FPU POP
 4970     FPU    : S4;
 4971     MEM    : S3;        // any mem
 4972 %}
 4973 
 4974 // UnConditional branch
 4975 pipe_class pipe_jmp( label labl ) %{
 4976     single_instruction;
 4977     BR   : S3;
 4978 %}
 4979 
 4980 // Conditional branch
 4981 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4982     single_instruction;
 4983     cr    : S1(read);
 4984     BR    : S3;
 4985 %}
 4986 
 4987 // Allocation idiom
 4988 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4989     instruction_count(1); force_serialization;
 4990     fixed_latency(6);
 4991     heap_ptr : S3(read);
 4992     DECODE   : S0(3);
 4993     D0       : S2;
 4994     MEM      : S3;
 4995     ALU      : S3(2);
 4996     dst      : S5(write);
 4997     BR       : S5;
 4998 %}
 4999 
 5000 // Generic big/slow expanded idiom
 5001 pipe_class pipe_slow(  ) %{
 5002     instruction_count(10); multiple_bundles; force_serialization;
 5003     fixed_latency(100);
 5004     D0  : S0(2);
 5005     MEM : S3(2);
 5006 %}
 5007 
 5008 // The real do-nothing guy
 5009 pipe_class empty( ) %{
 5010     instruction_count(0);
 5011 %}
 5012 
 5013 // Define the class for the Nop node
 5014 define %{
 5015    MachNop = empty;
 5016 %}
 5017 
 5018 %}
 5019 
 5020 //----------INSTRUCTIONS-------------------------------------------------------
 5021 //
 5022 // match      -- States which machine-independent subtree may be replaced
 5023 //               by this instruction.
 5024 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5025 //               selection to identify a minimum cost tree of machine
 5026 //               instructions that matches a tree of machine-independent
 5027 //               instructions.
 5028 // format     -- A string providing the disassembly for this instruction.
 5029 //               The value of an instruction's operand may be inserted
 5030 //               by referring to it with a '$' prefix.
 5031 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5032 //               to within an encode class as $primary, $secondary, and $tertiary
 5033 //               respectively.  The primary opcode is commonly used to
 5034 //               indicate the type of machine instruction, while secondary
 5035 //               and tertiary are often used for prefix options or addressing
 5036 //               modes.
 5037 // ins_encode -- A list of encode classes with parameters. The encode class
 5038 //               name must have been defined in an 'enc_class' specification
 5039 //               in the encode section of the architecture description.
 5040 
 5041 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5042 // Load Float
 5043 instruct MoveF2LEG(legRegF dst, regF src) %{
 5044   match(Set dst src);
 5045   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5046   ins_encode %{
 5047     ShouldNotReachHere();
 5048   %}
 5049   ins_pipe( fpu_reg_reg );
 5050 %}
 5051 
 5052 // Load Float
 5053 instruct MoveLEG2F(regF dst, legRegF src) %{
 5054   match(Set dst src);
 5055   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5056   ins_encode %{
 5057     ShouldNotReachHere();
 5058   %}
 5059   ins_pipe( fpu_reg_reg );
 5060 %}
 5061 
 5062 // Load Float
 5063 instruct MoveF2VL(vlRegF dst, regF src) %{
 5064   match(Set dst src);
 5065   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5066   ins_encode %{
 5067     ShouldNotReachHere();
 5068   %}
 5069   ins_pipe( fpu_reg_reg );
 5070 %}
 5071 
 5072 // Load Float
 5073 instruct MoveVL2F(regF dst, vlRegF src) %{
 5074   match(Set dst src);
 5075   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5076   ins_encode %{
 5077     ShouldNotReachHere();
 5078   %}
 5079   ins_pipe( fpu_reg_reg );
 5080 %}
 5081 
 5082 
 5083 
 5084 // Load Double
 5085 instruct MoveD2LEG(legRegD dst, regD src) %{
 5086   match(Set dst src);
 5087   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5088   ins_encode %{
 5089     ShouldNotReachHere();
 5090   %}
 5091   ins_pipe( fpu_reg_reg );
 5092 %}
 5093 
 5094 // Load Double
 5095 instruct MoveLEG2D(regD dst, legRegD src) %{
 5096   match(Set dst src);
 5097   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5098   ins_encode %{
 5099     ShouldNotReachHere();
 5100   %}
 5101   ins_pipe( fpu_reg_reg );
 5102 %}
 5103 
 5104 // Load Double
 5105 instruct MoveD2VL(vlRegD dst, regD src) %{
 5106   match(Set dst src);
 5107   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5108   ins_encode %{
 5109     ShouldNotReachHere();
 5110   %}
 5111   ins_pipe( fpu_reg_reg );
 5112 %}
 5113 
 5114 // Load Double
 5115 instruct MoveVL2D(regD dst, vlRegD src) %{
 5116   match(Set dst src);
 5117   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5118   ins_encode %{
 5119     ShouldNotReachHere();
 5120   %}
 5121   ins_pipe( fpu_reg_reg );
 5122 %}
 5123 
 5124 //----------BSWAP-Instruction--------------------------------------------------
 5125 instruct bytes_reverse_int(rRegI dst) %{
 5126   match(Set dst (ReverseBytesI dst));
 5127 
 5128   format %{ "BSWAP  $dst" %}
 5129   opcode(0x0F, 0xC8);
 5130   ins_encode( OpcP, OpcSReg(dst) );
 5131   ins_pipe( ialu_reg );
 5132 %}
 5133 
 5134 instruct bytes_reverse_long(eRegL dst) %{
 5135   match(Set dst (ReverseBytesL dst));
 5136 
 5137   format %{ "BSWAP  $dst.lo\n\t"
 5138             "BSWAP  $dst.hi\n\t"
 5139             "XCHG   $dst.lo $dst.hi" %}
 5140 
 5141   ins_cost(125);
 5142   ins_encode( bswap_long_bytes(dst) );
 5143   ins_pipe( ialu_reg_reg);
 5144 %}
 5145 
 5146 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5147   match(Set dst (ReverseBytesUS dst));
 5148   effect(KILL cr);
 5149 
 5150   format %{ "BSWAP  $dst\n\t"
 5151             "SHR    $dst,16\n\t" %}
 5152   ins_encode %{
 5153     __ bswapl($dst$$Register);
 5154     __ shrl($dst$$Register, 16);
 5155   %}
 5156   ins_pipe( ialu_reg );
 5157 %}
 5158 
 5159 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5160   match(Set dst (ReverseBytesS dst));
 5161   effect(KILL cr);
 5162 
 5163   format %{ "BSWAP  $dst\n\t"
 5164             "SAR    $dst,16\n\t" %}
 5165   ins_encode %{
 5166     __ bswapl($dst$$Register);
 5167     __ sarl($dst$$Register, 16);
 5168   %}
 5169   ins_pipe( ialu_reg );
 5170 %}
 5171 
 5172 
 5173 //---------- Zeros Count Instructions ------------------------------------------
 5174 
 5175 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5176   predicate(UseCountLeadingZerosInstruction);
 5177   match(Set dst (CountLeadingZerosI src));
 5178   effect(KILL cr);
 5179 
 5180   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5181   ins_encode %{
 5182     __ lzcntl($dst$$Register, $src$$Register);
 5183   %}
 5184   ins_pipe(ialu_reg);
 5185 %}
 5186 
 5187 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5188   predicate(!UseCountLeadingZerosInstruction);
 5189   match(Set dst (CountLeadingZerosI src));
 5190   effect(KILL cr);
 5191 
 5192   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5193             "JNZ    skip\n\t"
 5194             "MOV    $dst, -1\n"
 5195       "skip:\n\t"
 5196             "NEG    $dst\n\t"
 5197             "ADD    $dst, 31" %}
 5198   ins_encode %{
 5199     Register Rdst = $dst$$Register;
 5200     Register Rsrc = $src$$Register;
 5201     Label skip;
 5202     __ bsrl(Rdst, Rsrc);
 5203     __ jccb(Assembler::notZero, skip);
 5204     __ movl(Rdst, -1);
 5205     __ bind(skip);
 5206     __ negl(Rdst);
 5207     __ addl(Rdst, BitsPerInt - 1);
 5208   %}
 5209   ins_pipe(ialu_reg);
 5210 %}
 5211 
 5212 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5213   predicate(UseCountLeadingZerosInstruction);
 5214   match(Set dst (CountLeadingZerosL src));
 5215   effect(TEMP dst, KILL cr);
 5216 
 5217   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5218             "JNC    done\n\t"
 5219             "LZCNT  $dst, $src.lo\n\t"
 5220             "ADD    $dst, 32\n"
 5221       "done:" %}
 5222   ins_encode %{
 5223     Register Rdst = $dst$$Register;
 5224     Register Rsrc = $src$$Register;
 5225     Label done;
 5226     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5227     __ jccb(Assembler::carryClear, done);
 5228     __ lzcntl(Rdst, Rsrc);
 5229     __ addl(Rdst, BitsPerInt);
 5230     __ bind(done);
 5231   %}
 5232   ins_pipe(ialu_reg);
 5233 %}
 5234 
 5235 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5236   predicate(!UseCountLeadingZerosInstruction);
 5237   match(Set dst (CountLeadingZerosL src));
 5238   effect(TEMP dst, KILL cr);
 5239 
 5240   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5241             "JZ     msw_is_zero\n\t"
 5242             "ADD    $dst, 32\n\t"
 5243             "JMP    not_zero\n"
 5244       "msw_is_zero:\n\t"
 5245             "BSR    $dst, $src.lo\n\t"
 5246             "JNZ    not_zero\n\t"
 5247             "MOV    $dst, -1\n"
 5248       "not_zero:\n\t"
 5249             "NEG    $dst\n\t"
 5250             "ADD    $dst, 63\n" %}
 5251  ins_encode %{
 5252     Register Rdst = $dst$$Register;
 5253     Register Rsrc = $src$$Register;
 5254     Label msw_is_zero;
 5255     Label not_zero;
 5256     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5257     __ jccb(Assembler::zero, msw_is_zero);
 5258     __ addl(Rdst, BitsPerInt);
 5259     __ jmpb(not_zero);
 5260     __ bind(msw_is_zero);
 5261     __ bsrl(Rdst, Rsrc);
 5262     __ jccb(Assembler::notZero, not_zero);
 5263     __ movl(Rdst, -1);
 5264     __ bind(not_zero);
 5265     __ negl(Rdst);
 5266     __ addl(Rdst, BitsPerLong - 1);
 5267   %}
 5268   ins_pipe(ialu_reg);
 5269 %}
 5270 
 5271 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5272   predicate(UseCountTrailingZerosInstruction);
 5273   match(Set dst (CountTrailingZerosI src));
 5274   effect(KILL cr);
 5275 
 5276   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5277   ins_encode %{
 5278     __ tzcntl($dst$$Register, $src$$Register);
 5279   %}
 5280   ins_pipe(ialu_reg);
 5281 %}
 5282 
 5283 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5284   predicate(!UseCountTrailingZerosInstruction);
 5285   match(Set dst (CountTrailingZerosI src));
 5286   effect(KILL cr);
 5287 
 5288   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5289             "JNZ    done\n\t"
 5290             "MOV    $dst, 32\n"
 5291       "done:" %}
 5292   ins_encode %{
 5293     Register Rdst = $dst$$Register;
 5294     Label done;
 5295     __ bsfl(Rdst, $src$$Register);
 5296     __ jccb(Assembler::notZero, done);
 5297     __ movl(Rdst, BitsPerInt);
 5298     __ bind(done);
 5299   %}
 5300   ins_pipe(ialu_reg);
 5301 %}
 5302 
 5303 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5304   predicate(UseCountTrailingZerosInstruction);
 5305   match(Set dst (CountTrailingZerosL src));
 5306   effect(TEMP dst, KILL cr);
 5307 
 5308   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5309             "JNC    done\n\t"
 5310             "TZCNT  $dst, $src.hi\n\t"
 5311             "ADD    $dst, 32\n"
 5312             "done:" %}
 5313   ins_encode %{
 5314     Register Rdst = $dst$$Register;
 5315     Register Rsrc = $src$$Register;
 5316     Label done;
 5317     __ tzcntl(Rdst, Rsrc);
 5318     __ jccb(Assembler::carryClear, done);
 5319     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5320     __ addl(Rdst, BitsPerInt);
 5321     __ bind(done);
 5322   %}
 5323   ins_pipe(ialu_reg);
 5324 %}
 5325 
 5326 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5327   predicate(!UseCountTrailingZerosInstruction);
 5328   match(Set dst (CountTrailingZerosL src));
 5329   effect(TEMP dst, KILL cr);
 5330 
 5331   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5332             "JNZ    done\n\t"
 5333             "BSF    $dst, $src.hi\n\t"
 5334             "JNZ    msw_not_zero\n\t"
 5335             "MOV    $dst, 32\n"
 5336       "msw_not_zero:\n\t"
 5337             "ADD    $dst, 32\n"
 5338       "done:" %}
 5339   ins_encode %{
 5340     Register Rdst = $dst$$Register;
 5341     Register Rsrc = $src$$Register;
 5342     Label msw_not_zero;
 5343     Label done;
 5344     __ bsfl(Rdst, Rsrc);
 5345     __ jccb(Assembler::notZero, done);
 5346     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5347     __ jccb(Assembler::notZero, msw_not_zero);
 5348     __ movl(Rdst, BitsPerInt);
 5349     __ bind(msw_not_zero);
 5350     __ addl(Rdst, BitsPerInt);
 5351     __ bind(done);
 5352   %}
 5353   ins_pipe(ialu_reg);
 5354 %}
 5355 
 5356 
 5357 //---------- Population Count Instructions -------------------------------------
 5358 
 5359 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5360   predicate(UsePopCountInstruction);
 5361   match(Set dst (PopCountI src));
 5362   effect(KILL cr);
 5363 
 5364   format %{ "POPCNT $dst, $src" %}
 5365   ins_encode %{
 5366     __ popcntl($dst$$Register, $src$$Register);
 5367   %}
 5368   ins_pipe(ialu_reg);
 5369 %}
 5370 
 5371 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5372   predicate(UsePopCountInstruction);
 5373   match(Set dst (PopCountI (LoadI mem)));
 5374   effect(KILL cr);
 5375 
 5376   format %{ "POPCNT $dst, $mem" %}
 5377   ins_encode %{
 5378     __ popcntl($dst$$Register, $mem$$Address);
 5379   %}
 5380   ins_pipe(ialu_reg);
 5381 %}
 5382 
 5383 // Note: Long.bitCount(long) returns an int.
 5384 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5385   predicate(UsePopCountInstruction);
 5386   match(Set dst (PopCountL src));
 5387   effect(KILL cr, TEMP tmp, TEMP dst);
 5388 
 5389   format %{ "POPCNT $dst, $src.lo\n\t"
 5390             "POPCNT $tmp, $src.hi\n\t"
 5391             "ADD    $dst, $tmp" %}
 5392   ins_encode %{
 5393     __ popcntl($dst$$Register, $src$$Register);
 5394     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5395     __ addl($dst$$Register, $tmp$$Register);
 5396   %}
 5397   ins_pipe(ialu_reg);
 5398 %}
 5399 
 5400 // Note: Long.bitCount(long) returns an int.
 5401 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5402   predicate(UsePopCountInstruction);
 5403   match(Set dst (PopCountL (LoadL mem)));
 5404   effect(KILL cr, TEMP tmp, TEMP dst);
 5405 
 5406   format %{ "POPCNT $dst, $mem\n\t"
 5407             "POPCNT $tmp, $mem+4\n\t"
 5408             "ADD    $dst, $tmp" %}
 5409   ins_encode %{
 5410     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5411     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5412     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5413     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5414     __ addl($dst$$Register, $tmp$$Register);
 5415   %}
 5416   ins_pipe(ialu_reg);
 5417 %}
 5418 
 5419 
 5420 //----------Load/Store/Move Instructions---------------------------------------
 5421 //----------Load Instructions--------------------------------------------------
 5422 // Load Byte (8bit signed)
 5423 instruct loadB(xRegI dst, memory mem) %{
 5424   match(Set dst (LoadB mem));
 5425 
 5426   ins_cost(125);
 5427   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5428 
 5429   ins_encode %{
 5430     __ movsbl($dst$$Register, $mem$$Address);
 5431   %}
 5432 
 5433   ins_pipe(ialu_reg_mem);
 5434 %}
 5435 
 5436 // Load Byte (8bit signed) into Long Register
 5437 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5438   match(Set dst (ConvI2L (LoadB mem)));
 5439   effect(KILL cr);
 5440 
 5441   ins_cost(375);
 5442   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5443             "MOV    $dst.hi,$dst.lo\n\t"
 5444             "SAR    $dst.hi,7" %}
 5445 
 5446   ins_encode %{
 5447     __ movsbl($dst$$Register, $mem$$Address);
 5448     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5449     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5450   %}
 5451 
 5452   ins_pipe(ialu_reg_mem);
 5453 %}
 5454 
 5455 // Load Unsigned Byte (8bit UNsigned)
 5456 instruct loadUB(xRegI dst, memory mem) %{
 5457   match(Set dst (LoadUB mem));
 5458 
 5459   ins_cost(125);
 5460   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5461 
 5462   ins_encode %{
 5463     __ movzbl($dst$$Register, $mem$$Address);
 5464   %}
 5465 
 5466   ins_pipe(ialu_reg_mem);
 5467 %}
 5468 
 5469 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5470 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5471   match(Set dst (ConvI2L (LoadUB mem)));
 5472   effect(KILL cr);
 5473 
 5474   ins_cost(250);
 5475   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5476             "XOR    $dst.hi,$dst.hi" %}
 5477 
 5478   ins_encode %{
 5479     Register Rdst = $dst$$Register;
 5480     __ movzbl(Rdst, $mem$$Address);
 5481     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5482   %}
 5483 
 5484   ins_pipe(ialu_reg_mem);
 5485 %}
 5486 
 5487 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5488 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5489   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5490   effect(KILL cr);
 5491 
 5492   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5493             "XOR    $dst.hi,$dst.hi\n\t"
 5494             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5495   ins_encode %{
 5496     Register Rdst = $dst$$Register;
 5497     __ movzbl(Rdst, $mem$$Address);
 5498     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5499     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5500   %}
 5501   ins_pipe(ialu_reg_mem);
 5502 %}
 5503 
 5504 // Load Short (16bit signed)
 5505 instruct loadS(rRegI dst, memory mem) %{
 5506   match(Set dst (LoadS mem));
 5507 
 5508   ins_cost(125);
 5509   format %{ "MOVSX  $dst,$mem\t# short" %}
 5510 
 5511   ins_encode %{
 5512     __ movswl($dst$$Register, $mem$$Address);
 5513   %}
 5514 
 5515   ins_pipe(ialu_reg_mem);
 5516 %}
 5517 
 5518 // Load Short (16 bit signed) to Byte (8 bit signed)
 5519 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5520   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5521 
 5522   ins_cost(125);
 5523   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5524   ins_encode %{
 5525     __ movsbl($dst$$Register, $mem$$Address);
 5526   %}
 5527   ins_pipe(ialu_reg_mem);
 5528 %}
 5529 
 5530 // Load Short (16bit signed) into Long Register
 5531 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5532   match(Set dst (ConvI2L (LoadS mem)));
 5533   effect(KILL cr);
 5534 
 5535   ins_cost(375);
 5536   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5537             "MOV    $dst.hi,$dst.lo\n\t"
 5538             "SAR    $dst.hi,15" %}
 5539 
 5540   ins_encode %{
 5541     __ movswl($dst$$Register, $mem$$Address);
 5542     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5543     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5544   %}
 5545 
 5546   ins_pipe(ialu_reg_mem);
 5547 %}
 5548 
 5549 // Load Unsigned Short/Char (16bit unsigned)
 5550 instruct loadUS(rRegI dst, memory mem) %{
 5551   match(Set dst (LoadUS mem));
 5552 
 5553   ins_cost(125);
 5554   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5555 
 5556   ins_encode %{
 5557     __ movzwl($dst$$Register, $mem$$Address);
 5558   %}
 5559 
 5560   ins_pipe(ialu_reg_mem);
 5561 %}
 5562 
 5563 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5564 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5565   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5566 
 5567   ins_cost(125);
 5568   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5569   ins_encode %{
 5570     __ movsbl($dst$$Register, $mem$$Address);
 5571   %}
 5572   ins_pipe(ialu_reg_mem);
 5573 %}
 5574 
 5575 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5576 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5577   match(Set dst (ConvI2L (LoadUS mem)));
 5578   effect(KILL cr);
 5579 
 5580   ins_cost(250);
 5581   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5582             "XOR    $dst.hi,$dst.hi" %}
 5583 
 5584   ins_encode %{
 5585     __ movzwl($dst$$Register, $mem$$Address);
 5586     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5587   %}
 5588 
 5589   ins_pipe(ialu_reg_mem);
 5590 %}
 5591 
 5592 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5593 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5594   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5595   effect(KILL cr);
 5596 
 5597   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5598             "XOR    $dst.hi,$dst.hi" %}
 5599   ins_encode %{
 5600     Register Rdst = $dst$$Register;
 5601     __ movzbl(Rdst, $mem$$Address);
 5602     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5603   %}
 5604   ins_pipe(ialu_reg_mem);
 5605 %}
 5606 
 5607 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5608 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5609   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5610   effect(KILL cr);
 5611 
 5612   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5613             "XOR    $dst.hi,$dst.hi\n\t"
 5614             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5615   ins_encode %{
 5616     Register Rdst = $dst$$Register;
 5617     __ movzwl(Rdst, $mem$$Address);
 5618     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5619     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5620   %}
 5621   ins_pipe(ialu_reg_mem);
 5622 %}
 5623 
 5624 // Load Integer
 5625 instruct loadI(rRegI dst, memory mem) %{
 5626   match(Set dst (LoadI mem));
 5627 
 5628   ins_cost(125);
 5629   format %{ "MOV    $dst,$mem\t# int" %}
 5630 
 5631   ins_encode %{
 5632     __ movl($dst$$Register, $mem$$Address);
 5633   %}
 5634 
 5635   ins_pipe(ialu_reg_mem);
 5636 %}
 5637 
 5638 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5639 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5640   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5641 
 5642   ins_cost(125);
 5643   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5644   ins_encode %{
 5645     __ movsbl($dst$$Register, $mem$$Address);
 5646   %}
 5647   ins_pipe(ialu_reg_mem);
 5648 %}
 5649 
 5650 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5651 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5652   match(Set dst (AndI (LoadI mem) mask));
 5653 
 5654   ins_cost(125);
 5655   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5656   ins_encode %{
 5657     __ movzbl($dst$$Register, $mem$$Address);
 5658   %}
 5659   ins_pipe(ialu_reg_mem);
 5660 %}
 5661 
 5662 // Load Integer (32 bit signed) to Short (16 bit signed)
 5663 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5664   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5665 
 5666   ins_cost(125);
 5667   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5668   ins_encode %{
 5669     __ movswl($dst$$Register, $mem$$Address);
 5670   %}
 5671   ins_pipe(ialu_reg_mem);
 5672 %}
 5673 
 5674 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5675 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5676   match(Set dst (AndI (LoadI mem) mask));
 5677 
 5678   ins_cost(125);
 5679   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5680   ins_encode %{
 5681     __ movzwl($dst$$Register, $mem$$Address);
 5682   %}
 5683   ins_pipe(ialu_reg_mem);
 5684 %}
 5685 
 5686 // Load Integer into Long Register
 5687 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5688   match(Set dst (ConvI2L (LoadI mem)));
 5689   effect(KILL cr);
 5690 
 5691   ins_cost(375);
 5692   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5693             "MOV    $dst.hi,$dst.lo\n\t"
 5694             "SAR    $dst.hi,31" %}
 5695 
 5696   ins_encode %{
 5697     __ movl($dst$$Register, $mem$$Address);
 5698     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5699     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5700   %}
 5701 
 5702   ins_pipe(ialu_reg_mem);
 5703 %}
 5704 
 5705 // Load Integer with mask 0xFF into Long Register
 5706 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5707   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5708   effect(KILL cr);
 5709 
 5710   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5711             "XOR    $dst.hi,$dst.hi" %}
 5712   ins_encode %{
 5713     Register Rdst = $dst$$Register;
 5714     __ movzbl(Rdst, $mem$$Address);
 5715     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5716   %}
 5717   ins_pipe(ialu_reg_mem);
 5718 %}
 5719 
 5720 // Load Integer with mask 0xFFFF into Long Register
 5721 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5722   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5723   effect(KILL cr);
 5724 
 5725   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5726             "XOR    $dst.hi,$dst.hi" %}
 5727   ins_encode %{
 5728     Register Rdst = $dst$$Register;
 5729     __ movzwl(Rdst, $mem$$Address);
 5730     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5731   %}
 5732   ins_pipe(ialu_reg_mem);
 5733 %}
 5734 
 5735 // Load Integer with 31-bit mask into Long Register
 5736 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5737   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5738   effect(KILL cr);
 5739 
 5740   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5741             "XOR    $dst.hi,$dst.hi\n\t"
 5742             "AND    $dst.lo,$mask" %}
 5743   ins_encode %{
 5744     Register Rdst = $dst$$Register;
 5745     __ movl(Rdst, $mem$$Address);
 5746     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5747     __ andl(Rdst, $mask$$constant);
 5748   %}
 5749   ins_pipe(ialu_reg_mem);
 5750 %}
 5751 
 5752 // Load Unsigned Integer into Long Register
 5753 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5754   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5755   effect(KILL cr);
 5756 
 5757   ins_cost(250);
 5758   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5759             "XOR    $dst.hi,$dst.hi" %}
 5760 
 5761   ins_encode %{
 5762     __ movl($dst$$Register, $mem$$Address);
 5763     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5764   %}
 5765 
 5766   ins_pipe(ialu_reg_mem);
 5767 %}
 5768 
 5769 // Load Long.  Cannot clobber address while loading, so restrict address
 5770 // register to ESI
 5771 instruct loadL(eRegL dst, load_long_memory mem) %{
 5772   predicate(!((LoadLNode*)n)->require_atomic_access());
 5773   match(Set dst (LoadL mem));
 5774 
 5775   ins_cost(250);
 5776   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5777             "MOV    $dst.hi,$mem+4" %}
 5778 
 5779   ins_encode %{
 5780     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5781     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5782     __ movl($dst$$Register, Amemlo);
 5783     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5784   %}
 5785 
 5786   ins_pipe(ialu_reg_long_mem);
 5787 %}
 5788 
 5789 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5790 // then store it down to the stack and reload on the int
 5791 // side.
 5792 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5793   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5794   match(Set dst (LoadL mem));
 5795 
 5796   ins_cost(200);
 5797   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5798             "FISTp  $dst" %}
 5799   ins_encode(enc_loadL_volatile(mem,dst));
 5800   ins_pipe( fpu_reg_mem );
 5801 %}
 5802 
 5803 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5804   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5805   match(Set dst (LoadL mem));
 5806   effect(TEMP tmp);
 5807   ins_cost(180);
 5808   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5809             "MOVSD  $dst,$tmp" %}
 5810   ins_encode %{
 5811     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5812     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5813   %}
 5814   ins_pipe( pipe_slow );
 5815 %}
 5816 
 5817 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5818   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5819   match(Set dst (LoadL mem));
 5820   effect(TEMP tmp);
 5821   ins_cost(160);
 5822   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5823             "MOVD   $dst.lo,$tmp\n\t"
 5824             "PSRLQ  $tmp,32\n\t"
 5825             "MOVD   $dst.hi,$tmp" %}
 5826   ins_encode %{
 5827     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5828     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5829     __ psrlq($tmp$$XMMRegister, 32);
 5830     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5831   %}
 5832   ins_pipe( pipe_slow );
 5833 %}
 5834 
 5835 // Load Range
 5836 instruct loadRange(rRegI dst, memory mem) %{
 5837   match(Set dst (LoadRange mem));
 5838 
 5839   ins_cost(125);
 5840   format %{ "MOV    $dst,$mem" %}
 5841   opcode(0x8B);
 5842   ins_encode( OpcP, RegMem(dst,mem));
 5843   ins_pipe( ialu_reg_mem );
 5844 %}
 5845 
 5846 
 5847 // Load Pointer
 5848 instruct loadP(eRegP dst, memory mem) %{
 5849   match(Set dst (LoadP mem));
 5850 
 5851   ins_cost(125);
 5852   format %{ "MOV    $dst,$mem" %}
 5853   opcode(0x8B);
 5854   ins_encode( OpcP, RegMem(dst,mem));
 5855   ins_pipe( ialu_reg_mem );
 5856 %}
 5857 
 5858 // Load Klass Pointer
 5859 instruct loadKlass(eRegP dst, memory mem) %{
 5860   match(Set dst (LoadKlass mem));
 5861 
 5862   ins_cost(125);
 5863   format %{ "MOV    $dst,$mem" %}
 5864   opcode(0x8B);
 5865   ins_encode( OpcP, RegMem(dst,mem));
 5866   ins_pipe( ialu_reg_mem );
 5867 %}
 5868 
 5869 // Load Double
 5870 instruct loadDPR(regDPR dst, memory mem) %{
 5871   predicate(UseSSE<=1);
 5872   match(Set dst (LoadD mem));
 5873 
 5874   ins_cost(150);
 5875   format %{ "FLD_D  ST,$mem\n\t"
 5876             "FSTP   $dst" %}
 5877   opcode(0xDD);               /* DD /0 */
 5878   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5879               Pop_Reg_DPR(dst) );
 5880   ins_pipe( fpu_reg_mem );
 5881 %}
 5882 
 5883 // Load Double to XMM
 5884 instruct loadD(regD dst, memory mem) %{
 5885   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5886   match(Set dst (LoadD mem));
 5887   ins_cost(145);
 5888   format %{ "MOVSD  $dst,$mem" %}
 5889   ins_encode %{
 5890     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5891   %}
 5892   ins_pipe( pipe_slow );
 5893 %}
 5894 
 5895 instruct loadD_partial(regD dst, memory mem) %{
 5896   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5897   match(Set dst (LoadD mem));
 5898   ins_cost(145);
 5899   format %{ "MOVLPD $dst,$mem" %}
 5900   ins_encode %{
 5901     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5902   %}
 5903   ins_pipe( pipe_slow );
 5904 %}
 5905 
 5906 // Load to XMM register (single-precision floating point)
 5907 // MOVSS instruction
 5908 instruct loadF(regF dst, memory mem) %{
 5909   predicate(UseSSE>=1);
 5910   match(Set dst (LoadF mem));
 5911   ins_cost(145);
 5912   format %{ "MOVSS  $dst,$mem" %}
 5913   ins_encode %{
 5914     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5915   %}
 5916   ins_pipe( pipe_slow );
 5917 %}
 5918 
 5919 // Load Float
 5920 instruct loadFPR(regFPR dst, memory mem) %{
 5921   predicate(UseSSE==0);
 5922   match(Set dst (LoadF mem));
 5923 
 5924   ins_cost(150);
 5925   format %{ "FLD_S  ST,$mem\n\t"
 5926             "FSTP   $dst" %}
 5927   opcode(0xD9);               /* D9 /0 */
 5928   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5929               Pop_Reg_FPR(dst) );
 5930   ins_pipe( fpu_reg_mem );
 5931 %}
 5932 
 5933 // Load Effective Address
 5934 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5935   match(Set dst mem);
 5936 
 5937   ins_cost(110);
 5938   format %{ "LEA    $dst,$mem" %}
 5939   opcode(0x8D);
 5940   ins_encode( OpcP, RegMem(dst,mem));
 5941   ins_pipe( ialu_reg_reg_fat );
 5942 %}
 5943 
 5944 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5945   match(Set dst mem);
 5946 
 5947   ins_cost(110);
 5948   format %{ "LEA    $dst,$mem" %}
 5949   opcode(0x8D);
 5950   ins_encode( OpcP, RegMem(dst,mem));
 5951   ins_pipe( ialu_reg_reg_fat );
 5952 %}
 5953 
 5954 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5955   match(Set dst mem);
 5956 
 5957   ins_cost(110);
 5958   format %{ "LEA    $dst,$mem" %}
 5959   opcode(0x8D);
 5960   ins_encode( OpcP, RegMem(dst,mem));
 5961   ins_pipe( ialu_reg_reg_fat );
 5962 %}
 5963 
 5964 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5965   match(Set dst mem);
 5966 
 5967   ins_cost(110);
 5968   format %{ "LEA    $dst,$mem" %}
 5969   opcode(0x8D);
 5970   ins_encode( OpcP, RegMem(dst,mem));
 5971   ins_pipe( ialu_reg_reg_fat );
 5972 %}
 5973 
 5974 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5975   match(Set dst mem);
 5976 
 5977   ins_cost(110);
 5978   format %{ "LEA    $dst,$mem" %}
 5979   opcode(0x8D);
 5980   ins_encode( OpcP, RegMem(dst,mem));
 5981   ins_pipe( ialu_reg_reg_fat );
 5982 %}
 5983 
 5984 // Load Constant
 5985 instruct loadConI(rRegI dst, immI src) %{
 5986   match(Set dst src);
 5987 
 5988   format %{ "MOV    $dst,$src" %}
 5989   ins_encode( LdImmI(dst, src) );
 5990   ins_pipe( ialu_reg_fat );
 5991 %}
 5992 
 5993 // Load Constant zero
 5994 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5995   match(Set dst src);
 5996   effect(KILL cr);
 5997 
 5998   ins_cost(50);
 5999   format %{ "XOR    $dst,$dst" %}
 6000   opcode(0x33);  /* + rd */
 6001   ins_encode( OpcP, RegReg( dst, dst ) );
 6002   ins_pipe( ialu_reg );
 6003 %}
 6004 
 6005 instruct loadConP(eRegP dst, immP src) %{
 6006   match(Set dst src);
 6007 
 6008   format %{ "MOV    $dst,$src" %}
 6009   opcode(0xB8);  /* + rd */
 6010   ins_encode( LdImmP(dst, src) );
 6011   ins_pipe( ialu_reg_fat );
 6012 %}
 6013 
 6014 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6015   match(Set dst src);
 6016   effect(KILL cr);
 6017   ins_cost(200);
 6018   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6019             "MOV    $dst.hi,$src.hi" %}
 6020   opcode(0xB8);
 6021   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6022   ins_pipe( ialu_reg_long_fat );
 6023 %}
 6024 
 6025 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6026   match(Set dst src);
 6027   effect(KILL cr);
 6028   ins_cost(150);
 6029   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6030             "XOR    $dst.hi,$dst.hi" %}
 6031   opcode(0x33,0x33);
 6032   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6033   ins_pipe( ialu_reg_long );
 6034 %}
 6035 
 6036 // The instruction usage is guarded by predicate in operand immFPR().
 6037 instruct loadConFPR(regFPR dst, immFPR con) %{
 6038   match(Set dst con);
 6039   ins_cost(125);
 6040   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6041             "FSTP   $dst" %}
 6042   ins_encode %{
 6043     __ fld_s($constantaddress($con));
 6044     __ fstp_d($dst$$reg);
 6045   %}
 6046   ins_pipe(fpu_reg_con);
 6047 %}
 6048 
 6049 // The instruction usage is guarded by predicate in operand immFPR0().
 6050 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6051   match(Set dst con);
 6052   ins_cost(125);
 6053   format %{ "FLDZ   ST\n\t"
 6054             "FSTP   $dst" %}
 6055   ins_encode %{
 6056     __ fldz();
 6057     __ fstp_d($dst$$reg);
 6058   %}
 6059   ins_pipe(fpu_reg_con);
 6060 %}
 6061 
 6062 // The instruction usage is guarded by predicate in operand immFPR1().
 6063 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6064   match(Set dst con);
 6065   ins_cost(125);
 6066   format %{ "FLD1   ST\n\t"
 6067             "FSTP   $dst" %}
 6068   ins_encode %{
 6069     __ fld1();
 6070     __ fstp_d($dst$$reg);
 6071   %}
 6072   ins_pipe(fpu_reg_con);
 6073 %}
 6074 
 6075 // The instruction usage is guarded by predicate in operand immF().
 6076 instruct loadConF(regF dst, immF con) %{
 6077   match(Set dst con);
 6078   ins_cost(125);
 6079   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6080   ins_encode %{
 6081     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6082   %}
 6083   ins_pipe(pipe_slow);
 6084 %}
 6085 
 6086 // The instruction usage is guarded by predicate in operand immF0().
 6087 instruct loadConF0(regF dst, immF0 src) %{
 6088   match(Set dst src);
 6089   ins_cost(100);
 6090   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6091   ins_encode %{
 6092     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6093   %}
 6094   ins_pipe(pipe_slow);
 6095 %}
 6096 
 6097 // The instruction usage is guarded by predicate in operand immDPR().
 6098 instruct loadConDPR(regDPR dst, immDPR con) %{
 6099   match(Set dst con);
 6100   ins_cost(125);
 6101 
 6102   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6103             "FSTP   $dst" %}
 6104   ins_encode %{
 6105     __ fld_d($constantaddress($con));
 6106     __ fstp_d($dst$$reg);
 6107   %}
 6108   ins_pipe(fpu_reg_con);
 6109 %}
 6110 
 6111 // The instruction usage is guarded by predicate in operand immDPR0().
 6112 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6113   match(Set dst con);
 6114   ins_cost(125);
 6115 
 6116   format %{ "FLDZ   ST\n\t"
 6117             "FSTP   $dst" %}
 6118   ins_encode %{
 6119     __ fldz();
 6120     __ fstp_d($dst$$reg);
 6121   %}
 6122   ins_pipe(fpu_reg_con);
 6123 %}
 6124 
 6125 // The instruction usage is guarded by predicate in operand immDPR1().
 6126 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6127   match(Set dst con);
 6128   ins_cost(125);
 6129 
 6130   format %{ "FLD1   ST\n\t"
 6131             "FSTP   $dst" %}
 6132   ins_encode %{
 6133     __ fld1();
 6134     __ fstp_d($dst$$reg);
 6135   %}
 6136   ins_pipe(fpu_reg_con);
 6137 %}
 6138 
 6139 // The instruction usage is guarded by predicate in operand immD().
 6140 instruct loadConD(regD dst, immD con) %{
 6141   match(Set dst con);
 6142   ins_cost(125);
 6143   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6144   ins_encode %{
 6145     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6146   %}
 6147   ins_pipe(pipe_slow);
 6148 %}
 6149 
 6150 // The instruction usage is guarded by predicate in operand immD0().
 6151 instruct loadConD0(regD dst, immD0 src) %{
 6152   match(Set dst src);
 6153   ins_cost(100);
 6154   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6155   ins_encode %{
 6156     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6157   %}
 6158   ins_pipe( pipe_slow );
 6159 %}
 6160 
 6161 // Load Stack Slot
 6162 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6163   match(Set dst src);
 6164   ins_cost(125);
 6165 
 6166   format %{ "MOV    $dst,$src" %}
 6167   opcode(0x8B);
 6168   ins_encode( OpcP, RegMem(dst,src));
 6169   ins_pipe( ialu_reg_mem );
 6170 %}
 6171 
 6172 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6173   match(Set dst src);
 6174 
 6175   ins_cost(200);
 6176   format %{ "MOV    $dst,$src.lo\n\t"
 6177             "MOV    $dst+4,$src.hi" %}
 6178   opcode(0x8B, 0x8B);
 6179   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6180   ins_pipe( ialu_mem_long_reg );
 6181 %}
 6182 
 6183 // Load Stack Slot
 6184 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6185   match(Set dst src);
 6186   ins_cost(125);
 6187 
 6188   format %{ "MOV    $dst,$src" %}
 6189   opcode(0x8B);
 6190   ins_encode( OpcP, RegMem(dst,src));
 6191   ins_pipe( ialu_reg_mem );
 6192 %}
 6193 
 6194 // Load Stack Slot
 6195 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6196   match(Set dst src);
 6197   ins_cost(125);
 6198 
 6199   format %{ "FLD_S  $src\n\t"
 6200             "FSTP   $dst" %}
 6201   opcode(0xD9);               /* D9 /0, FLD m32real */
 6202   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6203               Pop_Reg_FPR(dst) );
 6204   ins_pipe( fpu_reg_mem );
 6205 %}
 6206 
 6207 // Load Stack Slot
 6208 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6209   match(Set dst src);
 6210   ins_cost(125);
 6211 
 6212   format %{ "FLD_D  $src\n\t"
 6213             "FSTP   $dst" %}
 6214   opcode(0xDD);               /* DD /0, FLD m64real */
 6215   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6216               Pop_Reg_DPR(dst) );
 6217   ins_pipe( fpu_reg_mem );
 6218 %}
 6219 
 6220 // Prefetch instructions for allocation.
 6221 // Must be safe to execute with invalid address (cannot fault).
 6222 
 6223 instruct prefetchAlloc0( memory mem ) %{
 6224   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6225   match(PrefetchAllocation mem);
 6226   ins_cost(0);
 6227   size(0);
 6228   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6229   ins_encode();
 6230   ins_pipe(empty);
 6231 %}
 6232 
 6233 instruct prefetchAlloc( memory mem ) %{
 6234   predicate(AllocatePrefetchInstr==3);
 6235   match( PrefetchAllocation mem );
 6236   ins_cost(100);
 6237 
 6238   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6239   ins_encode %{
 6240     __ prefetchw($mem$$Address);
 6241   %}
 6242   ins_pipe(ialu_mem);
 6243 %}
 6244 
 6245 instruct prefetchAllocNTA( memory mem ) %{
 6246   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6247   match(PrefetchAllocation mem);
 6248   ins_cost(100);
 6249 
 6250   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6251   ins_encode %{
 6252     __ prefetchnta($mem$$Address);
 6253   %}
 6254   ins_pipe(ialu_mem);
 6255 %}
 6256 
 6257 instruct prefetchAllocT0( memory mem ) %{
 6258   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6259   match(PrefetchAllocation mem);
 6260   ins_cost(100);
 6261 
 6262   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6263   ins_encode %{
 6264     __ prefetcht0($mem$$Address);
 6265   %}
 6266   ins_pipe(ialu_mem);
 6267 %}
 6268 
 6269 instruct prefetchAllocT2( memory mem ) %{
 6270   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6271   match(PrefetchAllocation mem);
 6272   ins_cost(100);
 6273 
 6274   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6275   ins_encode %{
 6276     __ prefetcht2($mem$$Address);
 6277   %}
 6278   ins_pipe(ialu_mem);
 6279 %}
 6280 
 6281 //----------Store Instructions-------------------------------------------------
 6282 
 6283 // Store Byte
 6284 instruct storeB(memory mem, xRegI src) %{
 6285   match(Set mem (StoreB mem src));
 6286 
 6287   ins_cost(125);
 6288   format %{ "MOV8   $mem,$src" %}
 6289   opcode(0x88);
 6290   ins_encode( OpcP, RegMem( src, mem ) );
 6291   ins_pipe( ialu_mem_reg );
 6292 %}
 6293 
 6294 // Store Char/Short
 6295 instruct storeC(memory mem, rRegI src) %{
 6296   match(Set mem (StoreC mem src));
 6297 
 6298   ins_cost(125);
 6299   format %{ "MOV16  $mem,$src" %}
 6300   opcode(0x89, 0x66);
 6301   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6302   ins_pipe( ialu_mem_reg );
 6303 %}
 6304 
 6305 // Store Integer
 6306 instruct storeI(memory mem, rRegI src) %{
 6307   match(Set mem (StoreI mem src));
 6308 
 6309   ins_cost(125);
 6310   format %{ "MOV    $mem,$src" %}
 6311   opcode(0x89);
 6312   ins_encode( OpcP, RegMem( src, mem ) );
 6313   ins_pipe( ialu_mem_reg );
 6314 %}
 6315 
 6316 // Store Long
 6317 instruct storeL(long_memory mem, eRegL src) %{
 6318   predicate(!((StoreLNode*)n)->require_atomic_access());
 6319   match(Set mem (StoreL mem src));
 6320 
 6321   ins_cost(200);
 6322   format %{ "MOV    $mem,$src.lo\n\t"
 6323             "MOV    $mem+4,$src.hi" %}
 6324   opcode(0x89, 0x89);
 6325   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6326   ins_pipe( ialu_mem_long_reg );
 6327 %}
 6328 
 6329 // Store Long to Integer
 6330 instruct storeL2I(memory mem, eRegL src) %{
 6331   match(Set mem (StoreI mem (ConvL2I src)));
 6332 
 6333   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6334   ins_encode %{
 6335     __ movl($mem$$Address, $src$$Register);
 6336   %}
 6337   ins_pipe(ialu_mem_reg);
 6338 %}
 6339 
 6340 // Volatile Store Long.  Must be atomic, so move it into
 6341 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6342 // target address before the store (for null-ptr checks)
 6343 // so the memory operand is used twice in the encoding.
 6344 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6345   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6346   match(Set mem (StoreL mem src));
 6347   effect( KILL cr );
 6348   ins_cost(400);
 6349   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6350             "FILD   $src\n\t"
 6351             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6352   opcode(0x3B);
 6353   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6354   ins_pipe( fpu_reg_mem );
 6355 %}
 6356 
 6357 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6358   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6359   match(Set mem (StoreL mem src));
 6360   effect( TEMP tmp, KILL cr );
 6361   ins_cost(380);
 6362   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6363             "MOVSD  $tmp,$src\n\t"
 6364             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6365   ins_encode %{
 6366     __ cmpl(rax, $mem$$Address);
 6367     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6368     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6369   %}
 6370   ins_pipe( pipe_slow );
 6371 %}
 6372 
 6373 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6374   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6375   match(Set mem (StoreL mem src));
 6376   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6377   ins_cost(360);
 6378   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6379             "MOVD   $tmp,$src.lo\n\t"
 6380             "MOVD   $tmp2,$src.hi\n\t"
 6381             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6382             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6383   ins_encode %{
 6384     __ cmpl(rax, $mem$$Address);
 6385     __ movdl($tmp$$XMMRegister, $src$$Register);
 6386     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6387     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6388     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6389   %}
 6390   ins_pipe( pipe_slow );
 6391 %}
 6392 
 6393 // Store Pointer; for storing unknown oops and raw pointers
 6394 instruct storeP(memory mem, anyRegP src) %{
 6395   match(Set mem (StoreP mem src));
 6396 
 6397   ins_cost(125);
 6398   format %{ "MOV    $mem,$src" %}
 6399   opcode(0x89);
 6400   ins_encode( OpcP, RegMem( src, mem ) );
 6401   ins_pipe( ialu_mem_reg );
 6402 %}
 6403 
 6404 // Store Integer Immediate
 6405 instruct storeImmI(memory mem, immI src) %{
 6406   match(Set mem (StoreI mem src));
 6407 
 6408   ins_cost(150);
 6409   format %{ "MOV    $mem,$src" %}
 6410   opcode(0xC7);               /* C7 /0 */
 6411   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6412   ins_pipe( ialu_mem_imm );
 6413 %}
 6414 
 6415 // Store Short/Char Immediate
 6416 instruct storeImmI16(memory mem, immI16 src) %{
 6417   predicate(UseStoreImmI16);
 6418   match(Set mem (StoreC mem src));
 6419 
 6420   ins_cost(150);
 6421   format %{ "MOV16  $mem,$src" %}
 6422   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6423   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6424   ins_pipe( ialu_mem_imm );
 6425 %}
 6426 
 6427 // Store Pointer Immediate; null pointers or constant oops that do not
 6428 // need card-mark barriers.
 6429 instruct storeImmP(memory mem, immP src) %{
 6430   match(Set mem (StoreP mem src));
 6431 
 6432   ins_cost(150);
 6433   format %{ "MOV    $mem,$src" %}
 6434   opcode(0xC7);               /* C7 /0 */
 6435   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6436   ins_pipe( ialu_mem_imm );
 6437 %}
 6438 
 6439 // Store Byte Immediate
 6440 instruct storeImmB(memory mem, immI8 src) %{
 6441   match(Set mem (StoreB mem src));
 6442 
 6443   ins_cost(150);
 6444   format %{ "MOV8   $mem,$src" %}
 6445   opcode(0xC6);               /* C6 /0 */
 6446   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6447   ins_pipe( ialu_mem_imm );
 6448 %}
 6449 
 6450 // Store CMS card-mark Immediate
 6451 instruct storeImmCM(memory mem, immI8 src) %{
 6452   match(Set mem (StoreCM mem src));
 6453 
 6454   ins_cost(150);
 6455   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6456   opcode(0xC6);               /* C6 /0 */
 6457   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6458   ins_pipe( ialu_mem_imm );
 6459 %}
 6460 
 6461 // Store Double
 6462 instruct storeDPR( memory mem, regDPR1 src) %{
 6463   predicate(UseSSE<=1);
 6464   match(Set mem (StoreD mem src));
 6465 
 6466   ins_cost(100);
 6467   format %{ "FST_D  $mem,$src" %}
 6468   opcode(0xDD);       /* DD /2 */
 6469   ins_encode( enc_FPR_store(mem,src) );
 6470   ins_pipe( fpu_mem_reg );
 6471 %}
 6472 
 6473 // Store double does rounding on x86
 6474 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6475   predicate(UseSSE<=1);
 6476   match(Set mem (StoreD mem (RoundDouble src)));
 6477 
 6478   ins_cost(100);
 6479   format %{ "FST_D  $mem,$src\t# round" %}
 6480   opcode(0xDD);       /* DD /2 */
 6481   ins_encode( enc_FPR_store(mem,src) );
 6482   ins_pipe( fpu_mem_reg );
 6483 %}
 6484 
 6485 // Store XMM register to memory (double-precision floating points)
 6486 // MOVSD instruction
 6487 instruct storeD(memory mem, regD src) %{
 6488   predicate(UseSSE>=2);
 6489   match(Set mem (StoreD mem src));
 6490   ins_cost(95);
 6491   format %{ "MOVSD  $mem,$src" %}
 6492   ins_encode %{
 6493     __ movdbl($mem$$Address, $src$$XMMRegister);
 6494   %}
 6495   ins_pipe( pipe_slow );
 6496 %}
 6497 
 6498 // Store XMM register to memory (single-precision floating point)
 6499 // MOVSS instruction
 6500 instruct storeF(memory mem, regF src) %{
 6501   predicate(UseSSE>=1);
 6502   match(Set mem (StoreF mem src));
 6503   ins_cost(95);
 6504   format %{ "MOVSS  $mem,$src" %}
 6505   ins_encode %{
 6506     __ movflt($mem$$Address, $src$$XMMRegister);
 6507   %}
 6508   ins_pipe( pipe_slow );
 6509 %}
 6510 
 6511 
 6512 // Store Float
 6513 instruct storeFPR( memory mem, regFPR1 src) %{
 6514   predicate(UseSSE==0);
 6515   match(Set mem (StoreF mem src));
 6516 
 6517   ins_cost(100);
 6518   format %{ "FST_S  $mem,$src" %}
 6519   opcode(0xD9);       /* D9 /2 */
 6520   ins_encode( enc_FPR_store(mem,src) );
 6521   ins_pipe( fpu_mem_reg );
 6522 %}
 6523 
 6524 // Store Float does rounding on x86
 6525 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6526   predicate(UseSSE==0);
 6527   match(Set mem (StoreF mem (RoundFloat src)));
 6528 
 6529   ins_cost(100);
 6530   format %{ "FST_S  $mem,$src\t# round" %}
 6531   opcode(0xD9);       /* D9 /2 */
 6532   ins_encode( enc_FPR_store(mem,src) );
 6533   ins_pipe( fpu_mem_reg );
 6534 %}
 6535 
 6536 // Store Float does rounding on x86
 6537 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6538   predicate(UseSSE<=1);
 6539   match(Set mem (StoreF mem (ConvD2F src)));
 6540 
 6541   ins_cost(100);
 6542   format %{ "FST_S  $mem,$src\t# D-round" %}
 6543   opcode(0xD9);       /* D9 /2 */
 6544   ins_encode( enc_FPR_store(mem,src) );
 6545   ins_pipe( fpu_mem_reg );
 6546 %}
 6547 
 6548 // Store immediate Float value (it is faster than store from FPU register)
 6549 // The instruction usage is guarded by predicate in operand immFPR().
 6550 instruct storeFPR_imm( memory mem, immFPR src) %{
 6551   match(Set mem (StoreF mem src));
 6552 
 6553   ins_cost(50);
 6554   format %{ "MOV    $mem,$src\t# store float" %}
 6555   opcode(0xC7);               /* C7 /0 */
 6556   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6557   ins_pipe( ialu_mem_imm );
 6558 %}
 6559 
 6560 // Store immediate Float value (it is faster than store from XMM register)
 6561 // The instruction usage is guarded by predicate in operand immF().
 6562 instruct storeF_imm( memory mem, immF src) %{
 6563   match(Set mem (StoreF mem src));
 6564 
 6565   ins_cost(50);
 6566   format %{ "MOV    $mem,$src\t# store float" %}
 6567   opcode(0xC7);               /* C7 /0 */
 6568   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6569   ins_pipe( ialu_mem_imm );
 6570 %}
 6571 
 6572 // Store Integer to stack slot
 6573 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6574   match(Set dst src);
 6575 
 6576   ins_cost(100);
 6577   format %{ "MOV    $dst,$src" %}
 6578   opcode(0x89);
 6579   ins_encode( OpcPRegSS( dst, src ) );
 6580   ins_pipe( ialu_mem_reg );
 6581 %}
 6582 
 6583 // Store Integer to stack slot
 6584 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6585   match(Set dst src);
 6586 
 6587   ins_cost(100);
 6588   format %{ "MOV    $dst,$src" %}
 6589   opcode(0x89);
 6590   ins_encode( OpcPRegSS( dst, src ) );
 6591   ins_pipe( ialu_mem_reg );
 6592 %}
 6593 
 6594 // Store Long to stack slot
 6595 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6596   match(Set dst src);
 6597 
 6598   ins_cost(200);
 6599   format %{ "MOV    $dst,$src.lo\n\t"
 6600             "MOV    $dst+4,$src.hi" %}
 6601   opcode(0x89, 0x89);
 6602   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6603   ins_pipe( ialu_mem_long_reg );
 6604 %}
 6605 
 6606 //----------MemBar Instructions-----------------------------------------------
 6607 // Memory barrier flavors
 6608 
 6609 instruct membar_acquire() %{
 6610   match(MemBarAcquire);
 6611   match(LoadFence);
 6612   ins_cost(400);
 6613 
 6614   size(0);
 6615   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6616   ins_encode();
 6617   ins_pipe(empty);
 6618 %}
 6619 
 6620 instruct membar_acquire_lock() %{
 6621   match(MemBarAcquireLock);
 6622   ins_cost(0);
 6623 
 6624   size(0);
 6625   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6626   ins_encode( );
 6627   ins_pipe(empty);
 6628 %}
 6629 
 6630 instruct membar_release() %{
 6631   match(MemBarRelease);
 6632   match(StoreFence);
 6633   ins_cost(400);
 6634 
 6635   size(0);
 6636   format %{ "MEMBAR-release ! (empty encoding)" %}
 6637   ins_encode( );
 6638   ins_pipe(empty);
 6639 %}
 6640 
 6641 instruct membar_release_lock() %{
 6642   match(MemBarReleaseLock);
 6643   ins_cost(0);
 6644 
 6645   size(0);
 6646   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6647   ins_encode( );
 6648   ins_pipe(empty);
 6649 %}
 6650 
 6651 instruct membar_volatile(eFlagsReg cr) %{
 6652   match(MemBarVolatile);
 6653   effect(KILL cr);
 6654   ins_cost(400);
 6655 
 6656   format %{
 6657     $$template
 6658     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6659   %}
 6660   ins_encode %{
 6661     __ membar(Assembler::StoreLoad);
 6662   %}
 6663   ins_pipe(pipe_slow);
 6664 %}
 6665 
 6666 instruct unnecessary_membar_volatile() %{
 6667   match(MemBarVolatile);
 6668   predicate(Matcher::post_store_load_barrier(n));
 6669   ins_cost(0);
 6670 
 6671   size(0);
 6672   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6673   ins_encode( );
 6674   ins_pipe(empty);
 6675 %}
 6676 
 6677 instruct membar_storestore() %{
 6678   match(MemBarStoreStore);
 6679   ins_cost(0);
 6680 
 6681   size(0);
 6682   format %{ "MEMBAR-storestore (empty encoding)" %}
 6683   ins_encode( );
 6684   ins_pipe(empty);
 6685 %}
 6686 
 6687 //----------Move Instructions--------------------------------------------------
 6688 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6689   match(Set dst (CastX2P src));
 6690   format %{ "# X2P  $dst, $src" %}
 6691   ins_encode( /*empty encoding*/ );
 6692   ins_cost(0);
 6693   ins_pipe(empty);
 6694 %}
 6695 
 6696 instruct castP2X(rRegI dst, eRegP src ) %{
 6697   match(Set dst (CastP2X src));
 6698   ins_cost(50);
 6699   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6700   ins_encode( enc_Copy( dst, src) );
 6701   ins_pipe( ialu_reg_reg );
 6702 %}
 6703 
 6704 //----------Conditional Move---------------------------------------------------
 6705 // Conditional move
 6706 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6707   predicate(!VM_Version::supports_cmov() );
 6708   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6709   ins_cost(200);
 6710   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6711             "MOV    $dst,$src\n"
 6712       "skip:" %}
 6713   ins_encode %{
 6714     Label Lskip;
 6715     // Invert sense of branch from sense of CMOV
 6716     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6717     __ movl($dst$$Register, $src$$Register);
 6718     __ bind(Lskip);
 6719   %}
 6720   ins_pipe( pipe_cmov_reg );
 6721 %}
 6722 
 6723 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6724   predicate(!VM_Version::supports_cmov() );
 6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6726   ins_cost(200);
 6727   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6728             "MOV    $dst,$src\n"
 6729       "skip:" %}
 6730   ins_encode %{
 6731     Label Lskip;
 6732     // Invert sense of branch from sense of CMOV
 6733     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6734     __ movl($dst$$Register, $src$$Register);
 6735     __ bind(Lskip);
 6736   %}
 6737   ins_pipe( pipe_cmov_reg );
 6738 %}
 6739 
 6740 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6741   predicate(VM_Version::supports_cmov() );
 6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6743   ins_cost(200);
 6744   format %{ "CMOV$cop $dst,$src" %}
 6745   opcode(0x0F,0x40);
 6746   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6747   ins_pipe( pipe_cmov_reg );
 6748 %}
 6749 
 6750 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6751   predicate(VM_Version::supports_cmov() );
 6752   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6753   ins_cost(200);
 6754   format %{ "CMOV$cop $dst,$src" %}
 6755   opcode(0x0F,0x40);
 6756   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6757   ins_pipe( pipe_cmov_reg );
 6758 %}
 6759 
 6760 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6761   predicate(VM_Version::supports_cmov() );
 6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6763   ins_cost(200);
 6764   expand %{
 6765     cmovI_regU(cop, cr, dst, src);
 6766   %}
 6767 %}
 6768 
 6769 // Conditional move
 6770 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6771   predicate(VM_Version::supports_cmov() );
 6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6773   ins_cost(250);
 6774   format %{ "CMOV$cop $dst,$src" %}
 6775   opcode(0x0F,0x40);
 6776   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6777   ins_pipe( pipe_cmov_mem );
 6778 %}
 6779 
 6780 // Conditional move
 6781 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6782   predicate(VM_Version::supports_cmov() );
 6783   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6784   ins_cost(250);
 6785   format %{ "CMOV$cop $dst,$src" %}
 6786   opcode(0x0F,0x40);
 6787   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6788   ins_pipe( pipe_cmov_mem );
 6789 %}
 6790 
 6791 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6792   predicate(VM_Version::supports_cmov() );
 6793   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6794   ins_cost(250);
 6795   expand %{
 6796     cmovI_memU(cop, cr, dst, src);
 6797   %}
 6798 %}
 6799 
 6800 // Conditional move
 6801 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6802   predicate(VM_Version::supports_cmov() );
 6803   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6804   ins_cost(200);
 6805   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6806   opcode(0x0F,0x40);
 6807   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6808   ins_pipe( pipe_cmov_reg );
 6809 %}
 6810 
 6811 // Conditional move (non-P6 version)
 6812 // Note:  a CMoveP is generated for  stubs and native wrappers
 6813 //        regardless of whether we are on a P6, so we
 6814 //        emulate a cmov here
 6815 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6816   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6817   ins_cost(300);
 6818   format %{ "Jn$cop   skip\n\t"
 6819           "MOV    $dst,$src\t# pointer\n"
 6820       "skip:" %}
 6821   opcode(0x8b);
 6822   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6823   ins_pipe( pipe_cmov_reg );
 6824 %}
 6825 
 6826 // Conditional move
 6827 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6828   predicate(VM_Version::supports_cmov() );
 6829   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6830   ins_cost(200);
 6831   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6832   opcode(0x0F,0x40);
 6833   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6834   ins_pipe( pipe_cmov_reg );
 6835 %}
 6836 
 6837 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6838   predicate(VM_Version::supports_cmov() );
 6839   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6840   ins_cost(200);
 6841   expand %{
 6842     cmovP_regU(cop, cr, dst, src);
 6843   %}
 6844 %}
 6845 
 6846 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6847 // correctly meets the two pointer arguments; one is an incoming
 6848 // register but the other is a memory operand.  ALSO appears to
 6849 // be buggy with implicit null checks.
 6850 //
 6851 //// Conditional move
 6852 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6853 //  predicate(VM_Version::supports_cmov() );
 6854 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6855 //  ins_cost(250);
 6856 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6857 //  opcode(0x0F,0x40);
 6858 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6859 //  ins_pipe( pipe_cmov_mem );
 6860 //%}
 6861 //
 6862 //// Conditional move
 6863 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6864 //  predicate(VM_Version::supports_cmov() );
 6865 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6866 //  ins_cost(250);
 6867 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6868 //  opcode(0x0F,0x40);
 6869 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6870 //  ins_pipe( pipe_cmov_mem );
 6871 //%}
 6872 
 6873 // Conditional move
 6874 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6875   predicate(UseSSE<=1);
 6876   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6877   ins_cost(200);
 6878   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6879   opcode(0xDA);
 6880   ins_encode( enc_cmov_dpr(cop,src) );
 6881   ins_pipe( pipe_cmovDPR_reg );
 6882 %}
 6883 
 6884 // Conditional move
 6885 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6886   predicate(UseSSE==0);
 6887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6888   ins_cost(200);
 6889   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6890   opcode(0xDA);
 6891   ins_encode( enc_cmov_dpr(cop,src) );
 6892   ins_pipe( pipe_cmovDPR_reg );
 6893 %}
 6894 
 6895 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6896 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6897   predicate(UseSSE<=1);
 6898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6899   ins_cost(200);
 6900   format %{ "Jn$cop   skip\n\t"
 6901             "MOV    $dst,$src\t# double\n"
 6902       "skip:" %}
 6903   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6904   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6905   ins_pipe( pipe_cmovDPR_reg );
 6906 %}
 6907 
 6908 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6909 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6910   predicate(UseSSE==0);
 6911   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6912   ins_cost(200);
 6913   format %{ "Jn$cop    skip\n\t"
 6914             "MOV    $dst,$src\t# float\n"
 6915       "skip:" %}
 6916   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6917   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6918   ins_pipe( pipe_cmovDPR_reg );
 6919 %}
 6920 
 6921 // No CMOVE with SSE/SSE2
 6922 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6923   predicate (UseSSE>=1);
 6924   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6925   ins_cost(200);
 6926   format %{ "Jn$cop   skip\n\t"
 6927             "MOVSS  $dst,$src\t# float\n"
 6928       "skip:" %}
 6929   ins_encode %{
 6930     Label skip;
 6931     // Invert sense of branch from sense of CMOV
 6932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6933     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6934     __ bind(skip);
 6935   %}
 6936   ins_pipe( pipe_slow );
 6937 %}
 6938 
 6939 // No CMOVE with SSE/SSE2
 6940 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6941   predicate (UseSSE>=2);
 6942   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6943   ins_cost(200);
 6944   format %{ "Jn$cop   skip\n\t"
 6945             "MOVSD  $dst,$src\t# float\n"
 6946       "skip:" %}
 6947   ins_encode %{
 6948     Label skip;
 6949     // Invert sense of branch from sense of CMOV
 6950     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6951     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6952     __ bind(skip);
 6953   %}
 6954   ins_pipe( pipe_slow );
 6955 %}
 6956 
 6957 // unsigned version
 6958 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6959   predicate (UseSSE>=1);
 6960   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6961   ins_cost(200);
 6962   format %{ "Jn$cop   skip\n\t"
 6963             "MOVSS  $dst,$src\t# float\n"
 6964       "skip:" %}
 6965   ins_encode %{
 6966     Label skip;
 6967     // Invert sense of branch from sense of CMOV
 6968     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6969     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6970     __ bind(skip);
 6971   %}
 6972   ins_pipe( pipe_slow );
 6973 %}
 6974 
 6975 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6976   predicate (UseSSE>=1);
 6977   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6978   ins_cost(200);
 6979   expand %{
 6980     fcmovF_regU(cop, cr, dst, src);
 6981   %}
 6982 %}
 6983 
 6984 // unsigned version
 6985 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6986   predicate (UseSSE>=2);
 6987   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6988   ins_cost(200);
 6989   format %{ "Jn$cop   skip\n\t"
 6990             "MOVSD  $dst,$src\t# float\n"
 6991       "skip:" %}
 6992   ins_encode %{
 6993     Label skip;
 6994     // Invert sense of branch from sense of CMOV
 6995     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6996     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6997     __ bind(skip);
 6998   %}
 6999   ins_pipe( pipe_slow );
 7000 %}
 7001 
 7002 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7003   predicate (UseSSE>=2);
 7004   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7005   ins_cost(200);
 7006   expand %{
 7007     fcmovD_regU(cop, cr, dst, src);
 7008   %}
 7009 %}
 7010 
 7011 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7012   predicate(VM_Version::supports_cmov() );
 7013   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7014   ins_cost(200);
 7015   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7016             "CMOV$cop $dst.hi,$src.hi" %}
 7017   opcode(0x0F,0x40);
 7018   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7019   ins_pipe( pipe_cmov_reg_long );
 7020 %}
 7021 
 7022 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7023   predicate(VM_Version::supports_cmov() );
 7024   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7025   ins_cost(200);
 7026   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7027             "CMOV$cop $dst.hi,$src.hi" %}
 7028   opcode(0x0F,0x40);
 7029   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7030   ins_pipe( pipe_cmov_reg_long );
 7031 %}
 7032 
 7033 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7034   predicate(VM_Version::supports_cmov() );
 7035   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7036   ins_cost(200);
 7037   expand %{
 7038     cmovL_regU(cop, cr, dst, src);
 7039   %}
 7040 %}
 7041 
 7042 //----------Arithmetic Instructions--------------------------------------------
 7043 //----------Addition Instructions----------------------------------------------
 7044 
 7045 // Integer Addition Instructions
 7046 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7047   match(Set dst (AddI dst src));
 7048   effect(KILL cr);
 7049 
 7050   size(2);
 7051   format %{ "ADD    $dst,$src" %}
 7052   opcode(0x03);
 7053   ins_encode( OpcP, RegReg( dst, src) );
 7054   ins_pipe( ialu_reg_reg );
 7055 %}
 7056 
 7057 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7058   match(Set dst (AddI dst src));
 7059   effect(KILL cr);
 7060 
 7061   format %{ "ADD    $dst,$src" %}
 7062   opcode(0x81, 0x00); /* /0 id */
 7063   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7064   ins_pipe( ialu_reg );
 7065 %}
 7066 
 7067 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7068   predicate(UseIncDec);
 7069   match(Set dst (AddI dst src));
 7070   effect(KILL cr);
 7071 
 7072   size(1);
 7073   format %{ "INC    $dst" %}
 7074   opcode(0x40); /*  */
 7075   ins_encode( Opc_plus( primary, dst ) );
 7076   ins_pipe( ialu_reg );
 7077 %}
 7078 
 7079 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7080   match(Set dst (AddI src0 src1));
 7081   ins_cost(110);
 7082 
 7083   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7084   opcode(0x8D); /* 0x8D /r */
 7085   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7086   ins_pipe( ialu_reg_reg );
 7087 %}
 7088 
 7089 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7090   match(Set dst (AddP src0 src1));
 7091   ins_cost(110);
 7092 
 7093   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7094   opcode(0x8D); /* 0x8D /r */
 7095   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7096   ins_pipe( ialu_reg_reg );
 7097 %}
 7098 
 7099 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7100   predicate(UseIncDec);
 7101   match(Set dst (AddI dst src));
 7102   effect(KILL cr);
 7103 
 7104   size(1);
 7105   format %{ "DEC    $dst" %}
 7106   opcode(0x48); /*  */
 7107   ins_encode( Opc_plus( primary, dst ) );
 7108   ins_pipe( ialu_reg );
 7109 %}
 7110 
 7111 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7112   match(Set dst (AddP dst src));
 7113   effect(KILL cr);
 7114 
 7115   size(2);
 7116   format %{ "ADD    $dst,$src" %}
 7117   opcode(0x03);
 7118   ins_encode( OpcP, RegReg( dst, src) );
 7119   ins_pipe( ialu_reg_reg );
 7120 %}
 7121 
 7122 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7123   match(Set dst (AddP dst src));
 7124   effect(KILL cr);
 7125 
 7126   format %{ "ADD    $dst,$src" %}
 7127   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7128   // ins_encode( RegImm( dst, src) );
 7129   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7130   ins_pipe( ialu_reg );
 7131 %}
 7132 
 7133 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7134   match(Set dst (AddI dst (LoadI src)));
 7135   effect(KILL cr);
 7136 
 7137   ins_cost(125);
 7138   format %{ "ADD    $dst,$src" %}
 7139   opcode(0x03);
 7140   ins_encode( OpcP, RegMem( dst, src) );
 7141   ins_pipe( ialu_reg_mem );
 7142 %}
 7143 
 7144 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7145   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7146   effect(KILL cr);
 7147 
 7148   ins_cost(150);
 7149   format %{ "ADD    $dst,$src" %}
 7150   opcode(0x01);  /* Opcode 01 /r */
 7151   ins_encode( OpcP, RegMem( src, dst ) );
 7152   ins_pipe( ialu_mem_reg );
 7153 %}
 7154 
 7155 // Add Memory with Immediate
 7156 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7158   effect(KILL cr);
 7159 
 7160   ins_cost(125);
 7161   format %{ "ADD    $dst,$src" %}
 7162   opcode(0x81);               /* Opcode 81 /0 id */
 7163   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7164   ins_pipe( ialu_mem_imm );
 7165 %}
 7166 
 7167 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7169   effect(KILL cr);
 7170 
 7171   ins_cost(125);
 7172   format %{ "INC    $dst" %}
 7173   opcode(0xFF);               /* Opcode FF /0 */
 7174   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7175   ins_pipe( ialu_mem_imm );
 7176 %}
 7177 
 7178 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7179   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7180   effect(KILL cr);
 7181 
 7182   ins_cost(125);
 7183   format %{ "DEC    $dst" %}
 7184   opcode(0xFF);               /* Opcode FF /1 */
 7185   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7186   ins_pipe( ialu_mem_imm );
 7187 %}
 7188 
 7189 
 7190 instruct checkCastPP( eRegP dst ) %{
 7191   match(Set dst (CheckCastPP dst));
 7192 
 7193   size(0);
 7194   format %{ "#checkcastPP of $dst" %}
 7195   ins_encode( /*empty encoding*/ );
 7196   ins_pipe( empty );
 7197 %}
 7198 
 7199 instruct castPP( eRegP dst ) %{
 7200   match(Set dst (CastPP dst));
 7201   format %{ "#castPP of $dst" %}
 7202   ins_encode( /*empty encoding*/ );
 7203   ins_pipe( empty );
 7204 %}
 7205 
 7206 instruct castII( rRegI dst ) %{
 7207   match(Set dst (CastII dst));
 7208   format %{ "#castII of $dst" %}
 7209   ins_encode( /*empty encoding*/ );
 7210   ins_cost(0);
 7211   ins_pipe( empty );
 7212 %}
 7213 
 7214 instruct castLL( eRegL dst ) %{
 7215   match(Set dst (CastLL dst));
 7216   format %{ "#castLL of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castFF( regF dst ) %{
 7223   match(Set dst (CastFF dst));
 7224   format %{ "#castFF of $dst" %}
 7225   ins_encode( /*empty encoding*/ );
 7226   ins_cost(0);
 7227   ins_pipe( empty );
 7228 %}
 7229 
 7230 instruct castDD( regD dst ) %{
 7231   match(Set dst (CastDD dst));
 7232   format %{ "#castDD of $dst" %}
 7233   ins_encode( /*empty encoding*/ );
 7234   ins_cost(0);
 7235   ins_pipe( empty );
 7236 %}
 7237 
 7238 // Load-locked - same as a regular pointer load when used with compare-swap
 7239 instruct loadPLocked(eRegP dst, memory mem) %{
 7240   match(Set dst (LoadPLocked mem));
 7241 
 7242   ins_cost(125);
 7243   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7244   opcode(0x8B);
 7245   ins_encode( OpcP, RegMem(dst,mem));
 7246   ins_pipe( ialu_reg_mem );
 7247 %}
 7248 
 7249 // Conditional-store of the updated heap-top.
 7250 // Used during allocation of the shared heap.
 7251 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7252 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7253   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7254   // EAX is killed if there is contention, but then it's also unused.
 7255   // In the common case of no contention, EAX holds the new oop address.
 7256   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7257   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7258   ins_pipe( pipe_cmpxchg );
 7259 %}
 7260 
 7261 // Conditional-store of an int value.
 7262 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7263 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7264   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7265   effect(KILL oldval);
 7266   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7267   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7268   ins_pipe( pipe_cmpxchg );
 7269 %}
 7270 
 7271 // Conditional-store of a long value.
 7272 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7273 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7274   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7275   effect(KILL oldval);
 7276   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7277             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7278             "XCHG   EBX,ECX"
 7279   %}
 7280   ins_encode %{
 7281     // Note: we need to swap rbx, and rcx before and after the
 7282     //       cmpxchg8 instruction because the instruction uses
 7283     //       rcx as the high order word of the new value to store but
 7284     //       our register encoding uses rbx.
 7285     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7286     __ lock();
 7287     __ cmpxchg8($mem$$Address);
 7288     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7289   %}
 7290   ins_pipe( pipe_cmpxchg );
 7291 %}
 7292 
 7293 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7294 
 7295 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7296   predicate(VM_Version::supports_cx8());
 7297   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7298   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7299   effect(KILL cr, KILL oldval);
 7300   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7301             "MOV    $res,0\n\t"
 7302             "JNE,s  fail\n\t"
 7303             "MOV    $res,1\n"
 7304           "fail:" %}
 7305   ins_encode( enc_cmpxchg8(mem_ptr),
 7306               enc_flags_ne_to_boolean(res) );
 7307   ins_pipe( pipe_cmpxchg );
 7308 %}
 7309 
 7310 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7311   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7312   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7313   effect(KILL cr, KILL oldval);
 7314   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7315             "MOV    $res,0\n\t"
 7316             "JNE,s  fail\n\t"
 7317             "MOV    $res,1\n"
 7318           "fail:" %}
 7319   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7320   ins_pipe( pipe_cmpxchg );
 7321 %}
 7322 
 7323 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7324   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7325   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7326   effect(KILL cr, KILL oldval);
 7327   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7328             "MOV    $res,0\n\t"
 7329             "JNE,s  fail\n\t"
 7330             "MOV    $res,1\n"
 7331           "fail:" %}
 7332   ins_encode( enc_cmpxchgb(mem_ptr),
 7333               enc_flags_ne_to_boolean(res) );
 7334   ins_pipe( pipe_cmpxchg );
 7335 %}
 7336 
 7337 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7338   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7339   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7340   effect(KILL cr, KILL oldval);
 7341   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7342             "MOV    $res,0\n\t"
 7343             "JNE,s  fail\n\t"
 7344             "MOV    $res,1\n"
 7345           "fail:" %}
 7346   ins_encode( enc_cmpxchgw(mem_ptr),
 7347               enc_flags_ne_to_boolean(res) );
 7348   ins_pipe( pipe_cmpxchg );
 7349 %}
 7350 
 7351 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7352   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7353   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7354   effect(KILL cr, KILL oldval);
 7355   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7356             "MOV    $res,0\n\t"
 7357             "JNE,s  fail\n\t"
 7358             "MOV    $res,1\n"
 7359           "fail:" %}
 7360   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7365   predicate(VM_Version::supports_cx8());
 7366   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7367   effect(KILL cr);
 7368   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7369   ins_encode( enc_cmpxchg8(mem_ptr) );
 7370   ins_pipe( pipe_cmpxchg );
 7371 %}
 7372 
 7373 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7374   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7375   effect(KILL cr);
 7376   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7377   ins_encode( enc_cmpxchg(mem_ptr) );
 7378   ins_pipe( pipe_cmpxchg );
 7379 %}
 7380 
 7381 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7382   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7383   effect(KILL cr);
 7384   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7385   ins_encode( enc_cmpxchgb(mem_ptr) );
 7386   ins_pipe( pipe_cmpxchg );
 7387 %}
 7388 
 7389 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7390   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7391   effect(KILL cr);
 7392   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7393   ins_encode( enc_cmpxchgw(mem_ptr) );
 7394   ins_pipe( pipe_cmpxchg );
 7395 %}
 7396 
 7397 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7398   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7399   effect(KILL cr);
 7400   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7401   ins_encode( enc_cmpxchg(mem_ptr) );
 7402   ins_pipe( pipe_cmpxchg );
 7403 %}
 7404 
 7405 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7406   predicate(n->as_LoadStore()->result_not_used());
 7407   match(Set dummy (GetAndAddB mem add));
 7408   effect(KILL cr);
 7409   format %{ "ADDB  [$mem],$add" %}
 7410   ins_encode %{
 7411     __ lock();
 7412     __ addb($mem$$Address, $add$$constant);
 7413   %}
 7414   ins_pipe( pipe_cmpxchg );
 7415 %}
 7416 
 7417 // Important to match to xRegI: only 8-bit regs.
 7418 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7419   match(Set newval (GetAndAddB mem newval));
 7420   effect(KILL cr);
 7421   format %{ "XADDB  [$mem],$newval" %}
 7422   ins_encode %{
 7423     __ lock();
 7424     __ xaddb($mem$$Address, $newval$$Register);
 7425   %}
 7426   ins_pipe( pipe_cmpxchg );
 7427 %}
 7428 
 7429 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7430   predicate(n->as_LoadStore()->result_not_used());
 7431   match(Set dummy (GetAndAddS mem add));
 7432   effect(KILL cr);
 7433   format %{ "ADDS  [$mem],$add" %}
 7434   ins_encode %{
 7435     __ lock();
 7436     __ addw($mem$$Address, $add$$constant);
 7437   %}
 7438   ins_pipe( pipe_cmpxchg );
 7439 %}
 7440 
 7441 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7442   match(Set newval (GetAndAddS mem newval));
 7443   effect(KILL cr);
 7444   format %{ "XADDS  [$mem],$newval" %}
 7445   ins_encode %{
 7446     __ lock();
 7447     __ xaddw($mem$$Address, $newval$$Register);
 7448   %}
 7449   ins_pipe( pipe_cmpxchg );
 7450 %}
 7451 
 7452 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7453   predicate(n->as_LoadStore()->result_not_used());
 7454   match(Set dummy (GetAndAddI mem add));
 7455   effect(KILL cr);
 7456   format %{ "ADDL  [$mem],$add" %}
 7457   ins_encode %{
 7458     __ lock();
 7459     __ addl($mem$$Address, $add$$constant);
 7460   %}
 7461   ins_pipe( pipe_cmpxchg );
 7462 %}
 7463 
 7464 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7465   match(Set newval (GetAndAddI mem newval));
 7466   effect(KILL cr);
 7467   format %{ "XADDL  [$mem],$newval" %}
 7468   ins_encode %{
 7469     __ lock();
 7470     __ xaddl($mem$$Address, $newval$$Register);
 7471   %}
 7472   ins_pipe( pipe_cmpxchg );
 7473 %}
 7474 
 7475 // Important to match to xRegI: only 8-bit regs.
 7476 instruct xchgB( memory mem, xRegI newval) %{
 7477   match(Set newval (GetAndSetB mem newval));
 7478   format %{ "XCHGB  $newval,[$mem]" %}
 7479   ins_encode %{
 7480     __ xchgb($newval$$Register, $mem$$Address);
 7481   %}
 7482   ins_pipe( pipe_cmpxchg );
 7483 %}
 7484 
 7485 instruct xchgS( memory mem, rRegI newval) %{
 7486   match(Set newval (GetAndSetS mem newval));
 7487   format %{ "XCHGW  $newval,[$mem]" %}
 7488   ins_encode %{
 7489     __ xchgw($newval$$Register, $mem$$Address);
 7490   %}
 7491   ins_pipe( pipe_cmpxchg );
 7492 %}
 7493 
 7494 instruct xchgI( memory mem, rRegI newval) %{
 7495   match(Set newval (GetAndSetI mem newval));
 7496   format %{ "XCHGL  $newval,[$mem]" %}
 7497   ins_encode %{
 7498     __ xchgl($newval$$Register, $mem$$Address);
 7499   %}
 7500   ins_pipe( pipe_cmpxchg );
 7501 %}
 7502 
 7503 instruct xchgP( memory mem, pRegP newval) %{
 7504   match(Set newval (GetAndSetP mem newval));
 7505   format %{ "XCHGL  $newval,[$mem]" %}
 7506   ins_encode %{
 7507     __ xchgl($newval$$Register, $mem$$Address);
 7508   %}
 7509   ins_pipe( pipe_cmpxchg );
 7510 %}
 7511 
 7512 //----------Subtraction Instructions-------------------------------------------
 7513 
 7514 // Integer Subtraction Instructions
 7515 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7516   match(Set dst (SubI dst src));
 7517   effect(KILL cr);
 7518 
 7519   size(2);
 7520   format %{ "SUB    $dst,$src" %}
 7521   opcode(0x2B);
 7522   ins_encode( OpcP, RegReg( dst, src) );
 7523   ins_pipe( ialu_reg_reg );
 7524 %}
 7525 
 7526 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7527   match(Set dst (SubI dst src));
 7528   effect(KILL cr);
 7529 
 7530   format %{ "SUB    $dst,$src" %}
 7531   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7532   // ins_encode( RegImm( dst, src) );
 7533   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7534   ins_pipe( ialu_reg );
 7535 %}
 7536 
 7537 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7538   match(Set dst (SubI dst (LoadI src)));
 7539   effect(KILL cr);
 7540 
 7541   ins_cost(125);
 7542   format %{ "SUB    $dst,$src" %}
 7543   opcode(0x2B);
 7544   ins_encode( OpcP, RegMem( dst, src) );
 7545   ins_pipe( ialu_reg_mem );
 7546 %}
 7547 
 7548 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7549   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7550   effect(KILL cr);
 7551 
 7552   ins_cost(150);
 7553   format %{ "SUB    $dst,$src" %}
 7554   opcode(0x29);  /* Opcode 29 /r */
 7555   ins_encode( OpcP, RegMem( src, dst ) );
 7556   ins_pipe( ialu_mem_reg );
 7557 %}
 7558 
 7559 // Subtract from a pointer
 7560 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7561   match(Set dst (AddP dst (SubI zero src)));
 7562   effect(KILL cr);
 7563 
 7564   size(2);
 7565   format %{ "SUB    $dst,$src" %}
 7566   opcode(0x2B);
 7567   ins_encode( OpcP, RegReg( dst, src) );
 7568   ins_pipe( ialu_reg_reg );
 7569 %}
 7570 
 7571 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7572   match(Set dst (SubI zero dst));
 7573   effect(KILL cr);
 7574 
 7575   size(2);
 7576   format %{ "NEG    $dst" %}
 7577   opcode(0xF7,0x03);  // Opcode F7 /3
 7578   ins_encode( OpcP, RegOpc( dst ) );
 7579   ins_pipe( ialu_reg );
 7580 %}
 7581 
 7582 //----------Multiplication/Division Instructions-------------------------------
 7583 // Integer Multiplication Instructions
 7584 // Multiply Register
 7585 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7586   match(Set dst (MulI dst src));
 7587   effect(KILL cr);
 7588 
 7589   size(3);
 7590   ins_cost(300);
 7591   format %{ "IMUL   $dst,$src" %}
 7592   opcode(0xAF, 0x0F);
 7593   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7594   ins_pipe( ialu_reg_reg_alu0 );
 7595 %}
 7596 
 7597 // Multiply 32-bit Immediate
 7598 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7599   match(Set dst (MulI src imm));
 7600   effect(KILL cr);
 7601 
 7602   ins_cost(300);
 7603   format %{ "IMUL   $dst,$src,$imm" %}
 7604   opcode(0x69);  /* 69 /r id */
 7605   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7606   ins_pipe( ialu_reg_reg_alu0 );
 7607 %}
 7608 
 7609 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7610   match(Set dst src);
 7611   effect(KILL cr);
 7612 
 7613   // Note that this is artificially increased to make it more expensive than loadConL
 7614   ins_cost(250);
 7615   format %{ "MOV    EAX,$src\t// low word only" %}
 7616   opcode(0xB8);
 7617   ins_encode( LdImmL_Lo(dst, src) );
 7618   ins_pipe( ialu_reg_fat );
 7619 %}
 7620 
 7621 // Multiply by 32-bit Immediate, taking the shifted high order results
 7622 //  (special case for shift by 32)
 7623 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7624   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7625   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7626              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7628   effect(USE src1, KILL cr);
 7629 
 7630   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7631   ins_cost(0*100 + 1*400 - 150);
 7632   format %{ "IMUL   EDX:EAX,$src1" %}
 7633   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7634   ins_pipe( pipe_slow );
 7635 %}
 7636 
 7637 // Multiply by 32-bit Immediate, taking the shifted high order results
 7638 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7639   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7640   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7641              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7642              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7643   effect(USE src1, KILL cr);
 7644 
 7645   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7646   ins_cost(1*100 + 1*400 - 150);
 7647   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7648             "SAR    EDX,$cnt-32" %}
 7649   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7650   ins_pipe( pipe_slow );
 7651 %}
 7652 
 7653 // Multiply Memory 32-bit Immediate
 7654 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7655   match(Set dst (MulI (LoadI src) imm));
 7656   effect(KILL cr);
 7657 
 7658   ins_cost(300);
 7659   format %{ "IMUL   $dst,$src,$imm" %}
 7660   opcode(0x69);  /* 69 /r id */
 7661   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7662   ins_pipe( ialu_reg_mem_alu0 );
 7663 %}
 7664 
 7665 // Multiply Memory
 7666 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7667   match(Set dst (MulI dst (LoadI src)));
 7668   effect(KILL cr);
 7669 
 7670   ins_cost(350);
 7671   format %{ "IMUL   $dst,$src" %}
 7672   opcode(0xAF, 0x0F);
 7673   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7674   ins_pipe( ialu_reg_mem_alu0 );
 7675 %}
 7676 
 7677 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7678 %{
 7679   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7680   effect(KILL cr, KILL src2);
 7681 
 7682   expand %{ mulI_eReg(dst, src1, cr);
 7683            mulI_eReg(src2, src3, cr);
 7684            addI_eReg(dst, src2, cr); %}
 7685 %}
 7686 
 7687 // Multiply Register Int to Long
 7688 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7689   // Basic Idea: long = (long)int * (long)int
 7690   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7691   effect(DEF dst, USE src, USE src1, KILL flags);
 7692 
 7693   ins_cost(300);
 7694   format %{ "IMUL   $dst,$src1" %}
 7695 
 7696   ins_encode( long_int_multiply( dst, src1 ) );
 7697   ins_pipe( ialu_reg_reg_alu0 );
 7698 %}
 7699 
 7700 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7701   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7702   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7703   effect(KILL flags);
 7704 
 7705   ins_cost(300);
 7706   format %{ "MUL    $dst,$src1" %}
 7707 
 7708   ins_encode( long_uint_multiply(dst, src1) );
 7709   ins_pipe( ialu_reg_reg_alu0 );
 7710 %}
 7711 
 7712 // Multiply Register Long
 7713 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7714   match(Set dst (MulL dst src));
 7715   effect(KILL cr, TEMP tmp);
 7716   ins_cost(4*100+3*400);
 7717 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7718 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7719   format %{ "MOV    $tmp,$src.lo\n\t"
 7720             "IMUL   $tmp,EDX\n\t"
 7721             "MOV    EDX,$src.hi\n\t"
 7722             "IMUL   EDX,EAX\n\t"
 7723             "ADD    $tmp,EDX\n\t"
 7724             "MUL    EDX:EAX,$src.lo\n\t"
 7725             "ADD    EDX,$tmp" %}
 7726   ins_encode( long_multiply( dst, src, tmp ) );
 7727   ins_pipe( pipe_slow );
 7728 %}
 7729 
 7730 // Multiply Register Long where the left operand's high 32 bits are zero
 7731 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7732   predicate(is_operand_hi32_zero(n->in(1)));
 7733   match(Set dst (MulL dst src));
 7734   effect(KILL cr, TEMP tmp);
 7735   ins_cost(2*100+2*400);
 7736 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7737 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7738   format %{ "MOV    $tmp,$src.hi\n\t"
 7739             "IMUL   $tmp,EAX\n\t"
 7740             "MUL    EDX:EAX,$src.lo\n\t"
 7741             "ADD    EDX,$tmp" %}
 7742   ins_encode %{
 7743     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7744     __ imull($tmp$$Register, rax);
 7745     __ mull($src$$Register);
 7746     __ addl(rdx, $tmp$$Register);
 7747   %}
 7748   ins_pipe( pipe_slow );
 7749 %}
 7750 
 7751 // Multiply Register Long where the right operand's high 32 bits are zero
 7752 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7753   predicate(is_operand_hi32_zero(n->in(2)));
 7754   match(Set dst (MulL dst src));
 7755   effect(KILL cr, TEMP tmp);
 7756   ins_cost(2*100+2*400);
 7757 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7758 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7759   format %{ "MOV    $tmp,$src.lo\n\t"
 7760             "IMUL   $tmp,EDX\n\t"
 7761             "MUL    EDX:EAX,$src.lo\n\t"
 7762             "ADD    EDX,$tmp" %}
 7763   ins_encode %{
 7764     __ movl($tmp$$Register, $src$$Register);
 7765     __ imull($tmp$$Register, rdx);
 7766     __ mull($src$$Register);
 7767     __ addl(rdx, $tmp$$Register);
 7768   %}
 7769   ins_pipe( pipe_slow );
 7770 %}
 7771 
 7772 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7773 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7774   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7775   match(Set dst (MulL dst src));
 7776   effect(KILL cr);
 7777   ins_cost(1*400);
 7778 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7779 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7780   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7781   ins_encode %{
 7782     __ mull($src$$Register);
 7783   %}
 7784   ins_pipe( pipe_slow );
 7785 %}
 7786 
 7787 // Multiply Register Long by small constant
 7788 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7789   match(Set dst (MulL dst src));
 7790   effect(KILL cr, TEMP tmp);
 7791   ins_cost(2*100+2*400);
 7792   size(12);
 7793 // Basic idea: lo(result) = lo(src * EAX)
 7794 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7795   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7796             "MOV    EDX,$src\n\t"
 7797             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7798             "ADD    EDX,$tmp" %}
 7799   ins_encode( long_multiply_con( dst, src, tmp ) );
 7800   ins_pipe( pipe_slow );
 7801 %}
 7802 
 7803 // Integer DIV with Register
 7804 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7805   match(Set rax (DivI rax div));
 7806   effect(KILL rdx, KILL cr);
 7807   size(26);
 7808   ins_cost(30*100+10*100);
 7809   format %{ "CMP    EAX,0x80000000\n\t"
 7810             "JNE,s  normal\n\t"
 7811             "XOR    EDX,EDX\n\t"
 7812             "CMP    ECX,-1\n\t"
 7813             "JE,s   done\n"
 7814     "normal: CDQ\n\t"
 7815             "IDIV   $div\n\t"
 7816     "done:"        %}
 7817   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7818   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7819   ins_pipe( ialu_reg_reg_alu0 );
 7820 %}
 7821 
 7822 // Divide Register Long
 7823 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7824   match(Set dst (DivL src1 src2));
 7825   effect( KILL cr, KILL cx, KILL bx );
 7826   ins_cost(10000);
 7827   format %{ "PUSH   $src1.hi\n\t"
 7828             "PUSH   $src1.lo\n\t"
 7829             "PUSH   $src2.hi\n\t"
 7830             "PUSH   $src2.lo\n\t"
 7831             "CALL   SharedRuntime::ldiv\n\t"
 7832             "ADD    ESP,16" %}
 7833   ins_encode( long_div(src1,src2) );
 7834   ins_pipe( pipe_slow );
 7835 %}
 7836 
 7837 // Integer DIVMOD with Register, both quotient and mod results
 7838 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7839   match(DivModI rax div);
 7840   effect(KILL cr);
 7841   size(26);
 7842   ins_cost(30*100+10*100);
 7843   format %{ "CMP    EAX,0x80000000\n\t"
 7844             "JNE,s  normal\n\t"
 7845             "XOR    EDX,EDX\n\t"
 7846             "CMP    ECX,-1\n\t"
 7847             "JE,s   done\n"
 7848     "normal: CDQ\n\t"
 7849             "IDIV   $div\n\t"
 7850     "done:"        %}
 7851   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7852   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7853   ins_pipe( pipe_slow );
 7854 %}
 7855 
 7856 // Integer MOD with Register
 7857 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7858   match(Set rdx (ModI rax div));
 7859   effect(KILL rax, KILL cr);
 7860 
 7861   size(26);
 7862   ins_cost(300);
 7863   format %{ "CDQ\n\t"
 7864             "IDIV   $div" %}
 7865   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7866   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7867   ins_pipe( ialu_reg_reg_alu0 );
 7868 %}
 7869 
 7870 // Remainder Register Long
 7871 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7872   match(Set dst (ModL src1 src2));
 7873   effect( KILL cr, KILL cx, KILL bx );
 7874   ins_cost(10000);
 7875   format %{ "PUSH   $src1.hi\n\t"
 7876             "PUSH   $src1.lo\n\t"
 7877             "PUSH   $src2.hi\n\t"
 7878             "PUSH   $src2.lo\n\t"
 7879             "CALL   SharedRuntime::lrem\n\t"
 7880             "ADD    ESP,16" %}
 7881   ins_encode( long_mod(src1,src2) );
 7882   ins_pipe( pipe_slow );
 7883 %}
 7884 
 7885 // Divide Register Long (no special case since divisor != -1)
 7886 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7887   match(Set dst (DivL dst imm));
 7888   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7889   ins_cost(1000);
 7890   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7891             "XOR    $tmp2,$tmp2\n\t"
 7892             "CMP    $tmp,EDX\n\t"
 7893             "JA,s   fast\n\t"
 7894             "MOV    $tmp2,EAX\n\t"
 7895             "MOV    EAX,EDX\n\t"
 7896             "MOV    EDX,0\n\t"
 7897             "JLE,s  pos\n\t"
 7898             "LNEG   EAX : $tmp2\n\t"
 7899             "DIV    $tmp # unsigned division\n\t"
 7900             "XCHG   EAX,$tmp2\n\t"
 7901             "DIV    $tmp\n\t"
 7902             "LNEG   $tmp2 : EAX\n\t"
 7903             "JMP,s  done\n"
 7904     "pos:\n\t"
 7905             "DIV    $tmp\n\t"
 7906             "XCHG   EAX,$tmp2\n"
 7907     "fast:\n\t"
 7908             "DIV    $tmp\n"
 7909     "done:\n\t"
 7910             "MOV    EDX,$tmp2\n\t"
 7911             "NEG    EDX:EAX # if $imm < 0" %}
 7912   ins_encode %{
 7913     int con = (int)$imm$$constant;
 7914     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7915     int pcon = (con > 0) ? con : -con;
 7916     Label Lfast, Lpos, Ldone;
 7917 
 7918     __ movl($tmp$$Register, pcon);
 7919     __ xorl($tmp2$$Register,$tmp2$$Register);
 7920     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7921     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7922 
 7923     __ movl($tmp2$$Register, $dst$$Register); // save
 7924     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7925     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7926     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7927 
 7928     // Negative dividend.
 7929     // convert value to positive to use unsigned division
 7930     __ lneg($dst$$Register, $tmp2$$Register);
 7931     __ divl($tmp$$Register);
 7932     __ xchgl($dst$$Register, $tmp2$$Register);
 7933     __ divl($tmp$$Register);
 7934     // revert result back to negative
 7935     __ lneg($tmp2$$Register, $dst$$Register);
 7936     __ jmpb(Ldone);
 7937 
 7938     __ bind(Lpos);
 7939     __ divl($tmp$$Register); // Use unsigned division
 7940     __ xchgl($dst$$Register, $tmp2$$Register);
 7941     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7942 
 7943     __ bind(Lfast);
 7944     // fast path: src is positive
 7945     __ divl($tmp$$Register); // Use unsigned division
 7946 
 7947     __ bind(Ldone);
 7948     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7949     if (con < 0) {
 7950       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7951     }
 7952   %}
 7953   ins_pipe( pipe_slow );
 7954 %}
 7955 
 7956 // Remainder Register Long (remainder fit into 32 bits)
 7957 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7958   match(Set dst (ModL dst imm));
 7959   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7960   ins_cost(1000);
 7961   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7962             "CMP    $tmp,EDX\n\t"
 7963             "JA,s   fast\n\t"
 7964             "MOV    $tmp2,EAX\n\t"
 7965             "MOV    EAX,EDX\n\t"
 7966             "MOV    EDX,0\n\t"
 7967             "JLE,s  pos\n\t"
 7968             "LNEG   EAX : $tmp2\n\t"
 7969             "DIV    $tmp # unsigned division\n\t"
 7970             "MOV    EAX,$tmp2\n\t"
 7971             "DIV    $tmp\n\t"
 7972             "NEG    EDX\n\t"
 7973             "JMP,s  done\n"
 7974     "pos:\n\t"
 7975             "DIV    $tmp\n\t"
 7976             "MOV    EAX,$tmp2\n"
 7977     "fast:\n\t"
 7978             "DIV    $tmp\n"
 7979     "done:\n\t"
 7980             "MOV    EAX,EDX\n\t"
 7981             "SAR    EDX,31\n\t" %}
 7982   ins_encode %{
 7983     int con = (int)$imm$$constant;
 7984     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7985     int pcon = (con > 0) ? con : -con;
 7986     Label  Lfast, Lpos, Ldone;
 7987 
 7988     __ movl($tmp$$Register, pcon);
 7989     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7990     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7991 
 7992     __ movl($tmp2$$Register, $dst$$Register); // save
 7993     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7994     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7995     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7996 
 7997     // Negative dividend.
 7998     // convert value to positive to use unsigned division
 7999     __ lneg($dst$$Register, $tmp2$$Register);
 8000     __ divl($tmp$$Register);
 8001     __ movl($dst$$Register, $tmp2$$Register);
 8002     __ divl($tmp$$Register);
 8003     // revert remainder back to negative
 8004     __ negl(HIGH_FROM_LOW($dst$$Register));
 8005     __ jmpb(Ldone);
 8006 
 8007     __ bind(Lpos);
 8008     __ divl($tmp$$Register);
 8009     __ movl($dst$$Register, $tmp2$$Register);
 8010 
 8011     __ bind(Lfast);
 8012     // fast path: src is positive
 8013     __ divl($tmp$$Register);
 8014 
 8015     __ bind(Ldone);
 8016     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8017     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8018 
 8019   %}
 8020   ins_pipe( pipe_slow );
 8021 %}
 8022 
 8023 // Integer Shift Instructions
 8024 // Shift Left by one
 8025 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8026   match(Set dst (LShiftI dst shift));
 8027   effect(KILL cr);
 8028 
 8029   size(2);
 8030   format %{ "SHL    $dst,$shift" %}
 8031   opcode(0xD1, 0x4);  /* D1 /4 */
 8032   ins_encode( OpcP, RegOpc( dst ) );
 8033   ins_pipe( ialu_reg );
 8034 %}
 8035 
 8036 // Shift Left by 8-bit immediate
 8037 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8038   match(Set dst (LShiftI dst shift));
 8039   effect(KILL cr);
 8040 
 8041   size(3);
 8042   format %{ "SHL    $dst,$shift" %}
 8043   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8044   ins_encode( RegOpcImm( dst, shift) );
 8045   ins_pipe( ialu_reg );
 8046 %}
 8047 
 8048 // Shift Left by variable
 8049 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8050   match(Set dst (LShiftI dst shift));
 8051   effect(KILL cr);
 8052 
 8053   size(2);
 8054   format %{ "SHL    $dst,$shift" %}
 8055   opcode(0xD3, 0x4);  /* D3 /4 */
 8056   ins_encode( OpcP, RegOpc( dst ) );
 8057   ins_pipe( ialu_reg_reg );
 8058 %}
 8059 
 8060 // Arithmetic shift right by one
 8061 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8062   match(Set dst (RShiftI dst shift));
 8063   effect(KILL cr);
 8064 
 8065   size(2);
 8066   format %{ "SAR    $dst,$shift" %}
 8067   opcode(0xD1, 0x7);  /* D1 /7 */
 8068   ins_encode( OpcP, RegOpc( dst ) );
 8069   ins_pipe( ialu_reg );
 8070 %}
 8071 
 8072 // Arithmetic shift right by one
 8073 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8074   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8075   effect(KILL cr);
 8076   format %{ "SAR    $dst,$shift" %}
 8077   opcode(0xD1, 0x7);  /* D1 /7 */
 8078   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8079   ins_pipe( ialu_mem_imm );
 8080 %}
 8081 
 8082 // Arithmetic Shift Right by 8-bit immediate
 8083 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8084   match(Set dst (RShiftI dst shift));
 8085   effect(KILL cr);
 8086 
 8087   size(3);
 8088   format %{ "SAR    $dst,$shift" %}
 8089   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8090   ins_encode( RegOpcImm( dst, shift ) );
 8091   ins_pipe( ialu_mem_imm );
 8092 %}
 8093 
 8094 // Arithmetic Shift Right by 8-bit immediate
 8095 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8096   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8097   effect(KILL cr);
 8098 
 8099   format %{ "SAR    $dst,$shift" %}
 8100   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8101   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8102   ins_pipe( ialu_mem_imm );
 8103 %}
 8104 
 8105 // Arithmetic Shift Right by variable
 8106 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8107   match(Set dst (RShiftI dst shift));
 8108   effect(KILL cr);
 8109 
 8110   size(2);
 8111   format %{ "SAR    $dst,$shift" %}
 8112   opcode(0xD3, 0x7);  /* D3 /7 */
 8113   ins_encode( OpcP, RegOpc( dst ) );
 8114   ins_pipe( ialu_reg_reg );
 8115 %}
 8116 
 8117 // Logical shift right by one
 8118 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8119   match(Set dst (URShiftI dst shift));
 8120   effect(KILL cr);
 8121 
 8122   size(2);
 8123   format %{ "SHR    $dst,$shift" %}
 8124   opcode(0xD1, 0x5);  /* D1 /5 */
 8125   ins_encode( OpcP, RegOpc( dst ) );
 8126   ins_pipe( ialu_reg );
 8127 %}
 8128 
 8129 // Logical Shift Right by 8-bit immediate
 8130 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8131   match(Set dst (URShiftI dst shift));
 8132   effect(KILL cr);
 8133 
 8134   size(3);
 8135   format %{ "SHR    $dst,$shift" %}
 8136   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8137   ins_encode( RegOpcImm( dst, shift) );
 8138   ins_pipe( ialu_reg );
 8139 %}
 8140 
 8141 
 8142 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8143 // This idiom is used by the compiler for the i2b bytecode.
 8144 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8145   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8146 
 8147   size(3);
 8148   format %{ "MOVSX  $dst,$src :8" %}
 8149   ins_encode %{
 8150     __ movsbl($dst$$Register, $src$$Register);
 8151   %}
 8152   ins_pipe(ialu_reg_reg);
 8153 %}
 8154 
 8155 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8156 // This idiom is used by the compiler the i2s bytecode.
 8157 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8158   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8159 
 8160   size(3);
 8161   format %{ "MOVSX  $dst,$src :16" %}
 8162   ins_encode %{
 8163     __ movswl($dst$$Register, $src$$Register);
 8164   %}
 8165   ins_pipe(ialu_reg_reg);
 8166 %}
 8167 
 8168 
 8169 // Logical Shift Right by variable
 8170 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8171   match(Set dst (URShiftI dst shift));
 8172   effect(KILL cr);
 8173 
 8174   size(2);
 8175   format %{ "SHR    $dst,$shift" %}
 8176   opcode(0xD3, 0x5);  /* D3 /5 */
 8177   ins_encode( OpcP, RegOpc( dst ) );
 8178   ins_pipe( ialu_reg_reg );
 8179 %}
 8180 
 8181 
 8182 //----------Logical Instructions-----------------------------------------------
 8183 //----------Integer Logical Instructions---------------------------------------
 8184 // And Instructions
 8185 // And Register with Register
 8186 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8187   match(Set dst (AndI dst src));
 8188   effect(KILL cr);
 8189 
 8190   size(2);
 8191   format %{ "AND    $dst,$src" %}
 8192   opcode(0x23);
 8193   ins_encode( OpcP, RegReg( dst, src) );
 8194   ins_pipe( ialu_reg_reg );
 8195 %}
 8196 
 8197 // And Register with Immediate
 8198 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8199   match(Set dst (AndI dst src));
 8200   effect(KILL cr);
 8201 
 8202   format %{ "AND    $dst,$src" %}
 8203   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8204   // ins_encode( RegImm( dst, src) );
 8205   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8206   ins_pipe( ialu_reg );
 8207 %}
 8208 
 8209 // And Register with Memory
 8210 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8211   match(Set dst (AndI dst (LoadI src)));
 8212   effect(KILL cr);
 8213 
 8214   ins_cost(125);
 8215   format %{ "AND    $dst,$src" %}
 8216   opcode(0x23);
 8217   ins_encode( OpcP, RegMem( dst, src) );
 8218   ins_pipe( ialu_reg_mem );
 8219 %}
 8220 
 8221 // And Memory with Register
 8222 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8223   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8224   effect(KILL cr);
 8225 
 8226   ins_cost(150);
 8227   format %{ "AND    $dst,$src" %}
 8228   opcode(0x21);  /* Opcode 21 /r */
 8229   ins_encode( OpcP, RegMem( src, dst ) );
 8230   ins_pipe( ialu_mem_reg );
 8231 %}
 8232 
 8233 // And Memory with Immediate
 8234 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8235   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8236   effect(KILL cr);
 8237 
 8238   ins_cost(125);
 8239   format %{ "AND    $dst,$src" %}
 8240   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8241   // ins_encode( MemImm( dst, src) );
 8242   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8243   ins_pipe( ialu_mem_imm );
 8244 %}
 8245 
 8246 // BMI1 instructions
 8247 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8248   match(Set dst (AndI (XorI src1 minus_1) src2));
 8249   predicate(UseBMI1Instructions);
 8250   effect(KILL cr);
 8251 
 8252   format %{ "ANDNL  $dst, $src1, $src2" %}
 8253 
 8254   ins_encode %{
 8255     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8256   %}
 8257   ins_pipe(ialu_reg);
 8258 %}
 8259 
 8260 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8261   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8262   predicate(UseBMI1Instructions);
 8263   effect(KILL cr);
 8264 
 8265   ins_cost(125);
 8266   format %{ "ANDNL  $dst, $src1, $src2" %}
 8267 
 8268   ins_encode %{
 8269     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8270   %}
 8271   ins_pipe(ialu_reg_mem);
 8272 %}
 8273 
 8274 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8275   match(Set dst (AndI (SubI imm_zero src) src));
 8276   predicate(UseBMI1Instructions);
 8277   effect(KILL cr);
 8278 
 8279   format %{ "BLSIL  $dst, $src" %}
 8280 
 8281   ins_encode %{
 8282     __ blsil($dst$$Register, $src$$Register);
 8283   %}
 8284   ins_pipe(ialu_reg);
 8285 %}
 8286 
 8287 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8288   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8289   predicate(UseBMI1Instructions);
 8290   effect(KILL cr);
 8291 
 8292   ins_cost(125);
 8293   format %{ "BLSIL  $dst, $src" %}
 8294 
 8295   ins_encode %{
 8296     __ blsil($dst$$Register, $src$$Address);
 8297   %}
 8298   ins_pipe(ialu_reg_mem);
 8299 %}
 8300 
 8301 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8302 %{
 8303   match(Set dst (XorI (AddI src minus_1) src));
 8304   predicate(UseBMI1Instructions);
 8305   effect(KILL cr);
 8306 
 8307   format %{ "BLSMSKL $dst, $src" %}
 8308 
 8309   ins_encode %{
 8310     __ blsmskl($dst$$Register, $src$$Register);
 8311   %}
 8312 
 8313   ins_pipe(ialu_reg);
 8314 %}
 8315 
 8316 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8317 %{
 8318   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8319   predicate(UseBMI1Instructions);
 8320   effect(KILL cr);
 8321 
 8322   ins_cost(125);
 8323   format %{ "BLSMSKL $dst, $src" %}
 8324 
 8325   ins_encode %{
 8326     __ blsmskl($dst$$Register, $src$$Address);
 8327   %}
 8328 
 8329   ins_pipe(ialu_reg_mem);
 8330 %}
 8331 
 8332 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8333 %{
 8334   match(Set dst (AndI (AddI src minus_1) src) );
 8335   predicate(UseBMI1Instructions);
 8336   effect(KILL cr);
 8337 
 8338   format %{ "BLSRL  $dst, $src" %}
 8339 
 8340   ins_encode %{
 8341     __ blsrl($dst$$Register, $src$$Register);
 8342   %}
 8343 
 8344   ins_pipe(ialu_reg);
 8345 %}
 8346 
 8347 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8348 %{
 8349   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8350   predicate(UseBMI1Instructions);
 8351   effect(KILL cr);
 8352 
 8353   ins_cost(125);
 8354   format %{ "BLSRL  $dst, $src" %}
 8355 
 8356   ins_encode %{
 8357     __ blsrl($dst$$Register, $src$$Address);
 8358   %}
 8359 
 8360   ins_pipe(ialu_reg_mem);
 8361 %}
 8362 
 8363 // Or Instructions
 8364 // Or Register with Register
 8365 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8366   match(Set dst (OrI dst src));
 8367   effect(KILL cr);
 8368 
 8369   size(2);
 8370   format %{ "OR     $dst,$src" %}
 8371   opcode(0x0B);
 8372   ins_encode( OpcP, RegReg( dst, src) );
 8373   ins_pipe( ialu_reg_reg );
 8374 %}
 8375 
 8376 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8377   match(Set dst (OrI dst (CastP2X src)));
 8378   effect(KILL cr);
 8379 
 8380   size(2);
 8381   format %{ "OR     $dst,$src" %}
 8382   opcode(0x0B);
 8383   ins_encode( OpcP, RegReg( dst, src) );
 8384   ins_pipe( ialu_reg_reg );
 8385 %}
 8386 
 8387 
 8388 // Or Register with Immediate
 8389 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8390   match(Set dst (OrI dst src));
 8391   effect(KILL cr);
 8392 
 8393   format %{ "OR     $dst,$src" %}
 8394   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8395   // ins_encode( RegImm( dst, src) );
 8396   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8397   ins_pipe( ialu_reg );
 8398 %}
 8399 
 8400 // Or Register with Memory
 8401 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8402   match(Set dst (OrI dst (LoadI src)));
 8403   effect(KILL cr);
 8404 
 8405   ins_cost(125);
 8406   format %{ "OR     $dst,$src" %}
 8407   opcode(0x0B);
 8408   ins_encode( OpcP, RegMem( dst, src) );
 8409   ins_pipe( ialu_reg_mem );
 8410 %}
 8411 
 8412 // Or Memory with Register
 8413 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8414   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8415   effect(KILL cr);
 8416 
 8417   ins_cost(150);
 8418   format %{ "OR     $dst,$src" %}
 8419   opcode(0x09);  /* Opcode 09 /r */
 8420   ins_encode( OpcP, RegMem( src, dst ) );
 8421   ins_pipe( ialu_mem_reg );
 8422 %}
 8423 
 8424 // Or Memory with Immediate
 8425 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8426   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8427   effect(KILL cr);
 8428 
 8429   ins_cost(125);
 8430   format %{ "OR     $dst,$src" %}
 8431   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8432   // ins_encode( MemImm( dst, src) );
 8433   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8434   ins_pipe( ialu_mem_imm );
 8435 %}
 8436 
 8437 // ROL/ROR
 8438 // ROL expand
 8439 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8440   effect(USE_DEF dst, USE shift, KILL cr);
 8441 
 8442   format %{ "ROL    $dst, $shift" %}
 8443   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8444   ins_encode( OpcP, RegOpc( dst ));
 8445   ins_pipe( ialu_reg );
 8446 %}
 8447 
 8448 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8449   effect(USE_DEF dst, USE shift, KILL cr);
 8450 
 8451   format %{ "ROL    $dst, $shift" %}
 8452   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8453   ins_encode( RegOpcImm(dst, shift) );
 8454   ins_pipe(ialu_reg);
 8455 %}
 8456 
 8457 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8458   effect(USE_DEF dst, USE shift, KILL cr);
 8459 
 8460   format %{ "ROL    $dst, $shift" %}
 8461   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8462   ins_encode(OpcP, RegOpc(dst));
 8463   ins_pipe( ialu_reg_reg );
 8464 %}
 8465 // end of ROL expand
 8466 
 8467 // ROL 32bit by one once
 8468 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8469   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8470 
 8471   expand %{
 8472     rolI_eReg_imm1(dst, lshift, cr);
 8473   %}
 8474 %}
 8475 
 8476 // ROL 32bit var by imm8 once
 8477 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8478   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8479   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8480 
 8481   expand %{
 8482     rolI_eReg_imm8(dst, lshift, cr);
 8483   %}
 8484 %}
 8485 
 8486 // ROL 32bit var by var once
 8487 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8488   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8489 
 8490   expand %{
 8491     rolI_eReg_CL(dst, shift, cr);
 8492   %}
 8493 %}
 8494 
 8495 // ROL 32bit var by var once
 8496 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8497   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8498 
 8499   expand %{
 8500     rolI_eReg_CL(dst, shift, cr);
 8501   %}
 8502 %}
 8503 
 8504 // ROR expand
 8505 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8506   effect(USE_DEF dst, USE shift, KILL cr);
 8507 
 8508   format %{ "ROR    $dst, $shift" %}
 8509   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8510   ins_encode( OpcP, RegOpc( dst ) );
 8511   ins_pipe( ialu_reg );
 8512 %}
 8513 
 8514 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8515   effect (USE_DEF dst, USE shift, KILL cr);
 8516 
 8517   format %{ "ROR    $dst, $shift" %}
 8518   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8519   ins_encode( RegOpcImm(dst, shift) );
 8520   ins_pipe( ialu_reg );
 8521 %}
 8522 
 8523 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8524   effect(USE_DEF dst, USE shift, KILL cr);
 8525 
 8526   format %{ "ROR    $dst, $shift" %}
 8527   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8528   ins_encode(OpcP, RegOpc(dst));
 8529   ins_pipe( ialu_reg_reg );
 8530 %}
 8531 // end of ROR expand
 8532 
 8533 // ROR right once
 8534 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8535   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8536 
 8537   expand %{
 8538     rorI_eReg_imm1(dst, rshift, cr);
 8539   %}
 8540 %}
 8541 
 8542 // ROR 32bit by immI8 once
 8543 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8544   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8545   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8546 
 8547   expand %{
 8548     rorI_eReg_imm8(dst, rshift, cr);
 8549   %}
 8550 %}
 8551 
 8552 // ROR 32bit var by var once
 8553 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8554   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8555 
 8556   expand %{
 8557     rorI_eReg_CL(dst, shift, cr);
 8558   %}
 8559 %}
 8560 
 8561 // ROR 32bit var by var once
 8562 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8563   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8564 
 8565   expand %{
 8566     rorI_eReg_CL(dst, shift, cr);
 8567   %}
 8568 %}
 8569 
 8570 // Xor Instructions
 8571 // Xor Register with Register
 8572 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8573   match(Set dst (XorI dst src));
 8574   effect(KILL cr);
 8575 
 8576   size(2);
 8577   format %{ "XOR    $dst,$src" %}
 8578   opcode(0x33);
 8579   ins_encode( OpcP, RegReg( dst, src) );
 8580   ins_pipe( ialu_reg_reg );
 8581 %}
 8582 
 8583 // Xor Register with Immediate -1
 8584 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8585   match(Set dst (XorI dst imm));
 8586 
 8587   size(2);
 8588   format %{ "NOT    $dst" %}
 8589   ins_encode %{
 8590      __ notl($dst$$Register);
 8591   %}
 8592   ins_pipe( ialu_reg );
 8593 %}
 8594 
 8595 // Xor Register with Immediate
 8596 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8597   match(Set dst (XorI dst src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "XOR    $dst,$src" %}
 8601   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8602   // ins_encode( RegImm( dst, src) );
 8603   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8604   ins_pipe( ialu_reg );
 8605 %}
 8606 
 8607 // Xor Register with Memory
 8608 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8609   match(Set dst (XorI dst (LoadI src)));
 8610   effect(KILL cr);
 8611 
 8612   ins_cost(125);
 8613   format %{ "XOR    $dst,$src" %}
 8614   opcode(0x33);
 8615   ins_encode( OpcP, RegMem(dst, src) );
 8616   ins_pipe( ialu_reg_mem );
 8617 %}
 8618 
 8619 // Xor Memory with Register
 8620 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8621   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8622   effect(KILL cr);
 8623 
 8624   ins_cost(150);
 8625   format %{ "XOR    $dst,$src" %}
 8626   opcode(0x31);  /* Opcode 31 /r */
 8627   ins_encode( OpcP, RegMem( src, dst ) );
 8628   ins_pipe( ialu_mem_reg );
 8629 %}
 8630 
 8631 // Xor Memory with Immediate
 8632 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8633   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8634   effect(KILL cr);
 8635 
 8636   ins_cost(125);
 8637   format %{ "XOR    $dst,$src" %}
 8638   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8639   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8640   ins_pipe( ialu_mem_imm );
 8641 %}
 8642 
 8643 //----------Convert Int to Boolean---------------------------------------------
 8644 
 8645 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8646   effect( DEF dst, USE src );
 8647   format %{ "MOV    $dst,$src" %}
 8648   ins_encode( enc_Copy( dst, src) );
 8649   ins_pipe( ialu_reg_reg );
 8650 %}
 8651 
 8652 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8653   effect( USE_DEF dst, USE src, KILL cr );
 8654 
 8655   size(4);
 8656   format %{ "NEG    $dst\n\t"
 8657             "ADC    $dst,$src" %}
 8658   ins_encode( neg_reg(dst),
 8659               OpcRegReg(0x13,dst,src) );
 8660   ins_pipe( ialu_reg_reg_long );
 8661 %}
 8662 
 8663 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8664   match(Set dst (Conv2B src));
 8665 
 8666   expand %{
 8667     movI_nocopy(dst,src);
 8668     ci2b(dst,src,cr);
 8669   %}
 8670 %}
 8671 
 8672 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8673   effect( DEF dst, USE src );
 8674   format %{ "MOV    $dst,$src" %}
 8675   ins_encode( enc_Copy( dst, src) );
 8676   ins_pipe( ialu_reg_reg );
 8677 %}
 8678 
 8679 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8680   effect( USE_DEF dst, USE src, KILL cr );
 8681   format %{ "NEG    $dst\n\t"
 8682             "ADC    $dst,$src" %}
 8683   ins_encode( neg_reg(dst),
 8684               OpcRegReg(0x13,dst,src) );
 8685   ins_pipe( ialu_reg_reg_long );
 8686 %}
 8687 
 8688 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8689   match(Set dst (Conv2B src));
 8690 
 8691   expand %{
 8692     movP_nocopy(dst,src);
 8693     cp2b(dst,src,cr);
 8694   %}
 8695 %}
 8696 
 8697 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8698   match(Set dst (CmpLTMask p q));
 8699   effect(KILL cr);
 8700   ins_cost(400);
 8701 
 8702   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8703   format %{ "XOR    $dst,$dst\n\t"
 8704             "CMP    $p,$q\n\t"
 8705             "SETlt  $dst\n\t"
 8706             "NEG    $dst" %}
 8707   ins_encode %{
 8708     Register Rp = $p$$Register;
 8709     Register Rq = $q$$Register;
 8710     Register Rd = $dst$$Register;
 8711     Label done;
 8712     __ xorl(Rd, Rd);
 8713     __ cmpl(Rp, Rq);
 8714     __ setb(Assembler::less, Rd);
 8715     __ negl(Rd);
 8716   %}
 8717 
 8718   ins_pipe(pipe_slow);
 8719 %}
 8720 
 8721 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8722   match(Set dst (CmpLTMask dst zero));
 8723   effect(DEF dst, KILL cr);
 8724   ins_cost(100);
 8725 
 8726   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8727   ins_encode %{
 8728   __ sarl($dst$$Register, 31);
 8729   %}
 8730   ins_pipe(ialu_reg);
 8731 %}
 8732 
 8733 /* better to save a register than avoid a branch */
 8734 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8735   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8736   effect(KILL cr);
 8737   ins_cost(400);
 8738   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8739             "JGE    done\n\t"
 8740             "ADD    $p,$y\n"
 8741             "done:  " %}
 8742   ins_encode %{
 8743     Register Rp = $p$$Register;
 8744     Register Rq = $q$$Register;
 8745     Register Ry = $y$$Register;
 8746     Label done;
 8747     __ subl(Rp, Rq);
 8748     __ jccb(Assembler::greaterEqual, done);
 8749     __ addl(Rp, Ry);
 8750     __ bind(done);
 8751   %}
 8752 
 8753   ins_pipe(pipe_cmplt);
 8754 %}
 8755 
 8756 /* better to save a register than avoid a branch */
 8757 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8758   match(Set y (AndI (CmpLTMask p q) y));
 8759   effect(KILL cr);
 8760 
 8761   ins_cost(300);
 8762 
 8763   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8764             "JLT      done\n\t"
 8765             "XORL     $y, $y\n"
 8766             "done:  " %}
 8767   ins_encode %{
 8768     Register Rp = $p$$Register;
 8769     Register Rq = $q$$Register;
 8770     Register Ry = $y$$Register;
 8771     Label done;
 8772     __ cmpl(Rp, Rq);
 8773     __ jccb(Assembler::less, done);
 8774     __ xorl(Ry, Ry);
 8775     __ bind(done);
 8776   %}
 8777 
 8778   ins_pipe(pipe_cmplt);
 8779 %}
 8780 
 8781 /* If I enable this, I encourage spilling in the inner loop of compress.
 8782 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8783   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8784 */
 8785 //----------Overflow Math Instructions-----------------------------------------
 8786 
 8787 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8788 %{
 8789   match(Set cr (OverflowAddI op1 op2));
 8790   effect(DEF cr, USE_KILL op1, USE op2);
 8791 
 8792   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8793 
 8794   ins_encode %{
 8795     __ addl($op1$$Register, $op2$$Register);
 8796   %}
 8797   ins_pipe(ialu_reg_reg);
 8798 %}
 8799 
 8800 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8801 %{
 8802   match(Set cr (OverflowAddI op1 op2));
 8803   effect(DEF cr, USE_KILL op1, USE op2);
 8804 
 8805   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8806 
 8807   ins_encode %{
 8808     __ addl($op1$$Register, $op2$$constant);
 8809   %}
 8810   ins_pipe(ialu_reg_reg);
 8811 %}
 8812 
 8813 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8814 %{
 8815   match(Set cr (OverflowSubI op1 op2));
 8816 
 8817   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8818   ins_encode %{
 8819     __ cmpl($op1$$Register, $op2$$Register);
 8820   %}
 8821   ins_pipe(ialu_reg_reg);
 8822 %}
 8823 
 8824 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8825 %{
 8826   match(Set cr (OverflowSubI op1 op2));
 8827 
 8828   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8829   ins_encode %{
 8830     __ cmpl($op1$$Register, $op2$$constant);
 8831   %}
 8832   ins_pipe(ialu_reg_reg);
 8833 %}
 8834 
 8835 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8836 %{
 8837   match(Set cr (OverflowSubI zero op2));
 8838   effect(DEF cr, USE_KILL op2);
 8839 
 8840   format %{ "NEG    $op2\t# overflow check int" %}
 8841   ins_encode %{
 8842     __ negl($op2$$Register);
 8843   %}
 8844   ins_pipe(ialu_reg_reg);
 8845 %}
 8846 
 8847 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8848 %{
 8849   match(Set cr (OverflowMulI op1 op2));
 8850   effect(DEF cr, USE_KILL op1, USE op2);
 8851 
 8852   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8853   ins_encode %{
 8854     __ imull($op1$$Register, $op2$$Register);
 8855   %}
 8856   ins_pipe(ialu_reg_reg_alu0);
 8857 %}
 8858 
 8859 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8860 %{
 8861   match(Set cr (OverflowMulI op1 op2));
 8862   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8863 
 8864   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8865   ins_encode %{
 8866     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8867   %}
 8868   ins_pipe(ialu_reg_reg_alu0);
 8869 %}
 8870 
 8871 // Integer Absolute Instructions
 8872 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8873 %{
 8874   match(Set dst (AbsI src));
 8875   effect(TEMP dst, TEMP tmp, KILL cr);
 8876   format %{ "movl $tmp, $src\n\t"
 8877             "sarl $tmp, 31\n\t"
 8878             "movl $dst, $src\n\t"
 8879             "xorl $dst, $tmp\n\t"
 8880             "subl $dst, $tmp\n"
 8881           %}
 8882   ins_encode %{
 8883     __ movl($tmp$$Register, $src$$Register);
 8884     __ sarl($tmp$$Register, 31);
 8885     __ movl($dst$$Register, $src$$Register);
 8886     __ xorl($dst$$Register, $tmp$$Register);
 8887     __ subl($dst$$Register, $tmp$$Register);
 8888   %}
 8889 
 8890   ins_pipe(ialu_reg_reg);
 8891 %}
 8892 
 8893 //----------Long Instructions------------------------------------------------
 8894 // Add Long Register with Register
 8895 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8896   match(Set dst (AddL dst src));
 8897   effect(KILL cr);
 8898   ins_cost(200);
 8899   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8900             "ADC    $dst.hi,$src.hi" %}
 8901   opcode(0x03, 0x13);
 8902   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8903   ins_pipe( ialu_reg_reg_long );
 8904 %}
 8905 
 8906 // Add Long Register with Immediate
 8907 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8908   match(Set dst (AddL dst src));
 8909   effect(KILL cr);
 8910   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8911             "ADC    $dst.hi,$src.hi" %}
 8912   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8913   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8914   ins_pipe( ialu_reg_long );
 8915 %}
 8916 
 8917 // Add Long Register with Memory
 8918 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8919   match(Set dst (AddL dst (LoadL mem)));
 8920   effect(KILL cr);
 8921   ins_cost(125);
 8922   format %{ "ADD    $dst.lo,$mem\n\t"
 8923             "ADC    $dst.hi,$mem+4" %}
 8924   opcode(0x03, 0x13);
 8925   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8926   ins_pipe( ialu_reg_long_mem );
 8927 %}
 8928 
 8929 // Subtract Long Register with Register.
 8930 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8931   match(Set dst (SubL dst src));
 8932   effect(KILL cr);
 8933   ins_cost(200);
 8934   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8935             "SBB    $dst.hi,$src.hi" %}
 8936   opcode(0x2B, 0x1B);
 8937   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8938   ins_pipe( ialu_reg_reg_long );
 8939 %}
 8940 
 8941 // Subtract Long Register with Immediate
 8942 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8943   match(Set dst (SubL dst src));
 8944   effect(KILL cr);
 8945   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8946             "SBB    $dst.hi,$src.hi" %}
 8947   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8948   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8949   ins_pipe( ialu_reg_long );
 8950 %}
 8951 
 8952 // Subtract Long Register with Memory
 8953 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8954   match(Set dst (SubL dst (LoadL mem)));
 8955   effect(KILL cr);
 8956   ins_cost(125);
 8957   format %{ "SUB    $dst.lo,$mem\n\t"
 8958             "SBB    $dst.hi,$mem+4" %}
 8959   opcode(0x2B, 0x1B);
 8960   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8961   ins_pipe( ialu_reg_long_mem );
 8962 %}
 8963 
 8964 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8965   match(Set dst (SubL zero dst));
 8966   effect(KILL cr);
 8967   ins_cost(300);
 8968   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8969   ins_encode( neg_long(dst) );
 8970   ins_pipe( ialu_reg_reg_long );
 8971 %}
 8972 
 8973 // And Long Register with Register
 8974 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8975   match(Set dst (AndL dst src));
 8976   effect(KILL cr);
 8977   format %{ "AND    $dst.lo,$src.lo\n\t"
 8978             "AND    $dst.hi,$src.hi" %}
 8979   opcode(0x23,0x23);
 8980   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8981   ins_pipe( ialu_reg_reg_long );
 8982 %}
 8983 
 8984 // And Long Register with Immediate
 8985 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8986   match(Set dst (AndL dst src));
 8987   effect(KILL cr);
 8988   format %{ "AND    $dst.lo,$src.lo\n\t"
 8989             "AND    $dst.hi,$src.hi" %}
 8990   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8991   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8992   ins_pipe( ialu_reg_long );
 8993 %}
 8994 
 8995 // And Long Register with Memory
 8996 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8997   match(Set dst (AndL dst (LoadL mem)));
 8998   effect(KILL cr);
 8999   ins_cost(125);
 9000   format %{ "AND    $dst.lo,$mem\n\t"
 9001             "AND    $dst.hi,$mem+4" %}
 9002   opcode(0x23, 0x23);
 9003   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9004   ins_pipe( ialu_reg_long_mem );
 9005 %}
 9006 
 9007 // BMI1 instructions
 9008 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 9009   match(Set dst (AndL (XorL src1 minus_1) src2));
 9010   predicate(UseBMI1Instructions);
 9011   effect(KILL cr, TEMP dst);
 9012 
 9013   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9014             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9015          %}
 9016 
 9017   ins_encode %{
 9018     Register Rdst = $dst$$Register;
 9019     Register Rsrc1 = $src1$$Register;
 9020     Register Rsrc2 = $src2$$Register;
 9021     __ andnl(Rdst, Rsrc1, Rsrc2);
 9022     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9023   %}
 9024   ins_pipe(ialu_reg_reg_long);
 9025 %}
 9026 
 9027 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9028   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9029   predicate(UseBMI1Instructions);
 9030   effect(KILL cr, TEMP dst);
 9031 
 9032   ins_cost(125);
 9033   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9034             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9035          %}
 9036 
 9037   ins_encode %{
 9038     Register Rdst = $dst$$Register;
 9039     Register Rsrc1 = $src1$$Register;
 9040     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9041 
 9042     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9043     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9044   %}
 9045   ins_pipe(ialu_reg_mem);
 9046 %}
 9047 
 9048 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9049   match(Set dst (AndL (SubL imm_zero src) src));
 9050   predicate(UseBMI1Instructions);
 9051   effect(KILL cr, TEMP dst);
 9052 
 9053   format %{ "MOVL   $dst.hi, 0\n\t"
 9054             "BLSIL  $dst.lo, $src.lo\n\t"
 9055             "JNZ    done\n\t"
 9056             "BLSIL  $dst.hi, $src.hi\n"
 9057             "done:"
 9058          %}
 9059 
 9060   ins_encode %{
 9061     Label done;
 9062     Register Rdst = $dst$$Register;
 9063     Register Rsrc = $src$$Register;
 9064     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9065     __ blsil(Rdst, Rsrc);
 9066     __ jccb(Assembler::notZero, done);
 9067     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9068     __ bind(done);
 9069   %}
 9070   ins_pipe(ialu_reg);
 9071 %}
 9072 
 9073 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9074   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9075   predicate(UseBMI1Instructions);
 9076   effect(KILL cr, TEMP dst);
 9077 
 9078   ins_cost(125);
 9079   format %{ "MOVL   $dst.hi, 0\n\t"
 9080             "BLSIL  $dst.lo, $src\n\t"
 9081             "JNZ    done\n\t"
 9082             "BLSIL  $dst.hi, $src+4\n"
 9083             "done:"
 9084          %}
 9085 
 9086   ins_encode %{
 9087     Label done;
 9088     Register Rdst = $dst$$Register;
 9089     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9090 
 9091     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9092     __ blsil(Rdst, $src$$Address);
 9093     __ jccb(Assembler::notZero, done);
 9094     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9095     __ bind(done);
 9096   %}
 9097   ins_pipe(ialu_reg_mem);
 9098 %}
 9099 
 9100 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9101 %{
 9102   match(Set dst (XorL (AddL src minus_1) src));
 9103   predicate(UseBMI1Instructions);
 9104   effect(KILL cr, TEMP dst);
 9105 
 9106   format %{ "MOVL    $dst.hi, 0\n\t"
 9107             "BLSMSKL $dst.lo, $src.lo\n\t"
 9108             "JNC     done\n\t"
 9109             "BLSMSKL $dst.hi, $src.hi\n"
 9110             "done:"
 9111          %}
 9112 
 9113   ins_encode %{
 9114     Label done;
 9115     Register Rdst = $dst$$Register;
 9116     Register Rsrc = $src$$Register;
 9117     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9118     __ blsmskl(Rdst, Rsrc);
 9119     __ jccb(Assembler::carryClear, done);
 9120     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9121     __ bind(done);
 9122   %}
 9123 
 9124   ins_pipe(ialu_reg);
 9125 %}
 9126 
 9127 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9128 %{
 9129   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9130   predicate(UseBMI1Instructions);
 9131   effect(KILL cr, TEMP dst);
 9132 
 9133   ins_cost(125);
 9134   format %{ "MOVL    $dst.hi, 0\n\t"
 9135             "BLSMSKL $dst.lo, $src\n\t"
 9136             "JNC     done\n\t"
 9137             "BLSMSKL $dst.hi, $src+4\n"
 9138             "done:"
 9139          %}
 9140 
 9141   ins_encode %{
 9142     Label done;
 9143     Register Rdst = $dst$$Register;
 9144     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9145 
 9146     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9147     __ blsmskl(Rdst, $src$$Address);
 9148     __ jccb(Assembler::carryClear, done);
 9149     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9150     __ bind(done);
 9151   %}
 9152 
 9153   ins_pipe(ialu_reg_mem);
 9154 %}
 9155 
 9156 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9157 %{
 9158   match(Set dst (AndL (AddL src minus_1) src) );
 9159   predicate(UseBMI1Instructions);
 9160   effect(KILL cr, TEMP dst);
 9161 
 9162   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9163             "BLSRL  $dst.lo, $src.lo\n\t"
 9164             "JNC    done\n\t"
 9165             "BLSRL  $dst.hi, $src.hi\n"
 9166             "done:"
 9167   %}
 9168 
 9169   ins_encode %{
 9170     Label done;
 9171     Register Rdst = $dst$$Register;
 9172     Register Rsrc = $src$$Register;
 9173     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9174     __ blsrl(Rdst, Rsrc);
 9175     __ jccb(Assembler::carryClear, done);
 9176     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9177     __ bind(done);
 9178   %}
 9179 
 9180   ins_pipe(ialu_reg);
 9181 %}
 9182 
 9183 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9184 %{
 9185   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9186   predicate(UseBMI1Instructions);
 9187   effect(KILL cr, TEMP dst);
 9188 
 9189   ins_cost(125);
 9190   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9191             "BLSRL  $dst.lo, $src\n\t"
 9192             "JNC    done\n\t"
 9193             "BLSRL  $dst.hi, $src+4\n"
 9194             "done:"
 9195   %}
 9196 
 9197   ins_encode %{
 9198     Label done;
 9199     Register Rdst = $dst$$Register;
 9200     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9201     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9202     __ blsrl(Rdst, $src$$Address);
 9203     __ jccb(Assembler::carryClear, done);
 9204     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9205     __ bind(done);
 9206   %}
 9207 
 9208   ins_pipe(ialu_reg_mem);
 9209 %}
 9210 
 9211 // Or Long Register with Register
 9212 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9213   match(Set dst (OrL dst src));
 9214   effect(KILL cr);
 9215   format %{ "OR     $dst.lo,$src.lo\n\t"
 9216             "OR     $dst.hi,$src.hi" %}
 9217   opcode(0x0B,0x0B);
 9218   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9219   ins_pipe( ialu_reg_reg_long );
 9220 %}
 9221 
 9222 // Or Long Register with Immediate
 9223 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9224   match(Set dst (OrL dst src));
 9225   effect(KILL cr);
 9226   format %{ "OR     $dst.lo,$src.lo\n\t"
 9227             "OR     $dst.hi,$src.hi" %}
 9228   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9229   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9230   ins_pipe( ialu_reg_long );
 9231 %}
 9232 
 9233 // Or Long Register with Memory
 9234 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9235   match(Set dst (OrL dst (LoadL mem)));
 9236   effect(KILL cr);
 9237   ins_cost(125);
 9238   format %{ "OR     $dst.lo,$mem\n\t"
 9239             "OR     $dst.hi,$mem+4" %}
 9240   opcode(0x0B,0x0B);
 9241   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9242   ins_pipe( ialu_reg_long_mem );
 9243 %}
 9244 
 9245 // Xor Long Register with Register
 9246 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9247   match(Set dst (XorL dst src));
 9248   effect(KILL cr);
 9249   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9250             "XOR    $dst.hi,$src.hi" %}
 9251   opcode(0x33,0x33);
 9252   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9253   ins_pipe( ialu_reg_reg_long );
 9254 %}
 9255 
 9256 // Xor Long Register with Immediate -1
 9257 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9258   match(Set dst (XorL dst imm));
 9259   format %{ "NOT    $dst.lo\n\t"
 9260             "NOT    $dst.hi" %}
 9261   ins_encode %{
 9262      __ notl($dst$$Register);
 9263      __ notl(HIGH_FROM_LOW($dst$$Register));
 9264   %}
 9265   ins_pipe( ialu_reg_long );
 9266 %}
 9267 
 9268 // Xor Long Register with Immediate
 9269 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9270   match(Set dst (XorL dst src));
 9271   effect(KILL cr);
 9272   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9273             "XOR    $dst.hi,$src.hi" %}
 9274   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9275   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9276   ins_pipe( ialu_reg_long );
 9277 %}
 9278 
 9279 // Xor Long Register with Memory
 9280 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9281   match(Set dst (XorL dst (LoadL mem)));
 9282   effect(KILL cr);
 9283   ins_cost(125);
 9284   format %{ "XOR    $dst.lo,$mem\n\t"
 9285             "XOR    $dst.hi,$mem+4" %}
 9286   opcode(0x33,0x33);
 9287   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9288   ins_pipe( ialu_reg_long_mem );
 9289 %}
 9290 
 9291 // Shift Left Long by 1
 9292 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9293   predicate(UseNewLongLShift);
 9294   match(Set dst (LShiftL dst cnt));
 9295   effect(KILL cr);
 9296   ins_cost(100);
 9297   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9298             "ADC    $dst.hi,$dst.hi" %}
 9299   ins_encode %{
 9300     __ addl($dst$$Register,$dst$$Register);
 9301     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9302   %}
 9303   ins_pipe( ialu_reg_long );
 9304 %}
 9305 
 9306 // Shift Left Long by 2
 9307 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9308   predicate(UseNewLongLShift);
 9309   match(Set dst (LShiftL dst cnt));
 9310   effect(KILL cr);
 9311   ins_cost(100);
 9312   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9313             "ADC    $dst.hi,$dst.hi\n\t"
 9314             "ADD    $dst.lo,$dst.lo\n\t"
 9315             "ADC    $dst.hi,$dst.hi" %}
 9316   ins_encode %{
 9317     __ addl($dst$$Register,$dst$$Register);
 9318     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9319     __ addl($dst$$Register,$dst$$Register);
 9320     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9321   %}
 9322   ins_pipe( ialu_reg_long );
 9323 %}
 9324 
 9325 // Shift Left Long by 3
 9326 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9327   predicate(UseNewLongLShift);
 9328   match(Set dst (LShiftL dst cnt));
 9329   effect(KILL cr);
 9330   ins_cost(100);
 9331   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9332             "ADC    $dst.hi,$dst.hi\n\t"
 9333             "ADD    $dst.lo,$dst.lo\n\t"
 9334             "ADC    $dst.hi,$dst.hi\n\t"
 9335             "ADD    $dst.lo,$dst.lo\n\t"
 9336             "ADC    $dst.hi,$dst.hi" %}
 9337   ins_encode %{
 9338     __ addl($dst$$Register,$dst$$Register);
 9339     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9340     __ addl($dst$$Register,$dst$$Register);
 9341     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9342     __ addl($dst$$Register,$dst$$Register);
 9343     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9344   %}
 9345   ins_pipe( ialu_reg_long );
 9346 %}
 9347 
 9348 // Shift Left Long by 1-31
 9349 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9350   match(Set dst (LShiftL dst cnt));
 9351   effect(KILL cr);
 9352   ins_cost(200);
 9353   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9354             "SHL    $dst.lo,$cnt" %}
 9355   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9356   ins_encode( move_long_small_shift(dst,cnt) );
 9357   ins_pipe( ialu_reg_long );
 9358 %}
 9359 
 9360 // Shift Left Long by 32-63
 9361 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9362   match(Set dst (LShiftL dst cnt));
 9363   effect(KILL cr);
 9364   ins_cost(300);
 9365   format %{ "MOV    $dst.hi,$dst.lo\n"
 9366           "\tSHL    $dst.hi,$cnt-32\n"
 9367           "\tXOR    $dst.lo,$dst.lo" %}
 9368   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9369   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9370   ins_pipe( ialu_reg_long );
 9371 %}
 9372 
 9373 // Shift Left Long by variable
 9374 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9375   match(Set dst (LShiftL dst shift));
 9376   effect(KILL cr);
 9377   ins_cost(500+200);
 9378   size(17);
 9379   format %{ "TEST   $shift,32\n\t"
 9380             "JEQ,s  small\n\t"
 9381             "MOV    $dst.hi,$dst.lo\n\t"
 9382             "XOR    $dst.lo,$dst.lo\n"
 9383     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9384             "SHL    $dst.lo,$shift" %}
 9385   ins_encode( shift_left_long( dst, shift ) );
 9386   ins_pipe( pipe_slow );
 9387 %}
 9388 
 9389 // Shift Right Long by 1-31
 9390 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9391   match(Set dst (URShiftL dst cnt));
 9392   effect(KILL cr);
 9393   ins_cost(200);
 9394   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9395             "SHR    $dst.hi,$cnt" %}
 9396   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9397   ins_encode( move_long_small_shift(dst,cnt) );
 9398   ins_pipe( ialu_reg_long );
 9399 %}
 9400 
 9401 // Shift Right Long by 32-63
 9402 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9403   match(Set dst (URShiftL dst cnt));
 9404   effect(KILL cr);
 9405   ins_cost(300);
 9406   format %{ "MOV    $dst.lo,$dst.hi\n"
 9407           "\tSHR    $dst.lo,$cnt-32\n"
 9408           "\tXOR    $dst.hi,$dst.hi" %}
 9409   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9410   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9411   ins_pipe( ialu_reg_long );
 9412 %}
 9413 
 9414 // Shift Right Long by variable
 9415 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9416   match(Set dst (URShiftL dst shift));
 9417   effect(KILL cr);
 9418   ins_cost(600);
 9419   size(17);
 9420   format %{ "TEST   $shift,32\n\t"
 9421             "JEQ,s  small\n\t"
 9422             "MOV    $dst.lo,$dst.hi\n\t"
 9423             "XOR    $dst.hi,$dst.hi\n"
 9424     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9425             "SHR    $dst.hi,$shift" %}
 9426   ins_encode( shift_right_long( dst, shift ) );
 9427   ins_pipe( pipe_slow );
 9428 %}
 9429 
 9430 // Shift Right Long by 1-31
 9431 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9432   match(Set dst (RShiftL dst cnt));
 9433   effect(KILL cr);
 9434   ins_cost(200);
 9435   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9436             "SAR    $dst.hi,$cnt" %}
 9437   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9438   ins_encode( move_long_small_shift(dst,cnt) );
 9439   ins_pipe( ialu_reg_long );
 9440 %}
 9441 
 9442 // Shift Right Long by 32-63
 9443 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9444   match(Set dst (RShiftL dst cnt));
 9445   effect(KILL cr);
 9446   ins_cost(300);
 9447   format %{ "MOV    $dst.lo,$dst.hi\n"
 9448           "\tSAR    $dst.lo,$cnt-32\n"
 9449           "\tSAR    $dst.hi,31" %}
 9450   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9451   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9452   ins_pipe( ialu_reg_long );
 9453 %}
 9454 
 9455 // Shift Right arithmetic Long by variable
 9456 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9457   match(Set dst (RShiftL dst shift));
 9458   effect(KILL cr);
 9459   ins_cost(600);
 9460   size(18);
 9461   format %{ "TEST   $shift,32\n\t"
 9462             "JEQ,s  small\n\t"
 9463             "MOV    $dst.lo,$dst.hi\n\t"
 9464             "SAR    $dst.hi,31\n"
 9465     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9466             "SAR    $dst.hi,$shift" %}
 9467   ins_encode( shift_right_arith_long( dst, shift ) );
 9468   ins_pipe( pipe_slow );
 9469 %}
 9470 
 9471 
 9472 //----------Double Instructions------------------------------------------------
 9473 // Double Math
 9474 
 9475 // Compare & branch
 9476 
 9477 // P6 version of float compare, sets condition codes in EFLAGS
 9478 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9479   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9480   match(Set cr (CmpD src1 src2));
 9481   effect(KILL rax);
 9482   ins_cost(150);
 9483   format %{ "FLD    $src1\n\t"
 9484             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9485             "JNP    exit\n\t"
 9486             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9487             "SAHF\n"
 9488      "exit:\tNOP               // avoid branch to branch" %}
 9489   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9490   ins_encode( Push_Reg_DPR(src1),
 9491               OpcP, RegOpc(src2),
 9492               cmpF_P6_fixup );
 9493   ins_pipe( pipe_slow );
 9494 %}
 9495 
 9496 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9497   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9498   match(Set cr (CmpD src1 src2));
 9499   ins_cost(150);
 9500   format %{ "FLD    $src1\n\t"
 9501             "FUCOMIP ST,$src2  // P6 instruction" %}
 9502   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9503   ins_encode( Push_Reg_DPR(src1),
 9504               OpcP, RegOpc(src2));
 9505   ins_pipe( pipe_slow );
 9506 %}
 9507 
 9508 // Compare & branch
 9509 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9510   predicate(UseSSE<=1);
 9511   match(Set cr (CmpD src1 src2));
 9512   effect(KILL rax);
 9513   ins_cost(200);
 9514   format %{ "FLD    $src1\n\t"
 9515             "FCOMp  $src2\n\t"
 9516             "FNSTSW AX\n\t"
 9517             "TEST   AX,0x400\n\t"
 9518             "JZ,s   flags\n\t"
 9519             "MOV    AH,1\t# unordered treat as LT\n"
 9520     "flags:\tSAHF" %}
 9521   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9522   ins_encode( Push_Reg_DPR(src1),
 9523               OpcP, RegOpc(src2),
 9524               fpu_flags);
 9525   ins_pipe( pipe_slow );
 9526 %}
 9527 
 9528 // Compare vs zero into -1,0,1
 9529 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9530   predicate(UseSSE<=1);
 9531   match(Set dst (CmpD3 src1 zero));
 9532   effect(KILL cr, KILL rax);
 9533   ins_cost(280);
 9534   format %{ "FTSTD  $dst,$src1" %}
 9535   opcode(0xE4, 0xD9);
 9536   ins_encode( Push_Reg_DPR(src1),
 9537               OpcS, OpcP, PopFPU,
 9538               CmpF_Result(dst));
 9539   ins_pipe( pipe_slow );
 9540 %}
 9541 
 9542 // Compare into -1,0,1
 9543 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9544   predicate(UseSSE<=1);
 9545   match(Set dst (CmpD3 src1 src2));
 9546   effect(KILL cr, KILL rax);
 9547   ins_cost(300);
 9548   format %{ "FCMPD  $dst,$src1,$src2" %}
 9549   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9550   ins_encode( Push_Reg_DPR(src1),
 9551               OpcP, RegOpc(src2),
 9552               CmpF_Result(dst));
 9553   ins_pipe( pipe_slow );
 9554 %}
 9555 
 9556 // float compare and set condition codes in EFLAGS by XMM regs
 9557 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9558   predicate(UseSSE>=2);
 9559   match(Set cr (CmpD src1 src2));
 9560   ins_cost(145);
 9561   format %{ "UCOMISD $src1,$src2\n\t"
 9562             "JNP,s   exit\n\t"
 9563             "PUSHF\t# saw NaN, set CF\n\t"
 9564             "AND     [rsp], #0xffffff2b\n\t"
 9565             "POPF\n"
 9566     "exit:" %}
 9567   ins_encode %{
 9568     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9569     emit_cmpfp_fixup(_masm);
 9570   %}
 9571   ins_pipe( pipe_slow );
 9572 %}
 9573 
 9574 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9575   predicate(UseSSE>=2);
 9576   match(Set cr (CmpD src1 src2));
 9577   ins_cost(100);
 9578   format %{ "UCOMISD $src1,$src2" %}
 9579   ins_encode %{
 9580     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9581   %}
 9582   ins_pipe( pipe_slow );
 9583 %}
 9584 
 9585 // float compare and set condition codes in EFLAGS by XMM regs
 9586 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9587   predicate(UseSSE>=2);
 9588   match(Set cr (CmpD src1 (LoadD src2)));
 9589   ins_cost(145);
 9590   format %{ "UCOMISD $src1,$src2\n\t"
 9591             "JNP,s   exit\n\t"
 9592             "PUSHF\t# saw NaN, set CF\n\t"
 9593             "AND     [rsp], #0xffffff2b\n\t"
 9594             "POPF\n"
 9595     "exit:" %}
 9596   ins_encode %{
 9597     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9598     emit_cmpfp_fixup(_masm);
 9599   %}
 9600   ins_pipe( pipe_slow );
 9601 %}
 9602 
 9603 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9604   predicate(UseSSE>=2);
 9605   match(Set cr (CmpD src1 (LoadD src2)));
 9606   ins_cost(100);
 9607   format %{ "UCOMISD $src1,$src2" %}
 9608   ins_encode %{
 9609     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9610   %}
 9611   ins_pipe( pipe_slow );
 9612 %}
 9613 
 9614 // Compare into -1,0,1 in XMM
 9615 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9616   predicate(UseSSE>=2);
 9617   match(Set dst (CmpD3 src1 src2));
 9618   effect(KILL cr);
 9619   ins_cost(255);
 9620   format %{ "UCOMISD $src1, $src2\n\t"
 9621             "MOV     $dst, #-1\n\t"
 9622             "JP,s    done\n\t"
 9623             "JB,s    done\n\t"
 9624             "SETNE   $dst\n\t"
 9625             "MOVZB   $dst, $dst\n"
 9626     "done:" %}
 9627   ins_encode %{
 9628     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9629     emit_cmpfp3(_masm, $dst$$Register);
 9630   %}
 9631   ins_pipe( pipe_slow );
 9632 %}
 9633 
 9634 // Compare into -1,0,1 in XMM and memory
 9635 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9636   predicate(UseSSE>=2);
 9637   match(Set dst (CmpD3 src1 (LoadD src2)));
 9638   effect(KILL cr);
 9639   ins_cost(275);
 9640   format %{ "UCOMISD $src1, $src2\n\t"
 9641             "MOV     $dst, #-1\n\t"
 9642             "JP,s    done\n\t"
 9643             "JB,s    done\n\t"
 9644             "SETNE   $dst\n\t"
 9645             "MOVZB   $dst, $dst\n"
 9646     "done:" %}
 9647   ins_encode %{
 9648     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9649     emit_cmpfp3(_masm, $dst$$Register);
 9650   %}
 9651   ins_pipe( pipe_slow );
 9652 %}
 9653 
 9654 
 9655 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9656   predicate (UseSSE <=1);
 9657   match(Set dst (SubD dst src));
 9658 
 9659   format %{ "FLD    $src\n\t"
 9660             "DSUBp  $dst,ST" %}
 9661   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9662   ins_cost(150);
 9663   ins_encode( Push_Reg_DPR(src),
 9664               OpcP, RegOpc(dst) );
 9665   ins_pipe( fpu_reg_reg );
 9666 %}
 9667 
 9668 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9669   predicate (UseSSE <=1);
 9670   match(Set dst (RoundDouble (SubD src1 src2)));
 9671   ins_cost(250);
 9672 
 9673   format %{ "FLD    $src2\n\t"
 9674             "DSUB   ST,$src1\n\t"
 9675             "FSTP_D $dst\t# D-round" %}
 9676   opcode(0xD8, 0x5);
 9677   ins_encode( Push_Reg_DPR(src2),
 9678               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9679   ins_pipe( fpu_mem_reg_reg );
 9680 %}
 9681 
 9682 
 9683 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9684   predicate (UseSSE <=1);
 9685   match(Set dst (SubD dst (LoadD src)));
 9686   ins_cost(150);
 9687 
 9688   format %{ "FLD    $src\n\t"
 9689             "DSUBp  $dst,ST" %}
 9690   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9691   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9692               OpcP, RegOpc(dst) );
 9693   ins_pipe( fpu_reg_mem );
 9694 %}
 9695 
 9696 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9697   predicate (UseSSE<=1);
 9698   match(Set dst (AbsD src));
 9699   ins_cost(100);
 9700   format %{ "FABS" %}
 9701   opcode(0xE1, 0xD9);
 9702   ins_encode( OpcS, OpcP );
 9703   ins_pipe( fpu_reg_reg );
 9704 %}
 9705 
 9706 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9707   predicate(UseSSE<=1);
 9708   match(Set dst (NegD src));
 9709   ins_cost(100);
 9710   format %{ "FCHS" %}
 9711   opcode(0xE0, 0xD9);
 9712   ins_encode( OpcS, OpcP );
 9713   ins_pipe( fpu_reg_reg );
 9714 %}
 9715 
 9716 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9717   predicate(UseSSE<=1);
 9718   match(Set dst (AddD dst src));
 9719   format %{ "FLD    $src\n\t"
 9720             "DADD   $dst,ST" %}
 9721   size(4);
 9722   ins_cost(150);
 9723   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9724   ins_encode( Push_Reg_DPR(src),
 9725               OpcP, RegOpc(dst) );
 9726   ins_pipe( fpu_reg_reg );
 9727 %}
 9728 
 9729 
 9730 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9731   predicate(UseSSE<=1);
 9732   match(Set dst (RoundDouble (AddD src1 src2)));
 9733   ins_cost(250);
 9734 
 9735   format %{ "FLD    $src2\n\t"
 9736             "DADD   ST,$src1\n\t"
 9737             "FSTP_D $dst\t# D-round" %}
 9738   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9739   ins_encode( Push_Reg_DPR(src2),
 9740               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9741   ins_pipe( fpu_mem_reg_reg );
 9742 %}
 9743 
 9744 
 9745 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9746   predicate(UseSSE<=1);
 9747   match(Set dst (AddD dst (LoadD src)));
 9748   ins_cost(150);
 9749 
 9750   format %{ "FLD    $src\n\t"
 9751             "DADDp  $dst,ST" %}
 9752   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9753   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9754               OpcP, RegOpc(dst) );
 9755   ins_pipe( fpu_reg_mem );
 9756 %}
 9757 
 9758 // add-to-memory
 9759 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9760   predicate(UseSSE<=1);
 9761   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9762   ins_cost(150);
 9763 
 9764   format %{ "FLD_D  $dst\n\t"
 9765             "DADD   ST,$src\n\t"
 9766             "FST_D  $dst" %}
 9767   opcode(0xDD, 0x0);
 9768   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9769               Opcode(0xD8), RegOpc(src),
 9770               set_instruction_start,
 9771               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9772   ins_pipe( fpu_reg_mem );
 9773 %}
 9774 
 9775 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9776   predicate(UseSSE<=1);
 9777   match(Set dst (AddD dst con));
 9778   ins_cost(125);
 9779   format %{ "FLD1\n\t"
 9780             "DADDp  $dst,ST" %}
 9781   ins_encode %{
 9782     __ fld1();
 9783     __ faddp($dst$$reg);
 9784   %}
 9785   ins_pipe(fpu_reg);
 9786 %}
 9787 
 9788 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9789   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9790   match(Set dst (AddD dst con));
 9791   ins_cost(200);
 9792   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9793             "DADDp  $dst,ST" %}
 9794   ins_encode %{
 9795     __ fld_d($constantaddress($con));
 9796     __ faddp($dst$$reg);
 9797   %}
 9798   ins_pipe(fpu_reg_mem);
 9799 %}
 9800 
 9801 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9802   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9803   match(Set dst (RoundDouble (AddD src con)));
 9804   ins_cost(200);
 9805   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9806             "DADD   ST,$src\n\t"
 9807             "FSTP_D $dst\t# D-round" %}
 9808   ins_encode %{
 9809     __ fld_d($constantaddress($con));
 9810     __ fadd($src$$reg);
 9811     __ fstp_d(Address(rsp, $dst$$disp));
 9812   %}
 9813   ins_pipe(fpu_mem_reg_con);
 9814 %}
 9815 
 9816 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9817   predicate(UseSSE<=1);
 9818   match(Set dst (MulD dst src));
 9819   format %{ "FLD    $src\n\t"
 9820             "DMULp  $dst,ST" %}
 9821   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9822   ins_cost(150);
 9823   ins_encode( Push_Reg_DPR(src),
 9824               OpcP, RegOpc(dst) );
 9825   ins_pipe( fpu_reg_reg );
 9826 %}
 9827 
 9828 // Strict FP instruction biases argument before multiply then
 9829 // biases result to avoid double rounding of subnormals.
 9830 //
 9831 // scale arg1 by multiplying arg1 by 2^(-15360)
 9832 // load arg2
 9833 // multiply scaled arg1 by arg2
 9834 // rescale product by 2^(15360)
 9835 //
 9836 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9837   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9838   match(Set dst (MulD dst src));
 9839   ins_cost(1);   // Select this instruction for all FP double multiplies
 9840 
 9841   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9842             "DMULp  $dst,ST\n\t"
 9843             "FLD    $src\n\t"
 9844             "DMULp  $dst,ST\n\t"
 9845             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9846             "DMULp  $dst,ST\n\t" %}
 9847   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9848   ins_encode( strictfp_bias1(dst),
 9849               Push_Reg_DPR(src),
 9850               OpcP, RegOpc(dst),
 9851               strictfp_bias2(dst) );
 9852   ins_pipe( fpu_reg_reg );
 9853 %}
 9854 
 9855 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9856   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9857   match(Set dst (MulD dst con));
 9858   ins_cost(200);
 9859   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9860             "DMULp  $dst,ST" %}
 9861   ins_encode %{
 9862     __ fld_d($constantaddress($con));
 9863     __ fmulp($dst$$reg);
 9864   %}
 9865   ins_pipe(fpu_reg_mem);
 9866 %}
 9867 
 9868 
 9869 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9870   predicate( UseSSE<=1 );
 9871   match(Set dst (MulD dst (LoadD src)));
 9872   ins_cost(200);
 9873   format %{ "FLD_D  $src\n\t"
 9874             "DMULp  $dst,ST" %}
 9875   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9876   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9877               OpcP, RegOpc(dst) );
 9878   ins_pipe( fpu_reg_mem );
 9879 %}
 9880 
 9881 //
 9882 // Cisc-alternate to reg-reg multiply
 9883 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9884   predicate( UseSSE<=1 );
 9885   match(Set dst (MulD src (LoadD mem)));
 9886   ins_cost(250);
 9887   format %{ "FLD_D  $mem\n\t"
 9888             "DMUL   ST,$src\n\t"
 9889             "FSTP_D $dst" %}
 9890   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9891   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9892               OpcReg_FPR(src),
 9893               Pop_Reg_DPR(dst) );
 9894   ins_pipe( fpu_reg_reg_mem );
 9895 %}
 9896 
 9897 
 9898 // MACRO3 -- addDPR a mulDPR
 9899 // This instruction is a '2-address' instruction in that the result goes
 9900 // back to src2.  This eliminates a move from the macro; possibly the
 9901 // register allocator will have to add it back (and maybe not).
 9902 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9903   predicate( UseSSE<=1 );
 9904   match(Set src2 (AddD (MulD src0 src1) src2));
 9905   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9906             "DMUL   ST,$src1\n\t"
 9907             "DADDp  $src2,ST" %}
 9908   ins_cost(250);
 9909   opcode(0xDD); /* LoadD DD /0 */
 9910   ins_encode( Push_Reg_FPR(src0),
 9911               FMul_ST_reg(src1),
 9912               FAddP_reg_ST(src2) );
 9913   ins_pipe( fpu_reg_reg_reg );
 9914 %}
 9915 
 9916 
 9917 // MACRO3 -- subDPR a mulDPR
 9918 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9919   predicate( UseSSE<=1 );
 9920   match(Set src2 (SubD (MulD src0 src1) src2));
 9921   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9922             "DMUL   ST,$src1\n\t"
 9923             "DSUBRp $src2,ST" %}
 9924   ins_cost(250);
 9925   ins_encode( Push_Reg_FPR(src0),
 9926               FMul_ST_reg(src1),
 9927               Opcode(0xDE), Opc_plus(0xE0,src2));
 9928   ins_pipe( fpu_reg_reg_reg );
 9929 %}
 9930 
 9931 
 9932 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9933   predicate( UseSSE<=1 );
 9934   match(Set dst (DivD dst src));
 9935 
 9936   format %{ "FLD    $src\n\t"
 9937             "FDIVp  $dst,ST" %}
 9938   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9939   ins_cost(150);
 9940   ins_encode( Push_Reg_DPR(src),
 9941               OpcP, RegOpc(dst) );
 9942   ins_pipe( fpu_reg_reg );
 9943 %}
 9944 
 9945 // Strict FP instruction biases argument before division then
 9946 // biases result, to avoid double rounding of subnormals.
 9947 //
 9948 // scale dividend by multiplying dividend by 2^(-15360)
 9949 // load divisor
 9950 // divide scaled dividend by divisor
 9951 // rescale quotient by 2^(15360)
 9952 //
 9953 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9954   predicate (UseSSE<=1);
 9955   match(Set dst (DivD dst src));
 9956   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9957   ins_cost(01);
 9958 
 9959   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9960             "DMULp  $dst,ST\n\t"
 9961             "FLD    $src\n\t"
 9962             "FDIVp  $dst,ST\n\t"
 9963             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9964             "DMULp  $dst,ST\n\t" %}
 9965   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9966   ins_encode( strictfp_bias1(dst),
 9967               Push_Reg_DPR(src),
 9968               OpcP, RegOpc(dst),
 9969               strictfp_bias2(dst) );
 9970   ins_pipe( fpu_reg_reg );
 9971 %}
 9972 
 9973 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9974   predicate(UseSSE<=1);
 9975   match(Set dst (ModD dst src));
 9976   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9977 
 9978   format %{ "DMOD   $dst,$src" %}
 9979   ins_cost(250);
 9980   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9981               emitModDPR(),
 9982               Push_Result_Mod_DPR(src),
 9983               Pop_Reg_DPR(dst));
 9984   ins_pipe( pipe_slow );
 9985 %}
 9986 
 9987 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9988   predicate(UseSSE>=2);
 9989   match(Set dst (ModD src0 src1));
 9990   effect(KILL rax, KILL cr);
 9991 
 9992   format %{ "SUB    ESP,8\t # DMOD\n"
 9993           "\tMOVSD  [ESP+0],$src1\n"
 9994           "\tFLD_D  [ESP+0]\n"
 9995           "\tMOVSD  [ESP+0],$src0\n"
 9996           "\tFLD_D  [ESP+0]\n"
 9997      "loop:\tFPREM\n"
 9998           "\tFWAIT\n"
 9999           "\tFNSTSW AX\n"
10000           "\tSAHF\n"
10001           "\tJP     loop\n"
10002           "\tFSTP_D [ESP+0]\n"
10003           "\tMOVSD  $dst,[ESP+0]\n"
10004           "\tADD    ESP,8\n"
10005           "\tFSTP   ST0\t # Restore FPU Stack"
10006     %}
10007   ins_cost(250);
10008   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10009   ins_pipe( pipe_slow );
10010 %}
10011 
10012 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10013   predicate (UseSSE<=1);
10014   match(Set dst(AtanD dst src));
10015   format %{ "DATA   $dst,$src" %}
10016   opcode(0xD9, 0xF3);
10017   ins_encode( Push_Reg_DPR(src),
10018               OpcP, OpcS, RegOpc(dst) );
10019   ins_pipe( pipe_slow );
10020 %}
10021 
10022 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10023   predicate (UseSSE>=2);
10024   match(Set dst(AtanD dst src));
10025   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10026   format %{ "DATA   $dst,$src" %}
10027   opcode(0xD9, 0xF3);
10028   ins_encode( Push_SrcD(src),
10029               OpcP, OpcS, Push_ResultD(dst) );
10030   ins_pipe( pipe_slow );
10031 %}
10032 
10033 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10034   predicate (UseSSE<=1);
10035   match(Set dst (SqrtD src));
10036   format %{ "DSQRT  $dst,$src" %}
10037   opcode(0xFA, 0xD9);
10038   ins_encode( Push_Reg_DPR(src),
10039               OpcS, OpcP, Pop_Reg_DPR(dst) );
10040   ins_pipe( pipe_slow );
10041 %}
10042 
10043 //-------------Float Instructions-------------------------------
10044 // Float Math
10045 
10046 // Code for float compare:
10047 //     fcompp();
10048 //     fwait(); fnstsw_ax();
10049 //     sahf();
10050 //     movl(dst, unordered_result);
10051 //     jcc(Assembler::parity, exit);
10052 //     movl(dst, less_result);
10053 //     jcc(Assembler::below, exit);
10054 //     movl(dst, equal_result);
10055 //     jcc(Assembler::equal, exit);
10056 //     movl(dst, greater_result);
10057 //   exit:
10058 
10059 // P6 version of float compare, sets condition codes in EFLAGS
10060 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10061   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10062   match(Set cr (CmpF src1 src2));
10063   effect(KILL rax);
10064   ins_cost(150);
10065   format %{ "FLD    $src1\n\t"
10066             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10067             "JNP    exit\n\t"
10068             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10069             "SAHF\n"
10070      "exit:\tNOP               // avoid branch to branch" %}
10071   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10072   ins_encode( Push_Reg_DPR(src1),
10073               OpcP, RegOpc(src2),
10074               cmpF_P6_fixup );
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10079   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10080   match(Set cr (CmpF src1 src2));
10081   ins_cost(100);
10082   format %{ "FLD    $src1\n\t"
10083             "FUCOMIP ST,$src2  // P6 instruction" %}
10084   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10085   ins_encode( Push_Reg_DPR(src1),
10086               OpcP, RegOpc(src2));
10087   ins_pipe( pipe_slow );
10088 %}
10089 
10090 
10091 // Compare & branch
10092 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10093   predicate(UseSSE == 0);
10094   match(Set cr (CmpF src1 src2));
10095   effect(KILL rax);
10096   ins_cost(200);
10097   format %{ "FLD    $src1\n\t"
10098             "FCOMp  $src2\n\t"
10099             "FNSTSW AX\n\t"
10100             "TEST   AX,0x400\n\t"
10101             "JZ,s   flags\n\t"
10102             "MOV    AH,1\t# unordered treat as LT\n"
10103     "flags:\tSAHF" %}
10104   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10105   ins_encode( Push_Reg_DPR(src1),
10106               OpcP, RegOpc(src2),
10107               fpu_flags);
10108   ins_pipe( pipe_slow );
10109 %}
10110 
10111 // Compare vs zero into -1,0,1
10112 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10113   predicate(UseSSE == 0);
10114   match(Set dst (CmpF3 src1 zero));
10115   effect(KILL cr, KILL rax);
10116   ins_cost(280);
10117   format %{ "FTSTF  $dst,$src1" %}
10118   opcode(0xE4, 0xD9);
10119   ins_encode( Push_Reg_DPR(src1),
10120               OpcS, OpcP, PopFPU,
10121               CmpF_Result(dst));
10122   ins_pipe( pipe_slow );
10123 %}
10124 
10125 // Compare into -1,0,1
10126 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10127   predicate(UseSSE == 0);
10128   match(Set dst (CmpF3 src1 src2));
10129   effect(KILL cr, KILL rax);
10130   ins_cost(300);
10131   format %{ "FCMPF  $dst,$src1,$src2" %}
10132   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10133   ins_encode( Push_Reg_DPR(src1),
10134               OpcP, RegOpc(src2),
10135               CmpF_Result(dst));
10136   ins_pipe( pipe_slow );
10137 %}
10138 
10139 // float compare and set condition codes in EFLAGS by XMM regs
10140 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 src2));
10143   ins_cost(145);
10144   format %{ "UCOMISS $src1,$src2\n\t"
10145             "JNP,s   exit\n\t"
10146             "PUSHF\t# saw NaN, set CF\n\t"
10147             "AND     [rsp], #0xffffff2b\n\t"
10148             "POPF\n"
10149     "exit:" %}
10150   ins_encode %{
10151     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10152     emit_cmpfp_fixup(_masm);
10153   %}
10154   ins_pipe( pipe_slow );
10155 %}
10156 
10157 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10158   predicate(UseSSE>=1);
10159   match(Set cr (CmpF src1 src2));
10160   ins_cost(100);
10161   format %{ "UCOMISS $src1,$src2" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10164   %}
10165   ins_pipe( pipe_slow );
10166 %}
10167 
10168 // float compare and set condition codes in EFLAGS by XMM regs
10169 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10170   predicate(UseSSE>=1);
10171   match(Set cr (CmpF src1 (LoadF src2)));
10172   ins_cost(165);
10173   format %{ "UCOMISS $src1,$src2\n\t"
10174             "JNP,s   exit\n\t"
10175             "PUSHF\t# saw NaN, set CF\n\t"
10176             "AND     [rsp], #0xffffff2b\n\t"
10177             "POPF\n"
10178     "exit:" %}
10179   ins_encode %{
10180     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10181     emit_cmpfp_fixup(_masm);
10182   %}
10183   ins_pipe( pipe_slow );
10184 %}
10185 
10186 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10187   predicate(UseSSE>=1);
10188   match(Set cr (CmpF src1 (LoadF src2)));
10189   ins_cost(100);
10190   format %{ "UCOMISS $src1,$src2" %}
10191   ins_encode %{
10192     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10193   %}
10194   ins_pipe( pipe_slow );
10195 %}
10196 
10197 // Compare into -1,0,1 in XMM
10198 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10199   predicate(UseSSE>=1);
10200   match(Set dst (CmpF3 src1 src2));
10201   effect(KILL cr);
10202   ins_cost(255);
10203   format %{ "UCOMISS $src1, $src2\n\t"
10204             "MOV     $dst, #-1\n\t"
10205             "JP,s    done\n\t"
10206             "JB,s    done\n\t"
10207             "SETNE   $dst\n\t"
10208             "MOVZB   $dst, $dst\n"
10209     "done:" %}
10210   ins_encode %{
10211     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10212     emit_cmpfp3(_masm, $dst$$Register);
10213   %}
10214   ins_pipe( pipe_slow );
10215 %}
10216 
10217 // Compare into -1,0,1 in XMM and memory
10218 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10219   predicate(UseSSE>=1);
10220   match(Set dst (CmpF3 src1 (LoadF src2)));
10221   effect(KILL cr);
10222   ins_cost(275);
10223   format %{ "UCOMISS $src1, $src2\n\t"
10224             "MOV     $dst, #-1\n\t"
10225             "JP,s    done\n\t"
10226             "JB,s    done\n\t"
10227             "SETNE   $dst\n\t"
10228             "MOVZB   $dst, $dst\n"
10229     "done:" %}
10230   ins_encode %{
10231     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10232     emit_cmpfp3(_masm, $dst$$Register);
10233   %}
10234   ins_pipe( pipe_slow );
10235 %}
10236 
10237 // Spill to obtain 24-bit precision
10238 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10239   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10240   match(Set dst (SubF src1 src2));
10241 
10242   format %{ "FSUB   $dst,$src1 - $src2" %}
10243   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10244   ins_encode( Push_Reg_FPR(src1),
10245               OpcReg_FPR(src2),
10246               Pop_Mem_FPR(dst) );
10247   ins_pipe( fpu_mem_reg_reg );
10248 %}
10249 //
10250 // This instruction does not round to 24-bits
10251 instruct subFPR_reg(regFPR dst, regFPR src) %{
10252   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10253   match(Set dst (SubF dst src));
10254 
10255   format %{ "FSUB   $dst,$src" %}
10256   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10257   ins_encode( Push_Reg_FPR(src),
10258               OpcP, RegOpc(dst) );
10259   ins_pipe( fpu_reg_reg );
10260 %}
10261 
10262 // Spill to obtain 24-bit precision
10263 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10264   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10265   match(Set dst (AddF src1 src2));
10266 
10267   format %{ "FADD   $dst,$src1,$src2" %}
10268   opcode(0xD8, 0x0); /* D8 C0+i */
10269   ins_encode( Push_Reg_FPR(src2),
10270               OpcReg_FPR(src1),
10271               Pop_Mem_FPR(dst) );
10272   ins_pipe( fpu_mem_reg_reg );
10273 %}
10274 //
10275 // This instruction does not round to 24-bits
10276 instruct addFPR_reg(regFPR dst, regFPR src) %{
10277   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10278   match(Set dst (AddF dst src));
10279 
10280   format %{ "FLD    $src\n\t"
10281             "FADDp  $dst,ST" %}
10282   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10283   ins_encode( Push_Reg_FPR(src),
10284               OpcP, RegOpc(dst) );
10285   ins_pipe( fpu_reg_reg );
10286 %}
10287 
10288 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10289   predicate(UseSSE==0);
10290   match(Set dst (AbsF src));
10291   ins_cost(100);
10292   format %{ "FABS" %}
10293   opcode(0xE1, 0xD9);
10294   ins_encode( OpcS, OpcP );
10295   ins_pipe( fpu_reg_reg );
10296 %}
10297 
10298 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10299   predicate(UseSSE==0);
10300   match(Set dst (NegF src));
10301   ins_cost(100);
10302   format %{ "FCHS" %}
10303   opcode(0xE0, 0xD9);
10304   ins_encode( OpcS, OpcP );
10305   ins_pipe( fpu_reg_reg );
10306 %}
10307 
10308 // Cisc-alternate to addFPR_reg
10309 // Spill to obtain 24-bit precision
10310 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10311   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10312   match(Set dst (AddF src1 (LoadF src2)));
10313 
10314   format %{ "FLD    $src2\n\t"
10315             "FADD   ST,$src1\n\t"
10316             "FSTP_S $dst" %}
10317   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10319               OpcReg_FPR(src1),
10320               Pop_Mem_FPR(dst) );
10321   ins_pipe( fpu_mem_reg_mem );
10322 %}
10323 //
10324 // Cisc-alternate to addFPR_reg
10325 // This instruction does not round to 24-bits
10326 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10327   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10328   match(Set dst (AddF dst (LoadF src)));
10329 
10330   format %{ "FADD   $dst,$src" %}
10331   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10332   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10333               OpcP, RegOpc(dst) );
10334   ins_pipe( fpu_reg_mem );
10335 %}
10336 
10337 // // Following two instructions for _222_mpegaudio
10338 // Spill to obtain 24-bit precision
10339 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10340   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10341   match(Set dst (AddF src1 src2));
10342 
10343   format %{ "FADD   $dst,$src1,$src2" %}
10344   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10345   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10346               OpcReg_FPR(src2),
10347               Pop_Mem_FPR(dst) );
10348   ins_pipe( fpu_mem_reg_mem );
10349 %}
10350 
10351 // Cisc-spill variant
10352 // Spill to obtain 24-bit precision
10353 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (AddF src1 (LoadF src2)));
10356 
10357   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10358   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10359   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10360               set_instruction_start,
10361               OpcP, RMopc_Mem(secondary,src1),
10362               Pop_Mem_FPR(dst) );
10363   ins_pipe( fpu_mem_mem_mem );
10364 %}
10365 
10366 // Spill to obtain 24-bit precision
10367 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10368   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10369   match(Set dst (AddF src1 src2));
10370 
10371   format %{ "FADD   $dst,$src1,$src2" %}
10372   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10373   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10374               set_instruction_start,
10375               OpcP, RMopc_Mem(secondary,src1),
10376               Pop_Mem_FPR(dst) );
10377   ins_pipe( fpu_mem_mem_mem );
10378 %}
10379 
10380 
10381 // Spill to obtain 24-bit precision
10382 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10383   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10384   match(Set dst (AddF src con));
10385   format %{ "FLD    $src\n\t"
10386             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10387             "FSTP_S $dst"  %}
10388   ins_encode %{
10389     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10390     __ fadd_s($constantaddress($con));
10391     __ fstp_s(Address(rsp, $dst$$disp));
10392   %}
10393   ins_pipe(fpu_mem_reg_con);
10394 %}
10395 //
10396 // This instruction does not round to 24-bits
10397 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10398   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10399   match(Set dst (AddF src con));
10400   format %{ "FLD    $src\n\t"
10401             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10402             "FSTP   $dst"  %}
10403   ins_encode %{
10404     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10405     __ fadd_s($constantaddress($con));
10406     __ fstp_d($dst$$reg);
10407   %}
10408   ins_pipe(fpu_reg_reg_con);
10409 %}
10410 
10411 // Spill to obtain 24-bit precision
10412 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10413   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10414   match(Set dst (MulF src1 src2));
10415 
10416   format %{ "FLD    $src1\n\t"
10417             "FMUL   $src2\n\t"
10418             "FSTP_S $dst"  %}
10419   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10420   ins_encode( Push_Reg_FPR(src1),
10421               OpcReg_FPR(src2),
10422               Pop_Mem_FPR(dst) );
10423   ins_pipe( fpu_mem_reg_reg );
10424 %}
10425 //
10426 // This instruction does not round to 24-bits
10427 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10428   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10429   match(Set dst (MulF src1 src2));
10430 
10431   format %{ "FLD    $src1\n\t"
10432             "FMUL   $src2\n\t"
10433             "FSTP_S $dst"  %}
10434   opcode(0xD8, 0x1); /* D8 C8+i */
10435   ins_encode( Push_Reg_FPR(src2),
10436               OpcReg_FPR(src1),
10437               Pop_Reg_FPR(dst) );
10438   ins_pipe( fpu_reg_reg_reg );
10439 %}
10440 
10441 
10442 // Spill to obtain 24-bit precision
10443 // Cisc-alternate to reg-reg multiply
10444 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10445   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10446   match(Set dst (MulF src1 (LoadF src2)));
10447 
10448   format %{ "FLD_S  $src2\n\t"
10449             "FMUL   $src1\n\t"
10450             "FSTP_S $dst"  %}
10451   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10452   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10453               OpcReg_FPR(src1),
10454               Pop_Mem_FPR(dst) );
10455   ins_pipe( fpu_mem_reg_mem );
10456 %}
10457 //
10458 // This instruction does not round to 24-bits
10459 // Cisc-alternate to reg-reg multiply
10460 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10461   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10462   match(Set dst (MulF src1 (LoadF src2)));
10463 
10464   format %{ "FMUL   $dst,$src1,$src2" %}
10465   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10466   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10467               OpcReg_FPR(src1),
10468               Pop_Reg_FPR(dst) );
10469   ins_pipe( fpu_reg_reg_mem );
10470 %}
10471 
10472 // Spill to obtain 24-bit precision
10473 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10474   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10475   match(Set dst (MulF src1 src2));
10476 
10477   format %{ "FMUL   $dst,$src1,$src2" %}
10478   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10479   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10480               set_instruction_start,
10481               OpcP, RMopc_Mem(secondary,src1),
10482               Pop_Mem_FPR(dst) );
10483   ins_pipe( fpu_mem_mem_mem );
10484 %}
10485 
10486 // Spill to obtain 24-bit precision
10487 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10488   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10489   match(Set dst (MulF src con));
10490 
10491   format %{ "FLD    $src\n\t"
10492             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10493             "FSTP_S $dst"  %}
10494   ins_encode %{
10495     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10496     __ fmul_s($constantaddress($con));
10497     __ fstp_s(Address(rsp, $dst$$disp));
10498   %}
10499   ins_pipe(fpu_mem_reg_con);
10500 %}
10501 //
10502 // This instruction does not round to 24-bits
10503 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10504   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10505   match(Set dst (MulF src con));
10506 
10507   format %{ "FLD    $src\n\t"
10508             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10509             "FSTP   $dst"  %}
10510   ins_encode %{
10511     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10512     __ fmul_s($constantaddress($con));
10513     __ fstp_d($dst$$reg);
10514   %}
10515   ins_pipe(fpu_reg_reg_con);
10516 %}
10517 
10518 
10519 //
10520 // MACRO1 -- subsume unshared load into mulFPR
10521 // This instruction does not round to 24-bits
10522 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10523   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10524   match(Set dst (MulF (LoadF mem1) src));
10525 
10526   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10527             "FMUL   ST,$src\n\t"
10528             "FSTP   $dst" %}
10529   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10530   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10531               OpcReg_FPR(src),
10532               Pop_Reg_FPR(dst) );
10533   ins_pipe( fpu_reg_reg_mem );
10534 %}
10535 //
10536 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10537 // This instruction does not round to 24-bits
10538 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10539   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10540   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10541   ins_cost(95);
10542 
10543   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10544             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10545             "FADD   ST,$src2\n\t"
10546             "FSTP   $dst" %}
10547   opcode(0xD9); /* LoadF D9 /0 */
10548   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10549               FMul_ST_reg(src1),
10550               FAdd_ST_reg(src2),
10551               Pop_Reg_FPR(dst) );
10552   ins_pipe( fpu_reg_mem_reg_reg );
10553 %}
10554 
10555 // MACRO3 -- addFPR a mulFPR
10556 // This instruction does not round to 24-bits.  It is a '2-address'
10557 // instruction in that the result goes back to src2.  This eliminates
10558 // a move from the macro; possibly the register allocator will have
10559 // to add it back (and maybe not).
10560 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10561   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10562   match(Set src2 (AddF (MulF src0 src1) src2));
10563 
10564   format %{ "FLD    $src0     ===MACRO3===\n\t"
10565             "FMUL   ST,$src1\n\t"
10566             "FADDP  $src2,ST" %}
10567   opcode(0xD9); /* LoadF D9 /0 */
10568   ins_encode( Push_Reg_FPR(src0),
10569               FMul_ST_reg(src1),
10570               FAddP_reg_ST(src2) );
10571   ins_pipe( fpu_reg_reg_reg );
10572 %}
10573 
10574 // MACRO4 -- divFPR subFPR
10575 // This instruction does not round to 24-bits
10576 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set dst (DivF (SubF src2 src1) src3));
10579 
10580   format %{ "FLD    $src2   ===MACRO4===\n\t"
10581             "FSUB   ST,$src1\n\t"
10582             "FDIV   ST,$src3\n\t"
10583             "FSTP  $dst" %}
10584   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10585   ins_encode( Push_Reg_FPR(src2),
10586               subFPR_divFPR_encode(src1,src3),
10587               Pop_Reg_FPR(dst) );
10588   ins_pipe( fpu_reg_reg_reg_reg );
10589 %}
10590 
10591 // Spill to obtain 24-bit precision
10592 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10593   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10594   match(Set dst (DivF src1 src2));
10595 
10596   format %{ "FDIV   $dst,$src1,$src2" %}
10597   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10598   ins_encode( Push_Reg_FPR(src1),
10599               OpcReg_FPR(src2),
10600               Pop_Mem_FPR(dst) );
10601   ins_pipe( fpu_mem_reg_reg );
10602 %}
10603 //
10604 // This instruction does not round to 24-bits
10605 instruct divFPR_reg(regFPR dst, regFPR src) %{
10606   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10607   match(Set dst (DivF dst src));
10608 
10609   format %{ "FDIV   $dst,$src" %}
10610   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10611   ins_encode( Push_Reg_FPR(src),
10612               OpcP, RegOpc(dst) );
10613   ins_pipe( fpu_reg_reg );
10614 %}
10615 
10616 
10617 // Spill to obtain 24-bit precision
10618 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10619   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10620   match(Set dst (ModF src1 src2));
10621   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10622 
10623   format %{ "FMOD   $dst,$src1,$src2" %}
10624   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10625               emitModDPR(),
10626               Push_Result_Mod_DPR(src2),
10627               Pop_Mem_FPR(dst));
10628   ins_pipe( pipe_slow );
10629 %}
10630 //
10631 // This instruction does not round to 24-bits
10632 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10633   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10634   match(Set dst (ModF dst src));
10635   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10636 
10637   format %{ "FMOD   $dst,$src" %}
10638   ins_encode(Push_Reg_Mod_DPR(dst, src),
10639               emitModDPR(),
10640               Push_Result_Mod_DPR(src),
10641               Pop_Reg_FPR(dst));
10642   ins_pipe( pipe_slow );
10643 %}
10644 
10645 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10646   predicate(UseSSE>=1);
10647   match(Set dst (ModF src0 src1));
10648   effect(KILL rax, KILL cr);
10649   format %{ "SUB    ESP,4\t # FMOD\n"
10650           "\tMOVSS  [ESP+0],$src1\n"
10651           "\tFLD_S  [ESP+0]\n"
10652           "\tMOVSS  [ESP+0],$src0\n"
10653           "\tFLD_S  [ESP+0]\n"
10654      "loop:\tFPREM\n"
10655           "\tFWAIT\n"
10656           "\tFNSTSW AX\n"
10657           "\tSAHF\n"
10658           "\tJP     loop\n"
10659           "\tFSTP_S [ESP+0]\n"
10660           "\tMOVSS  $dst,[ESP+0]\n"
10661           "\tADD    ESP,4\n"
10662           "\tFSTP   ST0\t # Restore FPU Stack"
10663     %}
10664   ins_cost(250);
10665   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10666   ins_pipe( pipe_slow );
10667 %}
10668 
10669 
10670 //----------Arithmetic Conversion Instructions---------------------------------
10671 // The conversions operations are all Alpha sorted.  Please keep it that way!
10672 
10673 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10674   predicate(UseSSE==0);
10675   match(Set dst (RoundFloat src));
10676   ins_cost(125);
10677   format %{ "FST_S  $dst,$src\t# F-round" %}
10678   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10679   ins_pipe( fpu_mem_reg );
10680 %}
10681 
10682 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10683   predicate(UseSSE<=1);
10684   match(Set dst (RoundDouble src));
10685   ins_cost(125);
10686   format %{ "FST_D  $dst,$src\t# D-round" %}
10687   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10688   ins_pipe( fpu_mem_reg );
10689 %}
10690 
10691 // Force rounding to 24-bit precision and 6-bit exponent
10692 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10693   predicate(UseSSE==0);
10694   match(Set dst (ConvD2F src));
10695   format %{ "FST_S  $dst,$src\t# F-round" %}
10696   expand %{
10697     roundFloat_mem_reg(dst,src);
10698   %}
10699 %}
10700 
10701 // Force rounding to 24-bit precision and 6-bit exponent
10702 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10703   predicate(UseSSE==1);
10704   match(Set dst (ConvD2F src));
10705   effect( KILL cr );
10706   format %{ "SUB    ESP,4\n\t"
10707             "FST_S  [ESP],$src\t# F-round\n\t"
10708             "MOVSS  $dst,[ESP]\n\t"
10709             "ADD ESP,4" %}
10710   ins_encode %{
10711     __ subptr(rsp, 4);
10712     if ($src$$reg != FPR1L_enc) {
10713       __ fld_s($src$$reg-1);
10714       __ fstp_s(Address(rsp, 0));
10715     } else {
10716       __ fst_s(Address(rsp, 0));
10717     }
10718     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10719     __ addptr(rsp, 4);
10720   %}
10721   ins_pipe( pipe_slow );
10722 %}
10723 
10724 // Force rounding double precision to single precision
10725 instruct convD2F_reg(regF dst, regD src) %{
10726   predicate(UseSSE>=2);
10727   match(Set dst (ConvD2F src));
10728   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10729   ins_encode %{
10730     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10731   %}
10732   ins_pipe( pipe_slow );
10733 %}
10734 
10735 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10736   predicate(UseSSE==0);
10737   match(Set dst (ConvF2D src));
10738   format %{ "FST_S  $dst,$src\t# D-round" %}
10739   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10740   ins_pipe( fpu_reg_reg );
10741 %}
10742 
10743 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10744   predicate(UseSSE==1);
10745   match(Set dst (ConvF2D src));
10746   format %{ "FST_D  $dst,$src\t# D-round" %}
10747   expand %{
10748     roundDouble_mem_reg(dst,src);
10749   %}
10750 %}
10751 
10752 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10753   predicate(UseSSE==1);
10754   match(Set dst (ConvF2D src));
10755   effect( KILL cr );
10756   format %{ "SUB    ESP,4\n\t"
10757             "MOVSS  [ESP] $src\n\t"
10758             "FLD_S  [ESP]\n\t"
10759             "ADD    ESP,4\n\t"
10760             "FSTP   $dst\t# D-round" %}
10761   ins_encode %{
10762     __ subptr(rsp, 4);
10763     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10764     __ fld_s(Address(rsp, 0));
10765     __ addptr(rsp, 4);
10766     __ fstp_d($dst$$reg);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 instruct convF2D_reg(regD dst, regF src) %{
10772   predicate(UseSSE>=2);
10773   match(Set dst (ConvF2D src));
10774   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10775   ins_encode %{
10776     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10777   %}
10778   ins_pipe( pipe_slow );
10779 %}
10780 
10781 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10782 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10783   predicate(UseSSE<=1);
10784   match(Set dst (ConvD2I src));
10785   effect( KILL tmp, KILL cr );
10786   format %{ "FLD    $src\t# Convert double to int \n\t"
10787             "FLDCW  trunc mode\n\t"
10788             "SUB    ESP,4\n\t"
10789             "FISTp  [ESP + #0]\n\t"
10790             "FLDCW  std/24-bit mode\n\t"
10791             "POP    EAX\n\t"
10792             "CMP    EAX,0x80000000\n\t"
10793             "JNE,s  fast\n\t"
10794             "FLD_D  $src\n\t"
10795             "CALL   d2i_wrapper\n"
10796       "fast:" %}
10797   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10802 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10803   predicate(UseSSE>=2);
10804   match(Set dst (ConvD2I src));
10805   effect( KILL tmp, KILL cr );
10806   format %{ "CVTTSD2SI $dst, $src\n\t"
10807             "CMP    $dst,0x80000000\n\t"
10808             "JNE,s  fast\n\t"
10809             "SUB    ESP, 8\n\t"
10810             "MOVSD  [ESP], $src\n\t"
10811             "FLD_D  [ESP]\n\t"
10812             "ADD    ESP, 8\n\t"
10813             "CALL   d2i_wrapper\n"
10814       "fast:" %}
10815   ins_encode %{
10816     Label fast;
10817     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10818     __ cmpl($dst$$Register, 0x80000000);
10819     __ jccb(Assembler::notEqual, fast);
10820     __ subptr(rsp, 8);
10821     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10822     __ fld_d(Address(rsp, 0));
10823     __ addptr(rsp, 8);
10824     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10825     __ post_call_nop();
10826     __ bind(fast);
10827   %}
10828   ins_pipe( pipe_slow );
10829 %}
10830 
10831 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10832   predicate(UseSSE<=1);
10833   match(Set dst (ConvD2L src));
10834   effect( KILL cr );
10835   format %{ "FLD    $src\t# Convert double to long\n\t"
10836             "FLDCW  trunc mode\n\t"
10837             "SUB    ESP,8\n\t"
10838             "FISTp  [ESP + #0]\n\t"
10839             "FLDCW  std/24-bit mode\n\t"
10840             "POP    EAX\n\t"
10841             "POP    EDX\n\t"
10842             "CMP    EDX,0x80000000\n\t"
10843             "JNE,s  fast\n\t"
10844             "TEST   EAX,EAX\n\t"
10845             "JNE,s  fast\n\t"
10846             "FLD    $src\n\t"
10847             "CALL   d2l_wrapper\n"
10848       "fast:" %}
10849   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10850   ins_pipe( pipe_slow );
10851 %}
10852 
10853 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10854 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10855   predicate (UseSSE>=2);
10856   match(Set dst (ConvD2L src));
10857   effect( KILL cr );
10858   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10859             "MOVSD  [ESP],$src\n\t"
10860             "FLD_D  [ESP]\n\t"
10861             "FLDCW  trunc mode\n\t"
10862             "FISTp  [ESP + #0]\n\t"
10863             "FLDCW  std/24-bit mode\n\t"
10864             "POP    EAX\n\t"
10865             "POP    EDX\n\t"
10866             "CMP    EDX,0x80000000\n\t"
10867             "JNE,s  fast\n\t"
10868             "TEST   EAX,EAX\n\t"
10869             "JNE,s  fast\n\t"
10870             "SUB    ESP,8\n\t"
10871             "MOVSD  [ESP],$src\n\t"
10872             "FLD_D  [ESP]\n\t"
10873             "ADD    ESP,8\n\t"
10874             "CALL   d2l_wrapper\n"
10875       "fast:" %}
10876   ins_encode %{
10877     Label fast;
10878     __ subptr(rsp, 8);
10879     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10880     __ fld_d(Address(rsp, 0));
10881     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10882     __ fistp_d(Address(rsp, 0));
10883     // Restore the rounding mode, mask the exception
10884     if (Compile::current()->in_24_bit_fp_mode()) {
10885       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10886     } else {
10887       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10888     }
10889     // Load the converted long, adjust CPU stack
10890     __ pop(rax);
10891     __ pop(rdx);
10892     __ cmpl(rdx, 0x80000000);
10893     __ jccb(Assembler::notEqual, fast);
10894     __ testl(rax, rax);
10895     __ jccb(Assembler::notEqual, fast);
10896     __ subptr(rsp, 8);
10897     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10898     __ fld_d(Address(rsp, 0));
10899     __ addptr(rsp, 8);
10900     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10901     __ post_call_nop();
10902     __ bind(fast);
10903   %}
10904   ins_pipe( pipe_slow );
10905 %}
10906 
10907 // Convert a double to an int.  Java semantics require we do complex
10908 // manglations in the corner cases.  So we set the rounding mode to
10909 // 'zero', store the darned double down as an int, and reset the
10910 // rounding mode to 'nearest'.  The hardware stores a flag value down
10911 // if we would overflow or converted a NAN; we check for this and
10912 // and go the slow path if needed.
10913 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10914   predicate(UseSSE==0);
10915   match(Set dst (ConvF2I src));
10916   effect( KILL tmp, KILL cr );
10917   format %{ "FLD    $src\t# Convert float to int \n\t"
10918             "FLDCW  trunc mode\n\t"
10919             "SUB    ESP,4\n\t"
10920             "FISTp  [ESP + #0]\n\t"
10921             "FLDCW  std/24-bit mode\n\t"
10922             "POP    EAX\n\t"
10923             "CMP    EAX,0x80000000\n\t"
10924             "JNE,s  fast\n\t"
10925             "FLD    $src\n\t"
10926             "CALL   d2i_wrapper\n"
10927       "fast:" %}
10928   // DPR2I_encoding works for FPR2I
10929   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 // Convert a float in xmm to an int reg.
10934 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10935   predicate(UseSSE>=1);
10936   match(Set dst (ConvF2I src));
10937   effect( KILL tmp, KILL cr );
10938   format %{ "CVTTSS2SI $dst, $src\n\t"
10939             "CMP    $dst,0x80000000\n\t"
10940             "JNE,s  fast\n\t"
10941             "SUB    ESP, 4\n\t"
10942             "MOVSS  [ESP], $src\n\t"
10943             "FLD    [ESP]\n\t"
10944             "ADD    ESP, 4\n\t"
10945             "CALL   d2i_wrapper\n"
10946       "fast:" %}
10947   ins_encode %{
10948     Label fast;
10949     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10950     __ cmpl($dst$$Register, 0x80000000);
10951     __ jccb(Assembler::notEqual, fast);
10952     __ subptr(rsp, 4);
10953     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10954     __ fld_s(Address(rsp, 0));
10955     __ addptr(rsp, 4);
10956     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10957     __ post_call_nop();
10958     __ bind(fast);
10959   %}
10960   ins_pipe( pipe_slow );
10961 %}
10962 
10963 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10964   predicate(UseSSE==0);
10965   match(Set dst (ConvF2L src));
10966   effect( KILL cr );
10967   format %{ "FLD    $src\t# Convert float to long\n\t"
10968             "FLDCW  trunc mode\n\t"
10969             "SUB    ESP,8\n\t"
10970             "FISTp  [ESP + #0]\n\t"
10971             "FLDCW  std/24-bit mode\n\t"
10972             "POP    EAX\n\t"
10973             "POP    EDX\n\t"
10974             "CMP    EDX,0x80000000\n\t"
10975             "JNE,s  fast\n\t"
10976             "TEST   EAX,EAX\n\t"
10977             "JNE,s  fast\n\t"
10978             "FLD    $src\n\t"
10979             "CALL   d2l_wrapper\n"
10980       "fast:" %}
10981   // DPR2L_encoding works for FPR2L
10982   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10983   ins_pipe( pipe_slow );
10984 %}
10985 
10986 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10987 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10988   predicate (UseSSE>=1);
10989   match(Set dst (ConvF2L src));
10990   effect( KILL cr );
10991   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10992             "MOVSS  [ESP],$src\n\t"
10993             "FLD_S  [ESP]\n\t"
10994             "FLDCW  trunc mode\n\t"
10995             "FISTp  [ESP + #0]\n\t"
10996             "FLDCW  std/24-bit mode\n\t"
10997             "POP    EAX\n\t"
10998             "POP    EDX\n\t"
10999             "CMP    EDX,0x80000000\n\t"
11000             "JNE,s  fast\n\t"
11001             "TEST   EAX,EAX\n\t"
11002             "JNE,s  fast\n\t"
11003             "SUB    ESP,4\t# Convert float to long\n\t"
11004             "MOVSS  [ESP],$src\n\t"
11005             "FLD_S  [ESP]\n\t"
11006             "ADD    ESP,4\n\t"
11007             "CALL   d2l_wrapper\n"
11008       "fast:" %}
11009   ins_encode %{
11010     Label fast;
11011     __ subptr(rsp, 8);
11012     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11013     __ fld_s(Address(rsp, 0));
11014     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11015     __ fistp_d(Address(rsp, 0));
11016     // Restore the rounding mode, mask the exception
11017     if (Compile::current()->in_24_bit_fp_mode()) {
11018       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11019     } else {
11020       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11021     }
11022     // Load the converted long, adjust CPU stack
11023     __ pop(rax);
11024     __ pop(rdx);
11025     __ cmpl(rdx, 0x80000000);
11026     __ jccb(Assembler::notEqual, fast);
11027     __ testl(rax, rax);
11028     __ jccb(Assembler::notEqual, fast);
11029     __ subptr(rsp, 4);
11030     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11031     __ fld_s(Address(rsp, 0));
11032     __ addptr(rsp, 4);
11033     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11034     __ post_call_nop();
11035     __ bind(fast);
11036   %}
11037   ins_pipe( pipe_slow );
11038 %}
11039 
11040 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11041   predicate( UseSSE<=1 );
11042   match(Set dst (ConvI2D src));
11043   format %{ "FILD   $src\n\t"
11044             "FSTP   $dst" %}
11045   opcode(0xDB, 0x0);  /* DB /0 */
11046   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11047   ins_pipe( fpu_reg_mem );
11048 %}
11049 
11050 instruct convI2D_reg(regD dst, rRegI src) %{
11051   predicate( UseSSE>=2 && !UseXmmI2D );
11052   match(Set dst (ConvI2D src));
11053   format %{ "CVTSI2SD $dst,$src" %}
11054   ins_encode %{
11055     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11056   %}
11057   ins_pipe( pipe_slow );
11058 %}
11059 
11060 instruct convI2D_mem(regD dst, memory mem) %{
11061   predicate( UseSSE>=2 );
11062   match(Set dst (ConvI2D (LoadI mem)));
11063   format %{ "CVTSI2SD $dst,$mem" %}
11064   ins_encode %{
11065     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11066   %}
11067   ins_pipe( pipe_slow );
11068 %}
11069 
11070 instruct convXI2D_reg(regD dst, rRegI src)
11071 %{
11072   predicate( UseSSE>=2 && UseXmmI2D );
11073   match(Set dst (ConvI2D src));
11074 
11075   format %{ "MOVD  $dst,$src\n\t"
11076             "CVTDQ2PD $dst,$dst\t# i2d" %}
11077   ins_encode %{
11078     __ movdl($dst$$XMMRegister, $src$$Register);
11079     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11080   %}
11081   ins_pipe(pipe_slow); // XXX
11082 %}
11083 
11084 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11085   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11086   match(Set dst (ConvI2D (LoadI mem)));
11087   format %{ "FILD   $mem\n\t"
11088             "FSTP   $dst" %}
11089   opcode(0xDB);      /* DB /0 */
11090   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11091               Pop_Reg_DPR(dst));
11092   ins_pipe( fpu_reg_mem );
11093 %}
11094 
11095 // Convert a byte to a float; no rounding step needed.
11096 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11097   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11098   match(Set dst (ConvI2F src));
11099   format %{ "FILD   $src\n\t"
11100             "FSTP   $dst" %}
11101 
11102   opcode(0xDB, 0x0);  /* DB /0 */
11103   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11104   ins_pipe( fpu_reg_mem );
11105 %}
11106 
11107 // In 24-bit mode, force exponent rounding by storing back out
11108 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11109   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11110   match(Set dst (ConvI2F src));
11111   ins_cost(200);
11112   format %{ "FILD   $src\n\t"
11113             "FSTP_S $dst" %}
11114   opcode(0xDB, 0x0);  /* DB /0 */
11115   ins_encode( Push_Mem_I(src),
11116               Pop_Mem_FPR(dst));
11117   ins_pipe( fpu_mem_mem );
11118 %}
11119 
11120 // In 24-bit mode, force exponent rounding by storing back out
11121 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11122   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11123   match(Set dst (ConvI2F (LoadI mem)));
11124   ins_cost(200);
11125   format %{ "FILD   $mem\n\t"
11126             "FSTP_S $dst" %}
11127   opcode(0xDB);  /* DB /0 */
11128   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11129               Pop_Mem_FPR(dst));
11130   ins_pipe( fpu_mem_mem );
11131 %}
11132 
11133 // This instruction does not round to 24-bits
11134 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11135   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11136   match(Set dst (ConvI2F src));
11137   format %{ "FILD   $src\n\t"
11138             "FSTP   $dst" %}
11139   opcode(0xDB, 0x0);  /* DB /0 */
11140   ins_encode( Push_Mem_I(src),
11141               Pop_Reg_FPR(dst));
11142   ins_pipe( fpu_reg_mem );
11143 %}
11144 
11145 // This instruction does not round to 24-bits
11146 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11147   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11148   match(Set dst (ConvI2F (LoadI mem)));
11149   format %{ "FILD   $mem\n\t"
11150             "FSTP   $dst" %}
11151   opcode(0xDB);      /* DB /0 */
11152   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11153               Pop_Reg_FPR(dst));
11154   ins_pipe( fpu_reg_mem );
11155 %}
11156 
11157 // Convert an int to a float in xmm; no rounding step needed.
11158 instruct convI2F_reg(regF dst, rRegI src) %{
11159   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11160   match(Set dst (ConvI2F src));
11161   format %{ "CVTSI2SS $dst, $src" %}
11162   ins_encode %{
11163     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11164   %}
11165   ins_pipe( pipe_slow );
11166 %}
11167 
11168  instruct convXI2F_reg(regF dst, rRegI src)
11169 %{
11170   predicate( UseSSE>=2 && UseXmmI2F );
11171   match(Set dst (ConvI2F src));
11172 
11173   format %{ "MOVD  $dst,$src\n\t"
11174             "CVTDQ2PS $dst,$dst\t# i2f" %}
11175   ins_encode %{
11176     __ movdl($dst$$XMMRegister, $src$$Register);
11177     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11178   %}
11179   ins_pipe(pipe_slow); // XXX
11180 %}
11181 
11182 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11183   match(Set dst (ConvI2L src));
11184   effect(KILL cr);
11185   ins_cost(375);
11186   format %{ "MOV    $dst.lo,$src\n\t"
11187             "MOV    $dst.hi,$src\n\t"
11188             "SAR    $dst.hi,31" %}
11189   ins_encode(convert_int_long(dst,src));
11190   ins_pipe( ialu_reg_reg_long );
11191 %}
11192 
11193 // Zero-extend convert int to long
11194 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11195   match(Set dst (AndL (ConvI2L src) mask) );
11196   effect( KILL flags );
11197   ins_cost(250);
11198   format %{ "MOV    $dst.lo,$src\n\t"
11199             "XOR    $dst.hi,$dst.hi" %}
11200   opcode(0x33); // XOR
11201   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11202   ins_pipe( ialu_reg_reg_long );
11203 %}
11204 
11205 // Zero-extend long
11206 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11207   match(Set dst (AndL src mask) );
11208   effect( KILL flags );
11209   ins_cost(250);
11210   format %{ "MOV    $dst.lo,$src.lo\n\t"
11211             "XOR    $dst.hi,$dst.hi\n\t" %}
11212   opcode(0x33); // XOR
11213   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11214   ins_pipe( ialu_reg_reg_long );
11215 %}
11216 
11217 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11218   predicate (UseSSE<=1);
11219   match(Set dst (ConvL2D src));
11220   effect( KILL cr );
11221   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11222             "PUSH   $src.lo\n\t"
11223             "FILD   ST,[ESP + #0]\n\t"
11224             "ADD    ESP,8\n\t"
11225             "FSTP_D $dst\t# D-round" %}
11226   opcode(0xDF, 0x5);  /* DF /5 */
11227   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11228   ins_pipe( pipe_slow );
11229 %}
11230 
11231 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11232   predicate (UseSSE>=2);
11233   match(Set dst (ConvL2D src));
11234   effect( KILL cr );
11235   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11236             "PUSH   $src.lo\n\t"
11237             "FILD_D [ESP]\n\t"
11238             "FSTP_D [ESP]\n\t"
11239             "MOVSD  $dst,[ESP]\n\t"
11240             "ADD    ESP,8" %}
11241   opcode(0xDF, 0x5);  /* DF /5 */
11242   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11243   ins_pipe( pipe_slow );
11244 %}
11245 
11246 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11247   predicate (UseSSE>=1);
11248   match(Set dst (ConvL2F src));
11249   effect( KILL cr );
11250   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11251             "PUSH   $src.lo\n\t"
11252             "FILD_D [ESP]\n\t"
11253             "FSTP_S [ESP]\n\t"
11254             "MOVSS  $dst,[ESP]\n\t"
11255             "ADD    ESP,8" %}
11256   opcode(0xDF, 0x5);  /* DF /5 */
11257   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11262   match(Set dst (ConvL2F src));
11263   effect( KILL cr );
11264   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11265             "PUSH   $src.lo\n\t"
11266             "FILD   ST,[ESP + #0]\n\t"
11267             "ADD    ESP,8\n\t"
11268             "FSTP_S $dst\t# F-round" %}
11269   opcode(0xDF, 0x5);  /* DF /5 */
11270   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11275   match(Set dst (ConvL2I src));
11276   effect( DEF dst, USE src );
11277   format %{ "MOV    $dst,$src.lo" %}
11278   ins_encode(enc_CopyL_Lo(dst,src));
11279   ins_pipe( ialu_reg_reg );
11280 %}
11281 
11282 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11283   match(Set dst (MoveF2I src));
11284   effect( DEF dst, USE src );
11285   ins_cost(100);
11286   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11287   ins_encode %{
11288     __ movl($dst$$Register, Address(rsp, $src$$disp));
11289   %}
11290   ins_pipe( ialu_reg_mem );
11291 %}
11292 
11293 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11294   predicate(UseSSE==0);
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297 
11298   ins_cost(125);
11299   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11300   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11301   ins_pipe( fpu_mem_reg );
11302 %}
11303 
11304 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11305   predicate(UseSSE>=1);
11306   match(Set dst (MoveF2I src));
11307   effect( DEF dst, USE src );
11308 
11309   ins_cost(95);
11310   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11311   ins_encode %{
11312     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11313   %}
11314   ins_pipe( pipe_slow );
11315 %}
11316 
11317 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11318   predicate(UseSSE>=2);
11319   match(Set dst (MoveF2I src));
11320   effect( DEF dst, USE src );
11321   ins_cost(85);
11322   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11323   ins_encode %{
11324     __ movdl($dst$$Register, $src$$XMMRegister);
11325   %}
11326   ins_pipe( pipe_slow );
11327 %}
11328 
11329 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11330   match(Set dst (MoveI2F src));
11331   effect( DEF dst, USE src );
11332 
11333   ins_cost(100);
11334   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11335   ins_encode %{
11336     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11337   %}
11338   ins_pipe( ialu_mem_reg );
11339 %}
11340 
11341 
11342 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11343   predicate(UseSSE==0);
11344   match(Set dst (MoveI2F src));
11345   effect(DEF dst, USE src);
11346 
11347   ins_cost(125);
11348   format %{ "FLD_S  $src\n\t"
11349             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11350   opcode(0xD9);               /* D9 /0, FLD m32real */
11351   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11352               Pop_Reg_FPR(dst) );
11353   ins_pipe( fpu_reg_mem );
11354 %}
11355 
11356 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11357   predicate(UseSSE>=1);
11358   match(Set dst (MoveI2F src));
11359   effect( DEF dst, USE src );
11360 
11361   ins_cost(95);
11362   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11363   ins_encode %{
11364     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11370   predicate(UseSSE>=2);
11371   match(Set dst (MoveI2F src));
11372   effect( DEF dst, USE src );
11373 
11374   ins_cost(85);
11375   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11376   ins_encode %{
11377     __ movdl($dst$$XMMRegister, $src$$Register);
11378   %}
11379   ins_pipe( pipe_slow );
11380 %}
11381 
11382 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11383   match(Set dst (MoveD2L src));
11384   effect(DEF dst, USE src);
11385 
11386   ins_cost(250);
11387   format %{ "MOV    $dst.lo,$src\n\t"
11388             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11389   opcode(0x8B, 0x8B);
11390   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11391   ins_pipe( ialu_mem_long_reg );
11392 %}
11393 
11394 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11395   predicate(UseSSE<=1);
11396   match(Set dst (MoveD2L src));
11397   effect(DEF dst, USE src);
11398 
11399   ins_cost(125);
11400   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11401   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11402   ins_pipe( fpu_mem_reg );
11403 %}
11404 
11405 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11406   predicate(UseSSE>=2);
11407   match(Set dst (MoveD2L src));
11408   effect(DEF dst, USE src);
11409   ins_cost(95);
11410   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11411   ins_encode %{
11412     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11413   %}
11414   ins_pipe( pipe_slow );
11415 %}
11416 
11417 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11418   predicate(UseSSE>=2);
11419   match(Set dst (MoveD2L src));
11420   effect(DEF dst, USE src, TEMP tmp);
11421   ins_cost(85);
11422   format %{ "MOVD   $dst.lo,$src\n\t"
11423             "PSHUFLW $tmp,$src,0x4E\n\t"
11424             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11425   ins_encode %{
11426     __ movdl($dst$$Register, $src$$XMMRegister);
11427     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11428     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11429   %}
11430   ins_pipe( pipe_slow );
11431 %}
11432 
11433 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11434   match(Set dst (MoveL2D src));
11435   effect(DEF dst, USE src);
11436 
11437   ins_cost(200);
11438   format %{ "MOV    $dst,$src.lo\n\t"
11439             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11440   opcode(0x89, 0x89);
11441   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11442   ins_pipe( ialu_mem_long_reg );
11443 %}
11444 
11445 
11446 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11447   predicate(UseSSE<=1);
11448   match(Set dst (MoveL2D src));
11449   effect(DEF dst, USE src);
11450   ins_cost(125);
11451 
11452   format %{ "FLD_D  $src\n\t"
11453             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11454   opcode(0xDD);               /* DD /0, FLD m64real */
11455   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11456               Pop_Reg_DPR(dst) );
11457   ins_pipe( fpu_reg_mem );
11458 %}
11459 
11460 
11461 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11462   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11463   match(Set dst (MoveL2D src));
11464   effect(DEF dst, USE src);
11465 
11466   ins_cost(95);
11467   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11468   ins_encode %{
11469     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11470   %}
11471   ins_pipe( pipe_slow );
11472 %}
11473 
11474 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11475   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11476   match(Set dst (MoveL2D src));
11477   effect(DEF dst, USE src);
11478 
11479   ins_cost(95);
11480   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11481   ins_encode %{
11482     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11483   %}
11484   ins_pipe( pipe_slow );
11485 %}
11486 
11487 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11488   predicate(UseSSE>=2);
11489   match(Set dst (MoveL2D src));
11490   effect(TEMP dst, USE src, TEMP tmp);
11491   ins_cost(85);
11492   format %{ "MOVD   $dst,$src.lo\n\t"
11493             "MOVD   $tmp,$src.hi\n\t"
11494             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11495   ins_encode %{
11496     __ movdl($dst$$XMMRegister, $src$$Register);
11497     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11498     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11499   %}
11500   ins_pipe( pipe_slow );
11501 %}
11502 
11503 
11504 // =======================================================================
11505 // fast clearing of an array
11506 // Small ClearArray non-AVX512.
11507 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11508   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11509   match(Set dummy (ClearArray cnt base));
11510   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11511 
11512   format %{ $$template
11513     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11514     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11515     $$emit$$"JG     LARGE\n\t"
11516     $$emit$$"SHL    ECX, 1\n\t"
11517     $$emit$$"DEC    ECX\n\t"
11518     $$emit$$"JS     DONE\t# Zero length\n\t"
11519     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11520     $$emit$$"DEC    ECX\n\t"
11521     $$emit$$"JGE    LOOP\n\t"
11522     $$emit$$"JMP    DONE\n\t"
11523     $$emit$$"# LARGE:\n\t"
11524     if (UseFastStosb) {
11525        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11526        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11527     } else if (UseXMMForObjInit) {
11528        $$emit$$"MOV     RDI,RAX\n\t"
11529        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11530        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11531        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11532        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11533        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11534        $$emit$$"ADD     0x40,RAX\n\t"
11535        $$emit$$"# L_zero_64_bytes:\n\t"
11536        $$emit$$"SUB     0x8,RCX\n\t"
11537        $$emit$$"JGE     L_loop\n\t"
11538        $$emit$$"ADD     0x4,RCX\n\t"
11539        $$emit$$"JL      L_tail\n\t"
11540        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11541        $$emit$$"ADD     0x20,RAX\n\t"
11542        $$emit$$"SUB     0x4,RCX\n\t"
11543        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11544        $$emit$$"ADD     0x4,RCX\n\t"
11545        $$emit$$"JLE     L_end\n\t"
11546        $$emit$$"DEC     RCX\n\t"
11547        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11548        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11549        $$emit$$"ADD     0x8,RAX\n\t"
11550        $$emit$$"DEC     RCX\n\t"
11551        $$emit$$"JGE     L_sloop\n\t"
11552        $$emit$$"# L_end:\n\t"
11553     } else {
11554        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11555        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11556     }
11557     $$emit$$"# DONE"
11558   %}
11559   ins_encode %{
11560     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11561                  $tmp$$XMMRegister, false, knoreg);
11562   %}
11563   ins_pipe( pipe_slow );
11564 %}
11565 
11566 // Small ClearArray AVX512 non-constant length.
11567 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11568   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11569   match(Set dummy (ClearArray cnt base));
11570   ins_cost(125);
11571   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11572 
11573   format %{ $$template
11574     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11575     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11576     $$emit$$"JG     LARGE\n\t"
11577     $$emit$$"SHL    ECX, 1\n\t"
11578     $$emit$$"DEC    ECX\n\t"
11579     $$emit$$"JS     DONE\t# Zero length\n\t"
11580     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11581     $$emit$$"DEC    ECX\n\t"
11582     $$emit$$"JGE    LOOP\n\t"
11583     $$emit$$"JMP    DONE\n\t"
11584     $$emit$$"# LARGE:\n\t"
11585     if (UseFastStosb) {
11586        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11587        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11588     } else if (UseXMMForObjInit) {
11589        $$emit$$"MOV     RDI,RAX\n\t"
11590        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11591        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11592        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11593        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11594        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11595        $$emit$$"ADD     0x40,RAX\n\t"
11596        $$emit$$"# L_zero_64_bytes:\n\t"
11597        $$emit$$"SUB     0x8,RCX\n\t"
11598        $$emit$$"JGE     L_loop\n\t"
11599        $$emit$$"ADD     0x4,RCX\n\t"
11600        $$emit$$"JL      L_tail\n\t"
11601        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11602        $$emit$$"ADD     0x20,RAX\n\t"
11603        $$emit$$"SUB     0x4,RCX\n\t"
11604        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11605        $$emit$$"ADD     0x4,RCX\n\t"
11606        $$emit$$"JLE     L_end\n\t"
11607        $$emit$$"DEC     RCX\n\t"
11608        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11609        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11610        $$emit$$"ADD     0x8,RAX\n\t"
11611        $$emit$$"DEC     RCX\n\t"
11612        $$emit$$"JGE     L_sloop\n\t"
11613        $$emit$$"# L_end:\n\t"
11614     } else {
11615        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11616        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11617     }
11618     $$emit$$"# DONE"
11619   %}
11620   ins_encode %{
11621     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11622                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11623   %}
11624   ins_pipe( pipe_slow );
11625 %}
11626 
11627 // Large ClearArray non-AVX512.
11628 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11629   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11630   match(Set dummy (ClearArray cnt base));
11631   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11632   format %{ $$template
11633     if (UseFastStosb) {
11634        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11635        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11636        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11637     } else if (UseXMMForObjInit) {
11638        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11639        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11640        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11641        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11642        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11643        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11644        $$emit$$"ADD     0x40,RAX\n\t"
11645        $$emit$$"# L_zero_64_bytes:\n\t"
11646        $$emit$$"SUB     0x8,RCX\n\t"
11647        $$emit$$"JGE     L_loop\n\t"
11648        $$emit$$"ADD     0x4,RCX\n\t"
11649        $$emit$$"JL      L_tail\n\t"
11650        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11651        $$emit$$"ADD     0x20,RAX\n\t"
11652        $$emit$$"SUB     0x4,RCX\n\t"
11653        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11654        $$emit$$"ADD     0x4,RCX\n\t"
11655        $$emit$$"JLE     L_end\n\t"
11656        $$emit$$"DEC     RCX\n\t"
11657        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11658        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11659        $$emit$$"ADD     0x8,RAX\n\t"
11660        $$emit$$"DEC     RCX\n\t"
11661        $$emit$$"JGE     L_sloop\n\t"
11662        $$emit$$"# L_end:\n\t"
11663     } else {
11664        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11665        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11666        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11667     }
11668     $$emit$$"# DONE"
11669   %}
11670   ins_encode %{
11671     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11672                  $tmp$$XMMRegister, true, knoreg);
11673   %}
11674   ins_pipe( pipe_slow );
11675 %}
11676 
11677 // Large ClearArray AVX512.
11678 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11679   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11680   match(Set dummy (ClearArray cnt base));
11681   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11682   format %{ $$template
11683     if (UseFastStosb) {
11684        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11685        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11686        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11687     } else if (UseXMMForObjInit) {
11688        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11689        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11690        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11691        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11692        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11693        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11694        $$emit$$"ADD     0x40,RAX\n\t"
11695        $$emit$$"# L_zero_64_bytes:\n\t"
11696        $$emit$$"SUB     0x8,RCX\n\t"
11697        $$emit$$"JGE     L_loop\n\t"
11698        $$emit$$"ADD     0x4,RCX\n\t"
11699        $$emit$$"JL      L_tail\n\t"
11700        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11701        $$emit$$"ADD     0x20,RAX\n\t"
11702        $$emit$$"SUB     0x4,RCX\n\t"
11703        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11704        $$emit$$"ADD     0x4,RCX\n\t"
11705        $$emit$$"JLE     L_end\n\t"
11706        $$emit$$"DEC     RCX\n\t"
11707        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11708        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11709        $$emit$$"ADD     0x8,RAX\n\t"
11710        $$emit$$"DEC     RCX\n\t"
11711        $$emit$$"JGE     L_sloop\n\t"
11712        $$emit$$"# L_end:\n\t"
11713     } else {
11714        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11715        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11716        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11717     }
11718     $$emit$$"# DONE"
11719   %}
11720   ins_encode %{
11721     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11722                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11723   %}
11724   ins_pipe( pipe_slow );
11725 %}
11726 
11727 // Small ClearArray AVX512 constant length.
11728 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11729 %{
11730   predicate(!((ClearArrayNode*)n)->is_large() &&
11731                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11732   match(Set dummy (ClearArray cnt base));
11733   ins_cost(100);
11734   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11735   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11736   ins_encode %{
11737    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11738   %}
11739   ins_pipe(pipe_slow);
11740 %}
11741 
11742 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11743                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11744   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11745   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11746   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11747 
11748   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11749   ins_encode %{
11750     __ string_compare($str1$$Register, $str2$$Register,
11751                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11752                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11753   %}
11754   ins_pipe( pipe_slow );
11755 %}
11756 
11757 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11758                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11759   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11760   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11761   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11762 
11763   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11764   ins_encode %{
11765     __ string_compare($str1$$Register, $str2$$Register,
11766                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11767                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11768   %}
11769   ins_pipe( pipe_slow );
11770 %}
11771 
11772 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11773                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11774   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11775   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11776   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11777 
11778   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11779   ins_encode %{
11780     __ string_compare($str1$$Register, $str2$$Register,
11781                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11782                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11788                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11789   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11790   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11791   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11792 
11793   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11794   ins_encode %{
11795     __ string_compare($str1$$Register, $str2$$Register,
11796                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11797                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11798   %}
11799   ins_pipe( pipe_slow );
11800 %}
11801 
11802 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11803                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11804   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11805   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11806   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11807 
11808   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11809   ins_encode %{
11810     __ string_compare($str1$$Register, $str2$$Register,
11811                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11812                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11813   %}
11814   ins_pipe( pipe_slow );
11815 %}
11816 
11817 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11818                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11819   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11820   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11821   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11822 
11823   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11824   ins_encode %{
11825     __ string_compare($str1$$Register, $str2$$Register,
11826                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11827                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11828   %}
11829   ins_pipe( pipe_slow );
11830 %}
11831 
11832 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11833                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11834   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11835   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11836   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11837 
11838   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11839   ins_encode %{
11840     __ string_compare($str2$$Register, $str1$$Register,
11841                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11842                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11848                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11849   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11850   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11851   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11852 
11853   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11854   ins_encode %{
11855     __ string_compare($str2$$Register, $str1$$Register,
11856                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11857                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11858   %}
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 // fast string equals
11863 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11864                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11865   predicate(!VM_Version::supports_avx512vlbw());
11866   match(Set result (StrEquals (Binary str1 str2) cnt));
11867   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11868 
11869   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11870   ins_encode %{
11871     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11872                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11873                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11874   %}
11875 
11876   ins_pipe( pipe_slow );
11877 %}
11878 
11879 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11880                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11881   predicate(VM_Version::supports_avx512vlbw());
11882   match(Set result (StrEquals (Binary str1 str2) cnt));
11883   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11884 
11885   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11886   ins_encode %{
11887     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11888                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11889                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11890   %}
11891 
11892   ins_pipe( pipe_slow );
11893 %}
11894 
11895 
11896 // fast search of substring with known size.
11897 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11898                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11899   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11900   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11901   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11902 
11903   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11904   ins_encode %{
11905     int icnt2 = (int)$int_cnt2$$constant;
11906     if (icnt2 >= 16) {
11907       // IndexOf for constant substrings with size >= 16 elements
11908       // which don't need to be loaded through stack.
11909       __ string_indexofC8($str1$$Register, $str2$$Register,
11910                           $cnt1$$Register, $cnt2$$Register,
11911                           icnt2, $result$$Register,
11912                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11913     } else {
11914       // Small strings are loaded through stack if they cross page boundary.
11915       __ string_indexof($str1$$Register, $str2$$Register,
11916                         $cnt1$$Register, $cnt2$$Register,
11917                         icnt2, $result$$Register,
11918                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11919     }
11920   %}
11921   ins_pipe( pipe_slow );
11922 %}
11923 
11924 // fast search of substring with known size.
11925 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11926                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11927   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11928   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11929   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11930 
11931   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11932   ins_encode %{
11933     int icnt2 = (int)$int_cnt2$$constant;
11934     if (icnt2 >= 8) {
11935       // IndexOf for constant substrings with size >= 8 elements
11936       // which don't need to be loaded through stack.
11937       __ string_indexofC8($str1$$Register, $str2$$Register,
11938                           $cnt1$$Register, $cnt2$$Register,
11939                           icnt2, $result$$Register,
11940                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11941     } else {
11942       // Small strings are loaded through stack if they cross page boundary.
11943       __ string_indexof($str1$$Register, $str2$$Register,
11944                         $cnt1$$Register, $cnt2$$Register,
11945                         icnt2, $result$$Register,
11946                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11947     }
11948   %}
11949   ins_pipe( pipe_slow );
11950 %}
11951 
11952 // fast search of substring with known size.
11953 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11954                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11955   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11956   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11957   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11958 
11959   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11960   ins_encode %{
11961     int icnt2 = (int)$int_cnt2$$constant;
11962     if (icnt2 >= 8) {
11963       // IndexOf for constant substrings with size >= 8 elements
11964       // which don't need to be loaded through stack.
11965       __ string_indexofC8($str1$$Register, $str2$$Register,
11966                           $cnt1$$Register, $cnt2$$Register,
11967                           icnt2, $result$$Register,
11968                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11969     } else {
11970       // Small strings are loaded through stack if they cross page boundary.
11971       __ string_indexof($str1$$Register, $str2$$Register,
11972                         $cnt1$$Register, $cnt2$$Register,
11973                         icnt2, $result$$Register,
11974                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11975     }
11976   %}
11977   ins_pipe( pipe_slow );
11978 %}
11979 
11980 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11981                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11982   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11983   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11984   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11985 
11986   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11987   ins_encode %{
11988     __ string_indexof($str1$$Register, $str2$$Register,
11989                       $cnt1$$Register, $cnt2$$Register,
11990                       (-1), $result$$Register,
11991                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11992   %}
11993   ins_pipe( pipe_slow );
11994 %}
11995 
11996 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11997                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11998   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11999   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12000   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12001 
12002   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12003   ins_encode %{
12004     __ string_indexof($str1$$Register, $str2$$Register,
12005                       $cnt1$$Register, $cnt2$$Register,
12006                       (-1), $result$$Register,
12007                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12008   %}
12009   ins_pipe( pipe_slow );
12010 %}
12011 
12012 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12013                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12014   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12015   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12016   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12017 
12018   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12019   ins_encode %{
12020     __ string_indexof($str1$$Register, $str2$$Register,
12021                       $cnt1$$Register, $cnt2$$Register,
12022                       (-1), $result$$Register,
12023                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12024   %}
12025   ins_pipe( pipe_slow );
12026 %}
12027 
12028 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12029                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12030   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12031   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12032   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12033   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12034   ins_encode %{
12035     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12036                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12037   %}
12038   ins_pipe( pipe_slow );
12039 %}
12040 
12041 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12042                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12043   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12044   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12045   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12046   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12047   ins_encode %{
12048     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12049                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12050   %}
12051   ins_pipe( pipe_slow );
12052 %}
12053 
12054 
12055 // fast array equals
12056 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12057                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12058 %{
12059   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12060   match(Set result (AryEq ary1 ary2));
12061   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12062   //ins_cost(300);
12063 
12064   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12065   ins_encode %{
12066     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12067                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12068                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12069   %}
12070   ins_pipe( pipe_slow );
12071 %}
12072 
12073 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12074                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12075 %{
12076   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12077   match(Set result (AryEq ary1 ary2));
12078   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12079   //ins_cost(300);
12080 
12081   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12082   ins_encode %{
12083     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12084                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12085                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12086   %}
12087   ins_pipe( pipe_slow );
12088 %}
12089 
12090 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12091                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12092 %{
12093   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12094   match(Set result (AryEq ary1 ary2));
12095   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12096   //ins_cost(300);
12097 
12098   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12099   ins_encode %{
12100     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12101                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12102                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12103   %}
12104   ins_pipe( pipe_slow );
12105 %}
12106 
12107 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12108                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12109 %{
12110   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12111   match(Set result (AryEq ary1 ary2));
12112   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12113   //ins_cost(300);
12114 
12115   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12116   ins_encode %{
12117     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12118                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12119                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12120   %}
12121   ins_pipe( pipe_slow );
12122 %}
12123 
12124 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12125                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12126 %{
12127   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12128   match(Set result (HasNegatives ary1 len));
12129   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12130 
12131   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12132   ins_encode %{
12133     __ has_negatives($ary1$$Register, $len$$Register,
12134                      $result$$Register, $tmp3$$Register,
12135                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12136   %}
12137   ins_pipe( pipe_slow );
12138 %}
12139 
12140 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12141                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12142 %{
12143   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12144   match(Set result (HasNegatives ary1 len));
12145   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12146 
12147   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12148   ins_encode %{
12149     __ has_negatives($ary1$$Register, $len$$Register,
12150                      $result$$Register, $tmp3$$Register,
12151                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12152   %}
12153   ins_pipe( pipe_slow );
12154 %}
12155 
12156 
12157 // fast char[] to byte[] compression
12158 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12159                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12160   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12161   match(Set result (StrCompressedCopy src (Binary dst len)));
12162   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12163 
12164   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12165   ins_encode %{
12166     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12167                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12168                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12169                            knoreg, knoreg);
12170   %}
12171   ins_pipe( pipe_slow );
12172 %}
12173 
12174 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12175                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12176   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12177   match(Set result (StrCompressedCopy src (Binary dst len)));
12178   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12179 
12180   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12181   ins_encode %{
12182     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12183                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12184                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12185                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12186   %}
12187   ins_pipe( pipe_slow );
12188 %}
12189 
12190 // fast byte[] to char[] inflation
12191 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12192                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12193   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12194   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12195   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12196 
12197   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12198   ins_encode %{
12199     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12200                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12201   %}
12202   ins_pipe( pipe_slow );
12203 %}
12204 
12205 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12206                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12207   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12208   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12209   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12210 
12211   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12212   ins_encode %{
12213     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12214                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12215   %}
12216   ins_pipe( pipe_slow );
12217 %}
12218 
12219 // encode char[] to byte[] in ISO_8859_1
12220 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12221                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12222                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12223   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12224   match(Set result (EncodeISOArray src (Binary dst len)));
12225   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12226 
12227   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12228   ins_encode %{
12229     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12230                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12231                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12232   %}
12233   ins_pipe( pipe_slow );
12234 %}
12235 
12236 // encode char[] to byte[] in ASCII
12237 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12238                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12239                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12240   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12241   match(Set result (EncodeISOArray src (Binary dst len)));
12242   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12243 
12244   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12245   ins_encode %{
12246     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12247                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12248                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12249   %}
12250   ins_pipe( pipe_slow );
12251 %}
12252 
12253 //----------Control Flow Instructions------------------------------------------
12254 // Signed compare Instructions
12255 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12256   match(Set cr (CmpI op1 op2));
12257   effect( DEF cr, USE op1, USE op2 );
12258   format %{ "CMP    $op1,$op2" %}
12259   opcode(0x3B);  /* Opcode 3B /r */
12260   ins_encode( OpcP, RegReg( op1, op2) );
12261   ins_pipe( ialu_cr_reg_reg );
12262 %}
12263 
12264 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12265   match(Set cr (CmpI op1 op2));
12266   effect( DEF cr, USE op1 );
12267   format %{ "CMP    $op1,$op2" %}
12268   opcode(0x81,0x07);  /* Opcode 81 /7 */
12269   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12270   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12271   ins_pipe( ialu_cr_reg_imm );
12272 %}
12273 
12274 // Cisc-spilled version of cmpI_eReg
12275 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12276   match(Set cr (CmpI op1 (LoadI op2)));
12277 
12278   format %{ "CMP    $op1,$op2" %}
12279   ins_cost(500);
12280   opcode(0x3B);  /* Opcode 3B /r */
12281   ins_encode( OpcP, RegMem( op1, op2) );
12282   ins_pipe( ialu_cr_reg_mem );
12283 %}
12284 
12285 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12286   match(Set cr (CmpI src zero));
12287   effect( DEF cr, USE src );
12288 
12289   format %{ "TEST   $src,$src" %}
12290   opcode(0x85);
12291   ins_encode( OpcP, RegReg( src, src ) );
12292   ins_pipe( ialu_cr_reg_imm );
12293 %}
12294 
12295 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12296   match(Set cr (CmpI (AndI src con) zero));
12297 
12298   format %{ "TEST   $src,$con" %}
12299   opcode(0xF7,0x00);
12300   ins_encode( OpcP, RegOpc(src), Con32(con) );
12301   ins_pipe( ialu_cr_reg_imm );
12302 %}
12303 
12304 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12305   match(Set cr (CmpI (AndI src mem) zero));
12306 
12307   format %{ "TEST   $src,$mem" %}
12308   opcode(0x85);
12309   ins_encode( OpcP, RegMem( src, mem ) );
12310   ins_pipe( ialu_cr_reg_mem );
12311 %}
12312 
12313 // Unsigned compare Instructions; really, same as signed except they
12314 // produce an eFlagsRegU instead of eFlagsReg.
12315 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12316   match(Set cr (CmpU op1 op2));
12317 
12318   format %{ "CMPu   $op1,$op2" %}
12319   opcode(0x3B);  /* Opcode 3B /r */
12320   ins_encode( OpcP, RegReg( op1, op2) );
12321   ins_pipe( ialu_cr_reg_reg );
12322 %}
12323 
12324 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12325   match(Set cr (CmpU op1 op2));
12326 
12327   format %{ "CMPu   $op1,$op2" %}
12328   opcode(0x81,0x07);  /* Opcode 81 /7 */
12329   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12330   ins_pipe( ialu_cr_reg_imm );
12331 %}
12332 
12333 // // Cisc-spilled version of cmpU_eReg
12334 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12335   match(Set cr (CmpU op1 (LoadI op2)));
12336 
12337   format %{ "CMPu   $op1,$op2" %}
12338   ins_cost(500);
12339   opcode(0x3B);  /* Opcode 3B /r */
12340   ins_encode( OpcP, RegMem( op1, op2) );
12341   ins_pipe( ialu_cr_reg_mem );
12342 %}
12343 
12344 // // Cisc-spilled version of cmpU_eReg
12345 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12346 //  match(Set cr (CmpU (LoadI op1) op2));
12347 //
12348 //  format %{ "CMPu   $op1,$op2" %}
12349 //  ins_cost(500);
12350 //  opcode(0x39);  /* Opcode 39 /r */
12351 //  ins_encode( OpcP, RegMem( op1, op2) );
12352 //%}
12353 
12354 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12355   match(Set cr (CmpU src zero));
12356 
12357   format %{ "TESTu  $src,$src" %}
12358   opcode(0x85);
12359   ins_encode( OpcP, RegReg( src, src ) );
12360   ins_pipe( ialu_cr_reg_imm );
12361 %}
12362 
12363 // Unsigned pointer compare Instructions
12364 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12365   match(Set cr (CmpP op1 op2));
12366 
12367   format %{ "CMPu   $op1,$op2" %}
12368   opcode(0x3B);  /* Opcode 3B /r */
12369   ins_encode( OpcP, RegReg( op1, op2) );
12370   ins_pipe( ialu_cr_reg_reg );
12371 %}
12372 
12373 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12374   match(Set cr (CmpP op1 op2));
12375 
12376   format %{ "CMPu   $op1,$op2" %}
12377   opcode(0x81,0x07);  /* Opcode 81 /7 */
12378   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12379   ins_pipe( ialu_cr_reg_imm );
12380 %}
12381 
12382 // // Cisc-spilled version of cmpP_eReg
12383 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12384   match(Set cr (CmpP op1 (LoadP op2)));
12385 
12386   format %{ "CMPu   $op1,$op2" %}
12387   ins_cost(500);
12388   opcode(0x3B);  /* Opcode 3B /r */
12389   ins_encode( OpcP, RegMem( op1, op2) );
12390   ins_pipe( ialu_cr_reg_mem );
12391 %}
12392 
12393 // // Cisc-spilled version of cmpP_eReg
12394 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12395 //  match(Set cr (CmpP (LoadP op1) op2));
12396 //
12397 //  format %{ "CMPu   $op1,$op2" %}
12398 //  ins_cost(500);
12399 //  opcode(0x39);  /* Opcode 39 /r */
12400 //  ins_encode( OpcP, RegMem( op1, op2) );
12401 //%}
12402 
12403 // Compare raw pointer (used in out-of-heap check).
12404 // Only works because non-oop pointers must be raw pointers
12405 // and raw pointers have no anti-dependencies.
12406 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12407   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12408   match(Set cr (CmpP op1 (LoadP op2)));
12409 
12410   format %{ "CMPu   $op1,$op2" %}
12411   opcode(0x3B);  /* Opcode 3B /r */
12412   ins_encode( OpcP, RegMem( op1, op2) );
12413   ins_pipe( ialu_cr_reg_mem );
12414 %}
12415 
12416 //
12417 // This will generate a signed flags result. This should be ok
12418 // since any compare to a zero should be eq/neq.
12419 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12420   match(Set cr (CmpP src zero));
12421 
12422   format %{ "TEST   $src,$src" %}
12423   opcode(0x85);
12424   ins_encode( OpcP, RegReg( src, src ) );
12425   ins_pipe( ialu_cr_reg_imm );
12426 %}
12427 
12428 // Cisc-spilled version of testP_reg
12429 // This will generate a signed flags result. This should be ok
12430 // since any compare to a zero should be eq/neq.
12431 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12432   match(Set cr (CmpP (LoadP op) zero));
12433 
12434   format %{ "TEST   $op,0xFFFFFFFF" %}
12435   ins_cost(500);
12436   opcode(0xF7);               /* Opcode F7 /0 */
12437   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12438   ins_pipe( ialu_cr_reg_imm );
12439 %}
12440 
12441 // Yanked all unsigned pointer compare operations.
12442 // Pointer compares are done with CmpP which is already unsigned.
12443 
12444 //----------Max and Min--------------------------------------------------------
12445 // Min Instructions
12446 ////
12447 //   *** Min and Max using the conditional move are slower than the
12448 //   *** branch version on a Pentium III.
12449 // // Conditional move for min
12450 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12451 //  effect( USE_DEF op2, USE op1, USE cr );
12452 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12453 //  opcode(0x4C,0x0F);
12454 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12455 //  ins_pipe( pipe_cmov_reg );
12456 //%}
12457 //
12458 //// Min Register with Register (P6 version)
12459 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12460 //  predicate(VM_Version::supports_cmov() );
12461 //  match(Set op2 (MinI op1 op2));
12462 //  ins_cost(200);
12463 //  expand %{
12464 //    eFlagsReg cr;
12465 //    compI_eReg(cr,op1,op2);
12466 //    cmovI_reg_lt(op2,op1,cr);
12467 //  %}
12468 //%}
12469 
12470 // Min Register with Register (generic version)
12471 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12472   match(Set dst (MinI dst src));
12473   effect(KILL flags);
12474   ins_cost(300);
12475 
12476   format %{ "MIN    $dst,$src" %}
12477   opcode(0xCC);
12478   ins_encode( min_enc(dst,src) );
12479   ins_pipe( pipe_slow );
12480 %}
12481 
12482 // Max Register with Register
12483 //   *** Min and Max using the conditional move are slower than the
12484 //   *** branch version on a Pentium III.
12485 // // Conditional move for max
12486 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12487 //  effect( USE_DEF op2, USE op1, USE cr );
12488 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12489 //  opcode(0x4F,0x0F);
12490 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12491 //  ins_pipe( pipe_cmov_reg );
12492 //%}
12493 //
12494 // // Max Register with Register (P6 version)
12495 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12496 //  predicate(VM_Version::supports_cmov() );
12497 //  match(Set op2 (MaxI op1 op2));
12498 //  ins_cost(200);
12499 //  expand %{
12500 //    eFlagsReg cr;
12501 //    compI_eReg(cr,op1,op2);
12502 //    cmovI_reg_gt(op2,op1,cr);
12503 //  %}
12504 //%}
12505 
12506 // Max Register with Register (generic version)
12507 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12508   match(Set dst (MaxI dst src));
12509   effect(KILL flags);
12510   ins_cost(300);
12511 
12512   format %{ "MAX    $dst,$src" %}
12513   opcode(0xCC);
12514   ins_encode( max_enc(dst,src) );
12515   ins_pipe( pipe_slow );
12516 %}
12517 
12518 // ============================================================================
12519 // Counted Loop limit node which represents exact final iterator value.
12520 // Note: the resulting value should fit into integer range since
12521 // counted loops have limit check on overflow.
12522 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12523   match(Set limit (LoopLimit (Binary init limit) stride));
12524   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12525   ins_cost(300);
12526 
12527   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12528   ins_encode %{
12529     int strd = (int)$stride$$constant;
12530     assert(strd != 1 && strd != -1, "sanity");
12531     int m1 = (strd > 0) ? 1 : -1;
12532     // Convert limit to long (EAX:EDX)
12533     __ cdql();
12534     // Convert init to long (init:tmp)
12535     __ movl($tmp$$Register, $init$$Register);
12536     __ sarl($tmp$$Register, 31);
12537     // $limit - $init
12538     __ subl($limit$$Register, $init$$Register);
12539     __ sbbl($limit_hi$$Register, $tmp$$Register);
12540     // + ($stride - 1)
12541     if (strd > 0) {
12542       __ addl($limit$$Register, (strd - 1));
12543       __ adcl($limit_hi$$Register, 0);
12544       __ movl($tmp$$Register, strd);
12545     } else {
12546       __ addl($limit$$Register, (strd + 1));
12547       __ adcl($limit_hi$$Register, -1);
12548       __ lneg($limit_hi$$Register, $limit$$Register);
12549       __ movl($tmp$$Register, -strd);
12550     }
12551     // signed devision: (EAX:EDX) / pos_stride
12552     __ idivl($tmp$$Register);
12553     if (strd < 0) {
12554       // restore sign
12555       __ negl($tmp$$Register);
12556     }
12557     // (EAX) * stride
12558     __ mull($tmp$$Register);
12559     // + init (ignore upper bits)
12560     __ addl($limit$$Register, $init$$Register);
12561   %}
12562   ins_pipe( pipe_slow );
12563 %}
12564 
12565 // ============================================================================
12566 // Branch Instructions
12567 // Jump Table
12568 instruct jumpXtnd(rRegI switch_val) %{
12569   match(Jump switch_val);
12570   ins_cost(350);
12571   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12572   ins_encode %{
12573     // Jump to Address(table_base + switch_reg)
12574     Address index(noreg, $switch_val$$Register, Address::times_1);
12575     __ jump(ArrayAddress($constantaddress, index));
12576   %}
12577   ins_pipe(pipe_jmp);
12578 %}
12579 
12580 // Jump Direct - Label defines a relative address from JMP+1
12581 instruct jmpDir(label labl) %{
12582   match(Goto);
12583   effect(USE labl);
12584 
12585   ins_cost(300);
12586   format %{ "JMP    $labl" %}
12587   size(5);
12588   ins_encode %{
12589     Label* L = $labl$$label;
12590     __ jmp(*L, false); // Always long jump
12591   %}
12592   ins_pipe( pipe_jmp );
12593 %}
12594 
12595 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12596 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12597   match(If cop cr);
12598   effect(USE labl);
12599 
12600   ins_cost(300);
12601   format %{ "J$cop    $labl" %}
12602   size(6);
12603   ins_encode %{
12604     Label* L = $labl$$label;
12605     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12606   %}
12607   ins_pipe( pipe_jcc );
12608 %}
12609 
12610 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12611 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12612   predicate(!n->has_vector_mask_set());
12613   match(CountedLoopEnd cop cr);
12614   effect(USE labl);
12615 
12616   ins_cost(300);
12617   format %{ "J$cop    $labl\t# Loop end" %}
12618   size(6);
12619   ins_encode %{
12620     Label* L = $labl$$label;
12621     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12622   %}
12623   ins_pipe( pipe_jcc );
12624 %}
12625 
12626 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12627 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12628   predicate(!n->has_vector_mask_set());
12629   match(CountedLoopEnd cop cmp);
12630   effect(USE labl);
12631 
12632   ins_cost(300);
12633   format %{ "J$cop,u  $labl\t# Loop end" %}
12634   size(6);
12635   ins_encode %{
12636     Label* L = $labl$$label;
12637     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12638   %}
12639   ins_pipe( pipe_jcc );
12640 %}
12641 
12642 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12643   predicate(!n->has_vector_mask_set());
12644   match(CountedLoopEnd cop cmp);
12645   effect(USE labl);
12646 
12647   ins_cost(200);
12648   format %{ "J$cop,u  $labl\t# Loop end" %}
12649   size(6);
12650   ins_encode %{
12651     Label* L = $labl$$label;
12652     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12653   %}
12654   ins_pipe( pipe_jcc );
12655 %}
12656 
12657 // mask version
12658 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12659 // Bounded mask operand used in following patten is needed for
12660 // post-loop multiversioning.
12661 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12662   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12663   match(CountedLoopEnd cop cr);
12664   effect(USE labl, TEMP ktmp);
12665 
12666   ins_cost(400);
12667   format %{ "J$cop    $labl\t# Loop end\n\t"
12668             "restorevectmask \t# vector mask restore for loops" %}
12669   size(10);
12670   ins_encode %{
12671     Label* L = $labl$$label;
12672     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12673     __ restorevectmask($ktmp$$KRegister);
12674   %}
12675   ins_pipe( pipe_jcc );
12676 %}
12677 
12678 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12679 // Bounded mask operand used in following patten is needed for
12680 // post-loop multiversioning.
12681 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12682   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12683   match(CountedLoopEnd cop cmp);
12684   effect(USE labl, TEMP ktmp);
12685 
12686   ins_cost(400);
12687   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12688             "restorevectmask \t# vector mask restore for loops" %}
12689   size(10);
12690   ins_encode %{
12691     Label* L = $labl$$label;
12692     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12693     __ restorevectmask($ktmp$$KRegister);
12694   %}
12695   ins_pipe( pipe_jcc );
12696 %}
12697 
12698 // Bounded mask operand used in following patten is needed for
12699 // post-loop multiversioning.
12700 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12701   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12702   match(CountedLoopEnd cop cmp);
12703   effect(USE labl, TEMP ktmp);
12704 
12705   ins_cost(300);
12706   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12707             "restorevectmask \t# vector mask restore for loops" %}
12708   size(10);
12709   ins_encode %{
12710     Label* L = $labl$$label;
12711     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12712     __ restorevectmask($ktmp$$KRegister);
12713   %}
12714   ins_pipe( pipe_jcc );
12715 %}
12716 
12717 // Jump Direct Conditional - using unsigned comparison
12718 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12719   match(If cop cmp);
12720   effect(USE labl);
12721 
12722   ins_cost(300);
12723   format %{ "J$cop,u  $labl" %}
12724   size(6);
12725   ins_encode %{
12726     Label* L = $labl$$label;
12727     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12728   %}
12729   ins_pipe(pipe_jcc);
12730 %}
12731 
12732 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12733   match(If cop cmp);
12734   effect(USE labl);
12735 
12736   ins_cost(200);
12737   format %{ "J$cop,u  $labl" %}
12738   size(6);
12739   ins_encode %{
12740     Label* L = $labl$$label;
12741     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12742   %}
12743   ins_pipe(pipe_jcc);
12744 %}
12745 
12746 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12747   match(If cop cmp);
12748   effect(USE labl);
12749 
12750   ins_cost(200);
12751   format %{ $$template
12752     if ($cop$$cmpcode == Assembler::notEqual) {
12753       $$emit$$"JP,u   $labl\n\t"
12754       $$emit$$"J$cop,u   $labl"
12755     } else {
12756       $$emit$$"JP,u   done\n\t"
12757       $$emit$$"J$cop,u   $labl\n\t"
12758       $$emit$$"done:"
12759     }
12760   %}
12761   ins_encode %{
12762     Label* l = $labl$$label;
12763     if ($cop$$cmpcode == Assembler::notEqual) {
12764       __ jcc(Assembler::parity, *l, false);
12765       __ jcc(Assembler::notEqual, *l, false);
12766     } else if ($cop$$cmpcode == Assembler::equal) {
12767       Label done;
12768       __ jccb(Assembler::parity, done);
12769       __ jcc(Assembler::equal, *l, false);
12770       __ bind(done);
12771     } else {
12772        ShouldNotReachHere();
12773     }
12774   %}
12775   ins_pipe(pipe_jcc);
12776 %}
12777 
12778 // ============================================================================
12779 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12780 // array for an instance of the superklass.  Set a hidden internal cache on a
12781 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12782 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12783 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12784   match(Set result (PartialSubtypeCheck sub super));
12785   effect( KILL rcx, KILL cr );
12786 
12787   ins_cost(1100);  // slightly larger than the next version
12788   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12789             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12790             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12791             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12792             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12793             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12794             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12795      "miss:\t" %}
12796 
12797   opcode(0x1); // Force a XOR of EDI
12798   ins_encode( enc_PartialSubtypeCheck() );
12799   ins_pipe( pipe_slow );
12800 %}
12801 
12802 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12803   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12804   effect( KILL rcx, KILL result );
12805 
12806   ins_cost(1000);
12807   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12808             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12809             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12810             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12811             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12812             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12813      "miss:\t" %}
12814 
12815   opcode(0x0);  // No need to XOR EDI
12816   ins_encode( enc_PartialSubtypeCheck() );
12817   ins_pipe( pipe_slow );
12818 %}
12819 
12820 // ============================================================================
12821 // Branch Instructions -- short offset versions
12822 //
12823 // These instructions are used to replace jumps of a long offset (the default
12824 // match) with jumps of a shorter offset.  These instructions are all tagged
12825 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12826 // match rules in general matching.  Instead, the ADLC generates a conversion
12827 // method in the MachNode which can be used to do in-place replacement of the
12828 // long variant with the shorter variant.  The compiler will determine if a
12829 // branch can be taken by the is_short_branch_offset() predicate in the machine
12830 // specific code section of the file.
12831 
12832 // Jump Direct - Label defines a relative address from JMP+1
12833 instruct jmpDir_short(label labl) %{
12834   match(Goto);
12835   effect(USE labl);
12836 
12837   ins_cost(300);
12838   format %{ "JMP,s  $labl" %}
12839   size(2);
12840   ins_encode %{
12841     Label* L = $labl$$label;
12842     __ jmpb(*L);
12843   %}
12844   ins_pipe( pipe_jmp );
12845   ins_short_branch(1);
12846 %}
12847 
12848 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12849 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12850   match(If cop cr);
12851   effect(USE labl);
12852 
12853   ins_cost(300);
12854   format %{ "J$cop,s  $labl" %}
12855   size(2);
12856   ins_encode %{
12857     Label* L = $labl$$label;
12858     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12859   %}
12860   ins_pipe( pipe_jcc );
12861   ins_short_branch(1);
12862 %}
12863 
12864 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12865 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12866   match(CountedLoopEnd cop cr);
12867   effect(USE labl);
12868 
12869   ins_cost(300);
12870   format %{ "J$cop,s  $labl\t# Loop end" %}
12871   size(2);
12872   ins_encode %{
12873     Label* L = $labl$$label;
12874     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12875   %}
12876   ins_pipe( pipe_jcc );
12877   ins_short_branch(1);
12878 %}
12879 
12880 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12881 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12882   match(CountedLoopEnd cop cmp);
12883   effect(USE labl);
12884 
12885   ins_cost(300);
12886   format %{ "J$cop,us $labl\t# Loop end" %}
12887   size(2);
12888   ins_encode %{
12889     Label* L = $labl$$label;
12890     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12891   %}
12892   ins_pipe( pipe_jcc );
12893   ins_short_branch(1);
12894 %}
12895 
12896 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12897   match(CountedLoopEnd cop cmp);
12898   effect(USE labl);
12899 
12900   ins_cost(300);
12901   format %{ "J$cop,us $labl\t# Loop end" %}
12902   size(2);
12903   ins_encode %{
12904     Label* L = $labl$$label;
12905     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12906   %}
12907   ins_pipe( pipe_jcc );
12908   ins_short_branch(1);
12909 %}
12910 
12911 // Jump Direct Conditional - using unsigned comparison
12912 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12913   match(If cop cmp);
12914   effect(USE labl);
12915 
12916   ins_cost(300);
12917   format %{ "J$cop,us $labl" %}
12918   size(2);
12919   ins_encode %{
12920     Label* L = $labl$$label;
12921     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12922   %}
12923   ins_pipe( pipe_jcc );
12924   ins_short_branch(1);
12925 %}
12926 
12927 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12928   match(If cop cmp);
12929   effect(USE labl);
12930 
12931   ins_cost(300);
12932   format %{ "J$cop,us $labl" %}
12933   size(2);
12934   ins_encode %{
12935     Label* L = $labl$$label;
12936     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12937   %}
12938   ins_pipe( pipe_jcc );
12939   ins_short_branch(1);
12940 %}
12941 
12942 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12943   match(If cop cmp);
12944   effect(USE labl);
12945 
12946   ins_cost(300);
12947   format %{ $$template
12948     if ($cop$$cmpcode == Assembler::notEqual) {
12949       $$emit$$"JP,u,s   $labl\n\t"
12950       $$emit$$"J$cop,u,s   $labl"
12951     } else {
12952       $$emit$$"JP,u,s   done\n\t"
12953       $$emit$$"J$cop,u,s  $labl\n\t"
12954       $$emit$$"done:"
12955     }
12956   %}
12957   size(4);
12958   ins_encode %{
12959     Label* l = $labl$$label;
12960     if ($cop$$cmpcode == Assembler::notEqual) {
12961       __ jccb(Assembler::parity, *l);
12962       __ jccb(Assembler::notEqual, *l);
12963     } else if ($cop$$cmpcode == Assembler::equal) {
12964       Label done;
12965       __ jccb(Assembler::parity, done);
12966       __ jccb(Assembler::equal, *l);
12967       __ bind(done);
12968     } else {
12969        ShouldNotReachHere();
12970     }
12971   %}
12972   ins_pipe(pipe_jcc);
12973   ins_short_branch(1);
12974 %}
12975 
12976 // ============================================================================
12977 // Long Compare
12978 //
12979 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12980 // is tricky.  The flavor of compare used depends on whether we are testing
12981 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12982 // The GE test is the negated LT test.  The LE test can be had by commuting
12983 // the operands (yielding a GE test) and then negating; negate again for the
12984 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12985 // NE test is negated from that.
12986 
12987 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12988 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12989 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12990 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12991 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12992 // foo match ends up with the wrong leaf.  One fix is to not match both
12993 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12994 // both forms beat the trinary form of long-compare and both are very useful
12995 // on Intel which has so few registers.
12996 
12997 // Manifest a CmpL result in an integer register.  Very painful.
12998 // This is the test to avoid.
12999 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13000   match(Set dst (CmpL3 src1 src2));
13001   effect( KILL flags );
13002   ins_cost(1000);
13003   format %{ "XOR    $dst,$dst\n\t"
13004             "CMP    $src1.hi,$src2.hi\n\t"
13005             "JLT,s  m_one\n\t"
13006             "JGT,s  p_one\n\t"
13007             "CMP    $src1.lo,$src2.lo\n\t"
13008             "JB,s   m_one\n\t"
13009             "JEQ,s  done\n"
13010     "p_one:\tINC    $dst\n\t"
13011             "JMP,s  done\n"
13012     "m_one:\tDEC    $dst\n"
13013      "done:" %}
13014   ins_encode %{
13015     Label p_one, m_one, done;
13016     __ xorptr($dst$$Register, $dst$$Register);
13017     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13018     __ jccb(Assembler::less,    m_one);
13019     __ jccb(Assembler::greater, p_one);
13020     __ cmpl($src1$$Register, $src2$$Register);
13021     __ jccb(Assembler::below,   m_one);
13022     __ jccb(Assembler::equal,   done);
13023     __ bind(p_one);
13024     __ incrementl($dst$$Register);
13025     __ jmpb(done);
13026     __ bind(m_one);
13027     __ decrementl($dst$$Register);
13028     __ bind(done);
13029   %}
13030   ins_pipe( pipe_slow );
13031 %}
13032 
13033 //======
13034 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13035 // compares.  Can be used for LE or GT compares by reversing arguments.
13036 // NOT GOOD FOR EQ/NE tests.
13037 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13038   match( Set flags (CmpL src zero ));
13039   ins_cost(100);
13040   format %{ "TEST   $src.hi,$src.hi" %}
13041   opcode(0x85);
13042   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13043   ins_pipe( ialu_cr_reg_reg );
13044 %}
13045 
13046 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13047 // compares.  Can be used for LE or GT compares by reversing arguments.
13048 // NOT GOOD FOR EQ/NE tests.
13049 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13050   match( Set flags (CmpL src1 src2 ));
13051   effect( TEMP tmp );
13052   ins_cost(300);
13053   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13054             "MOV    $tmp,$src1.hi\n\t"
13055             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13056   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13057   ins_pipe( ialu_cr_reg_reg );
13058 %}
13059 
13060 // Long compares reg < zero/req OR reg >= zero/req.
13061 // Just a wrapper for a normal branch, plus the predicate test.
13062 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13063   match(If cmp flags);
13064   effect(USE labl);
13065   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13066   expand %{
13067     jmpCon(cmp,flags,labl);    // JLT or JGE...
13068   %}
13069 %}
13070 
13071 //======
13072 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13073 // compares.  Can be used for LE or GT compares by reversing arguments.
13074 // NOT GOOD FOR EQ/NE tests.
13075 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13076   match(Set flags (CmpUL src zero));
13077   ins_cost(100);
13078   format %{ "TEST   $src.hi,$src.hi" %}
13079   opcode(0x85);
13080   ins_encode(OpcP, RegReg_Hi2(src, src));
13081   ins_pipe(ialu_cr_reg_reg);
13082 %}
13083 
13084 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13085 // compares.  Can be used for LE or GT compares by reversing arguments.
13086 // NOT GOOD FOR EQ/NE tests.
13087 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13088   match(Set flags (CmpUL src1 src2));
13089   effect(TEMP tmp);
13090   ins_cost(300);
13091   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13092             "MOV    $tmp,$src1.hi\n\t"
13093             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13094   ins_encode(long_cmp_flags2(src1, src2, tmp));
13095   ins_pipe(ialu_cr_reg_reg);
13096 %}
13097 
13098 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13099 // Just a wrapper for a normal branch, plus the predicate test.
13100 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13101   match(If cmp flags);
13102   effect(USE labl);
13103   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13104   expand %{
13105     jmpCon(cmp, flags, labl);    // JLT or JGE...
13106   %}
13107 %}
13108 
13109 // Compare 2 longs and CMOVE longs.
13110 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13111   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13112   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13113   ins_cost(400);
13114   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115             "CMOV$cmp $dst.hi,$src.hi" %}
13116   opcode(0x0F,0x40);
13117   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13118   ins_pipe( pipe_cmov_reg_long );
13119 %}
13120 
13121 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13122   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124   ins_cost(500);
13125   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13126             "CMOV$cmp $dst.hi,$src.hi" %}
13127   opcode(0x0F,0x40);
13128   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13129   ins_pipe( pipe_cmov_reg_long );
13130 %}
13131 
13132 // Compare 2 longs and CMOVE ints.
13133 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13134   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13135   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13136   ins_cost(200);
13137   format %{ "CMOV$cmp $dst,$src" %}
13138   opcode(0x0F,0x40);
13139   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13140   ins_pipe( pipe_cmov_reg );
13141 %}
13142 
13143 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13144   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13145   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13146   ins_cost(250);
13147   format %{ "CMOV$cmp $dst,$src" %}
13148   opcode(0x0F,0x40);
13149   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13150   ins_pipe( pipe_cmov_mem );
13151 %}
13152 
13153 // Compare 2 longs and CMOVE ints.
13154 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13155   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13156   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13157   ins_cost(200);
13158   format %{ "CMOV$cmp $dst,$src" %}
13159   opcode(0x0F,0x40);
13160   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13161   ins_pipe( pipe_cmov_reg );
13162 %}
13163 
13164 // Compare 2 longs and CMOVE doubles
13165 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13166   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13167   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13168   ins_cost(200);
13169   expand %{
13170     fcmovDPR_regS(cmp,flags,dst,src);
13171   %}
13172 %}
13173 
13174 // Compare 2 longs and CMOVE doubles
13175 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13176   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13177   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13178   ins_cost(200);
13179   expand %{
13180     fcmovD_regS(cmp,flags,dst,src);
13181   %}
13182 %}
13183 
13184 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13185   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13186   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13187   ins_cost(200);
13188   expand %{
13189     fcmovFPR_regS(cmp,flags,dst,src);
13190   %}
13191 %}
13192 
13193 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13194   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13195   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13196   ins_cost(200);
13197   expand %{
13198     fcmovF_regS(cmp,flags,dst,src);
13199   %}
13200 %}
13201 
13202 //======
13203 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13204 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13205   match( Set flags (CmpL src zero ));
13206   effect(TEMP tmp);
13207   ins_cost(200);
13208   format %{ "MOV    $tmp,$src.lo\n\t"
13209             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13210   ins_encode( long_cmp_flags0( src, tmp ) );
13211   ins_pipe( ialu_reg_reg_long );
13212 %}
13213 
13214 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13215 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13216   match( Set flags (CmpL src1 src2 ));
13217   ins_cost(200+300);
13218   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13219             "JNE,s  skip\n\t"
13220             "CMP    $src1.hi,$src2.hi\n\t"
13221      "skip:\t" %}
13222   ins_encode( long_cmp_flags1( src1, src2 ) );
13223   ins_pipe( ialu_cr_reg_reg );
13224 %}
13225 
13226 // Long compare reg == zero/reg OR reg != zero/reg
13227 // Just a wrapper for a normal branch, plus the predicate test.
13228 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13229   match(If cmp flags);
13230   effect(USE labl);
13231   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13232   expand %{
13233     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13234   %}
13235 %}
13236 
13237 //======
13238 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13239 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13240   match(Set flags (CmpUL src zero));
13241   effect(TEMP tmp);
13242   ins_cost(200);
13243   format %{ "MOV    $tmp,$src.lo\n\t"
13244             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13245   ins_encode(long_cmp_flags0(src, tmp));
13246   ins_pipe(ialu_reg_reg_long);
13247 %}
13248 
13249 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13250 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13251   match(Set flags (CmpUL src1 src2));
13252   ins_cost(200+300);
13253   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13254             "JNE,s  skip\n\t"
13255             "CMP    $src1.hi,$src2.hi\n\t"
13256      "skip:\t" %}
13257   ins_encode(long_cmp_flags1(src1, src2));
13258   ins_pipe(ialu_cr_reg_reg);
13259 %}
13260 
13261 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13262 // Just a wrapper for a normal branch, plus the predicate test.
13263 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13264   match(If cmp flags);
13265   effect(USE labl);
13266   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13267   expand %{
13268     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13269   %}
13270 %}
13271 
13272 // Compare 2 longs and CMOVE longs.
13273 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13274   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13275   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276   ins_cost(400);
13277   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278             "CMOV$cmp $dst.hi,$src.hi" %}
13279   opcode(0x0F,0x40);
13280   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13281   ins_pipe( pipe_cmov_reg_long );
13282 %}
13283 
13284 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13285   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13286   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287   ins_cost(500);
13288   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13289             "CMOV$cmp $dst.hi,$src.hi" %}
13290   opcode(0x0F,0x40);
13291   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13292   ins_pipe( pipe_cmov_reg_long );
13293 %}
13294 
13295 // Compare 2 longs and CMOVE ints.
13296 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13297   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13298   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13299   ins_cost(200);
13300   format %{ "CMOV$cmp $dst,$src" %}
13301   opcode(0x0F,0x40);
13302   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13303   ins_pipe( pipe_cmov_reg );
13304 %}
13305 
13306 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13307   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13309   ins_cost(250);
13310   format %{ "CMOV$cmp $dst,$src" %}
13311   opcode(0x0F,0x40);
13312   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13313   ins_pipe( pipe_cmov_mem );
13314 %}
13315 
13316 // Compare 2 longs and CMOVE ints.
13317 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13318   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13319   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13320   ins_cost(200);
13321   format %{ "CMOV$cmp $dst,$src" %}
13322   opcode(0x0F,0x40);
13323   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13324   ins_pipe( pipe_cmov_reg );
13325 %}
13326 
13327 // Compare 2 longs and CMOVE doubles
13328 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13329   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13330   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13331   ins_cost(200);
13332   expand %{
13333     fcmovDPR_regS(cmp,flags,dst,src);
13334   %}
13335 %}
13336 
13337 // Compare 2 longs and CMOVE doubles
13338 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13339   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13340   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13341   ins_cost(200);
13342   expand %{
13343     fcmovD_regS(cmp,flags,dst,src);
13344   %}
13345 %}
13346 
13347 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13348   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13349   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13350   ins_cost(200);
13351   expand %{
13352     fcmovFPR_regS(cmp,flags,dst,src);
13353   %}
13354 %}
13355 
13356 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13357   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13358   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13359   ins_cost(200);
13360   expand %{
13361     fcmovF_regS(cmp,flags,dst,src);
13362   %}
13363 %}
13364 
13365 //======
13366 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13367 // Same as cmpL_reg_flags_LEGT except must negate src
13368 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13369   match( Set flags (CmpL src zero ));
13370   effect( TEMP tmp );
13371   ins_cost(300);
13372   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13373             "CMP    $tmp,$src.lo\n\t"
13374             "SBB    $tmp,$src.hi\n\t" %}
13375   ins_encode( long_cmp_flags3(src, tmp) );
13376   ins_pipe( ialu_reg_reg_long );
13377 %}
13378 
13379 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13380 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13381 // requires a commuted test to get the same result.
13382 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13383   match( Set flags (CmpL src1 src2 ));
13384   effect( TEMP tmp );
13385   ins_cost(300);
13386   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13387             "MOV    $tmp,$src2.hi\n\t"
13388             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13389   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13390   ins_pipe( ialu_cr_reg_reg );
13391 %}
13392 
13393 // Long compares reg < zero/req OR reg >= zero/req.
13394 // Just a wrapper for a normal branch, plus the predicate test
13395 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13396   match(If cmp flags);
13397   effect(USE labl);
13398   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13399   ins_cost(300);
13400   expand %{
13401     jmpCon(cmp,flags,labl);    // JGT or JLE...
13402   %}
13403 %}
13404 
13405 //======
13406 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13407 // Same as cmpUL_reg_flags_LEGT except must negate src
13408 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13409   match(Set flags (CmpUL src zero));
13410   effect(TEMP tmp);
13411   ins_cost(300);
13412   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13413             "CMP    $tmp,$src.lo\n\t"
13414             "SBB    $tmp,$src.hi\n\t" %}
13415   ins_encode(long_cmp_flags3(src, tmp));
13416   ins_pipe(ialu_reg_reg_long);
13417 %}
13418 
13419 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13420 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13421 // requires a commuted test to get the same result.
13422 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13423   match(Set flags (CmpUL src1 src2));
13424   effect(TEMP tmp);
13425   ins_cost(300);
13426   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13427             "MOV    $tmp,$src2.hi\n\t"
13428             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13429   ins_encode(long_cmp_flags2( src2, src1, tmp));
13430   ins_pipe(ialu_cr_reg_reg);
13431 %}
13432 
13433 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13434 // Just a wrapper for a normal branch, plus the predicate test
13435 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13436   match(If cmp flags);
13437   effect(USE labl);
13438   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13439   ins_cost(300);
13440   expand %{
13441     jmpCon(cmp, flags, labl);    // JGT or JLE...
13442   %}
13443 %}
13444 
13445 // Compare 2 longs and CMOVE longs.
13446 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13447   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13448   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13449   ins_cost(400);
13450   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13451             "CMOV$cmp $dst.hi,$src.hi" %}
13452   opcode(0x0F,0x40);
13453   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13454   ins_pipe( pipe_cmov_reg_long );
13455 %}
13456 
13457 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13458   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13459   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13460   ins_cost(500);
13461   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13462             "CMOV$cmp $dst.hi,$src.hi+4" %}
13463   opcode(0x0F,0x40);
13464   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13465   ins_pipe( pipe_cmov_reg_long );
13466 %}
13467 
13468 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13469   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13470   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13471   ins_cost(400);
13472   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13473             "CMOV$cmp $dst.hi,$src.hi" %}
13474   opcode(0x0F,0x40);
13475   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13476   ins_pipe( pipe_cmov_reg_long );
13477 %}
13478 
13479 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13480   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482   ins_cost(500);
13483   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13484             "CMOV$cmp $dst.hi,$src.hi+4" %}
13485   opcode(0x0F,0x40);
13486   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13487   ins_pipe( pipe_cmov_reg_long );
13488 %}
13489 
13490 // Compare 2 longs and CMOVE ints.
13491 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13492   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13493   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13494   ins_cost(200);
13495   format %{ "CMOV$cmp $dst,$src" %}
13496   opcode(0x0F,0x40);
13497   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13498   ins_pipe( pipe_cmov_reg );
13499 %}
13500 
13501 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13504   ins_cost(250);
13505   format %{ "CMOV$cmp $dst,$src" %}
13506   opcode(0x0F,0x40);
13507   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13508   ins_pipe( pipe_cmov_mem );
13509 %}
13510 
13511 // Compare 2 longs and CMOVE ptrs.
13512 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13513   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13514   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   format %{ "CMOV$cmp $dst,$src" %}
13517   opcode(0x0F,0x40);
13518   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13519   ins_pipe( pipe_cmov_reg );
13520 %}
13521 
13522 // Compare 2 longs and CMOVE doubles
13523 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13524   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13525   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13526   ins_cost(200);
13527   expand %{
13528     fcmovDPR_regS(cmp,flags,dst,src);
13529   %}
13530 %}
13531 
13532 // Compare 2 longs and CMOVE doubles
13533 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13534   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13535   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13536   ins_cost(200);
13537   expand %{
13538     fcmovD_regS(cmp,flags,dst,src);
13539   %}
13540 %}
13541 
13542 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13543   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13544   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13545   ins_cost(200);
13546   expand %{
13547     fcmovFPR_regS(cmp,flags,dst,src);
13548   %}
13549 %}
13550 
13551 
13552 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13553   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13554   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13555   ins_cost(200);
13556   expand %{
13557     fcmovF_regS(cmp,flags,dst,src);
13558   %}
13559 %}
13560 
13561 
13562 // ============================================================================
13563 // Procedure Call/Return Instructions
13564 // Call Java Static Instruction
13565 // Note: If this code changes, the corresponding ret_addr_offset() and
13566 //       compute_padding() functions will have to be adjusted.
13567 instruct CallStaticJavaDirect(method meth) %{
13568   match(CallStaticJava);
13569   effect(USE meth);
13570 
13571   ins_cost(300);
13572   format %{ "CALL,static " %}
13573   opcode(0xE8); /* E8 cd */
13574   ins_encode( pre_call_resets,
13575               Java_Static_Call( meth ),
13576               call_epilog,
13577               post_call_FPU );
13578   ins_pipe( pipe_slow );
13579   ins_alignment(4);
13580 %}
13581 
13582 // Call Java Dynamic Instruction
13583 // Note: If this code changes, the corresponding ret_addr_offset() and
13584 //       compute_padding() functions will have to be adjusted.
13585 instruct CallDynamicJavaDirect(method meth) %{
13586   match(CallDynamicJava);
13587   effect(USE meth);
13588 
13589   ins_cost(300);
13590   format %{ "MOV    EAX,(oop)-1\n\t"
13591             "CALL,dynamic" %}
13592   opcode(0xE8); /* E8 cd */
13593   ins_encode( pre_call_resets,
13594               Java_Dynamic_Call( meth ),
13595               call_epilog,
13596               post_call_FPU );
13597   ins_pipe( pipe_slow );
13598   ins_alignment(4);
13599 %}
13600 
13601 // Call Runtime Instruction
13602 instruct CallRuntimeDirect(method meth) %{
13603   match(CallRuntime );
13604   effect(USE meth);
13605 
13606   ins_cost(300);
13607   format %{ "CALL,runtime " %}
13608   opcode(0xE8); /* E8 cd */
13609   // Use FFREEs to clear entries in float stack
13610   ins_encode( pre_call_resets,
13611               FFree_Float_Stack_All,
13612               Java_To_Runtime( meth ),
13613               post_call_FPU );
13614   ins_pipe( pipe_slow );
13615 %}
13616 
13617 // Call runtime without safepoint
13618 instruct CallLeafDirect(method meth) %{
13619   match(CallLeaf);
13620   effect(USE meth);
13621 
13622   ins_cost(300);
13623   format %{ "CALL_LEAF,runtime " %}
13624   opcode(0xE8); /* E8 cd */
13625   ins_encode( pre_call_resets,
13626               FFree_Float_Stack_All,
13627               Java_To_Runtime( meth ),
13628               Verify_FPU_For_Leaf, post_call_FPU );
13629   ins_pipe( pipe_slow );
13630 %}
13631 
13632 instruct CallLeafNoFPDirect(method meth) %{
13633   match(CallLeafNoFP);
13634   effect(USE meth);
13635 
13636   ins_cost(300);
13637   format %{ "CALL_LEAF_NOFP,runtime " %}
13638   opcode(0xE8); /* E8 cd */
13639   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13640   ins_pipe( pipe_slow );
13641 %}
13642 
13643 
13644 // Return Instruction
13645 // Remove the return address & jump to it.
13646 instruct Ret() %{
13647   match(Return);
13648   format %{ "RET" %}
13649   opcode(0xC3);
13650   ins_encode(OpcP);
13651   ins_pipe( pipe_jmp );
13652 %}
13653 
13654 // Tail Call; Jump from runtime stub to Java code.
13655 // Also known as an 'interprocedural jump'.
13656 // Target of jump will eventually return to caller.
13657 // TailJump below removes the return address.
13658 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13659   match(TailCall jump_target method_ptr);
13660   ins_cost(300);
13661   format %{ "JMP    $jump_target \t# EBX holds method" %}
13662   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13663   ins_encode( OpcP, RegOpc(jump_target) );
13664   ins_pipe( pipe_jmp );
13665 %}
13666 
13667 
13668 // Tail Jump; remove the return address; jump to target.
13669 // TailCall above leaves the return address around.
13670 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13671   match( TailJump jump_target ex_oop );
13672   ins_cost(300);
13673   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13674             "JMP    $jump_target " %}
13675   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13676   ins_encode( enc_pop_rdx,
13677               OpcP, RegOpc(jump_target) );
13678   ins_pipe( pipe_jmp );
13679 %}
13680 
13681 // Create exception oop: created by stack-crawling runtime code.
13682 // Created exception is now available to this handler, and is setup
13683 // just prior to jumping to this handler.  No code emitted.
13684 instruct CreateException( eAXRegP ex_oop )
13685 %{
13686   match(Set ex_oop (CreateEx));
13687 
13688   size(0);
13689   // use the following format syntax
13690   format %{ "# exception oop is in EAX; no code emitted" %}
13691   ins_encode();
13692   ins_pipe( empty );
13693 %}
13694 
13695 
13696 // Rethrow exception:
13697 // The exception oop will come in the first argument position.
13698 // Then JUMP (not call) to the rethrow stub code.
13699 instruct RethrowException()
13700 %{
13701   match(Rethrow);
13702 
13703   // use the following format syntax
13704   format %{ "JMP    rethrow_stub" %}
13705   ins_encode(enc_rethrow);
13706   ins_pipe( pipe_jmp );
13707 %}
13708 
13709 // inlined locking and unlocking
13710 
13711 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13712   predicate(Compile::current()->use_rtm());
13713   match(Set cr (FastLock object box));
13714   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13715   ins_cost(300);
13716   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13717   ins_encode %{
13718     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13719                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13720                  _rtm_counters, _stack_rtm_counters,
13721                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13722                  true, ra_->C->profile_rtm());
13723   %}
13724   ins_pipe(pipe_slow);
13725 %}
13726 
13727 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13728   predicate(!Compile::current()->use_rtm());
13729   match(Set cr (FastLock object box));
13730   effect(TEMP tmp, TEMP scr, USE_KILL box);
13731   ins_cost(300);
13732   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13733   ins_encode %{
13734     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13735                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13736   %}
13737   ins_pipe(pipe_slow);
13738 %}
13739 
13740 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13741   match(Set cr (FastUnlock object box));
13742   effect(TEMP tmp, USE_KILL box);
13743   ins_cost(300);
13744   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13745   ins_encode %{
13746     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13747   %}
13748   ins_pipe(pipe_slow);
13749 %}
13750 
13751 
13752 
13753 // ============================================================================
13754 // Safepoint Instruction
13755 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13756   match(SafePoint poll);
13757   effect(KILL cr, USE poll);
13758 
13759   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13760   ins_cost(125);
13761   // EBP would need size(3)
13762   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13763   ins_encode %{
13764     __ relocate(relocInfo::poll_type);
13765     address pre_pc = __ pc();
13766     __ testl(rax, Address($poll$$Register, 0));
13767     address post_pc = __ pc();
13768     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13769   %}
13770   ins_pipe(ialu_reg_mem);
13771 %}
13772 
13773 
13774 // ============================================================================
13775 // This name is KNOWN by the ADLC and cannot be changed.
13776 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13777 // for this guy.
13778 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13779   match(Set dst (ThreadLocal));
13780   effect(DEF dst, KILL cr);
13781 
13782   format %{ "MOV    $dst, Thread::current()" %}
13783   ins_encode %{
13784     Register dstReg = as_Register($dst$$reg);
13785     __ get_thread(dstReg);
13786   %}
13787   ins_pipe( ialu_reg_fat );
13788 %}
13789 
13790 
13791 
13792 //----------PEEPHOLE RULES-----------------------------------------------------
13793 // These must follow all instruction definitions as they use the names
13794 // defined in the instructions definitions.
13795 //
13796 // peepmatch ( root_instr_name [preceding_instruction]* );
13797 //
13798 // peepconstraint %{
13799 // (instruction_number.operand_name relational_op instruction_number.operand_name
13800 //  [, ...] );
13801 // // instruction numbers are zero-based using left to right order in peepmatch
13802 //
13803 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13804 // // provide an instruction_number.operand_name for each operand that appears
13805 // // in the replacement instruction's match rule
13806 //
13807 // ---------VM FLAGS---------------------------------------------------------
13808 //
13809 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13810 //
13811 // Each peephole rule is given an identifying number starting with zero and
13812 // increasing by one in the order seen by the parser.  An individual peephole
13813 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13814 // on the command-line.
13815 //
13816 // ---------CURRENT LIMITATIONS----------------------------------------------
13817 //
13818 // Only match adjacent instructions in same basic block
13819 // Only equality constraints
13820 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13821 // Only one replacement instruction
13822 //
13823 // ---------EXAMPLE----------------------------------------------------------
13824 //
13825 // // pertinent parts of existing instructions in architecture description
13826 // instruct movI(rRegI dst, rRegI src) %{
13827 //   match(Set dst (CopyI src));
13828 // %}
13829 //
13830 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13831 //   match(Set dst (AddI dst src));
13832 //   effect(KILL cr);
13833 // %}
13834 //
13835 // // Change (inc mov) to lea
13836 // peephole %{
13837 //   // increment preceeded by register-register move
13838 //   peepmatch ( incI_eReg movI );
13839 //   // require that the destination register of the increment
13840 //   // match the destination register of the move
13841 //   peepconstraint ( 0.dst == 1.dst );
13842 //   // construct a replacement instruction that sets
13843 //   // the destination to ( move's source register + one )
13844 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13845 // %}
13846 //
13847 // Implementation no longer uses movX instructions since
13848 // machine-independent system no longer uses CopyX nodes.
13849 //
13850 // peephole %{
13851 //   peepmatch ( incI_eReg movI );
13852 //   peepconstraint ( 0.dst == 1.dst );
13853 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13854 // %}
13855 //
13856 // peephole %{
13857 //   peepmatch ( decI_eReg movI );
13858 //   peepconstraint ( 0.dst == 1.dst );
13859 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13860 // %}
13861 //
13862 // peephole %{
13863 //   peepmatch ( addI_eReg_imm movI );
13864 //   peepconstraint ( 0.dst == 1.dst );
13865 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13866 // %}
13867 //
13868 // peephole %{
13869 //   peepmatch ( addP_eReg_imm movP );
13870 //   peepconstraint ( 0.dst == 1.dst );
13871 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13872 // %}
13873 
13874 // // Change load of spilled value to only a spill
13875 // instruct storeI(memory mem, rRegI src) %{
13876 //   match(Set mem (StoreI mem src));
13877 // %}
13878 //
13879 // instruct loadI(rRegI dst, memory mem) %{
13880 //   match(Set dst (LoadI mem));
13881 // %}
13882 //
13883 peephole %{
13884   peepmatch ( loadI storeI );
13885   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13886   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13887 %}
13888 
13889 //----------SMARTSPILL RULES---------------------------------------------------
13890 // These must follow all instruction definitions as they use the names
13891 // defined in the instructions definitions.