1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   __ verified_entry(C);
  628 
  629   C->output()->set_frame_complete(cbuf.insts_size());
  630 
  631   if (C->has_mach_constant_base_node()) {
  632     // NOTE: We set the table base offset here because users might be
  633     // emitted before MachConstantBaseNode.
  634     ConstantTable& constant_table = C->output()->constant_table();
  635     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  636   }
  637 }
  638 
  639 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  640   return MachNode::size(ra_); // too many variables; just compute it the hard way
  641 }
  642 
  643 int MachPrologNode::reloc() const {
  644   return 0; // a large enough number
  645 }
  646 
  647 //=============================================================================
  648 #ifndef PRODUCT
  649 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  650   Compile *C = ra_->C;
  651   int framesize = C->output()->frame_size_in_bytes();
  652   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  653   // Remove two words for return addr and rbp,
  654   framesize -= 2*wordSize;
  655 
  656   if (C->max_vector_size() > 16) {
  657     st->print("VZEROUPPER");
  658     st->cr(); st->print("\t");
  659   }
  660   if (C->in_24_bit_fp_mode()) {
  661     st->print("FLDCW  standard control word");
  662     st->cr(); st->print("\t");
  663   }
  664   if (framesize) {
  665     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  666     st->cr(); st->print("\t");
  667   }
  668   st->print_cr("POPL   EBP"); st->print("\t");
  669   if (do_polling() && C->is_method_compilation()) {
  670     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  671               "JA       #safepoint_stub\t"
  672               "# Safepoint: poll for GC");
  673   }
  674 }
  675 #endif
  676 
  677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  678   Compile *C = ra_->C;
  679   MacroAssembler _masm(&cbuf);
  680 
  681   if (C->max_vector_size() > 16) {
  682     // Clear upper bits of YMM registers when current compiled code uses
  683     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  684     _masm.vzeroupper();
  685   }
  686   // If method set FPU control word, restore to standard control word
  687   if (C->in_24_bit_fp_mode()) {
  688     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  689   }
  690 
  691   int framesize = C->output()->frame_size_in_bytes();
  692   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  693   // Remove two words for return addr and rbp,
  694   framesize -= 2*wordSize;
  695 
  696   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  697 
  698   if (framesize >= 128) {
  699     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  700     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  701     emit_d32(cbuf, framesize);
  702   } else if (framesize) {
  703     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  704     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  705     emit_d8(cbuf, framesize);
  706   }
  707 
  708   emit_opcode(cbuf, 0x58 | EBP_enc);
  709 
  710   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  711     __ reserved_stack_check();
  712   }
  713 
  714   if (do_polling() && C->is_method_compilation()) {
  715     Register thread = as_Register(EBX_enc);
  716     MacroAssembler masm(&cbuf);
  717     __ get_thread(thread);
  718     Label dummy_label;
  719     Label* code_stub = &dummy_label;
  720     if (!C->output()->in_scratch_emit_size()) {
  721       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  722     }
  723     __ relocate(relocInfo::poll_return_type);
  724     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  725   }
  726 }
  727 
  728 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  729   return MachNode::size(ra_); // too many variables; just compute it
  730                               // the hard way
  731 }
  732 
  733 int MachEpilogNode::reloc() const {
  734   return 0; // a large enough number
  735 }
  736 
  737 const Pipeline * MachEpilogNode::pipeline() const {
  738   return MachNode::pipeline_class();
  739 }
  740 
  741 //=============================================================================
  742 
  743 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  744 static enum RC rc_class( OptoReg::Name reg ) {
  745 
  746   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  747   if (OptoReg::is_stack(reg)) return rc_stack;
  748 
  749   VMReg r = OptoReg::as_VMReg(reg);
  750   if (r->is_Register()) return rc_int;
  751   if (r->is_FloatRegister()) {
  752     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  753     return rc_float;
  754   }
  755   if (r->is_KRegister()) return rc_kreg;
  756   assert(r->is_XMMRegister(), "must be");
  757   return rc_xmm;
  758 }
  759 
  760 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  761                         int opcode, const char *op_str, int size, outputStream* st ) {
  762   if( cbuf ) {
  763     emit_opcode  (*cbuf, opcode );
  764     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  765 #ifndef PRODUCT
  766   } else if( !do_size ) {
  767     if( size != 0 ) st->print("\n\t");
  768     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  769       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  770       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  771     } else { // FLD, FST, PUSH, POP
  772       st->print("%s [ESP + #%d]",op_str,offset);
  773     }
  774 #endif
  775   }
  776   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  777   return size+3+offset_size;
  778 }
  779 
  780 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  781 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  782                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  783   int in_size_in_bits = Assembler::EVEX_32bit;
  784   int evex_encoding = 0;
  785   if (reg_lo+1 == reg_hi) {
  786     in_size_in_bits = Assembler::EVEX_64bit;
  787     evex_encoding = Assembler::VEX_W;
  788   }
  789   if (cbuf) {
  790     MacroAssembler _masm(cbuf);
  791     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  792     //                          it maps more cases to single byte displacement
  793     _masm.set_managed();
  794     if (reg_lo+1 == reg_hi) { // double move?
  795       if (is_load) {
  796         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  797       } else {
  798         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  799       }
  800     } else {
  801       if (is_load) {
  802         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  803       } else {
  804         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  805       }
  806     }
  807 #ifndef PRODUCT
  808   } else if (!do_size) {
  809     if (size != 0) st->print("\n\t");
  810     if (reg_lo+1 == reg_hi) { // double move?
  811       if (is_load) st->print("%s %s,[ESP + #%d]",
  812                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  813                               Matcher::regName[reg_lo], offset);
  814       else         st->print("MOVSD  [ESP + #%d],%s",
  815                               offset, Matcher::regName[reg_lo]);
  816     } else {
  817       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  818                               Matcher::regName[reg_lo], offset);
  819       else         st->print("MOVSS  [ESP + #%d],%s",
  820                               offset, Matcher::regName[reg_lo]);
  821     }
  822 #endif
  823   }
  824   bool is_single_byte = false;
  825   if ((UseAVX > 2) && (offset != 0)) {
  826     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  827   }
  828   int offset_size = 0;
  829   if (UseAVX > 2 ) {
  830     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  831   } else {
  832     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  833   }
  834   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  835   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  836   return size+5+offset_size;
  837 }
  838 
  839 
  840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  841                             int src_hi, int dst_hi, int size, outputStream* st ) {
  842   if (cbuf) {
  843     MacroAssembler _masm(cbuf);
  844     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  845     _masm.set_managed();
  846     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  847       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  848                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  849     } else {
  850       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  851                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  852     }
  853 #ifndef PRODUCT
  854   } else if (!do_size) {
  855     if (size != 0) st->print("\n\t");
  856     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  857       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  858         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       } else {
  860         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  861       }
  862     } else {
  863       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  864         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  865       } else {
  866         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  867       }
  868     }
  869 #endif
  870   }
  871   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  872   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  873   int sz = (UseAVX > 2) ? 6 : 4;
  874   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  875       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  876   return size + sz;
  877 }
  878 
  879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  880                             int src_hi, int dst_hi, int size, outputStream* st ) {
  881   // 32-bit
  882   if (cbuf) {
  883     MacroAssembler _masm(cbuf);
  884     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  885     _masm.set_managed();
  886     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  887              as_Register(Matcher::_regEncode[src_lo]));
  888 #ifndef PRODUCT
  889   } else if (!do_size) {
  890     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  891 #endif
  892   }
  893   return (UseAVX> 2) ? 6 : 4;
  894 }
  895 
  896 
  897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  898                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  899   // 32-bit
  900   if (cbuf) {
  901     MacroAssembler _masm(cbuf);
  902     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  903     _masm.set_managed();
  904     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  905              as_XMMRegister(Matcher::_regEncode[src_lo]));
  906 #ifndef PRODUCT
  907   } else if (!do_size) {
  908     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  909 #endif
  910   }
  911   return (UseAVX> 2) ? 6 : 4;
  912 }
  913 
  914 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  915   if( cbuf ) {
  916     emit_opcode(*cbuf, 0x8B );
  917     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  918 #ifndef PRODUCT
  919   } else if( !do_size ) {
  920     if( size != 0 ) st->print("\n\t");
  921     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  922 #endif
  923   }
  924   return size+2;
  925 }
  926 
  927 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  928                                  int offset, int size, outputStream* st ) {
  929   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  930     if( cbuf ) {
  931       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  932       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  933 #ifndef PRODUCT
  934     } else if( !do_size ) {
  935       if( size != 0 ) st->print("\n\t");
  936       st->print("FLD    %s",Matcher::regName[src_lo]);
  937 #endif
  938     }
  939     size += 2;
  940   }
  941 
  942   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  943   const char *op_str;
  944   int op;
  945   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  946     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  947     op = 0xDD;
  948   } else {                   // 32-bit store
  949     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  950     op = 0xD9;
  951     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  952   }
  953 
  954   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  955 }
  956 
  957 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  958 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  959                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  960 
  961 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  962                             int stack_offset, int reg, uint ireg, outputStream* st);
  963 
  964 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  965                                      int dst_offset, uint ireg, outputStream* st) {
  966   if (cbuf) {
  967     MacroAssembler _masm(cbuf);
  968     switch (ireg) {
  969     case Op_VecS:
  970       __ pushl(Address(rsp, src_offset));
  971       __ popl (Address(rsp, dst_offset));
  972       break;
  973     case Op_VecD:
  974       __ pushl(Address(rsp, src_offset));
  975       __ popl (Address(rsp, dst_offset));
  976       __ pushl(Address(rsp, src_offset+4));
  977       __ popl (Address(rsp, dst_offset+4));
  978       break;
  979     case Op_VecX:
  980       __ movdqu(Address(rsp, -16), xmm0);
  981       __ movdqu(xmm0, Address(rsp, src_offset));
  982       __ movdqu(Address(rsp, dst_offset), xmm0);
  983       __ movdqu(xmm0, Address(rsp, -16));
  984       break;
  985     case Op_VecY:
  986       __ vmovdqu(Address(rsp, -32), xmm0);
  987       __ vmovdqu(xmm0, Address(rsp, src_offset));
  988       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  989       __ vmovdqu(xmm0, Address(rsp, -32));
  990       break;
  991     case Op_VecZ:
  992       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  993       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  994       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  995       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  996       break;
  997     default:
  998       ShouldNotReachHere();
  999     }
 1000 #ifndef PRODUCT
 1001   } else {
 1002     switch (ireg) {
 1003     case Op_VecS:
 1004       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1005                 "popl    [rsp + #%d]",
 1006                 src_offset, dst_offset);
 1007       break;
 1008     case Op_VecD:
 1009       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1010                 "popq    [rsp + #%d]\n\t"
 1011                 "pushl   [rsp + #%d]\n\t"
 1012                 "popq    [rsp + #%d]",
 1013                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1014       break;
 1015      case Op_VecX:
 1016       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1017                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1018                 "movdqu  [rsp + #%d], xmm0\n\t"
 1019                 "movdqu  xmm0, [rsp - #16]",
 1020                 src_offset, dst_offset);
 1021       break;
 1022     case Op_VecY:
 1023       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1024                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1025                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1026                 "vmovdqu xmm0, [rsp - #32]",
 1027                 src_offset, dst_offset);
 1028       break;
 1029     case Op_VecZ:
 1030       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1031                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1032                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1033                 "vmovdqu xmm0, [rsp - #64]",
 1034                 src_offset, dst_offset);
 1035       break;
 1036     default:
 1037       ShouldNotReachHere();
 1038     }
 1039 #endif
 1040   }
 1041 }
 1042 
 1043 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1044   // Get registers to move
 1045   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1046   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1047   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1048   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1049 
 1050   enum RC src_second_rc = rc_class(src_second);
 1051   enum RC src_first_rc = rc_class(src_first);
 1052   enum RC dst_second_rc = rc_class(dst_second);
 1053   enum RC dst_first_rc = rc_class(dst_first);
 1054 
 1055   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1056 
 1057   // Generate spill code!
 1058   int size = 0;
 1059 
 1060   if( src_first == dst_first && src_second == dst_second )
 1061     return size;            // Self copy, no move
 1062 
 1063   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1064     uint ireg = ideal_reg();
 1065     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1066     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1067     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1068     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1069       // mem -> mem
 1070       int src_offset = ra_->reg2offset(src_first);
 1071       int dst_offset = ra_->reg2offset(dst_first);
 1072       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1073     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1074       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1075     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1076       int stack_offset = ra_->reg2offset(dst_first);
 1077       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1078     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1079       int stack_offset = ra_->reg2offset(src_first);
 1080       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1081     } else {
 1082       ShouldNotReachHere();
 1083     }
 1084     return 0;
 1085   }
 1086 
 1087   // --------------------------------------
 1088   // Check for mem-mem move.  push/pop to move.
 1089   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1090     if( src_second == dst_first ) { // overlapping stack copy ranges
 1091       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1095     }
 1096     // move low bits
 1097     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1098     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1099     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1100       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1101       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1102     }
 1103     return size;
 1104   }
 1105 
 1106   // --------------------------------------
 1107   // Check for integer reg-reg copy
 1108   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1109     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1110 
 1111   // Check for integer store
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1113     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1114 
 1115   // Check for integer load
 1116   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1117     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1118 
 1119   // Check for integer reg-xmm reg copy
 1120   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1121     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1122             "no 64 bit integer-float reg moves" );
 1123     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1124   }
 1125   // --------------------------------------
 1126   // Check for float reg-reg copy
 1127   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1128     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1129             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1130     if( cbuf ) {
 1131 
 1132       // Note the mucking with the register encode to compensate for the 0/1
 1133       // indexing issue mentioned in a comment in the reg_def sections
 1134       // for FPR registers many lines above here.
 1135 
 1136       if( src_first != FPR1L_num ) {
 1137         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1138         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1139         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1140         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1141      } else {
 1142         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1143         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1144      }
 1145 #ifndef PRODUCT
 1146     } else if( !do_size ) {
 1147       if( size != 0 ) st->print("\n\t");
 1148       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1149       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1150 #endif
 1151     }
 1152     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1153   }
 1154 
 1155   // Check for float store
 1156   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1157     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1158   }
 1159 
 1160   // Check for float load
 1161   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1162     int offset = ra_->reg2offset(src_first);
 1163     const char *op_str;
 1164     int op;
 1165     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1166       op_str = "FLD_D";
 1167       op = 0xDD;
 1168     } else {                   // 32-bit load
 1169       op_str = "FLD_S";
 1170       op = 0xD9;
 1171       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1172     }
 1173     if( cbuf ) {
 1174       emit_opcode  (*cbuf, op );
 1175       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1176       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1177       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1178 #ifndef PRODUCT
 1179     } else if( !do_size ) {
 1180       if( size != 0 ) st->print("\n\t");
 1181       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1182 #endif
 1183     }
 1184     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1185     return size + 3+offset_size+2;
 1186   }
 1187 
 1188   // Check for xmm reg-reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1191             (src_first+1 == src_second && dst_first+1 == dst_second),
 1192             "no non-adjacent float-moves" );
 1193     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1194   }
 1195 
 1196   // Check for xmm reg-integer reg copy
 1197   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1198     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1199             "no 64 bit float-integer reg moves" );
 1200     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1201   }
 1202 
 1203   // Check for xmm store
 1204   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1205     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1206   }
 1207 
 1208   // Check for float xmm load
 1209   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1210     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1211   }
 1212 
 1213   // Copy from float reg to xmm reg
 1214   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1215     // copy to the top of stack from floating point reg
 1216     // and use LEA to preserve flags
 1217     if( cbuf ) {
 1218       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1219       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1220       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1221       emit_d8(*cbuf,0xF8);
 1222 #ifndef PRODUCT
 1223     } else if( !do_size ) {
 1224       if( size != 0 ) st->print("\n\t");
 1225       st->print("LEA    ESP,[ESP-8]");
 1226 #endif
 1227     }
 1228     size += 4;
 1229 
 1230     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1231 
 1232     // Copy from the temp memory to the xmm reg.
 1233     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1234 
 1235     if( cbuf ) {
 1236       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1237       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1238       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1239       emit_d8(*cbuf,0x08);
 1240 #ifndef PRODUCT
 1241     } else if( !do_size ) {
 1242       if( size != 0 ) st->print("\n\t");
 1243       st->print("LEA    ESP,[ESP+8]");
 1244 #endif
 1245     }
 1246     size += 4;
 1247     return size;
 1248   }
 1249 
 1250   // AVX-512 opmask specific spilling.
 1251   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1252     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1253     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1254     MacroAssembler _masm(cbuf);
 1255     int offset = ra_->reg2offset(src_first);
 1256     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1257     return 0;
 1258   }
 1259 
 1260   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1261     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1262     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1263     MacroAssembler _masm(cbuf);
 1264     int offset = ra_->reg2offset(dst_first);
 1265     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1270     Unimplemented();
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     MacroAssembler _masm(cbuf);
 1283     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1284     return 0;
 1285   }
 1286 
 1287   assert( size > 0, "missed a case" );
 1288 
 1289   // --------------------------------------------------------------------
 1290   // Check for second bits still needing moving.
 1291   if( src_second == dst_second )
 1292     return size;               // Self copy; no move
 1293   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1294 
 1295   // Check for second word int-int move
 1296   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1297     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1298 
 1299   // Check for second word integer store
 1300   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1301     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1302 
 1303   // Check for second word integer load
 1304   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1305     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1306 
 1307   Unimplemented();
 1308   return 0; // Mute compiler
 1309 }
 1310 
 1311 #ifndef PRODUCT
 1312 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1313   implementation( NULL, ra_, false, st );
 1314 }
 1315 #endif
 1316 
 1317 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1318   implementation( &cbuf, ra_, false, NULL );
 1319 }
 1320 
 1321 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1322   return MachNode::size(ra_);
 1323 }
 1324 
 1325 
 1326 //=============================================================================
 1327 #ifndef PRODUCT
 1328 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1329   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1330   int reg = ra_->get_reg_first(this);
 1331   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1332 }
 1333 #endif
 1334 
 1335 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1337   int reg = ra_->get_encode(this);
 1338   if( offset >= 128 ) {
 1339     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1340     emit_rm(cbuf, 0x2, reg, 0x04);
 1341     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1342     emit_d32(cbuf, offset);
 1343   }
 1344   else {
 1345     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1346     emit_rm(cbuf, 0x1, reg, 0x04);
 1347     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1348     emit_d8(cbuf, offset);
 1349   }
 1350 }
 1351 
 1352 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1353   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1354   if( offset >= 128 ) {
 1355     return 7;
 1356   }
 1357   else {
 1358     return 4;
 1359   }
 1360 }
 1361 
 1362 //=============================================================================
 1363 #ifndef PRODUCT
 1364 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1365   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1366   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1367   st->print_cr("\tNOP");
 1368   st->print_cr("\tNOP");
 1369   if( !OptoBreakpoint )
 1370     st->print_cr("\tNOP");
 1371 }
 1372 #endif
 1373 
 1374 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1375   MacroAssembler masm(&cbuf);
 1376 #ifdef ASSERT
 1377   uint insts_size = cbuf.insts_size();
 1378 #endif
 1379   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1380   masm.jump_cc(Assembler::notEqual,
 1381                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1382   /* WARNING these NOPs are critical so that verified entry point is properly
 1383      aligned for patching by NativeJump::patch_verified_entry() */
 1384   int nops_cnt = 2;
 1385   if( !OptoBreakpoint ) // Leave space for int3
 1386      nops_cnt += 1;
 1387   masm.nop(nops_cnt);
 1388 
 1389   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1390 }
 1391 
 1392 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1393   return OptoBreakpoint ? 11 : 12;
 1394 }
 1395 
 1396 
 1397 //=============================================================================
 1398 
 1399 // Vector calling convention not supported.
 1400 const bool Matcher::supports_vector_calling_convention() {
 1401   return false;
 1402 }
 1403 
 1404 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1405   Unimplemented();
 1406   return OptoRegPair(0, 0);
 1407 }
 1408 
 1409 // Is this branch offset short enough that a short branch can be used?
 1410 //
 1411 // NOTE: If the platform does not provide any short branch variants, then
 1412 //       this method should return false for offset 0.
 1413 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1414   // The passed offset is relative to address of the branch.
 1415   // On 86 a branch displacement is calculated relative to address
 1416   // of a next instruction.
 1417   offset -= br_size;
 1418 
 1419   // the short version of jmpConUCF2 contains multiple branches,
 1420   // making the reach slightly less
 1421   if (rule == jmpConUCF2_rule)
 1422     return (-126 <= offset && offset <= 125);
 1423   return (-128 <= offset && offset <= 127);
 1424 }
 1425 
 1426 // Return whether or not this register is ever used as an argument.  This
 1427 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1428 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1429 // arguments in those registers not be available to the callee.
 1430 bool Matcher::can_be_java_arg( int reg ) {
 1431   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1432   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1433   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1434   return false;
 1435 }
 1436 
 1437 bool Matcher::is_spillable_arg( int reg ) {
 1438   return can_be_java_arg(reg);
 1439 }
 1440 
 1441 uint Matcher::int_pressure_limit()
 1442 {
 1443   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1444 }
 1445 
 1446 uint Matcher::float_pressure_limit()
 1447 {
 1448   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1449 }
 1450 
 1451 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1452   // Use hardware integer DIV instruction when
 1453   // it is faster than a code which use multiply.
 1454   // Only when constant divisor fits into 32 bit
 1455   // (min_jint is excluded to get only correct
 1456   // positive 32 bit values from negative).
 1457   return VM_Version::has_fast_idiv() &&
 1458          (divisor == (int)divisor && divisor != min_jint);
 1459 }
 1460 
 1461 // Register for DIVI projection of divmodI
 1462 RegMask Matcher::divI_proj_mask() {
 1463   return EAX_REG_mask();
 1464 }
 1465 
 1466 // Register for MODI projection of divmodI
 1467 RegMask Matcher::modI_proj_mask() {
 1468   return EDX_REG_mask();
 1469 }
 1470 
 1471 // Register for DIVL projection of divmodL
 1472 RegMask Matcher::divL_proj_mask() {
 1473   ShouldNotReachHere();
 1474   return RegMask();
 1475 }
 1476 
 1477 // Register for MODL projection of divmodL
 1478 RegMask Matcher::modL_proj_mask() {
 1479   ShouldNotReachHere();
 1480   return RegMask();
 1481 }
 1482 
 1483 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1484   return NO_REG_mask();
 1485 }
 1486 
 1487 // Returns true if the high 32 bits of the value is known to be zero.
 1488 bool is_operand_hi32_zero(Node* n) {
 1489   int opc = n->Opcode();
 1490   if (opc == Op_AndL) {
 1491     Node* o2 = n->in(2);
 1492     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1493       return true;
 1494     }
 1495   }
 1496   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1497     return true;
 1498   }
 1499   return false;
 1500 }
 1501 
 1502 %}
 1503 
 1504 //----------ENCODING BLOCK-----------------------------------------------------
 1505 // This block specifies the encoding classes used by the compiler to output
 1506 // byte streams.  Encoding classes generate functions which are called by
 1507 // Machine Instruction Nodes in order to generate the bit encoding of the
 1508 // instruction.  Operands specify their base encoding interface with the
 1509 // interface keyword.  There are currently supported four interfaces,
 1510 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1511 // operand to generate a function which returns its register number when
 1512 // queried.   CONST_INTER causes an operand to generate a function which
 1513 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1514 // operand to generate four functions which return the Base Register, the
 1515 // Index Register, the Scale Value, and the Offset Value of the operand when
 1516 // queried.  COND_INTER causes an operand to generate six functions which
 1517 // return the encoding code (ie - encoding bits for the instruction)
 1518 // associated with each basic boolean condition for a conditional instruction.
 1519 // Instructions specify two basic values for encoding.  They use the
 1520 // ins_encode keyword to specify their encoding class (which must be one of
 1521 // the class names specified in the encoding block), and they use the
 1522 // opcode keyword to specify, in order, their primary, secondary, and
 1523 // tertiary opcode.  Only the opcode sections which a particular instruction
 1524 // needs for encoding need to be specified.
 1525 encode %{
 1526   // Build emit functions for each basic byte or larger field in the intel
 1527   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1528   // code in the enc_class source block.  Emit functions will live in the
 1529   // main source block for now.  In future, we can generalize this by
 1530   // adding a syntax that specifies the sizes of fields in an order,
 1531   // so that the adlc can build the emit functions automagically
 1532 
 1533   // Emit primary opcode
 1534   enc_class OpcP %{
 1535     emit_opcode(cbuf, $primary);
 1536   %}
 1537 
 1538   // Emit secondary opcode
 1539   enc_class OpcS %{
 1540     emit_opcode(cbuf, $secondary);
 1541   %}
 1542 
 1543   // Emit opcode directly
 1544   enc_class Opcode(immI d8) %{
 1545     emit_opcode(cbuf, $d8$$constant);
 1546   %}
 1547 
 1548   enc_class SizePrefix %{
 1549     emit_opcode(cbuf,0x66);
 1550   %}
 1551 
 1552   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1553     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1554   %}
 1555 
 1556   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1557     emit_opcode(cbuf,$opcode$$constant);
 1558     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1559   %}
 1560 
 1561   enc_class mov_r32_imm0( rRegI dst ) %{
 1562     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1563     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1564   %}
 1565 
 1566   enc_class cdq_enc %{
 1567     // Full implementation of Java idiv and irem; checks for
 1568     // special case as described in JVM spec., p.243 & p.271.
 1569     //
 1570     //         normal case                           special case
 1571     //
 1572     // input : rax,: dividend                         min_int
 1573     //         reg: divisor                          -1
 1574     //
 1575     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1576     //         rdx: remainder (= rax, irem reg)       0
 1577     //
 1578     //  Code sequnce:
 1579     //
 1580     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1581     //  0F 85 0B 00 00 00    jne         normal_case
 1582     //  33 D2                xor         rdx,edx
 1583     //  83 F9 FF             cmp         rcx,0FFh
 1584     //  0F 84 03 00 00 00    je          done
 1585     //                  normal_case:
 1586     //  99                   cdq
 1587     //  F7 F9                idiv        rax,ecx
 1588     //                  done:
 1589     //
 1590     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1591     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1592     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1593     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1594     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1595     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1596     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1597     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1598     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1599     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1600     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1601     // normal_case:
 1602     emit_opcode(cbuf,0x99);                                         // cdq
 1603     // idiv (note: must be emitted by the user of this rule)
 1604     // normal:
 1605   %}
 1606 
 1607   // Dense encoding for older common ops
 1608   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1609     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1610   %}
 1611 
 1612 
 1613   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1614   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1615     // Check for 8-bit immediate, and set sign extend bit in opcode
 1616     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1617       emit_opcode(cbuf, $primary | 0x02);
 1618     }
 1619     else {                          // If 32-bit immediate
 1620       emit_opcode(cbuf, $primary);
 1621     }
 1622   %}
 1623 
 1624   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1625     // Emit primary opcode and set sign-extend bit
 1626     // Check for 8-bit immediate, and set sign extend bit in opcode
 1627     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1628       emit_opcode(cbuf, $primary | 0x02);    }
 1629     else {                          // If 32-bit immediate
 1630       emit_opcode(cbuf, $primary);
 1631     }
 1632     // Emit r/m byte with secondary opcode, after primary opcode.
 1633     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1634   %}
 1635 
 1636   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1637     // Check for 8-bit immediate, and set sign extend bit in opcode
 1638     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1639       $$$emit8$imm$$constant;
 1640     }
 1641     else {                          // If 32-bit immediate
 1642       // Output immediate
 1643       $$$emit32$imm$$constant;
 1644     }
 1645   %}
 1646 
 1647   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1648     // Emit primary opcode and set sign-extend bit
 1649     // Check for 8-bit immediate, and set sign extend bit in opcode
 1650     int con = (int)$imm$$constant; // Throw away top bits
 1651     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1652     // Emit r/m byte with secondary opcode, after primary opcode.
 1653     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1654     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1655     else                               emit_d32(cbuf,con);
 1656   %}
 1657 
 1658   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1659     // Emit primary opcode and set sign-extend bit
 1660     // Check for 8-bit immediate, and set sign extend bit in opcode
 1661     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1662     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1663     // Emit r/m byte with tertiary opcode, after primary opcode.
 1664     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1665     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1666     else                               emit_d32(cbuf,con);
 1667   %}
 1668 
 1669   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1670     emit_cc(cbuf, $secondary, $dst$$reg );
 1671   %}
 1672 
 1673   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1674     int destlo = $dst$$reg;
 1675     int desthi = HIGH_FROM_LOW(destlo);
 1676     // bswap lo
 1677     emit_opcode(cbuf, 0x0F);
 1678     emit_cc(cbuf, 0xC8, destlo);
 1679     // bswap hi
 1680     emit_opcode(cbuf, 0x0F);
 1681     emit_cc(cbuf, 0xC8, desthi);
 1682     // xchg lo and hi
 1683     emit_opcode(cbuf, 0x87);
 1684     emit_rm(cbuf, 0x3, destlo, desthi);
 1685   %}
 1686 
 1687   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1688     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1689   %}
 1690 
 1691   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1692     $$$emit8$primary;
 1693     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1694   %}
 1695 
 1696   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1697     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1698     emit_d8(cbuf, op >> 8 );
 1699     emit_d8(cbuf, op & 255);
 1700   %}
 1701 
 1702   // emulate a CMOV with a conditional branch around a MOV
 1703   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1704     // Invert sense of branch from sense of CMOV
 1705     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1706     emit_d8( cbuf, $brOffs$$constant );
 1707   %}
 1708 
 1709   enc_class enc_PartialSubtypeCheck( ) %{
 1710     Register Redi = as_Register(EDI_enc); // result register
 1711     Register Reax = as_Register(EAX_enc); // super class
 1712     Register Recx = as_Register(ECX_enc); // killed
 1713     Register Resi = as_Register(ESI_enc); // sub class
 1714     Label miss;
 1715 
 1716     MacroAssembler _masm(&cbuf);
 1717     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1718                                      NULL, &miss,
 1719                                      /*set_cond_codes:*/ true);
 1720     if ($primary) {
 1721       __ xorptr(Redi, Redi);
 1722     }
 1723     __ bind(miss);
 1724   %}
 1725 
 1726   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1727     MacroAssembler masm(&cbuf);
 1728     int start = masm.offset();
 1729     if (UseSSE >= 2) {
 1730       if (VerifyFPU) {
 1731         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1732       }
 1733     } else {
 1734       // External c_calling_convention expects the FPU stack to be 'clean'.
 1735       // Compiled code leaves it dirty.  Do cleanup now.
 1736       masm.empty_FPU_stack();
 1737     }
 1738     if (sizeof_FFree_Float_Stack_All == -1) {
 1739       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1740     } else {
 1741       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1742     }
 1743   %}
 1744 
 1745   enc_class Verify_FPU_For_Leaf %{
 1746     if( VerifyFPU ) {
 1747       MacroAssembler masm(&cbuf);
 1748       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1749     }
 1750   %}
 1751 
 1752   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1753     // This is the instruction starting address for relocation info.
 1754     cbuf.set_insts_mark();
 1755     $$$emit8$primary;
 1756     // CALL directly to the runtime
 1757     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1758                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1759 
 1760     if (UseSSE >= 2) {
 1761       MacroAssembler _masm(&cbuf);
 1762       BasicType rt = tf()->return_type();
 1763 
 1764       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1765         // A C runtime call where the return value is unused.  In SSE2+
 1766         // mode the result needs to be removed from the FPU stack.  It's
 1767         // likely that this function call could be removed by the
 1768         // optimizer if the C function is a pure function.
 1769         __ ffree(0);
 1770       } else if (rt == T_FLOAT) {
 1771         __ lea(rsp, Address(rsp, -4));
 1772         __ fstp_s(Address(rsp, 0));
 1773         __ movflt(xmm0, Address(rsp, 0));
 1774         __ lea(rsp, Address(rsp,  4));
 1775       } else if (rt == T_DOUBLE) {
 1776         __ lea(rsp, Address(rsp, -8));
 1777         __ fstp_d(Address(rsp, 0));
 1778         __ movdbl(xmm0, Address(rsp, 0));
 1779         __ lea(rsp, Address(rsp,  8));
 1780       }
 1781     }
 1782   %}
 1783 
 1784   enc_class pre_call_resets %{
 1785     // If method sets FPU control word restore it here
 1786     debug_only(int off0 = cbuf.insts_size());
 1787     if (ra_->C->in_24_bit_fp_mode()) {
 1788       MacroAssembler _masm(&cbuf);
 1789       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1790     }
 1791     // Clear upper bits of YMM registers when current compiled code uses
 1792     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1793     MacroAssembler _masm(&cbuf);
 1794     __ vzeroupper();
 1795     debug_only(int off1 = cbuf.insts_size());
 1796     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1797   %}
 1798 
 1799   enc_class post_call_FPU %{
 1800     // If method sets FPU control word do it here also
 1801     if (Compile::current()->in_24_bit_fp_mode()) {
 1802       MacroAssembler masm(&cbuf);
 1803       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1804     }
 1805   %}
 1806 
 1807   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1808     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1809     // who we intended to call.
 1810     cbuf.set_insts_mark();
 1811     $$$emit8$primary;
 1812 
 1813     if (!_method) {
 1814       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1815                      runtime_call_Relocation::spec(),
 1816                      RELOC_IMM32);
 1817     } else {
 1818       int method_index = resolved_method_index(cbuf);
 1819       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1820                                                   : static_call_Relocation::spec(method_index);
 1821       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1822                      rspec, RELOC_DISP32);
 1823       // Emit stubs for static call.
 1824       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1825       if (stub == NULL) {
 1826         ciEnv::current()->record_failure("CodeCache is full");
 1827         return;
 1828       }
 1829     }
 1830   %}
 1831 
 1832   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1833     MacroAssembler _masm(&cbuf);
 1834     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1835   %}
 1836 
 1837   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1838     int disp = in_bytes(Method::from_compiled_offset());
 1839     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1840 
 1841     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1842     cbuf.set_insts_mark();
 1843     $$$emit8$primary;
 1844     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1845     emit_d8(cbuf, disp);             // Displacement
 1846 
 1847   %}
 1848 
 1849 //   Following encoding is no longer used, but may be restored if calling
 1850 //   convention changes significantly.
 1851 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1852 //
 1853 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1854 //     // int ic_reg     = Matcher::inline_cache_reg();
 1855 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1856 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1857 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1858 //
 1859 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1860 //     // // so we load it immediately before the call
 1861 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1862 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1863 //
 1864 //     // xor rbp,ebp
 1865 //     emit_opcode(cbuf, 0x33);
 1866 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1867 //
 1868 //     // CALL to interpreter.
 1869 //     cbuf.set_insts_mark();
 1870 //     $$$emit8$primary;
 1871 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1872 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1873 //   %}
 1874 
 1875   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1876     $$$emit8$primary;
 1877     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1878     $$$emit8$shift$$constant;
 1879   %}
 1880 
 1881   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1882     // Load immediate does not have a zero or sign extended version
 1883     // for 8-bit immediates
 1884     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1885     $$$emit32$src$$constant;
 1886   %}
 1887 
 1888   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1889     // Load immediate does not have a zero or sign extended version
 1890     // for 8-bit immediates
 1891     emit_opcode(cbuf, $primary + $dst$$reg);
 1892     $$$emit32$src$$constant;
 1893   %}
 1894 
 1895   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1896     // Load immediate does not have a zero or sign extended version
 1897     // for 8-bit immediates
 1898     int dst_enc = $dst$$reg;
 1899     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1900     if (src_con == 0) {
 1901       // xor dst, dst
 1902       emit_opcode(cbuf, 0x33);
 1903       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1904     } else {
 1905       emit_opcode(cbuf, $primary + dst_enc);
 1906       emit_d32(cbuf, src_con);
 1907     }
 1908   %}
 1909 
 1910   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1911     // Load immediate does not have a zero or sign extended version
 1912     // for 8-bit immediates
 1913     int dst_enc = $dst$$reg + 2;
 1914     int src_con = ((julong)($src$$constant)) >> 32;
 1915     if (src_con == 0) {
 1916       // xor dst, dst
 1917       emit_opcode(cbuf, 0x33);
 1918       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1919     } else {
 1920       emit_opcode(cbuf, $primary + dst_enc);
 1921       emit_d32(cbuf, src_con);
 1922     }
 1923   %}
 1924 
 1925 
 1926   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1927   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1928     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1929   %}
 1930 
 1931   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1932     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1933   %}
 1934 
 1935   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1936     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1937   %}
 1938 
 1939   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1940     $$$emit8$primary;
 1941     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1942   %}
 1943 
 1944   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1945     $$$emit8$secondary;
 1946     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1947   %}
 1948 
 1949   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1950     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1951   %}
 1952 
 1953   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1954     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1955   %}
 1956 
 1957   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1958     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1959   %}
 1960 
 1961   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1962     // Output immediate
 1963     $$$emit32$src$$constant;
 1964   %}
 1965 
 1966   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1967     // Output Float immediate bits
 1968     jfloat jf = $src$$constant;
 1969     int    jf_as_bits = jint_cast( jf );
 1970     emit_d32(cbuf, jf_as_bits);
 1971   %}
 1972 
 1973   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1974     // Output Float immediate bits
 1975     jfloat jf = $src$$constant;
 1976     int    jf_as_bits = jint_cast( jf );
 1977     emit_d32(cbuf, jf_as_bits);
 1978   %}
 1979 
 1980   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1981     // Output immediate
 1982     $$$emit16$src$$constant;
 1983   %}
 1984 
 1985   enc_class Con_d32(immI src) %{
 1986     emit_d32(cbuf,$src$$constant);
 1987   %}
 1988 
 1989   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1990     // Output immediate memory reference
 1991     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1992     emit_d32(cbuf, 0x00);
 1993   %}
 1994 
 1995   enc_class lock_prefix( ) %{
 1996     emit_opcode(cbuf,0xF0);         // [Lock]
 1997   %}
 1998 
 1999   // Cmp-xchg long value.
 2000   // Note: we need to swap rbx, and rcx before and after the
 2001   //       cmpxchg8 instruction because the instruction uses
 2002   //       rcx as the high order word of the new value to store but
 2003   //       our register encoding uses rbx,.
 2004   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2005 
 2006     // XCHG  rbx,ecx
 2007     emit_opcode(cbuf,0x87);
 2008     emit_opcode(cbuf,0xD9);
 2009     // [Lock]
 2010     emit_opcode(cbuf,0xF0);
 2011     // CMPXCHG8 [Eptr]
 2012     emit_opcode(cbuf,0x0F);
 2013     emit_opcode(cbuf,0xC7);
 2014     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2015     // XCHG  rbx,ecx
 2016     emit_opcode(cbuf,0x87);
 2017     emit_opcode(cbuf,0xD9);
 2018   %}
 2019 
 2020   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2021     // [Lock]
 2022     emit_opcode(cbuf,0xF0);
 2023 
 2024     // CMPXCHG [Eptr]
 2025     emit_opcode(cbuf,0x0F);
 2026     emit_opcode(cbuf,0xB1);
 2027     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2028   %}
 2029 
 2030   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2031     // [Lock]
 2032     emit_opcode(cbuf,0xF0);
 2033 
 2034     // CMPXCHGB [Eptr]
 2035     emit_opcode(cbuf,0x0F);
 2036     emit_opcode(cbuf,0xB0);
 2037     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2038   %}
 2039 
 2040   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2041     // [Lock]
 2042     emit_opcode(cbuf,0xF0);
 2043 
 2044     // 16-bit mode
 2045     emit_opcode(cbuf, 0x66);
 2046 
 2047     // CMPXCHGW [Eptr]
 2048     emit_opcode(cbuf,0x0F);
 2049     emit_opcode(cbuf,0xB1);
 2050     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2051   %}
 2052 
 2053   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2054     int res_encoding = $res$$reg;
 2055 
 2056     // MOV  res,0
 2057     emit_opcode( cbuf, 0xB8 + res_encoding);
 2058     emit_d32( cbuf, 0 );
 2059     // JNE,s  fail
 2060     emit_opcode(cbuf,0x75);
 2061     emit_d8(cbuf, 5 );
 2062     // MOV  res,1
 2063     emit_opcode( cbuf, 0xB8 + res_encoding);
 2064     emit_d32( cbuf, 1 );
 2065     // fail:
 2066   %}
 2067 
 2068   enc_class set_instruction_start( ) %{
 2069     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2070   %}
 2071 
 2072   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2073     int reg_encoding = $ereg$$reg;
 2074     int base  = $mem$$base;
 2075     int index = $mem$$index;
 2076     int scale = $mem$$scale;
 2077     int displace = $mem$$disp;
 2078     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2079     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2080   %}
 2081 
 2082   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2083     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2084     int base  = $mem$$base;
 2085     int index = $mem$$index;
 2086     int scale = $mem$$scale;
 2087     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2088     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2089     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2090   %}
 2091 
 2092   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2093     int r1, r2;
 2094     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2095     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2096     emit_opcode(cbuf,0x0F);
 2097     emit_opcode(cbuf,$tertiary);
 2098     emit_rm(cbuf, 0x3, r1, r2);
 2099     emit_d8(cbuf,$cnt$$constant);
 2100     emit_d8(cbuf,$primary);
 2101     emit_rm(cbuf, 0x3, $secondary, r1);
 2102     emit_d8(cbuf,$cnt$$constant);
 2103   %}
 2104 
 2105   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2106     emit_opcode( cbuf, 0x8B ); // Move
 2107     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2108     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2109       emit_d8(cbuf,$primary);
 2110       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2111       emit_d8(cbuf,$cnt$$constant-32);
 2112     }
 2113     emit_d8(cbuf,$primary);
 2114     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2115     emit_d8(cbuf,31);
 2116   %}
 2117 
 2118   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2119     int r1, r2;
 2120     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2121     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2122 
 2123     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2124     emit_rm(cbuf, 0x3, r1, r2);
 2125     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2126       emit_opcode(cbuf,$primary);
 2127       emit_rm(cbuf, 0x3, $secondary, r1);
 2128       emit_d8(cbuf,$cnt$$constant-32);
 2129     }
 2130     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2131     emit_rm(cbuf, 0x3, r2, r2);
 2132   %}
 2133 
 2134   // Clone of RegMem but accepts an extra parameter to access each
 2135   // half of a double in memory; it never needs relocation info.
 2136   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2137     emit_opcode(cbuf,$opcode$$constant);
 2138     int reg_encoding = $rm_reg$$reg;
 2139     int base     = $mem$$base;
 2140     int index    = $mem$$index;
 2141     int scale    = $mem$$scale;
 2142     int displace = $mem$$disp + $disp_for_half$$constant;
 2143     relocInfo::relocType disp_reloc = relocInfo::none;
 2144     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2145   %}
 2146 
 2147   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2148   //
 2149   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2150   // and it never needs relocation information.
 2151   // Frequently used to move data between FPU's Stack Top and memory.
 2152   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2153     int rm_byte_opcode = $rm_opcode$$constant;
 2154     int base     = $mem$$base;
 2155     int index    = $mem$$index;
 2156     int scale    = $mem$$scale;
 2157     int displace = $mem$$disp;
 2158     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2159     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2160   %}
 2161 
 2162   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2163     int rm_byte_opcode = $rm_opcode$$constant;
 2164     int base     = $mem$$base;
 2165     int index    = $mem$$index;
 2166     int scale    = $mem$$scale;
 2167     int displace = $mem$$disp;
 2168     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2169     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2170   %}
 2171 
 2172   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2173     int reg_encoding = $dst$$reg;
 2174     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2175     int index        = 0x04;            // 0x04 indicates no index
 2176     int scale        = 0x00;            // 0x00 indicates no scale
 2177     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2178     relocInfo::relocType disp_reloc = relocInfo::none;
 2179     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2180   %}
 2181 
 2182   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2183     // Compare dst,src
 2184     emit_opcode(cbuf,0x3B);
 2185     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2186     // jmp dst < src around move
 2187     emit_opcode(cbuf,0x7C);
 2188     emit_d8(cbuf,2);
 2189     // move dst,src
 2190     emit_opcode(cbuf,0x8B);
 2191     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2192   %}
 2193 
 2194   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2195     // Compare dst,src
 2196     emit_opcode(cbuf,0x3B);
 2197     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2198     // jmp dst > src around move
 2199     emit_opcode(cbuf,0x7F);
 2200     emit_d8(cbuf,2);
 2201     // move dst,src
 2202     emit_opcode(cbuf,0x8B);
 2203     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2204   %}
 2205 
 2206   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2207     // If src is FPR1, we can just FST to store it.
 2208     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2209     int reg_encoding = 0x2; // Just store
 2210     int base  = $mem$$base;
 2211     int index = $mem$$index;
 2212     int scale = $mem$$scale;
 2213     int displace = $mem$$disp;
 2214     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2215     if( $src$$reg != FPR1L_enc ) {
 2216       reg_encoding = 0x3;  // Store & pop
 2217       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2218       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2219     }
 2220     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2221     emit_opcode(cbuf,$primary);
 2222     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2223   %}
 2224 
 2225   enc_class neg_reg(rRegI dst) %{
 2226     // NEG $dst
 2227     emit_opcode(cbuf,0xF7);
 2228     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2229   %}
 2230 
 2231   enc_class setLT_reg(eCXRegI dst) %{
 2232     // SETLT $dst
 2233     emit_opcode(cbuf,0x0F);
 2234     emit_opcode(cbuf,0x9C);
 2235     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2236   %}
 2237 
 2238   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2239     int tmpReg = $tmp$$reg;
 2240 
 2241     // SUB $p,$q
 2242     emit_opcode(cbuf,0x2B);
 2243     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2244     // SBB $tmp,$tmp
 2245     emit_opcode(cbuf,0x1B);
 2246     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2247     // AND $tmp,$y
 2248     emit_opcode(cbuf,0x23);
 2249     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2250     // ADD $p,$tmp
 2251     emit_opcode(cbuf,0x03);
 2252     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2253   %}
 2254 
 2255   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2256     // TEST shift,32
 2257     emit_opcode(cbuf,0xF7);
 2258     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2259     emit_d32(cbuf,0x20);
 2260     // JEQ,s small
 2261     emit_opcode(cbuf, 0x74);
 2262     emit_d8(cbuf, 0x04);
 2263     // MOV    $dst.hi,$dst.lo
 2264     emit_opcode( cbuf, 0x8B );
 2265     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2266     // CLR    $dst.lo
 2267     emit_opcode(cbuf, 0x33);
 2268     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2269 // small:
 2270     // SHLD   $dst.hi,$dst.lo,$shift
 2271     emit_opcode(cbuf,0x0F);
 2272     emit_opcode(cbuf,0xA5);
 2273     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2274     // SHL    $dst.lo,$shift"
 2275     emit_opcode(cbuf,0xD3);
 2276     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2277   %}
 2278 
 2279   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2280     // TEST shift,32
 2281     emit_opcode(cbuf,0xF7);
 2282     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2283     emit_d32(cbuf,0x20);
 2284     // JEQ,s small
 2285     emit_opcode(cbuf, 0x74);
 2286     emit_d8(cbuf, 0x04);
 2287     // MOV    $dst.lo,$dst.hi
 2288     emit_opcode( cbuf, 0x8B );
 2289     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2290     // CLR    $dst.hi
 2291     emit_opcode(cbuf, 0x33);
 2292     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2293 // small:
 2294     // SHRD   $dst.lo,$dst.hi,$shift
 2295     emit_opcode(cbuf,0x0F);
 2296     emit_opcode(cbuf,0xAD);
 2297     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2298     // SHR    $dst.hi,$shift"
 2299     emit_opcode(cbuf,0xD3);
 2300     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2301   %}
 2302 
 2303   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2304     // TEST shift,32
 2305     emit_opcode(cbuf,0xF7);
 2306     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2307     emit_d32(cbuf,0x20);
 2308     // JEQ,s small
 2309     emit_opcode(cbuf, 0x74);
 2310     emit_d8(cbuf, 0x05);
 2311     // MOV    $dst.lo,$dst.hi
 2312     emit_opcode( cbuf, 0x8B );
 2313     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2314     // SAR    $dst.hi,31
 2315     emit_opcode(cbuf, 0xC1);
 2316     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2317     emit_d8(cbuf, 0x1F );
 2318 // small:
 2319     // SHRD   $dst.lo,$dst.hi,$shift
 2320     emit_opcode(cbuf,0x0F);
 2321     emit_opcode(cbuf,0xAD);
 2322     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2323     // SAR    $dst.hi,$shift"
 2324     emit_opcode(cbuf,0xD3);
 2325     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2326   %}
 2327 
 2328 
 2329   // ----------------- Encodings for floating point unit -----------------
 2330   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2331   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2332     $$$emit8$primary;
 2333     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2334   %}
 2335 
 2336   // Pop argument in FPR0 with FSTP ST(0)
 2337   enc_class PopFPU() %{
 2338     emit_opcode( cbuf, 0xDD );
 2339     emit_d8( cbuf, 0xD8 );
 2340   %}
 2341 
 2342   // !!!!! equivalent to Pop_Reg_F
 2343   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2344     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2345     emit_d8( cbuf, 0xD8+$dst$$reg );
 2346   %}
 2347 
 2348   enc_class Push_Reg_DPR( regDPR dst ) %{
 2349     emit_opcode( cbuf, 0xD9 );
 2350     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2351   %}
 2352 
 2353   enc_class strictfp_bias1( regDPR dst ) %{
 2354     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2355     emit_opcode( cbuf, 0x2D );
 2356     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2357     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2358     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2359   %}
 2360 
 2361   enc_class strictfp_bias2( regDPR dst ) %{
 2362     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2363     emit_opcode( cbuf, 0x2D );
 2364     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2365     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2366     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2367   %}
 2368 
 2369   // Special case for moving an integer register to a stack slot.
 2370   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2371     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2372   %}
 2373 
 2374   // Special case for moving a register to a stack slot.
 2375   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2376     // Opcode already emitted
 2377     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2378     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2379     emit_d32(cbuf, $dst$$disp);   // Displacement
 2380   %}
 2381 
 2382   // Push the integer in stackSlot 'src' onto FP-stack
 2383   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2384     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2385   %}
 2386 
 2387   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2388   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2389     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2390   %}
 2391 
 2392   // Same as Pop_Mem_F except for opcode
 2393   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2394   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2395     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2396   %}
 2397 
 2398   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2399     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2400     emit_d8( cbuf, 0xD8+$dst$$reg );
 2401   %}
 2402 
 2403   enc_class Push_Reg_FPR( regFPR dst ) %{
 2404     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2405     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2406   %}
 2407 
 2408   // Push FPU's float to a stack-slot, and pop FPU-stack
 2409   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2410     int pop = 0x02;
 2411     if ($src$$reg != FPR1L_enc) {
 2412       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2413       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2414       pop = 0x03;
 2415     }
 2416     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2417   %}
 2418 
 2419   // Push FPU's double to a stack-slot, and pop FPU-stack
 2420   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2421     int pop = 0x02;
 2422     if ($src$$reg != FPR1L_enc) {
 2423       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2424       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2425       pop = 0x03;
 2426     }
 2427     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2428   %}
 2429 
 2430   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2431   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2432     int pop = 0xD0 - 1; // -1 since we skip FLD
 2433     if ($src$$reg != FPR1L_enc) {
 2434       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2435       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2436       pop = 0xD8;
 2437     }
 2438     emit_opcode( cbuf, 0xDD );
 2439     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2440   %}
 2441 
 2442 
 2443   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2444     // load dst in FPR0
 2445     emit_opcode( cbuf, 0xD9 );
 2446     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2447     if ($src$$reg != FPR1L_enc) {
 2448       // fincstp
 2449       emit_opcode (cbuf, 0xD9);
 2450       emit_opcode (cbuf, 0xF7);
 2451       // swap src with FPR1:
 2452       // FXCH FPR1 with src
 2453       emit_opcode(cbuf, 0xD9);
 2454       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2455       // fdecstp
 2456       emit_opcode (cbuf, 0xD9);
 2457       emit_opcode (cbuf, 0xF6);
 2458     }
 2459   %}
 2460 
 2461   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2462     MacroAssembler _masm(&cbuf);
 2463     __ subptr(rsp, 8);
 2464     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2465     __ fld_d(Address(rsp, 0));
 2466     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2467     __ fld_d(Address(rsp, 0));
 2468   %}
 2469 
 2470   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2471     MacroAssembler _masm(&cbuf);
 2472     __ subptr(rsp, 4);
 2473     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2474     __ fld_s(Address(rsp, 0));
 2475     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2476     __ fld_s(Address(rsp, 0));
 2477   %}
 2478 
 2479   enc_class Push_ResultD(regD dst) %{
 2480     MacroAssembler _masm(&cbuf);
 2481     __ fstp_d(Address(rsp, 0));
 2482     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2483     __ addptr(rsp, 8);
 2484   %}
 2485 
 2486   enc_class Push_ResultF(regF dst, immI d8) %{
 2487     MacroAssembler _masm(&cbuf);
 2488     __ fstp_s(Address(rsp, 0));
 2489     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2490     __ addptr(rsp, $d8$$constant);
 2491   %}
 2492 
 2493   enc_class Push_SrcD(regD src) %{
 2494     MacroAssembler _masm(&cbuf);
 2495     __ subptr(rsp, 8);
 2496     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2497     __ fld_d(Address(rsp, 0));
 2498   %}
 2499 
 2500   enc_class push_stack_temp_qword() %{
 2501     MacroAssembler _masm(&cbuf);
 2502     __ subptr(rsp, 8);
 2503   %}
 2504 
 2505   enc_class pop_stack_temp_qword() %{
 2506     MacroAssembler _masm(&cbuf);
 2507     __ addptr(rsp, 8);
 2508   %}
 2509 
 2510   enc_class push_xmm_to_fpr1(regD src) %{
 2511     MacroAssembler _masm(&cbuf);
 2512     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2513     __ fld_d(Address(rsp, 0));
 2514   %}
 2515 
 2516   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2517     if ($src$$reg != FPR1L_enc) {
 2518       // fincstp
 2519       emit_opcode (cbuf, 0xD9);
 2520       emit_opcode (cbuf, 0xF7);
 2521       // FXCH FPR1 with src
 2522       emit_opcode(cbuf, 0xD9);
 2523       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2524       // fdecstp
 2525       emit_opcode (cbuf, 0xD9);
 2526       emit_opcode (cbuf, 0xF6);
 2527     }
 2528     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2529     // // FSTP   FPR$dst$$reg
 2530     // emit_opcode( cbuf, 0xDD );
 2531     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2532   %}
 2533 
 2534   enc_class fnstsw_sahf_skip_parity() %{
 2535     // fnstsw ax
 2536     emit_opcode( cbuf, 0xDF );
 2537     emit_opcode( cbuf, 0xE0 );
 2538     // sahf
 2539     emit_opcode( cbuf, 0x9E );
 2540     // jnp  ::skip
 2541     emit_opcode( cbuf, 0x7B );
 2542     emit_opcode( cbuf, 0x05 );
 2543   %}
 2544 
 2545   enc_class emitModDPR() %{
 2546     // fprem must be iterative
 2547     // :: loop
 2548     // fprem
 2549     emit_opcode( cbuf, 0xD9 );
 2550     emit_opcode( cbuf, 0xF8 );
 2551     // wait
 2552     emit_opcode( cbuf, 0x9b );
 2553     // fnstsw ax
 2554     emit_opcode( cbuf, 0xDF );
 2555     emit_opcode( cbuf, 0xE0 );
 2556     // sahf
 2557     emit_opcode( cbuf, 0x9E );
 2558     // jp  ::loop
 2559     emit_opcode( cbuf, 0x0F );
 2560     emit_opcode( cbuf, 0x8A );
 2561     emit_opcode( cbuf, 0xF4 );
 2562     emit_opcode( cbuf, 0xFF );
 2563     emit_opcode( cbuf, 0xFF );
 2564     emit_opcode( cbuf, 0xFF );
 2565   %}
 2566 
 2567   enc_class fpu_flags() %{
 2568     // fnstsw_ax
 2569     emit_opcode( cbuf, 0xDF);
 2570     emit_opcode( cbuf, 0xE0);
 2571     // test ax,0x0400
 2572     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2573     emit_opcode( cbuf, 0xA9 );
 2574     emit_d16   ( cbuf, 0x0400 );
 2575     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2576     // // test rax,0x0400
 2577     // emit_opcode( cbuf, 0xA9 );
 2578     // emit_d32   ( cbuf, 0x00000400 );
 2579     //
 2580     // jz exit (no unordered comparison)
 2581     emit_opcode( cbuf, 0x74 );
 2582     emit_d8    ( cbuf, 0x02 );
 2583     // mov ah,1 - treat as LT case (set carry flag)
 2584     emit_opcode( cbuf, 0xB4 );
 2585     emit_d8    ( cbuf, 0x01 );
 2586     // sahf
 2587     emit_opcode( cbuf, 0x9E);
 2588   %}
 2589 
 2590   enc_class cmpF_P6_fixup() %{
 2591     // Fixup the integer flags in case comparison involved a NaN
 2592     //
 2593     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2594     emit_opcode( cbuf, 0x7B );
 2595     emit_d8    ( cbuf, 0x03 );
 2596     // MOV AH,1 - treat as LT case (set carry flag)
 2597     emit_opcode( cbuf, 0xB4 );
 2598     emit_d8    ( cbuf, 0x01 );
 2599     // SAHF
 2600     emit_opcode( cbuf, 0x9E);
 2601     // NOP     // target for branch to avoid branch to branch
 2602     emit_opcode( cbuf, 0x90);
 2603   %}
 2604 
 2605 //     fnstsw_ax();
 2606 //     sahf();
 2607 //     movl(dst, nan_result);
 2608 //     jcc(Assembler::parity, exit);
 2609 //     movl(dst, less_result);
 2610 //     jcc(Assembler::below, exit);
 2611 //     movl(dst, equal_result);
 2612 //     jcc(Assembler::equal, exit);
 2613 //     movl(dst, greater_result);
 2614 
 2615 // less_result     =  1;
 2616 // greater_result  = -1;
 2617 // equal_result    = 0;
 2618 // nan_result      = -1;
 2619 
 2620   enc_class CmpF_Result(rRegI dst) %{
 2621     // fnstsw_ax();
 2622     emit_opcode( cbuf, 0xDF);
 2623     emit_opcode( cbuf, 0xE0);
 2624     // sahf
 2625     emit_opcode( cbuf, 0x9E);
 2626     // movl(dst, nan_result);
 2627     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2628     emit_d32( cbuf, -1 );
 2629     // jcc(Assembler::parity, exit);
 2630     emit_opcode( cbuf, 0x7A );
 2631     emit_d8    ( cbuf, 0x13 );
 2632     // movl(dst, less_result);
 2633     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2634     emit_d32( cbuf, -1 );
 2635     // jcc(Assembler::below, exit);
 2636     emit_opcode( cbuf, 0x72 );
 2637     emit_d8    ( cbuf, 0x0C );
 2638     // movl(dst, equal_result);
 2639     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2640     emit_d32( cbuf, 0 );
 2641     // jcc(Assembler::equal, exit);
 2642     emit_opcode( cbuf, 0x74 );
 2643     emit_d8    ( cbuf, 0x05 );
 2644     // movl(dst, greater_result);
 2645     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2646     emit_d32( cbuf, 1 );
 2647   %}
 2648 
 2649 
 2650   // Compare the longs and set flags
 2651   // BROKEN!  Do Not use as-is
 2652   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2653     // CMP    $src1.hi,$src2.hi
 2654     emit_opcode( cbuf, 0x3B );
 2655     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2656     // JNE,s  done
 2657     emit_opcode(cbuf,0x75);
 2658     emit_d8(cbuf, 2 );
 2659     // CMP    $src1.lo,$src2.lo
 2660     emit_opcode( cbuf, 0x3B );
 2661     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2662 // done:
 2663   %}
 2664 
 2665   enc_class convert_int_long( regL dst, rRegI src ) %{
 2666     // mov $dst.lo,$src
 2667     int dst_encoding = $dst$$reg;
 2668     int src_encoding = $src$$reg;
 2669     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2670     // mov $dst.hi,$src
 2671     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2672     // sar $dst.hi,31
 2673     emit_opcode( cbuf, 0xC1 );
 2674     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2675     emit_d8(cbuf, 0x1F );
 2676   %}
 2677 
 2678   enc_class convert_long_double( eRegL src ) %{
 2679     // push $src.hi
 2680     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2681     // push $src.lo
 2682     emit_opcode(cbuf, 0x50+$src$$reg  );
 2683     // fild 64-bits at [SP]
 2684     emit_opcode(cbuf,0xdf);
 2685     emit_d8(cbuf, 0x6C);
 2686     emit_d8(cbuf, 0x24);
 2687     emit_d8(cbuf, 0x00);
 2688     // pop stack
 2689     emit_opcode(cbuf, 0x83); // add  SP, #8
 2690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2691     emit_d8(cbuf, 0x8);
 2692   %}
 2693 
 2694   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2695     // IMUL   EDX:EAX,$src1
 2696     emit_opcode( cbuf, 0xF7 );
 2697     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2698     // SAR    EDX,$cnt-32
 2699     int shift_count = ((int)$cnt$$constant) - 32;
 2700     if (shift_count > 0) {
 2701       emit_opcode(cbuf, 0xC1);
 2702       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2703       emit_d8(cbuf, shift_count);
 2704     }
 2705   %}
 2706 
 2707   // this version doesn't have add sp, 8
 2708   enc_class convert_long_double2( eRegL src ) %{
 2709     // push $src.hi
 2710     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2711     // push $src.lo
 2712     emit_opcode(cbuf, 0x50+$src$$reg  );
 2713     // fild 64-bits at [SP]
 2714     emit_opcode(cbuf,0xdf);
 2715     emit_d8(cbuf, 0x6C);
 2716     emit_d8(cbuf, 0x24);
 2717     emit_d8(cbuf, 0x00);
 2718   %}
 2719 
 2720   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2721     // Basic idea: long = (long)int * (long)int
 2722     // IMUL EDX:EAX, src
 2723     emit_opcode( cbuf, 0xF7 );
 2724     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2725   %}
 2726 
 2727   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2728     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2729     // MUL EDX:EAX, src
 2730     emit_opcode( cbuf, 0xF7 );
 2731     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2732   %}
 2733 
 2734   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2735     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2736     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2737     // MOV    $tmp,$src.lo
 2738     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2739     // IMUL   $tmp,EDX
 2740     emit_opcode( cbuf, 0x0F );
 2741     emit_opcode( cbuf, 0xAF );
 2742     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2743     // MOV    EDX,$src.hi
 2744     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2745     // IMUL   EDX,EAX
 2746     emit_opcode( cbuf, 0x0F );
 2747     emit_opcode( cbuf, 0xAF );
 2748     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2749     // ADD    $tmp,EDX
 2750     emit_opcode( cbuf, 0x03 );
 2751     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2752     // MUL   EDX:EAX,$src.lo
 2753     emit_opcode( cbuf, 0xF7 );
 2754     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2755     // ADD    EDX,ESI
 2756     emit_opcode( cbuf, 0x03 );
 2757     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2758   %}
 2759 
 2760   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2761     // Basic idea: lo(result) = lo(src * y_lo)
 2762     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2763     // IMUL   $tmp,EDX,$src
 2764     emit_opcode( cbuf, 0x6B );
 2765     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2766     emit_d8( cbuf, (int)$src$$constant );
 2767     // MOV    EDX,$src
 2768     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2769     emit_d32( cbuf, (int)$src$$constant );
 2770     // MUL   EDX:EAX,EDX
 2771     emit_opcode( cbuf, 0xF7 );
 2772     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2773     // ADD    EDX,ESI
 2774     emit_opcode( cbuf, 0x03 );
 2775     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2776   %}
 2777 
 2778   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2779     // PUSH src1.hi
 2780     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2781     // PUSH src1.lo
 2782     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2783     // PUSH src2.hi
 2784     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2785     // PUSH src2.lo
 2786     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2787     // CALL directly to the runtime
 2788     cbuf.set_insts_mark();
 2789     emit_opcode(cbuf,0xE8);       // Call into runtime
 2790     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2791     // Restore stack
 2792     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2793     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2794     emit_d8(cbuf, 4*4);
 2795   %}
 2796 
 2797   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2798     // PUSH src1.hi
 2799     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2800     // PUSH src1.lo
 2801     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2802     // PUSH src2.hi
 2803     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2804     // PUSH src2.lo
 2805     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2806     // CALL directly to the runtime
 2807     cbuf.set_insts_mark();
 2808     emit_opcode(cbuf,0xE8);       // Call into runtime
 2809     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2810     // Restore stack
 2811     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2812     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2813     emit_d8(cbuf, 4*4);
 2814   %}
 2815 
 2816   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2817     // MOV   $tmp,$src.lo
 2818     emit_opcode(cbuf, 0x8B);
 2819     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2820     // OR    $tmp,$src.hi
 2821     emit_opcode(cbuf, 0x0B);
 2822     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2823   %}
 2824 
 2825   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2826     // CMP    $src1.lo,$src2.lo
 2827     emit_opcode( cbuf, 0x3B );
 2828     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2829     // JNE,s  skip
 2830     emit_cc(cbuf, 0x70, 0x5);
 2831     emit_d8(cbuf,2);
 2832     // CMP    $src1.hi,$src2.hi
 2833     emit_opcode( cbuf, 0x3B );
 2834     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2835   %}
 2836 
 2837   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2838     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2839     emit_opcode( cbuf, 0x3B );
 2840     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2841     // MOV    $tmp,$src1.hi
 2842     emit_opcode( cbuf, 0x8B );
 2843     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2844     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2845     emit_opcode( cbuf, 0x1B );
 2846     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2847   %}
 2848 
 2849   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2850     // XOR    $tmp,$tmp
 2851     emit_opcode(cbuf,0x33);  // XOR
 2852     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2853     // CMP    $tmp,$src.lo
 2854     emit_opcode( cbuf, 0x3B );
 2855     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2856     // SBB    $tmp,$src.hi
 2857     emit_opcode( cbuf, 0x1B );
 2858     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2859   %}
 2860 
 2861  // Sniff, sniff... smells like Gnu Superoptimizer
 2862   enc_class neg_long( eRegL dst ) %{
 2863     emit_opcode(cbuf,0xF7);    // NEG hi
 2864     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2865     emit_opcode(cbuf,0xF7);    // NEG lo
 2866     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2867     emit_opcode(cbuf,0x83);    // SBB hi,0
 2868     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2869     emit_d8    (cbuf,0 );
 2870   %}
 2871 
 2872   enc_class enc_pop_rdx() %{
 2873     emit_opcode(cbuf,0x5A);
 2874   %}
 2875 
 2876   enc_class enc_rethrow() %{
 2877     cbuf.set_insts_mark();
 2878     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2879     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2880                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2881   %}
 2882 
 2883 
 2884   // Convert a double to an int.  Java semantics require we do complex
 2885   // manglelations in the corner cases.  So we set the rounding mode to
 2886   // 'zero', store the darned double down as an int, and reset the
 2887   // rounding mode to 'nearest'.  The hardware throws an exception which
 2888   // patches up the correct value directly to the stack.
 2889   enc_class DPR2I_encoding( regDPR src ) %{
 2890     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2891     // exceptions here, so that a NAN or other corner-case value will
 2892     // thrown an exception (but normal values get converted at full speed).
 2893     // However, I2C adapters and other float-stack manglers leave pending
 2894     // invalid-op exceptions hanging.  We would have to clear them before
 2895     // enabling them and that is more expensive than just testing for the
 2896     // invalid value Intel stores down in the corner cases.
 2897     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2898     emit_opcode(cbuf,0x2D);
 2899     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2900     // Allocate a word
 2901     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2902     emit_opcode(cbuf,0xEC);
 2903     emit_d8(cbuf,0x04);
 2904     // Encoding assumes a double has been pushed into FPR0.
 2905     // Store down the double as an int, popping the FPU stack
 2906     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2907     emit_opcode(cbuf,0x1C);
 2908     emit_d8(cbuf,0x24);
 2909     // Restore the rounding mode; mask the exception
 2910     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2911     emit_opcode(cbuf,0x2D);
 2912     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2913         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2914         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2915 
 2916     // Load the converted int; adjust CPU stack
 2917     emit_opcode(cbuf,0x58);       // POP EAX
 2918     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2919     emit_d32   (cbuf,0x80000000); //         0x80000000
 2920     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2921     emit_d8    (cbuf,0x07);       // Size of slow_call
 2922     // Push src onto stack slow-path
 2923     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2924     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2925     // CALL directly to the runtime
 2926     cbuf.set_insts_mark();
 2927     emit_opcode(cbuf,0xE8);       // Call into runtime
 2928     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2929     // Carry on here...
 2930   %}
 2931 
 2932   enc_class DPR2L_encoding( regDPR src ) %{
 2933     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2934     emit_opcode(cbuf,0x2D);
 2935     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2936     // Allocate a word
 2937     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2938     emit_opcode(cbuf,0xEC);
 2939     emit_d8(cbuf,0x08);
 2940     // Encoding assumes a double has been pushed into FPR0.
 2941     // Store down the double as a long, popping the FPU stack
 2942     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2943     emit_opcode(cbuf,0x3C);
 2944     emit_d8(cbuf,0x24);
 2945     // Restore the rounding mode; mask the exception
 2946     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2947     emit_opcode(cbuf,0x2D);
 2948     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2949         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2950         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2951 
 2952     // Load the converted int; adjust CPU stack
 2953     emit_opcode(cbuf,0x58);       // POP EAX
 2954     emit_opcode(cbuf,0x5A);       // POP EDX
 2955     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2956     emit_d8    (cbuf,0xFA);       // rdx
 2957     emit_d32   (cbuf,0x80000000); //         0x80000000
 2958     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2959     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2960     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2961     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2962     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2963     emit_d8    (cbuf,0x07);       // Size of slow_call
 2964     // Push src onto stack slow-path
 2965     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2966     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2967     // CALL directly to the runtime
 2968     cbuf.set_insts_mark();
 2969     emit_opcode(cbuf,0xE8);       // Call into runtime
 2970     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2971     // Carry on here...
 2972   %}
 2973 
 2974   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2975     // Operand was loaded from memory into fp ST (stack top)
 2976     // FMUL   ST,$src  /* D8 C8+i */
 2977     emit_opcode(cbuf, 0xD8);
 2978     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2979   %}
 2980 
 2981   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2982     // FADDP  ST,src2  /* D8 C0+i */
 2983     emit_opcode(cbuf, 0xD8);
 2984     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2985     //could use FADDP  src2,fpST  /* DE C0+i */
 2986   %}
 2987 
 2988   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2989     // FADDP  src2,ST  /* DE C0+i */
 2990     emit_opcode(cbuf, 0xDE);
 2991     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2992   %}
 2993 
 2994   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2995     // Operand has been loaded into fp ST (stack top)
 2996       // FSUB   ST,$src1
 2997       emit_opcode(cbuf, 0xD8);
 2998       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2999 
 3000       // FDIV
 3001       emit_opcode(cbuf, 0xD8);
 3002       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3003   %}
 3004 
 3005   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3006     // Operand was loaded from memory into fp ST (stack top)
 3007     // FADD   ST,$src  /* D8 C0+i */
 3008     emit_opcode(cbuf, 0xD8);
 3009     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3010 
 3011     // FMUL  ST,src2  /* D8 C*+i */
 3012     emit_opcode(cbuf, 0xD8);
 3013     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3014   %}
 3015 
 3016 
 3017   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3018     // Operand was loaded from memory into fp ST (stack top)
 3019     // FADD   ST,$src  /* D8 C0+i */
 3020     emit_opcode(cbuf, 0xD8);
 3021     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3022 
 3023     // FMULP  src2,ST  /* DE C8+i */
 3024     emit_opcode(cbuf, 0xDE);
 3025     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3026   %}
 3027 
 3028   // Atomically load the volatile long
 3029   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3030     emit_opcode(cbuf,0xDF);
 3031     int rm_byte_opcode = 0x05;
 3032     int base     = $mem$$base;
 3033     int index    = $mem$$index;
 3034     int scale    = $mem$$scale;
 3035     int displace = $mem$$disp;
 3036     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3037     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3038     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3039   %}
 3040 
 3041   // Volatile Store Long.  Must be atomic, so move it into
 3042   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3043   // target address before the store (for null-ptr checks)
 3044   // so the memory operand is used twice in the encoding.
 3045   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3046     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3047     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3048     emit_opcode(cbuf,0xDF);
 3049     int rm_byte_opcode = 0x07;
 3050     int base     = $mem$$base;
 3051     int index    = $mem$$index;
 3052     int scale    = $mem$$scale;
 3053     int displace = $mem$$disp;
 3054     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3055     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3056   %}
 3057 
 3058 %}
 3059 
 3060 
 3061 //----------FRAME--------------------------------------------------------------
 3062 // Definition of frame structure and management information.
 3063 //
 3064 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3065 //                             |   (to get allocators register number
 3066 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3067 //  r   CALLER     |        |
 3068 //  o     |        +--------+      pad to even-align allocators stack-slot
 3069 //  w     V        |  pad0  |        numbers; owned by CALLER
 3070 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3071 //  h     ^        |   in   |  5
 3072 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3073 //  |     |        |        |  3
 3074 //  |     |        +--------+
 3075 //  V     |        | old out|      Empty on Intel, window on Sparc
 3076 //        |    old |preserve|      Must be even aligned.
 3077 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3078 //        |        |   in   |  3   area for Intel ret address
 3079 //     Owned by    |preserve|      Empty on Sparc.
 3080 //       SELF      +--------+
 3081 //        |        |  pad2  |  2   pad to align old SP
 3082 //        |        +--------+  1
 3083 //        |        | locks  |  0
 3084 //        |        +--------+----> OptoReg::stack0(), even aligned
 3085 //        |        |  pad1  | 11   pad to align new SP
 3086 //        |        +--------+
 3087 //        |        |        | 10
 3088 //        |        | spills |  9   spills
 3089 //        V        |        |  8   (pad0 slot for callee)
 3090 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3091 //        ^        |  out   |  7
 3092 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3093 //     Owned by    +--------+
 3094 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3095 //        |    new |preserve|      Must be even-aligned.
 3096 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3097 //        |        |        |
 3098 //
 3099 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3100 //         known from SELF's arguments and the Java calling convention.
 3101 //         Region 6-7 is determined per call site.
 3102 // Note 2: If the calling convention leaves holes in the incoming argument
 3103 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3104 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3105 //         incoming area, as the Java calling convention is completely under
 3106 //         the control of the AD file.  Doubles can be sorted and packed to
 3107 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3108 //         varargs C calling conventions.
 3109 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3110 //         even aligned with pad0 as needed.
 3111 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3112 //         region 6-11 is even aligned; it may be padded out more so that
 3113 //         the region from SP to FP meets the minimum stack alignment.
 3114 
 3115 frame %{
 3116   // These three registers define part of the calling convention
 3117   // between compiled code and the interpreter.
 3118   inline_cache_reg(EAX);                // Inline Cache Register
 3119 
 3120   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3121   cisc_spilling_operand_name(indOffset32);
 3122 
 3123   // Number of stack slots consumed by locking an object
 3124   sync_stack_slots(1);
 3125 
 3126   // Compiled code's Frame Pointer
 3127   frame_pointer(ESP);
 3128   // Interpreter stores its frame pointer in a register which is
 3129   // stored to the stack by I2CAdaptors.
 3130   // I2CAdaptors convert from interpreted java to compiled java.
 3131   interpreter_frame_pointer(EBP);
 3132 
 3133   // Stack alignment requirement
 3134   // Alignment size in bytes (128-bit -> 16 bytes)
 3135   stack_alignment(StackAlignmentInBytes);
 3136 
 3137   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3138   // for calls to C.  Supports the var-args backing area for register parms.
 3139   varargs_C_out_slots_killed(0);
 3140 
 3141   // The after-PROLOG location of the return address.  Location of
 3142   // return address specifies a type (REG or STACK) and a number
 3143   // representing the register number (i.e. - use a register name) or
 3144   // stack slot.
 3145   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3146   // Otherwise, it is above the locks and verification slot and alignment word
 3147   return_addr(STACK - 1 +
 3148               align_up((Compile::current()->in_preserve_stack_slots() +
 3149                         Compile::current()->fixed_slots()),
 3150                        stack_alignment_in_slots()));
 3151 
 3152   // Location of C & interpreter return values
 3153   c_return_value %{
 3154     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3155     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3156     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3157 
 3158     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3159     // that C functions return float and double results in XMM0.
 3160     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3161       return OptoRegPair(XMM0b_num,XMM0_num);
 3162     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3163       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3164 
 3165     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3166   %}
 3167 
 3168   // Location of return values
 3169   return_value %{
 3170     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3171     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3172     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3173     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3174       return OptoRegPair(XMM0b_num,XMM0_num);
 3175     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3176       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3177     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3178   %}
 3179 
 3180 %}
 3181 
 3182 //----------ATTRIBUTES---------------------------------------------------------
 3183 //----------Operand Attributes-------------------------------------------------
 3184 op_attrib op_cost(0);        // Required cost attribute
 3185 
 3186 //----------Instruction Attributes---------------------------------------------
 3187 ins_attrib ins_cost(100);       // Required cost attribute
 3188 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3189 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3190                                 // non-matching short branch variant of some
 3191                                                             // long branch?
 3192 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3193                                 // specifies the alignment that some part of the instruction (not
 3194                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3195                                 // function must be provided for the instruction
 3196 
 3197 //----------OPERANDS-----------------------------------------------------------
 3198 // Operand definitions must precede instruction definitions for correct parsing
 3199 // in the ADLC because operands constitute user defined types which are used in
 3200 // instruction definitions.
 3201 
 3202 //----------Simple Operands----------------------------------------------------
 3203 // Immediate Operands
 3204 // Integer Immediate
 3205 operand immI() %{
 3206   match(ConI);
 3207 
 3208   op_cost(10);
 3209   format %{ %}
 3210   interface(CONST_INTER);
 3211 %}
 3212 
 3213 // Constant for test vs zero
 3214 operand immI_0() %{
 3215   predicate(n->get_int() == 0);
 3216   match(ConI);
 3217 
 3218   op_cost(0);
 3219   format %{ %}
 3220   interface(CONST_INTER);
 3221 %}
 3222 
 3223 // Constant for increment
 3224 operand immI_1() %{
 3225   predicate(n->get_int() == 1);
 3226   match(ConI);
 3227 
 3228   op_cost(0);
 3229   format %{ %}
 3230   interface(CONST_INTER);
 3231 %}
 3232 
 3233 // Constant for decrement
 3234 operand immI_M1() %{
 3235   predicate(n->get_int() == -1);
 3236   match(ConI);
 3237 
 3238   op_cost(0);
 3239   format %{ %}
 3240   interface(CONST_INTER);
 3241 %}
 3242 
 3243 // Valid scale values for addressing modes
 3244 operand immI2() %{
 3245   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3246   match(ConI);
 3247 
 3248   format %{ %}
 3249   interface(CONST_INTER);
 3250 %}
 3251 
 3252 operand immI8() %{
 3253   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3254   match(ConI);
 3255 
 3256   op_cost(5);
 3257   format %{ %}
 3258   interface(CONST_INTER);
 3259 %}
 3260 
 3261 operand immU8() %{
 3262   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3263   match(ConI);
 3264 
 3265   op_cost(5);
 3266   format %{ %}
 3267   interface(CONST_INTER);
 3268 %}
 3269 
 3270 operand immI16() %{
 3271   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3272   match(ConI);
 3273 
 3274   op_cost(10);
 3275   format %{ %}
 3276   interface(CONST_INTER);
 3277 %}
 3278 
 3279 // Int Immediate non-negative
 3280 operand immU31()
 3281 %{
 3282   predicate(n->get_int() >= 0);
 3283   match(ConI);
 3284 
 3285   op_cost(0);
 3286   format %{ %}
 3287   interface(CONST_INTER);
 3288 %}
 3289 
 3290 // Constant for long shifts
 3291 operand immI_32() %{
 3292   predicate( n->get_int() == 32 );
 3293   match(ConI);
 3294 
 3295   op_cost(0);
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_1_31() %{
 3301   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3302   match(ConI);
 3303 
 3304   op_cost(0);
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 operand immI_32_63() %{
 3310   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3311   match(ConI);
 3312   op_cost(0);
 3313 
 3314   format %{ %}
 3315   interface(CONST_INTER);
 3316 %}
 3317 
 3318 operand immI_2() %{
 3319   predicate( n->get_int() == 2 );
 3320   match(ConI);
 3321 
 3322   op_cost(0);
 3323   format %{ %}
 3324   interface(CONST_INTER);
 3325 %}
 3326 
 3327 operand immI_3() %{
 3328   predicate( n->get_int() == 3 );
 3329   match(ConI);
 3330 
 3331   op_cost(0);
 3332   format %{ %}
 3333   interface(CONST_INTER);
 3334 %}
 3335 
 3336 operand immI_4()
 3337 %{
 3338   predicate(n->get_int() == 4);
 3339   match(ConI);
 3340 
 3341   op_cost(0);
 3342   format %{ %}
 3343   interface(CONST_INTER);
 3344 %}
 3345 
 3346 operand immI_8()
 3347 %{
 3348   predicate(n->get_int() == 8);
 3349   match(ConI);
 3350 
 3351   op_cost(0);
 3352   format %{ %}
 3353   interface(CONST_INTER);
 3354 %}
 3355 
 3356 // Pointer Immediate
 3357 operand immP() %{
 3358   match(ConP);
 3359 
 3360   op_cost(10);
 3361   format %{ %}
 3362   interface(CONST_INTER);
 3363 %}
 3364 
 3365 // NULL Pointer Immediate
 3366 operand immP0() %{
 3367   predicate( n->get_ptr() == 0 );
 3368   match(ConP);
 3369   op_cost(0);
 3370 
 3371   format %{ %}
 3372   interface(CONST_INTER);
 3373 %}
 3374 
 3375 // Long Immediate
 3376 operand immL() %{
 3377   match(ConL);
 3378 
 3379   op_cost(20);
 3380   format %{ %}
 3381   interface(CONST_INTER);
 3382 %}
 3383 
 3384 // Long Immediate zero
 3385 operand immL0() %{
 3386   predicate( n->get_long() == 0L );
 3387   match(ConL);
 3388   op_cost(0);
 3389 
 3390   format %{ %}
 3391   interface(CONST_INTER);
 3392 %}
 3393 
 3394 // Long Immediate zero
 3395 operand immL_M1() %{
 3396   predicate( n->get_long() == -1L );
 3397   match(ConL);
 3398   op_cost(0);
 3399 
 3400   format %{ %}
 3401   interface(CONST_INTER);
 3402 %}
 3403 
 3404 // Long immediate from 0 to 127.
 3405 // Used for a shorter form of long mul by 10.
 3406 operand immL_127() %{
 3407   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3408   match(ConL);
 3409   op_cost(0);
 3410 
 3411   format %{ %}
 3412   interface(CONST_INTER);
 3413 %}
 3414 
 3415 // Long Immediate: low 32-bit mask
 3416 operand immL_32bits() %{
 3417   predicate(n->get_long() == 0xFFFFFFFFL);
 3418   match(ConL);
 3419   op_cost(0);
 3420 
 3421   format %{ %}
 3422   interface(CONST_INTER);
 3423 %}
 3424 
 3425 // Long Immediate: low 32-bit mask
 3426 operand immL32() %{
 3427   predicate(n->get_long() == (int)(n->get_long()));
 3428   match(ConL);
 3429   op_cost(20);
 3430 
 3431   format %{ %}
 3432   interface(CONST_INTER);
 3433 %}
 3434 
 3435 //Double Immediate zero
 3436 operand immDPR0() %{
 3437   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3438   // bug that generates code such that NaNs compare equal to 0.0
 3439   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3440   match(ConD);
 3441 
 3442   op_cost(5);
 3443   format %{ %}
 3444   interface(CONST_INTER);
 3445 %}
 3446 
 3447 // Double Immediate one
 3448 operand immDPR1() %{
 3449   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3450   match(ConD);
 3451 
 3452   op_cost(5);
 3453   format %{ %}
 3454   interface(CONST_INTER);
 3455 %}
 3456 
 3457 // Double Immediate
 3458 operand immDPR() %{
 3459   predicate(UseSSE<=1);
 3460   match(ConD);
 3461 
 3462   op_cost(5);
 3463   format %{ %}
 3464   interface(CONST_INTER);
 3465 %}
 3466 
 3467 operand immD() %{
 3468   predicate(UseSSE>=2);
 3469   match(ConD);
 3470 
 3471   op_cost(5);
 3472   format %{ %}
 3473   interface(CONST_INTER);
 3474 %}
 3475 
 3476 // Double Immediate zero
 3477 operand immD0() %{
 3478   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3479   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3480   // compare equal to -0.0.
 3481   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3482   match(ConD);
 3483 
 3484   format %{ %}
 3485   interface(CONST_INTER);
 3486 %}
 3487 
 3488 // Float Immediate zero
 3489 operand immFPR0() %{
 3490   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3491   match(ConF);
 3492 
 3493   op_cost(5);
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 // Float Immediate one
 3499 operand immFPR1() %{
 3500   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3501   match(ConF);
 3502 
 3503   op_cost(5);
 3504   format %{ %}
 3505   interface(CONST_INTER);
 3506 %}
 3507 
 3508 // Float Immediate
 3509 operand immFPR() %{
 3510   predicate( UseSSE == 0 );
 3511   match(ConF);
 3512 
 3513   op_cost(5);
 3514   format %{ %}
 3515   interface(CONST_INTER);
 3516 %}
 3517 
 3518 // Float Immediate
 3519 operand immF() %{
 3520   predicate(UseSSE >= 1);
 3521   match(ConF);
 3522 
 3523   op_cost(5);
 3524   format %{ %}
 3525   interface(CONST_INTER);
 3526 %}
 3527 
 3528 // Float Immediate zero.  Zero and not -0.0
 3529 operand immF0() %{
 3530   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3531   match(ConF);
 3532 
 3533   op_cost(5);
 3534   format %{ %}
 3535   interface(CONST_INTER);
 3536 %}
 3537 
 3538 // Immediates for special shifts (sign extend)
 3539 
 3540 // Constants for increment
 3541 operand immI_16() %{
 3542   predicate( n->get_int() == 16 );
 3543   match(ConI);
 3544 
 3545   format %{ %}
 3546   interface(CONST_INTER);
 3547 %}
 3548 
 3549 operand immI_24() %{
 3550   predicate( n->get_int() == 24 );
 3551   match(ConI);
 3552 
 3553   format %{ %}
 3554   interface(CONST_INTER);
 3555 %}
 3556 
 3557 // Constant for byte-wide masking
 3558 operand immI_255() %{
 3559   predicate( n->get_int() == 255 );
 3560   match(ConI);
 3561 
 3562   format %{ %}
 3563   interface(CONST_INTER);
 3564 %}
 3565 
 3566 // Constant for short-wide masking
 3567 operand immI_65535() %{
 3568   predicate(n->get_int() == 65535);
 3569   match(ConI);
 3570 
 3571   format %{ %}
 3572   interface(CONST_INTER);
 3573 %}
 3574 
 3575 operand kReg()
 3576 %{
 3577   constraint(ALLOC_IN_RC(vectmask_reg));
 3578   match(RegVectMask);
 3579   format %{%}
 3580   interface(REG_INTER);
 3581 %}
 3582 
 3583 operand kReg_K1()
 3584 %{
 3585   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3586   match(RegVectMask);
 3587   format %{%}
 3588   interface(REG_INTER);
 3589 %}
 3590 
 3591 operand kReg_K2()
 3592 %{
 3593   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3594   match(RegVectMask);
 3595   format %{%}
 3596   interface(REG_INTER);
 3597 %}
 3598 
 3599 // Special Registers
 3600 operand kReg_K3()
 3601 %{
 3602   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3603   match(RegVectMask);
 3604   format %{%}
 3605   interface(REG_INTER);
 3606 %}
 3607 
 3608 operand kReg_K4()
 3609 %{
 3610   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3611   match(RegVectMask);
 3612   format %{%}
 3613   interface(REG_INTER);
 3614 %}
 3615 
 3616 operand kReg_K5()
 3617 %{
 3618   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3619   match(RegVectMask);
 3620   format %{%}
 3621   interface(REG_INTER);
 3622 %}
 3623 
 3624 operand kReg_K6()
 3625 %{
 3626   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3627   match(RegVectMask);
 3628   format %{%}
 3629   interface(REG_INTER);
 3630 %}
 3631 
 3632 // Special Registers
 3633 operand kReg_K7()
 3634 %{
 3635   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3636   match(RegVectMask);
 3637   format %{%}
 3638   interface(REG_INTER);
 3639 %}
 3640 
 3641 // Register Operands
 3642 // Integer Register
 3643 operand rRegI() %{
 3644   constraint(ALLOC_IN_RC(int_reg));
 3645   match(RegI);
 3646   match(xRegI);
 3647   match(eAXRegI);
 3648   match(eBXRegI);
 3649   match(eCXRegI);
 3650   match(eDXRegI);
 3651   match(eDIRegI);
 3652   match(eSIRegI);
 3653 
 3654   format %{ %}
 3655   interface(REG_INTER);
 3656 %}
 3657 
 3658 // Subset of Integer Register
 3659 operand xRegI(rRegI reg) %{
 3660   constraint(ALLOC_IN_RC(int_x_reg));
 3661   match(reg);
 3662   match(eAXRegI);
 3663   match(eBXRegI);
 3664   match(eCXRegI);
 3665   match(eDXRegI);
 3666 
 3667   format %{ %}
 3668   interface(REG_INTER);
 3669 %}
 3670 
 3671 // Special Registers
 3672 operand eAXRegI(xRegI reg) %{
 3673   constraint(ALLOC_IN_RC(eax_reg));
 3674   match(reg);
 3675   match(rRegI);
 3676 
 3677   format %{ "EAX" %}
 3678   interface(REG_INTER);
 3679 %}
 3680 
 3681 // Special Registers
 3682 operand eBXRegI(xRegI reg) %{
 3683   constraint(ALLOC_IN_RC(ebx_reg));
 3684   match(reg);
 3685   match(rRegI);
 3686 
 3687   format %{ "EBX" %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 operand eCXRegI(xRegI reg) %{
 3692   constraint(ALLOC_IN_RC(ecx_reg));
 3693   match(reg);
 3694   match(rRegI);
 3695 
 3696   format %{ "ECX" %}
 3697   interface(REG_INTER);
 3698 %}
 3699 
 3700 operand eDXRegI(xRegI reg) %{
 3701   constraint(ALLOC_IN_RC(edx_reg));
 3702   match(reg);
 3703   match(rRegI);
 3704 
 3705   format %{ "EDX" %}
 3706   interface(REG_INTER);
 3707 %}
 3708 
 3709 operand eDIRegI(xRegI reg) %{
 3710   constraint(ALLOC_IN_RC(edi_reg));
 3711   match(reg);
 3712   match(rRegI);
 3713 
 3714   format %{ "EDI" %}
 3715   interface(REG_INTER);
 3716 %}
 3717 
 3718 operand naxRegI() %{
 3719   constraint(ALLOC_IN_RC(nax_reg));
 3720   match(RegI);
 3721   match(eCXRegI);
 3722   match(eDXRegI);
 3723   match(eSIRegI);
 3724   match(eDIRegI);
 3725 
 3726   format %{ %}
 3727   interface(REG_INTER);
 3728 %}
 3729 
 3730 operand nadxRegI() %{
 3731   constraint(ALLOC_IN_RC(nadx_reg));
 3732   match(RegI);
 3733   match(eBXRegI);
 3734   match(eCXRegI);
 3735   match(eSIRegI);
 3736   match(eDIRegI);
 3737 
 3738   format %{ %}
 3739   interface(REG_INTER);
 3740 %}
 3741 
 3742 operand ncxRegI() %{
 3743   constraint(ALLOC_IN_RC(ncx_reg));
 3744   match(RegI);
 3745   match(eAXRegI);
 3746   match(eDXRegI);
 3747   match(eSIRegI);
 3748   match(eDIRegI);
 3749 
 3750   format %{ %}
 3751   interface(REG_INTER);
 3752 %}
 3753 
 3754 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3755 // //
 3756 operand eSIRegI(xRegI reg) %{
 3757    constraint(ALLOC_IN_RC(esi_reg));
 3758    match(reg);
 3759    match(rRegI);
 3760 
 3761    format %{ "ESI" %}
 3762    interface(REG_INTER);
 3763 %}
 3764 
 3765 // Pointer Register
 3766 operand anyRegP() %{
 3767   constraint(ALLOC_IN_RC(any_reg));
 3768   match(RegP);
 3769   match(eAXRegP);
 3770   match(eBXRegP);
 3771   match(eCXRegP);
 3772   match(eDIRegP);
 3773   match(eRegP);
 3774 
 3775   format %{ %}
 3776   interface(REG_INTER);
 3777 %}
 3778 
 3779 operand eRegP() %{
 3780   constraint(ALLOC_IN_RC(int_reg));
 3781   match(RegP);
 3782   match(eAXRegP);
 3783   match(eBXRegP);
 3784   match(eCXRegP);
 3785   match(eDIRegP);
 3786 
 3787   format %{ %}
 3788   interface(REG_INTER);
 3789 %}
 3790 
 3791 operand rRegP() %{
 3792   constraint(ALLOC_IN_RC(int_reg));
 3793   match(RegP);
 3794   match(eAXRegP);
 3795   match(eBXRegP);
 3796   match(eCXRegP);
 3797   match(eDIRegP);
 3798 
 3799   format %{ %}
 3800   interface(REG_INTER);
 3801 %}
 3802 
 3803 // On windows95, EBP is not safe to use for implicit null tests.
 3804 operand eRegP_no_EBP() %{
 3805   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3806   match(RegP);
 3807   match(eAXRegP);
 3808   match(eBXRegP);
 3809   match(eCXRegP);
 3810   match(eDIRegP);
 3811 
 3812   op_cost(100);
 3813   format %{ %}
 3814   interface(REG_INTER);
 3815 %}
 3816 
 3817 operand naxRegP() %{
 3818   constraint(ALLOC_IN_RC(nax_reg));
 3819   match(RegP);
 3820   match(eBXRegP);
 3821   match(eDXRegP);
 3822   match(eCXRegP);
 3823   match(eSIRegP);
 3824   match(eDIRegP);
 3825 
 3826   format %{ %}
 3827   interface(REG_INTER);
 3828 %}
 3829 
 3830 operand nabxRegP() %{
 3831   constraint(ALLOC_IN_RC(nabx_reg));
 3832   match(RegP);
 3833   match(eCXRegP);
 3834   match(eDXRegP);
 3835   match(eSIRegP);
 3836   match(eDIRegP);
 3837 
 3838   format %{ %}
 3839   interface(REG_INTER);
 3840 %}
 3841 
 3842 operand pRegP() %{
 3843   constraint(ALLOC_IN_RC(p_reg));
 3844   match(RegP);
 3845   match(eBXRegP);
 3846   match(eDXRegP);
 3847   match(eSIRegP);
 3848   match(eDIRegP);
 3849 
 3850   format %{ %}
 3851   interface(REG_INTER);
 3852 %}
 3853 
 3854 // Special Registers
 3855 // Return a pointer value
 3856 operand eAXRegP(eRegP reg) %{
 3857   constraint(ALLOC_IN_RC(eax_reg));
 3858   match(reg);
 3859   format %{ "EAX" %}
 3860   interface(REG_INTER);
 3861 %}
 3862 
 3863 // Used in AtomicAdd
 3864 operand eBXRegP(eRegP reg) %{
 3865   constraint(ALLOC_IN_RC(ebx_reg));
 3866   match(reg);
 3867   format %{ "EBX" %}
 3868   interface(REG_INTER);
 3869 %}
 3870 
 3871 // Tail-call (interprocedural jump) to interpreter
 3872 operand eCXRegP(eRegP reg) %{
 3873   constraint(ALLOC_IN_RC(ecx_reg));
 3874   match(reg);
 3875   format %{ "ECX" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 
 3879 operand eDXRegP(eRegP reg) %{
 3880   constraint(ALLOC_IN_RC(edx_reg));
 3881   match(reg);
 3882   format %{ "EDX" %}
 3883   interface(REG_INTER);
 3884 %}
 3885 
 3886 operand eSIRegP(eRegP reg) %{
 3887   constraint(ALLOC_IN_RC(esi_reg));
 3888   match(reg);
 3889   format %{ "ESI" %}
 3890   interface(REG_INTER);
 3891 %}
 3892 
 3893 // Used in rep stosw
 3894 operand eDIRegP(eRegP reg) %{
 3895   constraint(ALLOC_IN_RC(edi_reg));
 3896   match(reg);
 3897   format %{ "EDI" %}
 3898   interface(REG_INTER);
 3899 %}
 3900 
 3901 operand eRegL() %{
 3902   constraint(ALLOC_IN_RC(long_reg));
 3903   match(RegL);
 3904   match(eADXRegL);
 3905 
 3906   format %{ %}
 3907   interface(REG_INTER);
 3908 %}
 3909 
 3910 operand eADXRegL( eRegL reg ) %{
 3911   constraint(ALLOC_IN_RC(eadx_reg));
 3912   match(reg);
 3913 
 3914   format %{ "EDX:EAX" %}
 3915   interface(REG_INTER);
 3916 %}
 3917 
 3918 operand eBCXRegL( eRegL reg ) %{
 3919   constraint(ALLOC_IN_RC(ebcx_reg));
 3920   match(reg);
 3921 
 3922   format %{ "EBX:ECX" %}
 3923   interface(REG_INTER);
 3924 %}
 3925 
 3926 // Special case for integer high multiply
 3927 operand eADXRegL_low_only() %{
 3928   constraint(ALLOC_IN_RC(eadx_reg));
 3929   match(RegL);
 3930 
 3931   format %{ "EAX" %}
 3932   interface(REG_INTER);
 3933 %}
 3934 
 3935 // Flags register, used as output of compare instructions
 3936 operand rFlagsReg() %{
 3937   constraint(ALLOC_IN_RC(int_flags));
 3938   match(RegFlags);
 3939 
 3940   format %{ "EFLAGS" %}
 3941   interface(REG_INTER);
 3942 %}
 3943 
 3944 // Flags register, used as output of compare instructions
 3945 operand eFlagsReg() %{
 3946   constraint(ALLOC_IN_RC(int_flags));
 3947   match(RegFlags);
 3948 
 3949   format %{ "EFLAGS" %}
 3950   interface(REG_INTER);
 3951 %}
 3952 
 3953 // Flags register, used as output of FLOATING POINT compare instructions
 3954 operand eFlagsRegU() %{
 3955   constraint(ALLOC_IN_RC(int_flags));
 3956   match(RegFlags);
 3957 
 3958   format %{ "EFLAGS_U" %}
 3959   interface(REG_INTER);
 3960 %}
 3961 
 3962 operand eFlagsRegUCF() %{
 3963   constraint(ALLOC_IN_RC(int_flags));
 3964   match(RegFlags);
 3965   predicate(false);
 3966 
 3967   format %{ "EFLAGS_U_CF" %}
 3968   interface(REG_INTER);
 3969 %}
 3970 
 3971 // Condition Code Register used by long compare
 3972 operand flagsReg_long_LTGE() %{
 3973   constraint(ALLOC_IN_RC(int_flags));
 3974   match(RegFlags);
 3975   format %{ "FLAGS_LTGE" %}
 3976   interface(REG_INTER);
 3977 %}
 3978 operand flagsReg_long_EQNE() %{
 3979   constraint(ALLOC_IN_RC(int_flags));
 3980   match(RegFlags);
 3981   format %{ "FLAGS_EQNE" %}
 3982   interface(REG_INTER);
 3983 %}
 3984 operand flagsReg_long_LEGT() %{
 3985   constraint(ALLOC_IN_RC(int_flags));
 3986   match(RegFlags);
 3987   format %{ "FLAGS_LEGT" %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 // Condition Code Register used by unsigned long compare
 3992 operand flagsReg_ulong_LTGE() %{
 3993   constraint(ALLOC_IN_RC(int_flags));
 3994   match(RegFlags);
 3995   format %{ "FLAGS_U_LTGE" %}
 3996   interface(REG_INTER);
 3997 %}
 3998 operand flagsReg_ulong_EQNE() %{
 3999   constraint(ALLOC_IN_RC(int_flags));
 4000   match(RegFlags);
 4001   format %{ "FLAGS_U_EQNE" %}
 4002   interface(REG_INTER);
 4003 %}
 4004 operand flagsReg_ulong_LEGT() %{
 4005   constraint(ALLOC_IN_RC(int_flags));
 4006   match(RegFlags);
 4007   format %{ "FLAGS_U_LEGT" %}
 4008   interface(REG_INTER);
 4009 %}
 4010 
 4011 // Float register operands
 4012 operand regDPR() %{
 4013   predicate( UseSSE < 2 );
 4014   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4015   match(RegD);
 4016   match(regDPR1);
 4017   match(regDPR2);
 4018   format %{ %}
 4019   interface(REG_INTER);
 4020 %}
 4021 
 4022 operand regDPR1(regDPR reg) %{
 4023   predicate( UseSSE < 2 );
 4024   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4025   match(reg);
 4026   format %{ "FPR1" %}
 4027   interface(REG_INTER);
 4028 %}
 4029 
 4030 operand regDPR2(regDPR reg) %{
 4031   predicate( UseSSE < 2 );
 4032   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4033   match(reg);
 4034   format %{ "FPR2" %}
 4035   interface(REG_INTER);
 4036 %}
 4037 
 4038 operand regnotDPR1(regDPR reg) %{
 4039   predicate( UseSSE < 2 );
 4040   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4041   match(reg);
 4042   format %{ %}
 4043   interface(REG_INTER);
 4044 %}
 4045 
 4046 // Float register operands
 4047 operand regFPR() %{
 4048   predicate( UseSSE < 2 );
 4049   constraint(ALLOC_IN_RC(fp_flt_reg));
 4050   match(RegF);
 4051   match(regFPR1);
 4052   format %{ %}
 4053   interface(REG_INTER);
 4054 %}
 4055 
 4056 // Float register operands
 4057 operand regFPR1(regFPR reg) %{
 4058   predicate( UseSSE < 2 );
 4059   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4060   match(reg);
 4061   format %{ "FPR1" %}
 4062   interface(REG_INTER);
 4063 %}
 4064 
 4065 // XMM Float register operands
 4066 operand regF() %{
 4067   predicate( UseSSE>=1 );
 4068   constraint(ALLOC_IN_RC(float_reg_legacy));
 4069   match(RegF);
 4070   format %{ %}
 4071   interface(REG_INTER);
 4072 %}
 4073 
 4074 operand legRegF() %{
 4075   predicate( UseSSE>=1 );
 4076   constraint(ALLOC_IN_RC(float_reg_legacy));
 4077   match(RegF);
 4078   format %{ %}
 4079   interface(REG_INTER);
 4080 %}
 4081 
 4082 // Float register operands
 4083 operand vlRegF() %{
 4084    constraint(ALLOC_IN_RC(float_reg_vl));
 4085    match(RegF);
 4086 
 4087    format %{ %}
 4088    interface(REG_INTER);
 4089 %}
 4090 
 4091 // XMM Double register operands
 4092 operand regD() %{
 4093   predicate( UseSSE>=2 );
 4094   constraint(ALLOC_IN_RC(double_reg_legacy));
 4095   match(RegD);
 4096   format %{ %}
 4097   interface(REG_INTER);
 4098 %}
 4099 
 4100 // Double register operands
 4101 operand legRegD() %{
 4102   predicate( UseSSE>=2 );
 4103   constraint(ALLOC_IN_RC(double_reg_legacy));
 4104   match(RegD);
 4105   format %{ %}
 4106   interface(REG_INTER);
 4107 %}
 4108 
 4109 operand vlRegD() %{
 4110    constraint(ALLOC_IN_RC(double_reg_vl));
 4111    match(RegD);
 4112 
 4113    format %{ %}
 4114    interface(REG_INTER);
 4115 %}
 4116 
 4117 //----------Memory Operands----------------------------------------------------
 4118 // Direct Memory Operand
 4119 operand direct(immP addr) %{
 4120   match(addr);
 4121 
 4122   format %{ "[$addr]" %}
 4123   interface(MEMORY_INTER) %{
 4124     base(0xFFFFFFFF);
 4125     index(0x4);
 4126     scale(0x0);
 4127     disp($addr);
 4128   %}
 4129 %}
 4130 
 4131 // Indirect Memory Operand
 4132 operand indirect(eRegP reg) %{
 4133   constraint(ALLOC_IN_RC(int_reg));
 4134   match(reg);
 4135 
 4136   format %{ "[$reg]" %}
 4137   interface(MEMORY_INTER) %{
 4138     base($reg);
 4139     index(0x4);
 4140     scale(0x0);
 4141     disp(0x0);
 4142   %}
 4143 %}
 4144 
 4145 // Indirect Memory Plus Short Offset Operand
 4146 operand indOffset8(eRegP reg, immI8 off) %{
 4147   match(AddP reg off);
 4148 
 4149   format %{ "[$reg + $off]" %}
 4150   interface(MEMORY_INTER) %{
 4151     base($reg);
 4152     index(0x4);
 4153     scale(0x0);
 4154     disp($off);
 4155   %}
 4156 %}
 4157 
 4158 // Indirect Memory Plus Long Offset Operand
 4159 operand indOffset32(eRegP reg, immI off) %{
 4160   match(AddP reg off);
 4161 
 4162   format %{ "[$reg + $off]" %}
 4163   interface(MEMORY_INTER) %{
 4164     base($reg);
 4165     index(0x4);
 4166     scale(0x0);
 4167     disp($off);
 4168   %}
 4169 %}
 4170 
 4171 // Indirect Memory Plus Long Offset Operand
 4172 operand indOffset32X(rRegI reg, immP off) %{
 4173   match(AddP off reg);
 4174 
 4175   format %{ "[$reg + $off]" %}
 4176   interface(MEMORY_INTER) %{
 4177     base($reg);
 4178     index(0x4);
 4179     scale(0x0);
 4180     disp($off);
 4181   %}
 4182 %}
 4183 
 4184 // Indirect Memory Plus Index Register Plus Offset Operand
 4185 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4186   match(AddP (AddP reg ireg) off);
 4187 
 4188   op_cost(10);
 4189   format %{"[$reg + $off + $ireg]" %}
 4190   interface(MEMORY_INTER) %{
 4191     base($reg);
 4192     index($ireg);
 4193     scale(0x0);
 4194     disp($off);
 4195   %}
 4196 %}
 4197 
 4198 // Indirect Memory Plus Index Register Plus Offset Operand
 4199 operand indIndex(eRegP reg, rRegI ireg) %{
 4200   match(AddP reg ireg);
 4201 
 4202   op_cost(10);
 4203   format %{"[$reg + $ireg]" %}
 4204   interface(MEMORY_INTER) %{
 4205     base($reg);
 4206     index($ireg);
 4207     scale(0x0);
 4208     disp(0x0);
 4209   %}
 4210 %}
 4211 
 4212 // // -------------------------------------------------------------------------
 4213 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4214 // // -------------------------------------------------------------------------
 4215 // // Scaled Memory Operands
 4216 // // Indirect Memory Times Scale Plus Offset Operand
 4217 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4218 //   match(AddP off (LShiftI ireg scale));
 4219 //
 4220 //   op_cost(10);
 4221 //   format %{"[$off + $ireg << $scale]" %}
 4222 //   interface(MEMORY_INTER) %{
 4223 //     base(0x4);
 4224 //     index($ireg);
 4225 //     scale($scale);
 4226 //     disp($off);
 4227 //   %}
 4228 // %}
 4229 
 4230 // Indirect Memory Times Scale Plus Index Register
 4231 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4232   match(AddP reg (LShiftI ireg scale));
 4233 
 4234   op_cost(10);
 4235   format %{"[$reg + $ireg << $scale]" %}
 4236   interface(MEMORY_INTER) %{
 4237     base($reg);
 4238     index($ireg);
 4239     scale($scale);
 4240     disp(0x0);
 4241   %}
 4242 %}
 4243 
 4244 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4245 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4246   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4247 
 4248   op_cost(10);
 4249   format %{"[$reg + $off + $ireg << $scale]" %}
 4250   interface(MEMORY_INTER) %{
 4251     base($reg);
 4252     index($ireg);
 4253     scale($scale);
 4254     disp($off);
 4255   %}
 4256 %}
 4257 
 4258 //----------Load Long Memory Operands------------------------------------------
 4259 // The load-long idiom will use it's address expression again after loading
 4260 // the first word of the long.  If the load-long destination overlaps with
 4261 // registers used in the addressing expression, the 2nd half will be loaded
 4262 // from a clobbered address.  Fix this by requiring that load-long use
 4263 // address registers that do not overlap with the load-long target.
 4264 
 4265 // load-long support
 4266 operand load_long_RegP() %{
 4267   constraint(ALLOC_IN_RC(esi_reg));
 4268   match(RegP);
 4269   match(eSIRegP);
 4270   op_cost(100);
 4271   format %{  %}
 4272   interface(REG_INTER);
 4273 %}
 4274 
 4275 // Indirect Memory Operand Long
 4276 operand load_long_indirect(load_long_RegP reg) %{
 4277   constraint(ALLOC_IN_RC(esi_reg));
 4278   match(reg);
 4279 
 4280   format %{ "[$reg]" %}
 4281   interface(MEMORY_INTER) %{
 4282     base($reg);
 4283     index(0x4);
 4284     scale(0x0);
 4285     disp(0x0);
 4286   %}
 4287 %}
 4288 
 4289 // Indirect Memory Plus Long Offset Operand
 4290 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4291   match(AddP reg off);
 4292 
 4293   format %{ "[$reg + $off]" %}
 4294   interface(MEMORY_INTER) %{
 4295     base($reg);
 4296     index(0x4);
 4297     scale(0x0);
 4298     disp($off);
 4299   %}
 4300 %}
 4301 
 4302 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4303 
 4304 
 4305 //----------Special Memory Operands--------------------------------------------
 4306 // Stack Slot Operand - This operand is used for loading and storing temporary
 4307 //                      values on the stack where a match requires a value to
 4308 //                      flow through memory.
 4309 operand stackSlotP(sRegP reg) %{
 4310   constraint(ALLOC_IN_RC(stack_slots));
 4311   // No match rule because this operand is only generated in matching
 4312   format %{ "[$reg]" %}
 4313   interface(MEMORY_INTER) %{
 4314     base(0x4);   // ESP
 4315     index(0x4);  // No Index
 4316     scale(0x0);  // No Scale
 4317     disp($reg);  // Stack Offset
 4318   %}
 4319 %}
 4320 
 4321 operand stackSlotI(sRegI reg) %{
 4322   constraint(ALLOC_IN_RC(stack_slots));
 4323   // No match rule because this operand is only generated in matching
 4324   format %{ "[$reg]" %}
 4325   interface(MEMORY_INTER) %{
 4326     base(0x4);   // ESP
 4327     index(0x4);  // No Index
 4328     scale(0x0);  // No Scale
 4329     disp($reg);  // Stack Offset
 4330   %}
 4331 %}
 4332 
 4333 operand stackSlotF(sRegF reg) %{
 4334   constraint(ALLOC_IN_RC(stack_slots));
 4335   // No match rule because this operand is only generated in matching
 4336   format %{ "[$reg]" %}
 4337   interface(MEMORY_INTER) %{
 4338     base(0x4);   // ESP
 4339     index(0x4);  // No Index
 4340     scale(0x0);  // No Scale
 4341     disp($reg);  // Stack Offset
 4342   %}
 4343 %}
 4344 
 4345 operand stackSlotD(sRegD reg) %{
 4346   constraint(ALLOC_IN_RC(stack_slots));
 4347   // No match rule because this operand is only generated in matching
 4348   format %{ "[$reg]" %}
 4349   interface(MEMORY_INTER) %{
 4350     base(0x4);   // ESP
 4351     index(0x4);  // No Index
 4352     scale(0x0);  // No Scale
 4353     disp($reg);  // Stack Offset
 4354   %}
 4355 %}
 4356 
 4357 operand stackSlotL(sRegL reg) %{
 4358   constraint(ALLOC_IN_RC(stack_slots));
 4359   // No match rule because this operand is only generated in matching
 4360   format %{ "[$reg]" %}
 4361   interface(MEMORY_INTER) %{
 4362     base(0x4);   // ESP
 4363     index(0x4);  // No Index
 4364     scale(0x0);  // No Scale
 4365     disp($reg);  // Stack Offset
 4366   %}
 4367 %}
 4368 
 4369 //----------Conditional Branch Operands----------------------------------------
 4370 // Comparison Op  - This is the operation of the comparison, and is limited to
 4371 //                  the following set of codes:
 4372 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4373 //
 4374 // Other attributes of the comparison, such as unsignedness, are specified
 4375 // by the comparison instruction that sets a condition code flags register.
 4376 // That result is represented by a flags operand whose subtype is appropriate
 4377 // to the unsignedness (etc.) of the comparison.
 4378 //
 4379 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4380 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4381 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4382 
 4383 // Comparision Code
 4384 operand cmpOp() %{
 4385   match(Bool);
 4386 
 4387   format %{ "" %}
 4388   interface(COND_INTER) %{
 4389     equal(0x4, "e");
 4390     not_equal(0x5, "ne");
 4391     less(0xC, "l");
 4392     greater_equal(0xD, "ge");
 4393     less_equal(0xE, "le");
 4394     greater(0xF, "g");
 4395     overflow(0x0, "o");
 4396     no_overflow(0x1, "no");
 4397   %}
 4398 %}
 4399 
 4400 // Comparison Code, unsigned compare.  Used by FP also, with
 4401 // C2 (unordered) turned into GT or LT already.  The other bits
 4402 // C0 and C3 are turned into Carry & Zero flags.
 4403 operand cmpOpU() %{
 4404   match(Bool);
 4405 
 4406   format %{ "" %}
 4407   interface(COND_INTER) %{
 4408     equal(0x4, "e");
 4409     not_equal(0x5, "ne");
 4410     less(0x2, "b");
 4411     greater_equal(0x3, "nb");
 4412     less_equal(0x6, "be");
 4413     greater(0x7, "nbe");
 4414     overflow(0x0, "o");
 4415     no_overflow(0x1, "no");
 4416   %}
 4417 %}
 4418 
 4419 // Floating comparisons that don't require any fixup for the unordered case
 4420 operand cmpOpUCF() %{
 4421   match(Bool);
 4422   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4423             n->as_Bool()->_test._test == BoolTest::ge ||
 4424             n->as_Bool()->_test._test == BoolTest::le ||
 4425             n->as_Bool()->_test._test == BoolTest::gt);
 4426   format %{ "" %}
 4427   interface(COND_INTER) %{
 4428     equal(0x4, "e");
 4429     not_equal(0x5, "ne");
 4430     less(0x2, "b");
 4431     greater_equal(0x3, "nb");
 4432     less_equal(0x6, "be");
 4433     greater(0x7, "nbe");
 4434     overflow(0x0, "o");
 4435     no_overflow(0x1, "no");
 4436   %}
 4437 %}
 4438 
 4439 
 4440 // Floating comparisons that can be fixed up with extra conditional jumps
 4441 operand cmpOpUCF2() %{
 4442   match(Bool);
 4443   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4444             n->as_Bool()->_test._test == BoolTest::eq);
 4445   format %{ "" %}
 4446   interface(COND_INTER) %{
 4447     equal(0x4, "e");
 4448     not_equal(0x5, "ne");
 4449     less(0x2, "b");
 4450     greater_equal(0x3, "nb");
 4451     less_equal(0x6, "be");
 4452     greater(0x7, "nbe");
 4453     overflow(0x0, "o");
 4454     no_overflow(0x1, "no");
 4455   %}
 4456 %}
 4457 
 4458 // Comparison Code for FP conditional move
 4459 operand cmpOp_fcmov() %{
 4460   match(Bool);
 4461 
 4462   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4463             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4464   format %{ "" %}
 4465   interface(COND_INTER) %{
 4466     equal        (0x0C8);
 4467     not_equal    (0x1C8);
 4468     less         (0x0C0);
 4469     greater_equal(0x1C0);
 4470     less_equal   (0x0D0);
 4471     greater      (0x1D0);
 4472     overflow(0x0, "o"); // not really supported by the instruction
 4473     no_overflow(0x1, "no"); // not really supported by the instruction
 4474   %}
 4475 %}
 4476 
 4477 // Comparison Code used in long compares
 4478 operand cmpOp_commute() %{
 4479   match(Bool);
 4480 
 4481   format %{ "" %}
 4482   interface(COND_INTER) %{
 4483     equal(0x4, "e");
 4484     not_equal(0x5, "ne");
 4485     less(0xF, "g");
 4486     greater_equal(0xE, "le");
 4487     less_equal(0xD, "ge");
 4488     greater(0xC, "l");
 4489     overflow(0x0, "o");
 4490     no_overflow(0x1, "no");
 4491   %}
 4492 %}
 4493 
 4494 // Comparison Code used in unsigned long compares
 4495 operand cmpOpU_commute() %{
 4496   match(Bool);
 4497 
 4498   format %{ "" %}
 4499   interface(COND_INTER) %{
 4500     equal(0x4, "e");
 4501     not_equal(0x5, "ne");
 4502     less(0x7, "nbe");
 4503     greater_equal(0x6, "be");
 4504     less_equal(0x3, "nb");
 4505     greater(0x2, "b");
 4506     overflow(0x0, "o");
 4507     no_overflow(0x1, "no");
 4508   %}
 4509 %}
 4510 
 4511 //----------OPERAND CLASSES----------------------------------------------------
 4512 // Operand Classes are groups of operands that are used as to simplify
 4513 // instruction definitions by not requiring the AD writer to specify separate
 4514 // instructions for every form of operand when the instruction accepts
 4515 // multiple operand types with the same basic encoding and format.  The classic
 4516 // case of this is memory operands.
 4517 
 4518 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4519                indIndex, indIndexScale, indIndexScaleOffset);
 4520 
 4521 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4522 // This means some kind of offset is always required and you cannot use
 4523 // an oop as the offset (done when working on static globals).
 4524 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4525                     indIndex, indIndexScale, indIndexScaleOffset);
 4526 
 4527 
 4528 //----------PIPELINE-----------------------------------------------------------
 4529 // Rules which define the behavior of the target architectures pipeline.
 4530 pipeline %{
 4531 
 4532 //----------ATTRIBUTES---------------------------------------------------------
 4533 attributes %{
 4534   variable_size_instructions;        // Fixed size instructions
 4535   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4536   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4537   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4538   instruction_fetch_units = 1;       // of 16 bytes
 4539 
 4540   // List of nop instructions
 4541   nops( MachNop );
 4542 %}
 4543 
 4544 //----------RESOURCES----------------------------------------------------------
 4545 // Resources are the functional units available to the machine
 4546 
 4547 // Generic P2/P3 pipeline
 4548 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4549 // 3 instructions decoded per cycle.
 4550 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4551 // 2 ALU op, only ALU0 handles mul/div instructions.
 4552 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4553            MS0, MS1, MEM = MS0 | MS1,
 4554            BR, FPU,
 4555            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4556 
 4557 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4558 // Pipeline Description specifies the stages in the machine's pipeline
 4559 
 4560 // Generic P2/P3 pipeline
 4561 pipe_desc(S0, S1, S2, S3, S4, S5);
 4562 
 4563 //----------PIPELINE CLASSES---------------------------------------------------
 4564 // Pipeline Classes describe the stages in which input and output are
 4565 // referenced by the hardware pipeline.
 4566 
 4567 // Naming convention: ialu or fpu
 4568 // Then: _reg
 4569 // Then: _reg if there is a 2nd register
 4570 // Then: _long if it's a pair of instructions implementing a long
 4571 // Then: _fat if it requires the big decoder
 4572 //   Or: _mem if it requires the big decoder and a memory unit.
 4573 
 4574 // Integer ALU reg operation
 4575 pipe_class ialu_reg(rRegI dst) %{
 4576     single_instruction;
 4577     dst    : S4(write);
 4578     dst    : S3(read);
 4579     DECODE : S0;        // any decoder
 4580     ALU    : S3;        // any alu
 4581 %}
 4582 
 4583 // Long ALU reg operation
 4584 pipe_class ialu_reg_long(eRegL dst) %{
 4585     instruction_count(2);
 4586     dst    : S4(write);
 4587     dst    : S3(read);
 4588     DECODE : S0(2);     // any 2 decoders
 4589     ALU    : S3(2);     // both alus
 4590 %}
 4591 
 4592 // Integer ALU reg operation using big decoder
 4593 pipe_class ialu_reg_fat(rRegI dst) %{
 4594     single_instruction;
 4595     dst    : S4(write);
 4596     dst    : S3(read);
 4597     D0     : S0;        // big decoder only
 4598     ALU    : S3;        // any alu
 4599 %}
 4600 
 4601 // Long ALU reg operation using big decoder
 4602 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4603     instruction_count(2);
 4604     dst    : S4(write);
 4605     dst    : S3(read);
 4606     D0     : S0(2);     // big decoder only; twice
 4607     ALU    : S3(2);     // any 2 alus
 4608 %}
 4609 
 4610 // Integer ALU reg-reg operation
 4611 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4612     single_instruction;
 4613     dst    : S4(write);
 4614     src    : S3(read);
 4615     DECODE : S0;        // any decoder
 4616     ALU    : S3;        // any alu
 4617 %}
 4618 
 4619 // Long ALU reg-reg operation
 4620 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4621     instruction_count(2);
 4622     dst    : S4(write);
 4623     src    : S3(read);
 4624     DECODE : S0(2);     // any 2 decoders
 4625     ALU    : S3(2);     // both alus
 4626 %}
 4627 
 4628 // Integer ALU reg-reg operation
 4629 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4630     single_instruction;
 4631     dst    : S4(write);
 4632     src    : S3(read);
 4633     D0     : S0;        // big decoder only
 4634     ALU    : S3;        // any alu
 4635 %}
 4636 
 4637 // Long ALU reg-reg operation
 4638 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4639     instruction_count(2);
 4640     dst    : S4(write);
 4641     src    : S3(read);
 4642     D0     : S0(2);     // big decoder only; twice
 4643     ALU    : S3(2);     // both alus
 4644 %}
 4645 
 4646 // Integer ALU reg-mem operation
 4647 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4648     single_instruction;
 4649     dst    : S5(write);
 4650     mem    : S3(read);
 4651     D0     : S0;        // big decoder only
 4652     ALU    : S4;        // any alu
 4653     MEM    : S3;        // any mem
 4654 %}
 4655 
 4656 // Long ALU reg-mem operation
 4657 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4658     instruction_count(2);
 4659     dst    : S5(write);
 4660     mem    : S3(read);
 4661     D0     : S0(2);     // big decoder only; twice
 4662     ALU    : S4(2);     // any 2 alus
 4663     MEM    : S3(2);     // both mems
 4664 %}
 4665 
 4666 // Integer mem operation (prefetch)
 4667 pipe_class ialu_mem(memory mem)
 4668 %{
 4669     single_instruction;
 4670     mem    : S3(read);
 4671     D0     : S0;        // big decoder only
 4672     MEM    : S3;        // any mem
 4673 %}
 4674 
 4675 // Integer Store to Memory
 4676 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4677     single_instruction;
 4678     mem    : S3(read);
 4679     src    : S5(read);
 4680     D0     : S0;        // big decoder only
 4681     ALU    : S4;        // any alu
 4682     MEM    : S3;
 4683 %}
 4684 
 4685 // Long Store to Memory
 4686 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4687     instruction_count(2);
 4688     mem    : S3(read);
 4689     src    : S5(read);
 4690     D0     : S0(2);     // big decoder only; twice
 4691     ALU    : S4(2);     // any 2 alus
 4692     MEM    : S3(2);     // Both mems
 4693 %}
 4694 
 4695 // Integer Store to Memory
 4696 pipe_class ialu_mem_imm(memory mem) %{
 4697     single_instruction;
 4698     mem    : S3(read);
 4699     D0     : S0;        // big decoder only
 4700     ALU    : S4;        // any alu
 4701     MEM    : S3;
 4702 %}
 4703 
 4704 // Integer ALU0 reg-reg operation
 4705 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4706     single_instruction;
 4707     dst    : S4(write);
 4708     src    : S3(read);
 4709     D0     : S0;        // Big decoder only
 4710     ALU0   : S3;        // only alu0
 4711 %}
 4712 
 4713 // Integer ALU0 reg-mem operation
 4714 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4715     single_instruction;
 4716     dst    : S5(write);
 4717     mem    : S3(read);
 4718     D0     : S0;        // big decoder only
 4719     ALU0   : S4;        // ALU0 only
 4720     MEM    : S3;        // any mem
 4721 %}
 4722 
 4723 // Integer ALU reg-reg operation
 4724 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4725     single_instruction;
 4726     cr     : S4(write);
 4727     src1   : S3(read);
 4728     src2   : S3(read);
 4729     DECODE : S0;        // any decoder
 4730     ALU    : S3;        // any alu
 4731 %}
 4732 
 4733 // Integer ALU reg-imm operation
 4734 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4735     single_instruction;
 4736     cr     : S4(write);
 4737     src1   : S3(read);
 4738     DECODE : S0;        // any decoder
 4739     ALU    : S3;        // any alu
 4740 %}
 4741 
 4742 // Integer ALU reg-mem operation
 4743 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4744     single_instruction;
 4745     cr     : S4(write);
 4746     src1   : S3(read);
 4747     src2   : S3(read);
 4748     D0     : S0;        // big decoder only
 4749     ALU    : S4;        // any alu
 4750     MEM    : S3;
 4751 %}
 4752 
 4753 // Conditional move reg-reg
 4754 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4755     instruction_count(4);
 4756     y      : S4(read);
 4757     q      : S3(read);
 4758     p      : S3(read);
 4759     DECODE : S0(4);     // any decoder
 4760 %}
 4761 
 4762 // Conditional move reg-reg
 4763 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4764     single_instruction;
 4765     dst    : S4(write);
 4766     src    : S3(read);
 4767     cr     : S3(read);
 4768     DECODE : S0;        // any decoder
 4769 %}
 4770 
 4771 // Conditional move reg-mem
 4772 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4773     single_instruction;
 4774     dst    : S4(write);
 4775     src    : S3(read);
 4776     cr     : S3(read);
 4777     DECODE : S0;        // any decoder
 4778     MEM    : S3;
 4779 %}
 4780 
 4781 // Conditional move reg-reg long
 4782 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4783     single_instruction;
 4784     dst    : S4(write);
 4785     src    : S3(read);
 4786     cr     : S3(read);
 4787     DECODE : S0(2);     // any 2 decoders
 4788 %}
 4789 
 4790 // Conditional move double reg-reg
 4791 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4792     single_instruction;
 4793     dst    : S4(write);
 4794     src    : S3(read);
 4795     cr     : S3(read);
 4796     DECODE : S0;        // any decoder
 4797 %}
 4798 
 4799 // Float reg-reg operation
 4800 pipe_class fpu_reg(regDPR dst) %{
 4801     instruction_count(2);
 4802     dst    : S3(read);
 4803     DECODE : S0(2);     // any 2 decoders
 4804     FPU    : S3;
 4805 %}
 4806 
 4807 // Float reg-reg operation
 4808 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4809     instruction_count(2);
 4810     dst    : S4(write);
 4811     src    : S3(read);
 4812     DECODE : S0(2);     // any 2 decoders
 4813     FPU    : S3;
 4814 %}
 4815 
 4816 // Float reg-reg operation
 4817 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4818     instruction_count(3);
 4819     dst    : S4(write);
 4820     src1   : S3(read);
 4821     src2   : S3(read);
 4822     DECODE : S0(3);     // any 3 decoders
 4823     FPU    : S3(2);
 4824 %}
 4825 
 4826 // Float reg-reg operation
 4827 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4828     instruction_count(4);
 4829     dst    : S4(write);
 4830     src1   : S3(read);
 4831     src2   : S3(read);
 4832     src3   : S3(read);
 4833     DECODE : S0(4);     // any 3 decoders
 4834     FPU    : S3(2);
 4835 %}
 4836 
 4837 // Float reg-reg operation
 4838 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4839     instruction_count(4);
 4840     dst    : S4(write);
 4841     src1   : S3(read);
 4842     src2   : S3(read);
 4843     src3   : S3(read);
 4844     DECODE : S1(3);     // any 3 decoders
 4845     D0     : S0;        // Big decoder only
 4846     FPU    : S3(2);
 4847     MEM    : S3;
 4848 %}
 4849 
 4850 // Float reg-mem operation
 4851 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4852     instruction_count(2);
 4853     dst    : S5(write);
 4854     mem    : S3(read);
 4855     D0     : S0;        // big decoder only
 4856     DECODE : S1;        // any decoder for FPU POP
 4857     FPU    : S4;
 4858     MEM    : S3;        // any mem
 4859 %}
 4860 
 4861 // Float reg-mem operation
 4862 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4863     instruction_count(3);
 4864     dst    : S5(write);
 4865     src1   : S3(read);
 4866     mem    : S3(read);
 4867     D0     : S0;        // big decoder only
 4868     DECODE : S1(2);     // any decoder for FPU POP
 4869     FPU    : S4;
 4870     MEM    : S3;        // any mem
 4871 %}
 4872 
 4873 // Float mem-reg operation
 4874 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4875     instruction_count(2);
 4876     src    : S5(read);
 4877     mem    : S3(read);
 4878     DECODE : S0;        // any decoder for FPU PUSH
 4879     D0     : S1;        // big decoder only
 4880     FPU    : S4;
 4881     MEM    : S3;        // any mem
 4882 %}
 4883 
 4884 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4885     instruction_count(3);
 4886     src1   : S3(read);
 4887     src2   : S3(read);
 4888     mem    : S3(read);
 4889     DECODE : S0(2);     // any decoder for FPU PUSH
 4890     D0     : S1;        // big decoder only
 4891     FPU    : S4;
 4892     MEM    : S3;        // any mem
 4893 %}
 4894 
 4895 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4896     instruction_count(3);
 4897     src1   : S3(read);
 4898     src2   : S3(read);
 4899     mem    : S4(read);
 4900     DECODE : S0;        // any decoder for FPU PUSH
 4901     D0     : S0(2);     // big decoder only
 4902     FPU    : S4;
 4903     MEM    : S3(2);     // any mem
 4904 %}
 4905 
 4906 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4907     instruction_count(2);
 4908     src1   : S3(read);
 4909     dst    : S4(read);
 4910     D0     : S0(2);     // big decoder only
 4911     MEM    : S3(2);     // any mem
 4912 %}
 4913 
 4914 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4915     instruction_count(3);
 4916     src1   : S3(read);
 4917     src2   : S3(read);
 4918     dst    : S4(read);
 4919     D0     : S0(3);     // big decoder only
 4920     FPU    : S4;
 4921     MEM    : S3(3);     // any mem
 4922 %}
 4923 
 4924 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4925     instruction_count(3);
 4926     src1   : S4(read);
 4927     mem    : S4(read);
 4928     DECODE : S0;        // any decoder for FPU PUSH
 4929     D0     : S0(2);     // big decoder only
 4930     FPU    : S4;
 4931     MEM    : S3(2);     // any mem
 4932 %}
 4933 
 4934 // Float load constant
 4935 pipe_class fpu_reg_con(regDPR dst) %{
 4936     instruction_count(2);
 4937     dst    : S5(write);
 4938     D0     : S0;        // big decoder only for the load
 4939     DECODE : S1;        // any decoder for FPU POP
 4940     FPU    : S4;
 4941     MEM    : S3;        // any mem
 4942 %}
 4943 
 4944 // Float load constant
 4945 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4946     instruction_count(3);
 4947     dst    : S5(write);
 4948     src    : S3(read);
 4949     D0     : S0;        // big decoder only for the load
 4950     DECODE : S1(2);     // any decoder for FPU POP
 4951     FPU    : S4;
 4952     MEM    : S3;        // any mem
 4953 %}
 4954 
 4955 // UnConditional branch
 4956 pipe_class pipe_jmp( label labl ) %{
 4957     single_instruction;
 4958     BR   : S3;
 4959 %}
 4960 
 4961 // Conditional branch
 4962 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4963     single_instruction;
 4964     cr    : S1(read);
 4965     BR    : S3;
 4966 %}
 4967 
 4968 // Allocation idiom
 4969 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4970     instruction_count(1); force_serialization;
 4971     fixed_latency(6);
 4972     heap_ptr : S3(read);
 4973     DECODE   : S0(3);
 4974     D0       : S2;
 4975     MEM      : S3;
 4976     ALU      : S3(2);
 4977     dst      : S5(write);
 4978     BR       : S5;
 4979 %}
 4980 
 4981 // Generic big/slow expanded idiom
 4982 pipe_class pipe_slow(  ) %{
 4983     instruction_count(10); multiple_bundles; force_serialization;
 4984     fixed_latency(100);
 4985     D0  : S0(2);
 4986     MEM : S3(2);
 4987 %}
 4988 
 4989 // The real do-nothing guy
 4990 pipe_class empty( ) %{
 4991     instruction_count(0);
 4992 %}
 4993 
 4994 // Define the class for the Nop node
 4995 define %{
 4996    MachNop = empty;
 4997 %}
 4998 
 4999 %}
 5000 
 5001 //----------INSTRUCTIONS-------------------------------------------------------
 5002 //
 5003 // match      -- States which machine-independent subtree may be replaced
 5004 //               by this instruction.
 5005 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5006 //               selection to identify a minimum cost tree of machine
 5007 //               instructions that matches a tree of machine-independent
 5008 //               instructions.
 5009 // format     -- A string providing the disassembly for this instruction.
 5010 //               The value of an instruction's operand may be inserted
 5011 //               by referring to it with a '$' prefix.
 5012 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5013 //               to within an encode class as $primary, $secondary, and $tertiary
 5014 //               respectively.  The primary opcode is commonly used to
 5015 //               indicate the type of machine instruction, while secondary
 5016 //               and tertiary are often used for prefix options or addressing
 5017 //               modes.
 5018 // ins_encode -- A list of encode classes with parameters. The encode class
 5019 //               name must have been defined in an 'enc_class' specification
 5020 //               in the encode section of the architecture description.
 5021 
 5022 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5023 // Load Float
 5024 instruct MoveF2LEG(legRegF dst, regF src) %{
 5025   match(Set dst src);
 5026   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5027   ins_encode %{
 5028     ShouldNotReachHere();
 5029   %}
 5030   ins_pipe( fpu_reg_reg );
 5031 %}
 5032 
 5033 // Load Float
 5034 instruct MoveLEG2F(regF dst, legRegF src) %{
 5035   match(Set dst src);
 5036   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5037   ins_encode %{
 5038     ShouldNotReachHere();
 5039   %}
 5040   ins_pipe( fpu_reg_reg );
 5041 %}
 5042 
 5043 // Load Float
 5044 instruct MoveF2VL(vlRegF dst, regF src) %{
 5045   match(Set dst src);
 5046   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5047   ins_encode %{
 5048     ShouldNotReachHere();
 5049   %}
 5050   ins_pipe( fpu_reg_reg );
 5051 %}
 5052 
 5053 // Load Float
 5054 instruct MoveVL2F(regF dst, vlRegF src) %{
 5055   match(Set dst src);
 5056   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5057   ins_encode %{
 5058     ShouldNotReachHere();
 5059   %}
 5060   ins_pipe( fpu_reg_reg );
 5061 %}
 5062 
 5063 
 5064 
 5065 // Load Double
 5066 instruct MoveD2LEG(legRegD dst, regD src) %{
 5067   match(Set dst src);
 5068   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5069   ins_encode %{
 5070     ShouldNotReachHere();
 5071   %}
 5072   ins_pipe( fpu_reg_reg );
 5073 %}
 5074 
 5075 // Load Double
 5076 instruct MoveLEG2D(regD dst, legRegD src) %{
 5077   match(Set dst src);
 5078   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5079   ins_encode %{
 5080     ShouldNotReachHere();
 5081   %}
 5082   ins_pipe( fpu_reg_reg );
 5083 %}
 5084 
 5085 // Load Double
 5086 instruct MoveD2VL(vlRegD dst, regD src) %{
 5087   match(Set dst src);
 5088   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5089   ins_encode %{
 5090     ShouldNotReachHere();
 5091   %}
 5092   ins_pipe( fpu_reg_reg );
 5093 %}
 5094 
 5095 // Load Double
 5096 instruct MoveVL2D(regD dst, vlRegD src) %{
 5097   match(Set dst src);
 5098   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5099   ins_encode %{
 5100     ShouldNotReachHere();
 5101   %}
 5102   ins_pipe( fpu_reg_reg );
 5103 %}
 5104 
 5105 //----------BSWAP-Instruction--------------------------------------------------
 5106 instruct bytes_reverse_int(rRegI dst) %{
 5107   match(Set dst (ReverseBytesI dst));
 5108 
 5109   format %{ "BSWAP  $dst" %}
 5110   opcode(0x0F, 0xC8);
 5111   ins_encode( OpcP, OpcSReg(dst) );
 5112   ins_pipe( ialu_reg );
 5113 %}
 5114 
 5115 instruct bytes_reverse_long(eRegL dst) %{
 5116   match(Set dst (ReverseBytesL dst));
 5117 
 5118   format %{ "BSWAP  $dst.lo\n\t"
 5119             "BSWAP  $dst.hi\n\t"
 5120             "XCHG   $dst.lo $dst.hi" %}
 5121 
 5122   ins_cost(125);
 5123   ins_encode( bswap_long_bytes(dst) );
 5124   ins_pipe( ialu_reg_reg);
 5125 %}
 5126 
 5127 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5128   match(Set dst (ReverseBytesUS dst));
 5129   effect(KILL cr);
 5130 
 5131   format %{ "BSWAP  $dst\n\t"
 5132             "SHR    $dst,16\n\t" %}
 5133   ins_encode %{
 5134     __ bswapl($dst$$Register);
 5135     __ shrl($dst$$Register, 16);
 5136   %}
 5137   ins_pipe( ialu_reg );
 5138 %}
 5139 
 5140 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5141   match(Set dst (ReverseBytesS dst));
 5142   effect(KILL cr);
 5143 
 5144   format %{ "BSWAP  $dst\n\t"
 5145             "SAR    $dst,16\n\t" %}
 5146   ins_encode %{
 5147     __ bswapl($dst$$Register);
 5148     __ sarl($dst$$Register, 16);
 5149   %}
 5150   ins_pipe( ialu_reg );
 5151 %}
 5152 
 5153 
 5154 //---------- Zeros Count Instructions ------------------------------------------
 5155 
 5156 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5157   predicate(UseCountLeadingZerosInstruction);
 5158   match(Set dst (CountLeadingZerosI src));
 5159   effect(KILL cr);
 5160 
 5161   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5162   ins_encode %{
 5163     __ lzcntl($dst$$Register, $src$$Register);
 5164   %}
 5165   ins_pipe(ialu_reg);
 5166 %}
 5167 
 5168 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5169   predicate(!UseCountLeadingZerosInstruction);
 5170   match(Set dst (CountLeadingZerosI src));
 5171   effect(KILL cr);
 5172 
 5173   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5174             "JNZ    skip\n\t"
 5175             "MOV    $dst, -1\n"
 5176       "skip:\n\t"
 5177             "NEG    $dst\n\t"
 5178             "ADD    $dst, 31" %}
 5179   ins_encode %{
 5180     Register Rdst = $dst$$Register;
 5181     Register Rsrc = $src$$Register;
 5182     Label skip;
 5183     __ bsrl(Rdst, Rsrc);
 5184     __ jccb(Assembler::notZero, skip);
 5185     __ movl(Rdst, -1);
 5186     __ bind(skip);
 5187     __ negl(Rdst);
 5188     __ addl(Rdst, BitsPerInt - 1);
 5189   %}
 5190   ins_pipe(ialu_reg);
 5191 %}
 5192 
 5193 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5194   predicate(UseCountLeadingZerosInstruction);
 5195   match(Set dst (CountLeadingZerosL src));
 5196   effect(TEMP dst, KILL cr);
 5197 
 5198   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5199             "JNC    done\n\t"
 5200             "LZCNT  $dst, $src.lo\n\t"
 5201             "ADD    $dst, 32\n"
 5202       "done:" %}
 5203   ins_encode %{
 5204     Register Rdst = $dst$$Register;
 5205     Register Rsrc = $src$$Register;
 5206     Label done;
 5207     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5208     __ jccb(Assembler::carryClear, done);
 5209     __ lzcntl(Rdst, Rsrc);
 5210     __ addl(Rdst, BitsPerInt);
 5211     __ bind(done);
 5212   %}
 5213   ins_pipe(ialu_reg);
 5214 %}
 5215 
 5216 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5217   predicate(!UseCountLeadingZerosInstruction);
 5218   match(Set dst (CountLeadingZerosL src));
 5219   effect(TEMP dst, KILL cr);
 5220 
 5221   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5222             "JZ     msw_is_zero\n\t"
 5223             "ADD    $dst, 32\n\t"
 5224             "JMP    not_zero\n"
 5225       "msw_is_zero:\n\t"
 5226             "BSR    $dst, $src.lo\n\t"
 5227             "JNZ    not_zero\n\t"
 5228             "MOV    $dst, -1\n"
 5229       "not_zero:\n\t"
 5230             "NEG    $dst\n\t"
 5231             "ADD    $dst, 63\n" %}
 5232  ins_encode %{
 5233     Register Rdst = $dst$$Register;
 5234     Register Rsrc = $src$$Register;
 5235     Label msw_is_zero;
 5236     Label not_zero;
 5237     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5238     __ jccb(Assembler::zero, msw_is_zero);
 5239     __ addl(Rdst, BitsPerInt);
 5240     __ jmpb(not_zero);
 5241     __ bind(msw_is_zero);
 5242     __ bsrl(Rdst, Rsrc);
 5243     __ jccb(Assembler::notZero, not_zero);
 5244     __ movl(Rdst, -1);
 5245     __ bind(not_zero);
 5246     __ negl(Rdst);
 5247     __ addl(Rdst, BitsPerLong - 1);
 5248   %}
 5249   ins_pipe(ialu_reg);
 5250 %}
 5251 
 5252 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5253   predicate(UseCountTrailingZerosInstruction);
 5254   match(Set dst (CountTrailingZerosI src));
 5255   effect(KILL cr);
 5256 
 5257   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5258   ins_encode %{
 5259     __ tzcntl($dst$$Register, $src$$Register);
 5260   %}
 5261   ins_pipe(ialu_reg);
 5262 %}
 5263 
 5264 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5265   predicate(!UseCountTrailingZerosInstruction);
 5266   match(Set dst (CountTrailingZerosI src));
 5267   effect(KILL cr);
 5268 
 5269   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5270             "JNZ    done\n\t"
 5271             "MOV    $dst, 32\n"
 5272       "done:" %}
 5273   ins_encode %{
 5274     Register Rdst = $dst$$Register;
 5275     Label done;
 5276     __ bsfl(Rdst, $src$$Register);
 5277     __ jccb(Assembler::notZero, done);
 5278     __ movl(Rdst, BitsPerInt);
 5279     __ bind(done);
 5280   %}
 5281   ins_pipe(ialu_reg);
 5282 %}
 5283 
 5284 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5285   predicate(UseCountTrailingZerosInstruction);
 5286   match(Set dst (CountTrailingZerosL src));
 5287   effect(TEMP dst, KILL cr);
 5288 
 5289   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5290             "JNC    done\n\t"
 5291             "TZCNT  $dst, $src.hi\n\t"
 5292             "ADD    $dst, 32\n"
 5293             "done:" %}
 5294   ins_encode %{
 5295     Register Rdst = $dst$$Register;
 5296     Register Rsrc = $src$$Register;
 5297     Label done;
 5298     __ tzcntl(Rdst, Rsrc);
 5299     __ jccb(Assembler::carryClear, done);
 5300     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5301     __ addl(Rdst, BitsPerInt);
 5302     __ bind(done);
 5303   %}
 5304   ins_pipe(ialu_reg);
 5305 %}
 5306 
 5307 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5308   predicate(!UseCountTrailingZerosInstruction);
 5309   match(Set dst (CountTrailingZerosL src));
 5310   effect(TEMP dst, KILL cr);
 5311 
 5312   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5313             "JNZ    done\n\t"
 5314             "BSF    $dst, $src.hi\n\t"
 5315             "JNZ    msw_not_zero\n\t"
 5316             "MOV    $dst, 32\n"
 5317       "msw_not_zero:\n\t"
 5318             "ADD    $dst, 32\n"
 5319       "done:" %}
 5320   ins_encode %{
 5321     Register Rdst = $dst$$Register;
 5322     Register Rsrc = $src$$Register;
 5323     Label msw_not_zero;
 5324     Label done;
 5325     __ bsfl(Rdst, Rsrc);
 5326     __ jccb(Assembler::notZero, done);
 5327     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5328     __ jccb(Assembler::notZero, msw_not_zero);
 5329     __ movl(Rdst, BitsPerInt);
 5330     __ bind(msw_not_zero);
 5331     __ addl(Rdst, BitsPerInt);
 5332     __ bind(done);
 5333   %}
 5334   ins_pipe(ialu_reg);
 5335 %}
 5336 
 5337 
 5338 //---------- Population Count Instructions -------------------------------------
 5339 
 5340 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5341   predicate(UsePopCountInstruction);
 5342   match(Set dst (PopCountI src));
 5343   effect(KILL cr);
 5344 
 5345   format %{ "POPCNT $dst, $src" %}
 5346   ins_encode %{
 5347     __ popcntl($dst$$Register, $src$$Register);
 5348   %}
 5349   ins_pipe(ialu_reg);
 5350 %}
 5351 
 5352 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5353   predicate(UsePopCountInstruction);
 5354   match(Set dst (PopCountI (LoadI mem)));
 5355   effect(KILL cr);
 5356 
 5357   format %{ "POPCNT $dst, $mem" %}
 5358   ins_encode %{
 5359     __ popcntl($dst$$Register, $mem$$Address);
 5360   %}
 5361   ins_pipe(ialu_reg);
 5362 %}
 5363 
 5364 // Note: Long.bitCount(long) returns an int.
 5365 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5366   predicate(UsePopCountInstruction);
 5367   match(Set dst (PopCountL src));
 5368   effect(KILL cr, TEMP tmp, TEMP dst);
 5369 
 5370   format %{ "POPCNT $dst, $src.lo\n\t"
 5371             "POPCNT $tmp, $src.hi\n\t"
 5372             "ADD    $dst, $tmp" %}
 5373   ins_encode %{
 5374     __ popcntl($dst$$Register, $src$$Register);
 5375     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5376     __ addl($dst$$Register, $tmp$$Register);
 5377   %}
 5378   ins_pipe(ialu_reg);
 5379 %}
 5380 
 5381 // Note: Long.bitCount(long) returns an int.
 5382 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5383   predicate(UsePopCountInstruction);
 5384   match(Set dst (PopCountL (LoadL mem)));
 5385   effect(KILL cr, TEMP tmp, TEMP dst);
 5386 
 5387   format %{ "POPCNT $dst, $mem\n\t"
 5388             "POPCNT $tmp, $mem+4\n\t"
 5389             "ADD    $dst, $tmp" %}
 5390   ins_encode %{
 5391     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5392     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5393     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5394     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5395     __ addl($dst$$Register, $tmp$$Register);
 5396   %}
 5397   ins_pipe(ialu_reg);
 5398 %}
 5399 
 5400 
 5401 //----------Load/Store/Move Instructions---------------------------------------
 5402 //----------Load Instructions--------------------------------------------------
 5403 // Load Byte (8bit signed)
 5404 instruct loadB(xRegI dst, memory mem) %{
 5405   match(Set dst (LoadB mem));
 5406 
 5407   ins_cost(125);
 5408   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5409 
 5410   ins_encode %{
 5411     __ movsbl($dst$$Register, $mem$$Address);
 5412   %}
 5413 
 5414   ins_pipe(ialu_reg_mem);
 5415 %}
 5416 
 5417 // Load Byte (8bit signed) into Long Register
 5418 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5419   match(Set dst (ConvI2L (LoadB mem)));
 5420   effect(KILL cr);
 5421 
 5422   ins_cost(375);
 5423   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5424             "MOV    $dst.hi,$dst.lo\n\t"
 5425             "SAR    $dst.hi,7" %}
 5426 
 5427   ins_encode %{
 5428     __ movsbl($dst$$Register, $mem$$Address);
 5429     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5430     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5431   %}
 5432 
 5433   ins_pipe(ialu_reg_mem);
 5434 %}
 5435 
 5436 // Load Unsigned Byte (8bit UNsigned)
 5437 instruct loadUB(xRegI dst, memory mem) %{
 5438   match(Set dst (LoadUB mem));
 5439 
 5440   ins_cost(125);
 5441   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5442 
 5443   ins_encode %{
 5444     __ movzbl($dst$$Register, $mem$$Address);
 5445   %}
 5446 
 5447   ins_pipe(ialu_reg_mem);
 5448 %}
 5449 
 5450 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5451 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5452   match(Set dst (ConvI2L (LoadUB mem)));
 5453   effect(KILL cr);
 5454 
 5455   ins_cost(250);
 5456   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5457             "XOR    $dst.hi,$dst.hi" %}
 5458 
 5459   ins_encode %{
 5460     Register Rdst = $dst$$Register;
 5461     __ movzbl(Rdst, $mem$$Address);
 5462     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5463   %}
 5464 
 5465   ins_pipe(ialu_reg_mem);
 5466 %}
 5467 
 5468 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5469 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5470   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5471   effect(KILL cr);
 5472 
 5473   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5474             "XOR    $dst.hi,$dst.hi\n\t"
 5475             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5476   ins_encode %{
 5477     Register Rdst = $dst$$Register;
 5478     __ movzbl(Rdst, $mem$$Address);
 5479     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5480     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5481   %}
 5482   ins_pipe(ialu_reg_mem);
 5483 %}
 5484 
 5485 // Load Short (16bit signed)
 5486 instruct loadS(rRegI dst, memory mem) %{
 5487   match(Set dst (LoadS mem));
 5488 
 5489   ins_cost(125);
 5490   format %{ "MOVSX  $dst,$mem\t# short" %}
 5491 
 5492   ins_encode %{
 5493     __ movswl($dst$$Register, $mem$$Address);
 5494   %}
 5495 
 5496   ins_pipe(ialu_reg_mem);
 5497 %}
 5498 
 5499 // Load Short (16 bit signed) to Byte (8 bit signed)
 5500 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5501   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5502 
 5503   ins_cost(125);
 5504   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5505   ins_encode %{
 5506     __ movsbl($dst$$Register, $mem$$Address);
 5507   %}
 5508   ins_pipe(ialu_reg_mem);
 5509 %}
 5510 
 5511 // Load Short (16bit signed) into Long Register
 5512 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5513   match(Set dst (ConvI2L (LoadS mem)));
 5514   effect(KILL cr);
 5515 
 5516   ins_cost(375);
 5517   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5518             "MOV    $dst.hi,$dst.lo\n\t"
 5519             "SAR    $dst.hi,15" %}
 5520 
 5521   ins_encode %{
 5522     __ movswl($dst$$Register, $mem$$Address);
 5523     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5524     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5525   %}
 5526 
 5527   ins_pipe(ialu_reg_mem);
 5528 %}
 5529 
 5530 // Load Unsigned Short/Char (16bit unsigned)
 5531 instruct loadUS(rRegI dst, memory mem) %{
 5532   match(Set dst (LoadUS mem));
 5533 
 5534   ins_cost(125);
 5535   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5536 
 5537   ins_encode %{
 5538     __ movzwl($dst$$Register, $mem$$Address);
 5539   %}
 5540 
 5541   ins_pipe(ialu_reg_mem);
 5542 %}
 5543 
 5544 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5545 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5546   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5547 
 5548   ins_cost(125);
 5549   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5550   ins_encode %{
 5551     __ movsbl($dst$$Register, $mem$$Address);
 5552   %}
 5553   ins_pipe(ialu_reg_mem);
 5554 %}
 5555 
 5556 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5557 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5558   match(Set dst (ConvI2L (LoadUS mem)));
 5559   effect(KILL cr);
 5560 
 5561   ins_cost(250);
 5562   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5563             "XOR    $dst.hi,$dst.hi" %}
 5564 
 5565   ins_encode %{
 5566     __ movzwl($dst$$Register, $mem$$Address);
 5567     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5568   %}
 5569 
 5570   ins_pipe(ialu_reg_mem);
 5571 %}
 5572 
 5573 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5574 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5575   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5576   effect(KILL cr);
 5577 
 5578   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5579             "XOR    $dst.hi,$dst.hi" %}
 5580   ins_encode %{
 5581     Register Rdst = $dst$$Register;
 5582     __ movzbl(Rdst, $mem$$Address);
 5583     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5584   %}
 5585   ins_pipe(ialu_reg_mem);
 5586 %}
 5587 
 5588 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5589 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5590   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5591   effect(KILL cr);
 5592 
 5593   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5594             "XOR    $dst.hi,$dst.hi\n\t"
 5595             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5596   ins_encode %{
 5597     Register Rdst = $dst$$Register;
 5598     __ movzwl(Rdst, $mem$$Address);
 5599     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5600     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5601   %}
 5602   ins_pipe(ialu_reg_mem);
 5603 %}
 5604 
 5605 // Load Integer
 5606 instruct loadI(rRegI dst, memory mem) %{
 5607   match(Set dst (LoadI mem));
 5608 
 5609   ins_cost(125);
 5610   format %{ "MOV    $dst,$mem\t# int" %}
 5611 
 5612   ins_encode %{
 5613     __ movl($dst$$Register, $mem$$Address);
 5614   %}
 5615 
 5616   ins_pipe(ialu_reg_mem);
 5617 %}
 5618 
 5619 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5620 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5621   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5622 
 5623   ins_cost(125);
 5624   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5625   ins_encode %{
 5626     __ movsbl($dst$$Register, $mem$$Address);
 5627   %}
 5628   ins_pipe(ialu_reg_mem);
 5629 %}
 5630 
 5631 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5632 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5633   match(Set dst (AndI (LoadI mem) mask));
 5634 
 5635   ins_cost(125);
 5636   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5637   ins_encode %{
 5638     __ movzbl($dst$$Register, $mem$$Address);
 5639   %}
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Integer (32 bit signed) to Short (16 bit signed)
 5644 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5645   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5646 
 5647   ins_cost(125);
 5648   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5649   ins_encode %{
 5650     __ movswl($dst$$Register, $mem$$Address);
 5651   %}
 5652   ins_pipe(ialu_reg_mem);
 5653 %}
 5654 
 5655 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5656 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5657   match(Set dst (AndI (LoadI mem) mask));
 5658 
 5659   ins_cost(125);
 5660   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5661   ins_encode %{
 5662     __ movzwl($dst$$Register, $mem$$Address);
 5663   %}
 5664   ins_pipe(ialu_reg_mem);
 5665 %}
 5666 
 5667 // Load Integer into Long Register
 5668 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5669   match(Set dst (ConvI2L (LoadI mem)));
 5670   effect(KILL cr);
 5671 
 5672   ins_cost(375);
 5673   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5674             "MOV    $dst.hi,$dst.lo\n\t"
 5675             "SAR    $dst.hi,31" %}
 5676 
 5677   ins_encode %{
 5678     __ movl($dst$$Register, $mem$$Address);
 5679     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5680     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5681   %}
 5682 
 5683   ins_pipe(ialu_reg_mem);
 5684 %}
 5685 
 5686 // Load Integer with mask 0xFF into Long Register
 5687 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5688   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5689   effect(KILL cr);
 5690 
 5691   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5692             "XOR    $dst.hi,$dst.hi" %}
 5693   ins_encode %{
 5694     Register Rdst = $dst$$Register;
 5695     __ movzbl(Rdst, $mem$$Address);
 5696     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5697   %}
 5698   ins_pipe(ialu_reg_mem);
 5699 %}
 5700 
 5701 // Load Integer with mask 0xFFFF into Long Register
 5702 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5703   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5704   effect(KILL cr);
 5705 
 5706   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5707             "XOR    $dst.hi,$dst.hi" %}
 5708   ins_encode %{
 5709     Register Rdst = $dst$$Register;
 5710     __ movzwl(Rdst, $mem$$Address);
 5711     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5712   %}
 5713   ins_pipe(ialu_reg_mem);
 5714 %}
 5715 
 5716 // Load Integer with 31-bit mask into Long Register
 5717 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5718   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5719   effect(KILL cr);
 5720 
 5721   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5722             "XOR    $dst.hi,$dst.hi\n\t"
 5723             "AND    $dst.lo,$mask" %}
 5724   ins_encode %{
 5725     Register Rdst = $dst$$Register;
 5726     __ movl(Rdst, $mem$$Address);
 5727     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5728     __ andl(Rdst, $mask$$constant);
 5729   %}
 5730   ins_pipe(ialu_reg_mem);
 5731 %}
 5732 
 5733 // Load Unsigned Integer into Long Register
 5734 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5735   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5736   effect(KILL cr);
 5737 
 5738   ins_cost(250);
 5739   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5740             "XOR    $dst.hi,$dst.hi" %}
 5741 
 5742   ins_encode %{
 5743     __ movl($dst$$Register, $mem$$Address);
 5744     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5745   %}
 5746 
 5747   ins_pipe(ialu_reg_mem);
 5748 %}
 5749 
 5750 // Load Long.  Cannot clobber address while loading, so restrict address
 5751 // register to ESI
 5752 instruct loadL(eRegL dst, load_long_memory mem) %{
 5753   predicate(!((LoadLNode*)n)->require_atomic_access());
 5754   match(Set dst (LoadL mem));
 5755 
 5756   ins_cost(250);
 5757   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5758             "MOV    $dst.hi,$mem+4" %}
 5759 
 5760   ins_encode %{
 5761     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5762     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5763     __ movl($dst$$Register, Amemlo);
 5764     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5765   %}
 5766 
 5767   ins_pipe(ialu_reg_long_mem);
 5768 %}
 5769 
 5770 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5771 // then store it down to the stack and reload on the int
 5772 // side.
 5773 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5774   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5775   match(Set dst (LoadL mem));
 5776 
 5777   ins_cost(200);
 5778   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5779             "FISTp  $dst" %}
 5780   ins_encode(enc_loadL_volatile(mem,dst));
 5781   ins_pipe( fpu_reg_mem );
 5782 %}
 5783 
 5784 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5785   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5786   match(Set dst (LoadL mem));
 5787   effect(TEMP tmp);
 5788   ins_cost(180);
 5789   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5790             "MOVSD  $dst,$tmp" %}
 5791   ins_encode %{
 5792     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5793     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5794   %}
 5795   ins_pipe( pipe_slow );
 5796 %}
 5797 
 5798 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5799   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5800   match(Set dst (LoadL mem));
 5801   effect(TEMP tmp);
 5802   ins_cost(160);
 5803   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5804             "MOVD   $dst.lo,$tmp\n\t"
 5805             "PSRLQ  $tmp,32\n\t"
 5806             "MOVD   $dst.hi,$tmp" %}
 5807   ins_encode %{
 5808     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5809     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5810     __ psrlq($tmp$$XMMRegister, 32);
 5811     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5812   %}
 5813   ins_pipe( pipe_slow );
 5814 %}
 5815 
 5816 // Load Range
 5817 instruct loadRange(rRegI dst, memory mem) %{
 5818   match(Set dst (LoadRange mem));
 5819 
 5820   ins_cost(125);
 5821   format %{ "MOV    $dst,$mem" %}
 5822   opcode(0x8B);
 5823   ins_encode( OpcP, RegMem(dst,mem));
 5824   ins_pipe( ialu_reg_mem );
 5825 %}
 5826 
 5827 
 5828 // Load Pointer
 5829 instruct loadP(eRegP dst, memory mem) %{
 5830   match(Set dst (LoadP mem));
 5831 
 5832   ins_cost(125);
 5833   format %{ "MOV    $dst,$mem" %}
 5834   opcode(0x8B);
 5835   ins_encode( OpcP, RegMem(dst,mem));
 5836   ins_pipe( ialu_reg_mem );
 5837 %}
 5838 
 5839 // Load Klass Pointer
 5840 instruct loadKlass(eRegP dst, memory mem) %{
 5841   match(Set dst (LoadKlass mem));
 5842 
 5843   ins_cost(125);
 5844   format %{ "MOV    $dst,$mem" %}
 5845   opcode(0x8B);
 5846   ins_encode( OpcP, RegMem(dst,mem));
 5847   ins_pipe( ialu_reg_mem );
 5848 %}
 5849 
 5850 // Load Double
 5851 instruct loadDPR(regDPR dst, memory mem) %{
 5852   predicate(UseSSE<=1);
 5853   match(Set dst (LoadD mem));
 5854 
 5855   ins_cost(150);
 5856   format %{ "FLD_D  ST,$mem\n\t"
 5857             "FSTP   $dst" %}
 5858   opcode(0xDD);               /* DD /0 */
 5859   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5860               Pop_Reg_DPR(dst) );
 5861   ins_pipe( fpu_reg_mem );
 5862 %}
 5863 
 5864 // Load Double to XMM
 5865 instruct loadD(regD dst, memory mem) %{
 5866   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5867   match(Set dst (LoadD mem));
 5868   ins_cost(145);
 5869   format %{ "MOVSD  $dst,$mem" %}
 5870   ins_encode %{
 5871     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5872   %}
 5873   ins_pipe( pipe_slow );
 5874 %}
 5875 
 5876 instruct loadD_partial(regD dst, memory mem) %{
 5877   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5878   match(Set dst (LoadD mem));
 5879   ins_cost(145);
 5880   format %{ "MOVLPD $dst,$mem" %}
 5881   ins_encode %{
 5882     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5883   %}
 5884   ins_pipe( pipe_slow );
 5885 %}
 5886 
 5887 // Load to XMM register (single-precision floating point)
 5888 // MOVSS instruction
 5889 instruct loadF(regF dst, memory mem) %{
 5890   predicate(UseSSE>=1);
 5891   match(Set dst (LoadF mem));
 5892   ins_cost(145);
 5893   format %{ "MOVSS  $dst,$mem" %}
 5894   ins_encode %{
 5895     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5896   %}
 5897   ins_pipe( pipe_slow );
 5898 %}
 5899 
 5900 // Load Float
 5901 instruct loadFPR(regFPR dst, memory mem) %{
 5902   predicate(UseSSE==0);
 5903   match(Set dst (LoadF mem));
 5904 
 5905   ins_cost(150);
 5906   format %{ "FLD_S  ST,$mem\n\t"
 5907             "FSTP   $dst" %}
 5908   opcode(0xD9);               /* D9 /0 */
 5909   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5910               Pop_Reg_FPR(dst) );
 5911   ins_pipe( fpu_reg_mem );
 5912 %}
 5913 
 5914 // Load Effective Address
 5915 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5916   match(Set dst mem);
 5917 
 5918   ins_cost(110);
 5919   format %{ "LEA    $dst,$mem" %}
 5920   opcode(0x8D);
 5921   ins_encode( OpcP, RegMem(dst,mem));
 5922   ins_pipe( ialu_reg_reg_fat );
 5923 %}
 5924 
 5925 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5926   match(Set dst mem);
 5927 
 5928   ins_cost(110);
 5929   format %{ "LEA    $dst,$mem" %}
 5930   opcode(0x8D);
 5931   ins_encode( OpcP, RegMem(dst,mem));
 5932   ins_pipe( ialu_reg_reg_fat );
 5933 %}
 5934 
 5935 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5936   match(Set dst mem);
 5937 
 5938   ins_cost(110);
 5939   format %{ "LEA    $dst,$mem" %}
 5940   opcode(0x8D);
 5941   ins_encode( OpcP, RegMem(dst,mem));
 5942   ins_pipe( ialu_reg_reg_fat );
 5943 %}
 5944 
 5945 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5946   match(Set dst mem);
 5947 
 5948   ins_cost(110);
 5949   format %{ "LEA    $dst,$mem" %}
 5950   opcode(0x8D);
 5951   ins_encode( OpcP, RegMem(dst,mem));
 5952   ins_pipe( ialu_reg_reg_fat );
 5953 %}
 5954 
 5955 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5956   match(Set dst mem);
 5957 
 5958   ins_cost(110);
 5959   format %{ "LEA    $dst,$mem" %}
 5960   opcode(0x8D);
 5961   ins_encode( OpcP, RegMem(dst,mem));
 5962   ins_pipe( ialu_reg_reg_fat );
 5963 %}
 5964 
 5965 // Load Constant
 5966 instruct loadConI(rRegI dst, immI src) %{
 5967   match(Set dst src);
 5968 
 5969   format %{ "MOV    $dst,$src" %}
 5970   ins_encode( LdImmI(dst, src) );
 5971   ins_pipe( ialu_reg_fat );
 5972 %}
 5973 
 5974 // Load Constant zero
 5975 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5976   match(Set dst src);
 5977   effect(KILL cr);
 5978 
 5979   ins_cost(50);
 5980   format %{ "XOR    $dst,$dst" %}
 5981   opcode(0x33);  /* + rd */
 5982   ins_encode( OpcP, RegReg( dst, dst ) );
 5983   ins_pipe( ialu_reg );
 5984 %}
 5985 
 5986 instruct loadConP(eRegP dst, immP src) %{
 5987   match(Set dst src);
 5988 
 5989   format %{ "MOV    $dst,$src" %}
 5990   opcode(0xB8);  /* + rd */
 5991   ins_encode( LdImmP(dst, src) );
 5992   ins_pipe( ialu_reg_fat );
 5993 %}
 5994 
 5995 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5996   match(Set dst src);
 5997   effect(KILL cr);
 5998   ins_cost(200);
 5999   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6000             "MOV    $dst.hi,$src.hi" %}
 6001   opcode(0xB8);
 6002   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6003   ins_pipe( ialu_reg_long_fat );
 6004 %}
 6005 
 6006 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6007   match(Set dst src);
 6008   effect(KILL cr);
 6009   ins_cost(150);
 6010   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6011             "XOR    $dst.hi,$dst.hi" %}
 6012   opcode(0x33,0x33);
 6013   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6014   ins_pipe( ialu_reg_long );
 6015 %}
 6016 
 6017 // The instruction usage is guarded by predicate in operand immFPR().
 6018 instruct loadConFPR(regFPR dst, immFPR con) %{
 6019   match(Set dst con);
 6020   ins_cost(125);
 6021   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6022             "FSTP   $dst" %}
 6023   ins_encode %{
 6024     __ fld_s($constantaddress($con));
 6025     __ fstp_d($dst$$reg);
 6026   %}
 6027   ins_pipe(fpu_reg_con);
 6028 %}
 6029 
 6030 // The instruction usage is guarded by predicate in operand immFPR0().
 6031 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6032   match(Set dst con);
 6033   ins_cost(125);
 6034   format %{ "FLDZ   ST\n\t"
 6035             "FSTP   $dst" %}
 6036   ins_encode %{
 6037     __ fldz();
 6038     __ fstp_d($dst$$reg);
 6039   %}
 6040   ins_pipe(fpu_reg_con);
 6041 %}
 6042 
 6043 // The instruction usage is guarded by predicate in operand immFPR1().
 6044 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6045   match(Set dst con);
 6046   ins_cost(125);
 6047   format %{ "FLD1   ST\n\t"
 6048             "FSTP   $dst" %}
 6049   ins_encode %{
 6050     __ fld1();
 6051     __ fstp_d($dst$$reg);
 6052   %}
 6053   ins_pipe(fpu_reg_con);
 6054 %}
 6055 
 6056 // The instruction usage is guarded by predicate in operand immF().
 6057 instruct loadConF(regF dst, immF con) %{
 6058   match(Set dst con);
 6059   ins_cost(125);
 6060   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6061   ins_encode %{
 6062     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6063   %}
 6064   ins_pipe(pipe_slow);
 6065 %}
 6066 
 6067 // The instruction usage is guarded by predicate in operand immF0().
 6068 instruct loadConF0(regF dst, immF0 src) %{
 6069   match(Set dst src);
 6070   ins_cost(100);
 6071   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6072   ins_encode %{
 6073     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6074   %}
 6075   ins_pipe(pipe_slow);
 6076 %}
 6077 
 6078 // The instruction usage is guarded by predicate in operand immDPR().
 6079 instruct loadConDPR(regDPR dst, immDPR con) %{
 6080   match(Set dst con);
 6081   ins_cost(125);
 6082 
 6083   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6084             "FSTP   $dst" %}
 6085   ins_encode %{
 6086     __ fld_d($constantaddress($con));
 6087     __ fstp_d($dst$$reg);
 6088   %}
 6089   ins_pipe(fpu_reg_con);
 6090 %}
 6091 
 6092 // The instruction usage is guarded by predicate in operand immDPR0().
 6093 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6094   match(Set dst con);
 6095   ins_cost(125);
 6096 
 6097   format %{ "FLDZ   ST\n\t"
 6098             "FSTP   $dst" %}
 6099   ins_encode %{
 6100     __ fldz();
 6101     __ fstp_d($dst$$reg);
 6102   %}
 6103   ins_pipe(fpu_reg_con);
 6104 %}
 6105 
 6106 // The instruction usage is guarded by predicate in operand immDPR1().
 6107 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6108   match(Set dst con);
 6109   ins_cost(125);
 6110 
 6111   format %{ "FLD1   ST\n\t"
 6112             "FSTP   $dst" %}
 6113   ins_encode %{
 6114     __ fld1();
 6115     __ fstp_d($dst$$reg);
 6116   %}
 6117   ins_pipe(fpu_reg_con);
 6118 %}
 6119 
 6120 // The instruction usage is guarded by predicate in operand immD().
 6121 instruct loadConD(regD dst, immD con) %{
 6122   match(Set dst con);
 6123   ins_cost(125);
 6124   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6125   ins_encode %{
 6126     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6127   %}
 6128   ins_pipe(pipe_slow);
 6129 %}
 6130 
 6131 // The instruction usage is guarded by predicate in operand immD0().
 6132 instruct loadConD0(regD dst, immD0 src) %{
 6133   match(Set dst src);
 6134   ins_cost(100);
 6135   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6136   ins_encode %{
 6137     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6138   %}
 6139   ins_pipe( pipe_slow );
 6140 %}
 6141 
 6142 // Load Stack Slot
 6143 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6144   match(Set dst src);
 6145   ins_cost(125);
 6146 
 6147   format %{ "MOV    $dst,$src" %}
 6148   opcode(0x8B);
 6149   ins_encode( OpcP, RegMem(dst,src));
 6150   ins_pipe( ialu_reg_mem );
 6151 %}
 6152 
 6153 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6154   match(Set dst src);
 6155 
 6156   ins_cost(200);
 6157   format %{ "MOV    $dst,$src.lo\n\t"
 6158             "MOV    $dst+4,$src.hi" %}
 6159   opcode(0x8B, 0x8B);
 6160   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6161   ins_pipe( ialu_mem_long_reg );
 6162 %}
 6163 
 6164 // Load Stack Slot
 6165 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6166   match(Set dst src);
 6167   ins_cost(125);
 6168 
 6169   format %{ "MOV    $dst,$src" %}
 6170   opcode(0x8B);
 6171   ins_encode( OpcP, RegMem(dst,src));
 6172   ins_pipe( ialu_reg_mem );
 6173 %}
 6174 
 6175 // Load Stack Slot
 6176 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6177   match(Set dst src);
 6178   ins_cost(125);
 6179 
 6180   format %{ "FLD_S  $src\n\t"
 6181             "FSTP   $dst" %}
 6182   opcode(0xD9);               /* D9 /0, FLD m32real */
 6183   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6184               Pop_Reg_FPR(dst) );
 6185   ins_pipe( fpu_reg_mem );
 6186 %}
 6187 
 6188 // Load Stack Slot
 6189 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6190   match(Set dst src);
 6191   ins_cost(125);
 6192 
 6193   format %{ "FLD_D  $src\n\t"
 6194             "FSTP   $dst" %}
 6195   opcode(0xDD);               /* DD /0, FLD m64real */
 6196   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6197               Pop_Reg_DPR(dst) );
 6198   ins_pipe( fpu_reg_mem );
 6199 %}
 6200 
 6201 // Prefetch instructions for allocation.
 6202 // Must be safe to execute with invalid address (cannot fault).
 6203 
 6204 instruct prefetchAlloc0( memory mem ) %{
 6205   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6206   match(PrefetchAllocation mem);
 6207   ins_cost(0);
 6208   size(0);
 6209   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6210   ins_encode();
 6211   ins_pipe(empty);
 6212 %}
 6213 
 6214 instruct prefetchAlloc( memory mem ) %{
 6215   predicate(AllocatePrefetchInstr==3);
 6216   match( PrefetchAllocation mem );
 6217   ins_cost(100);
 6218 
 6219   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6220   ins_encode %{
 6221     __ prefetchw($mem$$Address);
 6222   %}
 6223   ins_pipe(ialu_mem);
 6224 %}
 6225 
 6226 instruct prefetchAllocNTA( memory mem ) %{
 6227   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6228   match(PrefetchAllocation mem);
 6229   ins_cost(100);
 6230 
 6231   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6232   ins_encode %{
 6233     __ prefetchnta($mem$$Address);
 6234   %}
 6235   ins_pipe(ialu_mem);
 6236 %}
 6237 
 6238 instruct prefetchAllocT0( memory mem ) %{
 6239   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6240   match(PrefetchAllocation mem);
 6241   ins_cost(100);
 6242 
 6243   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6244   ins_encode %{
 6245     __ prefetcht0($mem$$Address);
 6246   %}
 6247   ins_pipe(ialu_mem);
 6248 %}
 6249 
 6250 instruct prefetchAllocT2( memory mem ) %{
 6251   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6252   match(PrefetchAllocation mem);
 6253   ins_cost(100);
 6254 
 6255   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6256   ins_encode %{
 6257     __ prefetcht2($mem$$Address);
 6258   %}
 6259   ins_pipe(ialu_mem);
 6260 %}
 6261 
 6262 //----------Store Instructions-------------------------------------------------
 6263 
 6264 // Store Byte
 6265 instruct storeB(memory mem, xRegI src) %{
 6266   match(Set mem (StoreB mem src));
 6267 
 6268   ins_cost(125);
 6269   format %{ "MOV8   $mem,$src" %}
 6270   opcode(0x88);
 6271   ins_encode( OpcP, RegMem( src, mem ) );
 6272   ins_pipe( ialu_mem_reg );
 6273 %}
 6274 
 6275 // Store Char/Short
 6276 instruct storeC(memory mem, rRegI src) %{
 6277   match(Set mem (StoreC mem src));
 6278 
 6279   ins_cost(125);
 6280   format %{ "MOV16  $mem,$src" %}
 6281   opcode(0x89, 0x66);
 6282   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6283   ins_pipe( ialu_mem_reg );
 6284 %}
 6285 
 6286 // Store Integer
 6287 instruct storeI(memory mem, rRegI src) %{
 6288   match(Set mem (StoreI mem src));
 6289 
 6290   ins_cost(125);
 6291   format %{ "MOV    $mem,$src" %}
 6292   opcode(0x89);
 6293   ins_encode( OpcP, RegMem( src, mem ) );
 6294   ins_pipe( ialu_mem_reg );
 6295 %}
 6296 
 6297 // Store Long
 6298 instruct storeL(long_memory mem, eRegL src) %{
 6299   predicate(!((StoreLNode*)n)->require_atomic_access());
 6300   match(Set mem (StoreL mem src));
 6301 
 6302   ins_cost(200);
 6303   format %{ "MOV    $mem,$src.lo\n\t"
 6304             "MOV    $mem+4,$src.hi" %}
 6305   opcode(0x89, 0x89);
 6306   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6307   ins_pipe( ialu_mem_long_reg );
 6308 %}
 6309 
 6310 // Store Long to Integer
 6311 instruct storeL2I(memory mem, eRegL src) %{
 6312   match(Set mem (StoreI mem (ConvL2I src)));
 6313 
 6314   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6315   ins_encode %{
 6316     __ movl($mem$$Address, $src$$Register);
 6317   %}
 6318   ins_pipe(ialu_mem_reg);
 6319 %}
 6320 
 6321 // Volatile Store Long.  Must be atomic, so move it into
 6322 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6323 // target address before the store (for null-ptr checks)
 6324 // so the memory operand is used twice in the encoding.
 6325 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6326   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6327   match(Set mem (StoreL mem src));
 6328   effect( KILL cr );
 6329   ins_cost(400);
 6330   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6331             "FILD   $src\n\t"
 6332             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6333   opcode(0x3B);
 6334   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6335   ins_pipe( fpu_reg_mem );
 6336 %}
 6337 
 6338 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6339   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6340   match(Set mem (StoreL mem src));
 6341   effect( TEMP tmp, KILL cr );
 6342   ins_cost(380);
 6343   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6344             "MOVSD  $tmp,$src\n\t"
 6345             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6346   ins_encode %{
 6347     __ cmpl(rax, $mem$$Address);
 6348     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6349     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6350   %}
 6351   ins_pipe( pipe_slow );
 6352 %}
 6353 
 6354 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6355   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6356   match(Set mem (StoreL mem src));
 6357   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6358   ins_cost(360);
 6359   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6360             "MOVD   $tmp,$src.lo\n\t"
 6361             "MOVD   $tmp2,$src.hi\n\t"
 6362             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6363             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6364   ins_encode %{
 6365     __ cmpl(rax, $mem$$Address);
 6366     __ movdl($tmp$$XMMRegister, $src$$Register);
 6367     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6368     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6369     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6370   %}
 6371   ins_pipe( pipe_slow );
 6372 %}
 6373 
 6374 // Store Pointer; for storing unknown oops and raw pointers
 6375 instruct storeP(memory mem, anyRegP src) %{
 6376   match(Set mem (StoreP mem src));
 6377 
 6378   ins_cost(125);
 6379   format %{ "MOV    $mem,$src" %}
 6380   opcode(0x89);
 6381   ins_encode( OpcP, RegMem( src, mem ) );
 6382   ins_pipe( ialu_mem_reg );
 6383 %}
 6384 
 6385 // Store Integer Immediate
 6386 instruct storeImmI(memory mem, immI src) %{
 6387   match(Set mem (StoreI mem src));
 6388 
 6389   ins_cost(150);
 6390   format %{ "MOV    $mem,$src" %}
 6391   opcode(0xC7);               /* C7 /0 */
 6392   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6393   ins_pipe( ialu_mem_imm );
 6394 %}
 6395 
 6396 // Store Short/Char Immediate
 6397 instruct storeImmI16(memory mem, immI16 src) %{
 6398   predicate(UseStoreImmI16);
 6399   match(Set mem (StoreC mem src));
 6400 
 6401   ins_cost(150);
 6402   format %{ "MOV16  $mem,$src" %}
 6403   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6404   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6405   ins_pipe( ialu_mem_imm );
 6406 %}
 6407 
 6408 // Store Pointer Immediate; null pointers or constant oops that do not
 6409 // need card-mark barriers.
 6410 instruct storeImmP(memory mem, immP src) %{
 6411   match(Set mem (StoreP mem src));
 6412 
 6413   ins_cost(150);
 6414   format %{ "MOV    $mem,$src" %}
 6415   opcode(0xC7);               /* C7 /0 */
 6416   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6417   ins_pipe( ialu_mem_imm );
 6418 %}
 6419 
 6420 // Store Byte Immediate
 6421 instruct storeImmB(memory mem, immI8 src) %{
 6422   match(Set mem (StoreB mem src));
 6423 
 6424   ins_cost(150);
 6425   format %{ "MOV8   $mem,$src" %}
 6426   opcode(0xC6);               /* C6 /0 */
 6427   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6428   ins_pipe( ialu_mem_imm );
 6429 %}
 6430 
 6431 // Store CMS card-mark Immediate
 6432 instruct storeImmCM(memory mem, immI8 src) %{
 6433   match(Set mem (StoreCM mem src));
 6434 
 6435   ins_cost(150);
 6436   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6437   opcode(0xC6);               /* C6 /0 */
 6438   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6439   ins_pipe( ialu_mem_imm );
 6440 %}
 6441 
 6442 // Store Double
 6443 instruct storeDPR( memory mem, regDPR1 src) %{
 6444   predicate(UseSSE<=1);
 6445   match(Set mem (StoreD mem src));
 6446 
 6447   ins_cost(100);
 6448   format %{ "FST_D  $mem,$src" %}
 6449   opcode(0xDD);       /* DD /2 */
 6450   ins_encode( enc_FPR_store(mem,src) );
 6451   ins_pipe( fpu_mem_reg );
 6452 %}
 6453 
 6454 // Store double does rounding on x86
 6455 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6456   predicate(UseSSE<=1);
 6457   match(Set mem (StoreD mem (RoundDouble src)));
 6458 
 6459   ins_cost(100);
 6460   format %{ "FST_D  $mem,$src\t# round" %}
 6461   opcode(0xDD);       /* DD /2 */
 6462   ins_encode( enc_FPR_store(mem,src) );
 6463   ins_pipe( fpu_mem_reg );
 6464 %}
 6465 
 6466 // Store XMM register to memory (double-precision floating points)
 6467 // MOVSD instruction
 6468 instruct storeD(memory mem, regD src) %{
 6469   predicate(UseSSE>=2);
 6470   match(Set mem (StoreD mem src));
 6471   ins_cost(95);
 6472   format %{ "MOVSD  $mem,$src" %}
 6473   ins_encode %{
 6474     __ movdbl($mem$$Address, $src$$XMMRegister);
 6475   %}
 6476   ins_pipe( pipe_slow );
 6477 %}
 6478 
 6479 // Store XMM register to memory (single-precision floating point)
 6480 // MOVSS instruction
 6481 instruct storeF(memory mem, regF src) %{
 6482   predicate(UseSSE>=1);
 6483   match(Set mem (StoreF mem src));
 6484   ins_cost(95);
 6485   format %{ "MOVSS  $mem,$src" %}
 6486   ins_encode %{
 6487     __ movflt($mem$$Address, $src$$XMMRegister);
 6488   %}
 6489   ins_pipe( pipe_slow );
 6490 %}
 6491 
 6492 
 6493 // Store Float
 6494 instruct storeFPR( memory mem, regFPR1 src) %{
 6495   predicate(UseSSE==0);
 6496   match(Set mem (StoreF mem src));
 6497 
 6498   ins_cost(100);
 6499   format %{ "FST_S  $mem,$src" %}
 6500   opcode(0xD9);       /* D9 /2 */
 6501   ins_encode( enc_FPR_store(mem,src) );
 6502   ins_pipe( fpu_mem_reg );
 6503 %}
 6504 
 6505 // Store Float does rounding on x86
 6506 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6507   predicate(UseSSE==0);
 6508   match(Set mem (StoreF mem (RoundFloat src)));
 6509 
 6510   ins_cost(100);
 6511   format %{ "FST_S  $mem,$src\t# round" %}
 6512   opcode(0xD9);       /* D9 /2 */
 6513   ins_encode( enc_FPR_store(mem,src) );
 6514   ins_pipe( fpu_mem_reg );
 6515 %}
 6516 
 6517 // Store Float does rounding on x86
 6518 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6519   predicate(UseSSE<=1);
 6520   match(Set mem (StoreF mem (ConvD2F src)));
 6521 
 6522   ins_cost(100);
 6523   format %{ "FST_S  $mem,$src\t# D-round" %}
 6524   opcode(0xD9);       /* D9 /2 */
 6525   ins_encode( enc_FPR_store(mem,src) );
 6526   ins_pipe( fpu_mem_reg );
 6527 %}
 6528 
 6529 // Store immediate Float value (it is faster than store from FPU register)
 6530 // The instruction usage is guarded by predicate in operand immFPR().
 6531 instruct storeFPR_imm( memory mem, immFPR src) %{
 6532   match(Set mem (StoreF mem src));
 6533 
 6534   ins_cost(50);
 6535   format %{ "MOV    $mem,$src\t# store float" %}
 6536   opcode(0xC7);               /* C7 /0 */
 6537   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6538   ins_pipe( ialu_mem_imm );
 6539 %}
 6540 
 6541 // Store immediate Float value (it is faster than store from XMM register)
 6542 // The instruction usage is guarded by predicate in operand immF().
 6543 instruct storeF_imm( memory mem, immF src) %{
 6544   match(Set mem (StoreF mem src));
 6545 
 6546   ins_cost(50);
 6547   format %{ "MOV    $mem,$src\t# store float" %}
 6548   opcode(0xC7);               /* C7 /0 */
 6549   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6550   ins_pipe( ialu_mem_imm );
 6551 %}
 6552 
 6553 // Store Integer to stack slot
 6554 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6555   match(Set dst src);
 6556 
 6557   ins_cost(100);
 6558   format %{ "MOV    $dst,$src" %}
 6559   opcode(0x89);
 6560   ins_encode( OpcPRegSS( dst, src ) );
 6561   ins_pipe( ialu_mem_reg );
 6562 %}
 6563 
 6564 // Store Integer to stack slot
 6565 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6566   match(Set dst src);
 6567 
 6568   ins_cost(100);
 6569   format %{ "MOV    $dst,$src" %}
 6570   opcode(0x89);
 6571   ins_encode( OpcPRegSS( dst, src ) );
 6572   ins_pipe( ialu_mem_reg );
 6573 %}
 6574 
 6575 // Store Long to stack slot
 6576 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6577   match(Set dst src);
 6578 
 6579   ins_cost(200);
 6580   format %{ "MOV    $dst,$src.lo\n\t"
 6581             "MOV    $dst+4,$src.hi" %}
 6582   opcode(0x89, 0x89);
 6583   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6584   ins_pipe( ialu_mem_long_reg );
 6585 %}
 6586 
 6587 //----------MemBar Instructions-----------------------------------------------
 6588 // Memory barrier flavors
 6589 
 6590 instruct membar_acquire() %{
 6591   match(MemBarAcquire);
 6592   match(LoadFence);
 6593   ins_cost(400);
 6594 
 6595   size(0);
 6596   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6597   ins_encode();
 6598   ins_pipe(empty);
 6599 %}
 6600 
 6601 instruct membar_acquire_lock() %{
 6602   match(MemBarAcquireLock);
 6603   ins_cost(0);
 6604 
 6605   size(0);
 6606   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6607   ins_encode( );
 6608   ins_pipe(empty);
 6609 %}
 6610 
 6611 instruct membar_release() %{
 6612   match(MemBarRelease);
 6613   match(StoreFence);
 6614   ins_cost(400);
 6615 
 6616   size(0);
 6617   format %{ "MEMBAR-release ! (empty encoding)" %}
 6618   ins_encode( );
 6619   ins_pipe(empty);
 6620 %}
 6621 
 6622 instruct membar_release_lock() %{
 6623   match(MemBarReleaseLock);
 6624   ins_cost(0);
 6625 
 6626   size(0);
 6627   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6628   ins_encode( );
 6629   ins_pipe(empty);
 6630 %}
 6631 
 6632 instruct membar_volatile(eFlagsReg cr) %{
 6633   match(MemBarVolatile);
 6634   effect(KILL cr);
 6635   ins_cost(400);
 6636 
 6637   format %{
 6638     $$template
 6639     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6640   %}
 6641   ins_encode %{
 6642     __ membar(Assembler::StoreLoad);
 6643   %}
 6644   ins_pipe(pipe_slow);
 6645 %}
 6646 
 6647 instruct unnecessary_membar_volatile() %{
 6648   match(MemBarVolatile);
 6649   predicate(Matcher::post_store_load_barrier(n));
 6650   ins_cost(0);
 6651 
 6652   size(0);
 6653   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6654   ins_encode( );
 6655   ins_pipe(empty);
 6656 %}
 6657 
 6658 instruct membar_storestore() %{
 6659   match(MemBarStoreStore);
 6660   match(StoreStoreFence);
 6661   ins_cost(0);
 6662 
 6663   size(0);
 6664   format %{ "MEMBAR-storestore (empty encoding)" %}
 6665   ins_encode( );
 6666   ins_pipe(empty);
 6667 %}
 6668 
 6669 //----------Move Instructions--------------------------------------------------
 6670 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6671   match(Set dst (CastX2P src));
 6672   format %{ "# X2P  $dst, $src" %}
 6673   ins_encode( /*empty encoding*/ );
 6674   ins_cost(0);
 6675   ins_pipe(empty);
 6676 %}
 6677 
 6678 instruct castP2X(rRegI dst, eRegP src ) %{
 6679   match(Set dst (CastP2X src));
 6680   ins_cost(50);
 6681   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6682   ins_encode( enc_Copy( dst, src) );
 6683   ins_pipe( ialu_reg_reg );
 6684 %}
 6685 
 6686 //----------Conditional Move---------------------------------------------------
 6687 // Conditional move
 6688 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6689   predicate(!VM_Version::supports_cmov() );
 6690   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6691   ins_cost(200);
 6692   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6693             "MOV    $dst,$src\n"
 6694       "skip:" %}
 6695   ins_encode %{
 6696     Label Lskip;
 6697     // Invert sense of branch from sense of CMOV
 6698     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6699     __ movl($dst$$Register, $src$$Register);
 6700     __ bind(Lskip);
 6701   %}
 6702   ins_pipe( pipe_cmov_reg );
 6703 %}
 6704 
 6705 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6706   predicate(!VM_Version::supports_cmov() );
 6707   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6708   ins_cost(200);
 6709   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6710             "MOV    $dst,$src\n"
 6711       "skip:" %}
 6712   ins_encode %{
 6713     Label Lskip;
 6714     // Invert sense of branch from sense of CMOV
 6715     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6716     __ movl($dst$$Register, $src$$Register);
 6717     __ bind(Lskip);
 6718   %}
 6719   ins_pipe( pipe_cmov_reg );
 6720 %}
 6721 
 6722 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6723   predicate(VM_Version::supports_cmov() );
 6724   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6725   ins_cost(200);
 6726   format %{ "CMOV$cop $dst,$src" %}
 6727   opcode(0x0F,0x40);
 6728   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6729   ins_pipe( pipe_cmov_reg );
 6730 %}
 6731 
 6732 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6733   predicate(VM_Version::supports_cmov() );
 6734   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6735   ins_cost(200);
 6736   format %{ "CMOV$cop $dst,$src" %}
 6737   opcode(0x0F,0x40);
 6738   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6739   ins_pipe( pipe_cmov_reg );
 6740 %}
 6741 
 6742 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6743   predicate(VM_Version::supports_cmov() );
 6744   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6745   ins_cost(200);
 6746   expand %{
 6747     cmovI_regU(cop, cr, dst, src);
 6748   %}
 6749 %}
 6750 
 6751 // Conditional move
 6752 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6753   predicate(VM_Version::supports_cmov() );
 6754   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6755   ins_cost(250);
 6756   format %{ "CMOV$cop $dst,$src" %}
 6757   opcode(0x0F,0x40);
 6758   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6759   ins_pipe( pipe_cmov_mem );
 6760 %}
 6761 
 6762 // Conditional move
 6763 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6764   predicate(VM_Version::supports_cmov() );
 6765   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6766   ins_cost(250);
 6767   format %{ "CMOV$cop $dst,$src" %}
 6768   opcode(0x0F,0x40);
 6769   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6770   ins_pipe( pipe_cmov_mem );
 6771 %}
 6772 
 6773 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6774   predicate(VM_Version::supports_cmov() );
 6775   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6776   ins_cost(250);
 6777   expand %{
 6778     cmovI_memU(cop, cr, dst, src);
 6779   %}
 6780 %}
 6781 
 6782 // Conditional move
 6783 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6784   predicate(VM_Version::supports_cmov() );
 6785   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6786   ins_cost(200);
 6787   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6788   opcode(0x0F,0x40);
 6789   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6790   ins_pipe( pipe_cmov_reg );
 6791 %}
 6792 
 6793 // Conditional move (non-P6 version)
 6794 // Note:  a CMoveP is generated for  stubs and native wrappers
 6795 //        regardless of whether we are on a P6, so we
 6796 //        emulate a cmov here
 6797 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6798   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6799   ins_cost(300);
 6800   format %{ "Jn$cop   skip\n\t"
 6801           "MOV    $dst,$src\t# pointer\n"
 6802       "skip:" %}
 6803   opcode(0x8b);
 6804   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6805   ins_pipe( pipe_cmov_reg );
 6806 %}
 6807 
 6808 // Conditional move
 6809 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6810   predicate(VM_Version::supports_cmov() );
 6811   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6812   ins_cost(200);
 6813   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6814   opcode(0x0F,0x40);
 6815   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6816   ins_pipe( pipe_cmov_reg );
 6817 %}
 6818 
 6819 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6820   predicate(VM_Version::supports_cmov() );
 6821   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6822   ins_cost(200);
 6823   expand %{
 6824     cmovP_regU(cop, cr, dst, src);
 6825   %}
 6826 %}
 6827 
 6828 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6829 // correctly meets the two pointer arguments; one is an incoming
 6830 // register but the other is a memory operand.  ALSO appears to
 6831 // be buggy with implicit null checks.
 6832 //
 6833 //// Conditional move
 6834 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6835 //  predicate(VM_Version::supports_cmov() );
 6836 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6837 //  ins_cost(250);
 6838 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6839 //  opcode(0x0F,0x40);
 6840 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6841 //  ins_pipe( pipe_cmov_mem );
 6842 //%}
 6843 //
 6844 //// Conditional move
 6845 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6846 //  predicate(VM_Version::supports_cmov() );
 6847 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6848 //  ins_cost(250);
 6849 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6850 //  opcode(0x0F,0x40);
 6851 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6852 //  ins_pipe( pipe_cmov_mem );
 6853 //%}
 6854 
 6855 // Conditional move
 6856 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6857   predicate(UseSSE<=1);
 6858   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6859   ins_cost(200);
 6860   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6861   opcode(0xDA);
 6862   ins_encode( enc_cmov_dpr(cop,src) );
 6863   ins_pipe( pipe_cmovDPR_reg );
 6864 %}
 6865 
 6866 // Conditional move
 6867 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6868   predicate(UseSSE==0);
 6869   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6870   ins_cost(200);
 6871   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6872   opcode(0xDA);
 6873   ins_encode( enc_cmov_dpr(cop,src) );
 6874   ins_pipe( pipe_cmovDPR_reg );
 6875 %}
 6876 
 6877 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6878 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6879   predicate(UseSSE<=1);
 6880   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6881   ins_cost(200);
 6882   format %{ "Jn$cop   skip\n\t"
 6883             "MOV    $dst,$src\t# double\n"
 6884       "skip:" %}
 6885   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6886   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6887   ins_pipe( pipe_cmovDPR_reg );
 6888 %}
 6889 
 6890 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6891 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6892   predicate(UseSSE==0);
 6893   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6894   ins_cost(200);
 6895   format %{ "Jn$cop    skip\n\t"
 6896             "MOV    $dst,$src\t# float\n"
 6897       "skip:" %}
 6898   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6899   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6900   ins_pipe( pipe_cmovDPR_reg );
 6901 %}
 6902 
 6903 // No CMOVE with SSE/SSE2
 6904 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6905   predicate (UseSSE>=1);
 6906   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6907   ins_cost(200);
 6908   format %{ "Jn$cop   skip\n\t"
 6909             "MOVSS  $dst,$src\t# float\n"
 6910       "skip:" %}
 6911   ins_encode %{
 6912     Label skip;
 6913     // Invert sense of branch from sense of CMOV
 6914     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6915     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6916     __ bind(skip);
 6917   %}
 6918   ins_pipe( pipe_slow );
 6919 %}
 6920 
 6921 // No CMOVE with SSE/SSE2
 6922 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6923   predicate (UseSSE>=2);
 6924   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6925   ins_cost(200);
 6926   format %{ "Jn$cop   skip\n\t"
 6927             "MOVSD  $dst,$src\t# float\n"
 6928       "skip:" %}
 6929   ins_encode %{
 6930     Label skip;
 6931     // Invert sense of branch from sense of CMOV
 6932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6933     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6934     __ bind(skip);
 6935   %}
 6936   ins_pipe( pipe_slow );
 6937 %}
 6938 
 6939 // unsigned version
 6940 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6941   predicate (UseSSE>=1);
 6942   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6943   ins_cost(200);
 6944   format %{ "Jn$cop   skip\n\t"
 6945             "MOVSS  $dst,$src\t# float\n"
 6946       "skip:" %}
 6947   ins_encode %{
 6948     Label skip;
 6949     // Invert sense of branch from sense of CMOV
 6950     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6951     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6952     __ bind(skip);
 6953   %}
 6954   ins_pipe( pipe_slow );
 6955 %}
 6956 
 6957 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6958   predicate (UseSSE>=1);
 6959   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6960   ins_cost(200);
 6961   expand %{
 6962     fcmovF_regU(cop, cr, dst, src);
 6963   %}
 6964 %}
 6965 
 6966 // unsigned version
 6967 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6968   predicate (UseSSE>=2);
 6969   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6970   ins_cost(200);
 6971   format %{ "Jn$cop   skip\n\t"
 6972             "MOVSD  $dst,$src\t# float\n"
 6973       "skip:" %}
 6974   ins_encode %{
 6975     Label skip;
 6976     // Invert sense of branch from sense of CMOV
 6977     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6978     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6979     __ bind(skip);
 6980   %}
 6981   ins_pipe( pipe_slow );
 6982 %}
 6983 
 6984 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6985   predicate (UseSSE>=2);
 6986   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6987   ins_cost(200);
 6988   expand %{
 6989     fcmovD_regU(cop, cr, dst, src);
 6990   %}
 6991 %}
 6992 
 6993 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6994   predicate(VM_Version::supports_cmov() );
 6995   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6996   ins_cost(200);
 6997   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6998             "CMOV$cop $dst.hi,$src.hi" %}
 6999   opcode(0x0F,0x40);
 7000   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7001   ins_pipe( pipe_cmov_reg_long );
 7002 %}
 7003 
 7004 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7005   predicate(VM_Version::supports_cmov() );
 7006   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7007   ins_cost(200);
 7008   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7009             "CMOV$cop $dst.hi,$src.hi" %}
 7010   opcode(0x0F,0x40);
 7011   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7012   ins_pipe( pipe_cmov_reg_long );
 7013 %}
 7014 
 7015 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7016   predicate(VM_Version::supports_cmov() );
 7017   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7018   ins_cost(200);
 7019   expand %{
 7020     cmovL_regU(cop, cr, dst, src);
 7021   %}
 7022 %}
 7023 
 7024 //----------Arithmetic Instructions--------------------------------------------
 7025 //----------Addition Instructions----------------------------------------------
 7026 
 7027 // Integer Addition Instructions
 7028 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7029   match(Set dst (AddI dst src));
 7030   effect(KILL cr);
 7031 
 7032   size(2);
 7033   format %{ "ADD    $dst,$src" %}
 7034   opcode(0x03);
 7035   ins_encode( OpcP, RegReg( dst, src) );
 7036   ins_pipe( ialu_reg_reg );
 7037 %}
 7038 
 7039 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7040   match(Set dst (AddI dst src));
 7041   effect(KILL cr);
 7042 
 7043   format %{ "ADD    $dst,$src" %}
 7044   opcode(0x81, 0x00); /* /0 id */
 7045   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7046   ins_pipe( ialu_reg );
 7047 %}
 7048 
 7049 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7050   predicate(UseIncDec);
 7051   match(Set dst (AddI dst src));
 7052   effect(KILL cr);
 7053 
 7054   size(1);
 7055   format %{ "INC    $dst" %}
 7056   opcode(0x40); /*  */
 7057   ins_encode( Opc_plus( primary, dst ) );
 7058   ins_pipe( ialu_reg );
 7059 %}
 7060 
 7061 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7062   match(Set dst (AddI src0 src1));
 7063   ins_cost(110);
 7064 
 7065   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7066   opcode(0x8D); /* 0x8D /r */
 7067   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7068   ins_pipe( ialu_reg_reg );
 7069 %}
 7070 
 7071 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7072   match(Set dst (AddP src0 src1));
 7073   ins_cost(110);
 7074 
 7075   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7076   opcode(0x8D); /* 0x8D /r */
 7077   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7078   ins_pipe( ialu_reg_reg );
 7079 %}
 7080 
 7081 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7082   predicate(UseIncDec);
 7083   match(Set dst (AddI dst src));
 7084   effect(KILL cr);
 7085 
 7086   size(1);
 7087   format %{ "DEC    $dst" %}
 7088   opcode(0x48); /*  */
 7089   ins_encode( Opc_plus( primary, dst ) );
 7090   ins_pipe( ialu_reg );
 7091 %}
 7092 
 7093 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7094   match(Set dst (AddP dst src));
 7095   effect(KILL cr);
 7096 
 7097   size(2);
 7098   format %{ "ADD    $dst,$src" %}
 7099   opcode(0x03);
 7100   ins_encode( OpcP, RegReg( dst, src) );
 7101   ins_pipe( ialu_reg_reg );
 7102 %}
 7103 
 7104 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7105   match(Set dst (AddP dst src));
 7106   effect(KILL cr);
 7107 
 7108   format %{ "ADD    $dst,$src" %}
 7109   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7110   // ins_encode( RegImm( dst, src) );
 7111   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7112   ins_pipe( ialu_reg );
 7113 %}
 7114 
 7115 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7116   match(Set dst (AddI dst (LoadI src)));
 7117   effect(KILL cr);
 7118 
 7119   ins_cost(125);
 7120   format %{ "ADD    $dst,$src" %}
 7121   opcode(0x03);
 7122   ins_encode( OpcP, RegMem( dst, src) );
 7123   ins_pipe( ialu_reg_mem );
 7124 %}
 7125 
 7126 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7127   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7128   effect(KILL cr);
 7129 
 7130   ins_cost(150);
 7131   format %{ "ADD    $dst,$src" %}
 7132   opcode(0x01);  /* Opcode 01 /r */
 7133   ins_encode( OpcP, RegMem( src, dst ) );
 7134   ins_pipe( ialu_mem_reg );
 7135 %}
 7136 
 7137 // Add Memory with Immediate
 7138 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7139   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7140   effect(KILL cr);
 7141 
 7142   ins_cost(125);
 7143   format %{ "ADD    $dst,$src" %}
 7144   opcode(0x81);               /* Opcode 81 /0 id */
 7145   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7146   ins_pipe( ialu_mem_imm );
 7147 %}
 7148 
 7149 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7150   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7151   effect(KILL cr);
 7152 
 7153   ins_cost(125);
 7154   format %{ "INC    $dst" %}
 7155   opcode(0xFF);               /* Opcode FF /0 */
 7156   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7157   ins_pipe( ialu_mem_imm );
 7158 %}
 7159 
 7160 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7161   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7162   effect(KILL cr);
 7163 
 7164   ins_cost(125);
 7165   format %{ "DEC    $dst" %}
 7166   opcode(0xFF);               /* Opcode FF /1 */
 7167   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7168   ins_pipe( ialu_mem_imm );
 7169 %}
 7170 
 7171 
 7172 instruct checkCastPP( eRegP dst ) %{
 7173   match(Set dst (CheckCastPP dst));
 7174 
 7175   size(0);
 7176   format %{ "#checkcastPP of $dst" %}
 7177   ins_encode( /*empty encoding*/ );
 7178   ins_pipe( empty );
 7179 %}
 7180 
 7181 instruct castPP( eRegP dst ) %{
 7182   match(Set dst (CastPP dst));
 7183   format %{ "#castPP of $dst" %}
 7184   ins_encode( /*empty encoding*/ );
 7185   ins_pipe( empty );
 7186 %}
 7187 
 7188 instruct castII( rRegI dst ) %{
 7189   match(Set dst (CastII dst));
 7190   format %{ "#castII of $dst" %}
 7191   ins_encode( /*empty encoding*/ );
 7192   ins_cost(0);
 7193   ins_pipe( empty );
 7194 %}
 7195 
 7196 instruct castLL( eRegL dst ) %{
 7197   match(Set dst (CastLL dst));
 7198   format %{ "#castLL of $dst" %}
 7199   ins_encode( /*empty encoding*/ );
 7200   ins_cost(0);
 7201   ins_pipe( empty );
 7202 %}
 7203 
 7204 instruct castFF( regF dst ) %{
 7205   predicate(UseSSE >= 1);
 7206   match(Set dst (CastFF dst));
 7207   format %{ "#castFF of $dst" %}
 7208   ins_encode( /*empty encoding*/ );
 7209   ins_cost(0);
 7210   ins_pipe( empty );
 7211 %}
 7212 
 7213 instruct castDD( regD dst ) %{
 7214   predicate(UseSSE >= 2);
 7215   match(Set dst (CastDD dst));
 7216   format %{ "#castDD of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castFF_PR( regFPR dst ) %{
 7223   predicate(UseSSE < 1);
 7224   match(Set dst (CastFF dst));
 7225   format %{ "#castFF of $dst" %}
 7226   ins_encode( /*empty encoding*/ );
 7227   ins_cost(0);
 7228   ins_pipe( empty );
 7229 %}
 7230 
 7231 instruct castDD_PR( regDPR dst ) %{
 7232   predicate(UseSSE < 2);
 7233   match(Set dst (CastDD dst));
 7234   format %{ "#castDD of $dst" %}
 7235   ins_encode( /*empty encoding*/ );
 7236   ins_cost(0);
 7237   ins_pipe( empty );
 7238 %}
 7239 
 7240 // Load-locked - same as a regular pointer load when used with compare-swap
 7241 instruct loadPLocked(eRegP dst, memory mem) %{
 7242   match(Set dst (LoadPLocked mem));
 7243 
 7244   ins_cost(125);
 7245   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7246   opcode(0x8B);
 7247   ins_encode( OpcP, RegMem(dst,mem));
 7248   ins_pipe( ialu_reg_mem );
 7249 %}
 7250 
 7251 // Conditional-store of the updated heap-top.
 7252 // Used during allocation of the shared heap.
 7253 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7255   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7256   // EAX is killed if there is contention, but then it's also unused.
 7257   // In the common case of no contention, EAX holds the new oop address.
 7258   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7259   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7260   ins_pipe( pipe_cmpxchg );
 7261 %}
 7262 
 7263 // Conditional-store of an int value.
 7264 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7266   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7267   effect(KILL oldval);
 7268   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7269   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7270   ins_pipe( pipe_cmpxchg );
 7271 %}
 7272 
 7273 // Conditional-store of a long value.
 7274 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7276   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7277   effect(KILL oldval);
 7278   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7279             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7280             "XCHG   EBX,ECX"
 7281   %}
 7282   ins_encode %{
 7283     // Note: we need to swap rbx, and rcx before and after the
 7284     //       cmpxchg8 instruction because the instruction uses
 7285     //       rcx as the high order word of the new value to store but
 7286     //       our register encoding uses rbx.
 7287     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7288     __ lock();
 7289     __ cmpxchg8($mem$$Address);
 7290     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7291   %}
 7292   ins_pipe( pipe_cmpxchg );
 7293 %}
 7294 
 7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7296 
 7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7298   predicate(VM_Version::supports_cx8());
 7299   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7300   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7301   effect(KILL cr, KILL oldval);
 7302   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7303             "MOV    $res,0\n\t"
 7304             "JNE,s  fail\n\t"
 7305             "MOV    $res,1\n"
 7306           "fail:" %}
 7307   ins_encode( enc_cmpxchg8(mem_ptr),
 7308               enc_flags_ne_to_boolean(res) );
 7309   ins_pipe( pipe_cmpxchg );
 7310 %}
 7311 
 7312 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7313   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7314   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7315   effect(KILL cr, KILL oldval);
 7316   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7317             "MOV    $res,0\n\t"
 7318             "JNE,s  fail\n\t"
 7319             "MOV    $res,1\n"
 7320           "fail:" %}
 7321   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7322   ins_pipe( pipe_cmpxchg );
 7323 %}
 7324 
 7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7326   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7327   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7328   effect(KILL cr, KILL oldval);
 7329   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7330             "MOV    $res,0\n\t"
 7331             "JNE,s  fail\n\t"
 7332             "MOV    $res,1\n"
 7333           "fail:" %}
 7334   ins_encode( enc_cmpxchgb(mem_ptr),
 7335               enc_flags_ne_to_boolean(res) );
 7336   ins_pipe( pipe_cmpxchg );
 7337 %}
 7338 
 7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7340   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7341   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7342   effect(KILL cr, KILL oldval);
 7343   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7344             "MOV    $res,0\n\t"
 7345             "JNE,s  fail\n\t"
 7346             "MOV    $res,1\n"
 7347           "fail:" %}
 7348   ins_encode( enc_cmpxchgw(mem_ptr),
 7349               enc_flags_ne_to_boolean(res) );
 7350   ins_pipe( pipe_cmpxchg );
 7351 %}
 7352 
 7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7354   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7355   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7356   effect(KILL cr, KILL oldval);
 7357   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7358             "MOV    $res,0\n\t"
 7359             "JNE,s  fail\n\t"
 7360             "MOV    $res,1\n"
 7361           "fail:" %}
 7362   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7363   ins_pipe( pipe_cmpxchg );
 7364 %}
 7365 
 7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7367   predicate(VM_Version::supports_cx8());
 7368   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7369   effect(KILL cr);
 7370   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7371   ins_encode( enc_cmpxchg8(mem_ptr) );
 7372   ins_pipe( pipe_cmpxchg );
 7373 %}
 7374 
 7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7376   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7377   effect(KILL cr);
 7378   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7379   ins_encode( enc_cmpxchg(mem_ptr) );
 7380   ins_pipe( pipe_cmpxchg );
 7381 %}
 7382 
 7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7384   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7385   effect(KILL cr);
 7386   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7387   ins_encode( enc_cmpxchgb(mem_ptr) );
 7388   ins_pipe( pipe_cmpxchg );
 7389 %}
 7390 
 7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7392   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7393   effect(KILL cr);
 7394   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7395   ins_encode( enc_cmpxchgw(mem_ptr) );
 7396   ins_pipe( pipe_cmpxchg );
 7397 %}
 7398 
 7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7400   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7401   effect(KILL cr);
 7402   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7403   ins_encode( enc_cmpxchg(mem_ptr) );
 7404   ins_pipe( pipe_cmpxchg );
 7405 %}
 7406 
 7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7408   predicate(n->as_LoadStore()->result_not_used());
 7409   match(Set dummy (GetAndAddB mem add));
 7410   effect(KILL cr);
 7411   format %{ "ADDB  [$mem],$add" %}
 7412   ins_encode %{
 7413     __ lock();
 7414     __ addb($mem$$Address, $add$$constant);
 7415   %}
 7416   ins_pipe( pipe_cmpxchg );
 7417 %}
 7418 
 7419 // Important to match to xRegI: only 8-bit regs.
 7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7421   match(Set newval (GetAndAddB mem newval));
 7422   effect(KILL cr);
 7423   format %{ "XADDB  [$mem],$newval" %}
 7424   ins_encode %{
 7425     __ lock();
 7426     __ xaddb($mem$$Address, $newval$$Register);
 7427   %}
 7428   ins_pipe( pipe_cmpxchg );
 7429 %}
 7430 
 7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7432   predicate(n->as_LoadStore()->result_not_used());
 7433   match(Set dummy (GetAndAddS mem add));
 7434   effect(KILL cr);
 7435   format %{ "ADDS  [$mem],$add" %}
 7436   ins_encode %{
 7437     __ lock();
 7438     __ addw($mem$$Address, $add$$constant);
 7439   %}
 7440   ins_pipe( pipe_cmpxchg );
 7441 %}
 7442 
 7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7444   match(Set newval (GetAndAddS mem newval));
 7445   effect(KILL cr);
 7446   format %{ "XADDS  [$mem],$newval" %}
 7447   ins_encode %{
 7448     __ lock();
 7449     __ xaddw($mem$$Address, $newval$$Register);
 7450   %}
 7451   ins_pipe( pipe_cmpxchg );
 7452 %}
 7453 
 7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7455   predicate(n->as_LoadStore()->result_not_used());
 7456   match(Set dummy (GetAndAddI mem add));
 7457   effect(KILL cr);
 7458   format %{ "ADDL  [$mem],$add" %}
 7459   ins_encode %{
 7460     __ lock();
 7461     __ addl($mem$$Address, $add$$constant);
 7462   %}
 7463   ins_pipe( pipe_cmpxchg );
 7464 %}
 7465 
 7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7467   match(Set newval (GetAndAddI mem newval));
 7468   effect(KILL cr);
 7469   format %{ "XADDL  [$mem],$newval" %}
 7470   ins_encode %{
 7471     __ lock();
 7472     __ xaddl($mem$$Address, $newval$$Register);
 7473   %}
 7474   ins_pipe( pipe_cmpxchg );
 7475 %}
 7476 
 7477 // Important to match to xRegI: only 8-bit regs.
 7478 instruct xchgB( memory mem, xRegI newval) %{
 7479   match(Set newval (GetAndSetB mem newval));
 7480   format %{ "XCHGB  $newval,[$mem]" %}
 7481   ins_encode %{
 7482     __ xchgb($newval$$Register, $mem$$Address);
 7483   %}
 7484   ins_pipe( pipe_cmpxchg );
 7485 %}
 7486 
 7487 instruct xchgS( memory mem, rRegI newval) %{
 7488   match(Set newval (GetAndSetS mem newval));
 7489   format %{ "XCHGW  $newval,[$mem]" %}
 7490   ins_encode %{
 7491     __ xchgw($newval$$Register, $mem$$Address);
 7492   %}
 7493   ins_pipe( pipe_cmpxchg );
 7494 %}
 7495 
 7496 instruct xchgI( memory mem, rRegI newval) %{
 7497   match(Set newval (GetAndSetI mem newval));
 7498   format %{ "XCHGL  $newval,[$mem]" %}
 7499   ins_encode %{
 7500     __ xchgl($newval$$Register, $mem$$Address);
 7501   %}
 7502   ins_pipe( pipe_cmpxchg );
 7503 %}
 7504 
 7505 instruct xchgP( memory mem, pRegP newval) %{
 7506   match(Set newval (GetAndSetP mem newval));
 7507   format %{ "XCHGL  $newval,[$mem]" %}
 7508   ins_encode %{
 7509     __ xchgl($newval$$Register, $mem$$Address);
 7510   %}
 7511   ins_pipe( pipe_cmpxchg );
 7512 %}
 7513 
 7514 //----------Subtraction Instructions-------------------------------------------
 7515 
 7516 // Integer Subtraction Instructions
 7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7518   match(Set dst (SubI dst src));
 7519   effect(KILL cr);
 7520 
 7521   size(2);
 7522   format %{ "SUB    $dst,$src" %}
 7523   opcode(0x2B);
 7524   ins_encode( OpcP, RegReg( dst, src) );
 7525   ins_pipe( ialu_reg_reg );
 7526 %}
 7527 
 7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7529   match(Set dst (SubI dst src));
 7530   effect(KILL cr);
 7531 
 7532   format %{ "SUB    $dst,$src" %}
 7533   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7534   // ins_encode( RegImm( dst, src) );
 7535   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7536   ins_pipe( ialu_reg );
 7537 %}
 7538 
 7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7540   match(Set dst (SubI dst (LoadI src)));
 7541   effect(KILL cr);
 7542 
 7543   ins_cost(125);
 7544   format %{ "SUB    $dst,$src" %}
 7545   opcode(0x2B);
 7546   ins_encode( OpcP, RegMem( dst, src) );
 7547   ins_pipe( ialu_reg_mem );
 7548 %}
 7549 
 7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7551   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7552   effect(KILL cr);
 7553 
 7554   ins_cost(150);
 7555   format %{ "SUB    $dst,$src" %}
 7556   opcode(0x29);  /* Opcode 29 /r */
 7557   ins_encode( OpcP, RegMem( src, dst ) );
 7558   ins_pipe( ialu_mem_reg );
 7559 %}
 7560 
 7561 // Subtract from a pointer
 7562 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7563   match(Set dst (AddP dst (SubI zero src)));
 7564   effect(KILL cr);
 7565 
 7566   size(2);
 7567   format %{ "SUB    $dst,$src" %}
 7568   opcode(0x2B);
 7569   ins_encode( OpcP, RegReg( dst, src) );
 7570   ins_pipe( ialu_reg_reg );
 7571 %}
 7572 
 7573 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7574   match(Set dst (SubI zero dst));
 7575   effect(KILL cr);
 7576 
 7577   size(2);
 7578   format %{ "NEG    $dst" %}
 7579   opcode(0xF7,0x03);  // Opcode F7 /3
 7580   ins_encode( OpcP, RegOpc( dst ) );
 7581   ins_pipe( ialu_reg );
 7582 %}
 7583 
 7584 //----------Multiplication/Division Instructions-------------------------------
 7585 // Integer Multiplication Instructions
 7586 // Multiply Register
 7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7588   match(Set dst (MulI dst src));
 7589   effect(KILL cr);
 7590 
 7591   size(3);
 7592   ins_cost(300);
 7593   format %{ "IMUL   $dst,$src" %}
 7594   opcode(0xAF, 0x0F);
 7595   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7596   ins_pipe( ialu_reg_reg_alu0 );
 7597 %}
 7598 
 7599 // Multiply 32-bit Immediate
 7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7601   match(Set dst (MulI src imm));
 7602   effect(KILL cr);
 7603 
 7604   ins_cost(300);
 7605   format %{ "IMUL   $dst,$src,$imm" %}
 7606   opcode(0x69);  /* 69 /r id */
 7607   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7608   ins_pipe( ialu_reg_reg_alu0 );
 7609 %}
 7610 
 7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7612   match(Set dst src);
 7613   effect(KILL cr);
 7614 
 7615   // Note that this is artificially increased to make it more expensive than loadConL
 7616   ins_cost(250);
 7617   format %{ "MOV    EAX,$src\t// low word only" %}
 7618   opcode(0xB8);
 7619   ins_encode( LdImmL_Lo(dst, src) );
 7620   ins_pipe( ialu_reg_fat );
 7621 %}
 7622 
 7623 // Multiply by 32-bit Immediate, taking the shifted high order results
 7624 //  (special case for shift by 32)
 7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7626   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7627   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7629              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7630   effect(USE src1, KILL cr);
 7631 
 7632   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7633   ins_cost(0*100 + 1*400 - 150);
 7634   format %{ "IMUL   EDX:EAX,$src1" %}
 7635   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7636   ins_pipe( pipe_slow );
 7637 %}
 7638 
 7639 // Multiply by 32-bit Immediate, taking the shifted high order results
 7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7641   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7642   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7643              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7644              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7645   effect(USE src1, KILL cr);
 7646 
 7647   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7648   ins_cost(1*100 + 1*400 - 150);
 7649   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7650             "SAR    EDX,$cnt-32" %}
 7651   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7652   ins_pipe( pipe_slow );
 7653 %}
 7654 
 7655 // Multiply Memory 32-bit Immediate
 7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7657   match(Set dst (MulI (LoadI src) imm));
 7658   effect(KILL cr);
 7659 
 7660   ins_cost(300);
 7661   format %{ "IMUL   $dst,$src,$imm" %}
 7662   opcode(0x69);  /* 69 /r id */
 7663   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7664   ins_pipe( ialu_reg_mem_alu0 );
 7665 %}
 7666 
 7667 // Multiply Memory
 7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7669   match(Set dst (MulI dst (LoadI src)));
 7670   effect(KILL cr);
 7671 
 7672   ins_cost(350);
 7673   format %{ "IMUL   $dst,$src" %}
 7674   opcode(0xAF, 0x0F);
 7675   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7676   ins_pipe( ialu_reg_mem_alu0 );
 7677 %}
 7678 
 7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7680 %{
 7681   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7682   effect(KILL cr, KILL src2);
 7683 
 7684   expand %{ mulI_eReg(dst, src1, cr);
 7685            mulI_eReg(src2, src3, cr);
 7686            addI_eReg(dst, src2, cr); %}
 7687 %}
 7688 
 7689 // Multiply Register Int to Long
 7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7691   // Basic Idea: long = (long)int * (long)int
 7692   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7693   effect(DEF dst, USE src, USE src1, KILL flags);
 7694 
 7695   ins_cost(300);
 7696   format %{ "IMUL   $dst,$src1" %}
 7697 
 7698   ins_encode( long_int_multiply( dst, src1 ) );
 7699   ins_pipe( ialu_reg_reg_alu0 );
 7700 %}
 7701 
 7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7703   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7704   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7705   effect(KILL flags);
 7706 
 7707   ins_cost(300);
 7708   format %{ "MUL    $dst,$src1" %}
 7709 
 7710   ins_encode( long_uint_multiply(dst, src1) );
 7711   ins_pipe( ialu_reg_reg_alu0 );
 7712 %}
 7713 
 7714 // Multiply Register Long
 7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7716   match(Set dst (MulL dst src));
 7717   effect(KILL cr, TEMP tmp);
 7718   ins_cost(4*100+3*400);
 7719 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7720 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7721   format %{ "MOV    $tmp,$src.lo\n\t"
 7722             "IMUL   $tmp,EDX\n\t"
 7723             "MOV    EDX,$src.hi\n\t"
 7724             "IMUL   EDX,EAX\n\t"
 7725             "ADD    $tmp,EDX\n\t"
 7726             "MUL    EDX:EAX,$src.lo\n\t"
 7727             "ADD    EDX,$tmp" %}
 7728   ins_encode( long_multiply( dst, src, tmp ) );
 7729   ins_pipe( pipe_slow );
 7730 %}
 7731 
 7732 // Multiply Register Long where the left operand's high 32 bits are zero
 7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7734   predicate(is_operand_hi32_zero(n->in(1)));
 7735   match(Set dst (MulL dst src));
 7736   effect(KILL cr, TEMP tmp);
 7737   ins_cost(2*100+2*400);
 7738 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7739 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7740   format %{ "MOV    $tmp,$src.hi\n\t"
 7741             "IMUL   $tmp,EAX\n\t"
 7742             "MUL    EDX:EAX,$src.lo\n\t"
 7743             "ADD    EDX,$tmp" %}
 7744   ins_encode %{
 7745     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7746     __ imull($tmp$$Register, rax);
 7747     __ mull($src$$Register);
 7748     __ addl(rdx, $tmp$$Register);
 7749   %}
 7750   ins_pipe( pipe_slow );
 7751 %}
 7752 
 7753 // Multiply Register Long where the right operand's high 32 bits are zero
 7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7755   predicate(is_operand_hi32_zero(n->in(2)));
 7756   match(Set dst (MulL dst src));
 7757   effect(KILL cr, TEMP tmp);
 7758   ins_cost(2*100+2*400);
 7759 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7760 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7761   format %{ "MOV    $tmp,$src.lo\n\t"
 7762             "IMUL   $tmp,EDX\n\t"
 7763             "MUL    EDX:EAX,$src.lo\n\t"
 7764             "ADD    EDX,$tmp" %}
 7765   ins_encode %{
 7766     __ movl($tmp$$Register, $src$$Register);
 7767     __ imull($tmp$$Register, rdx);
 7768     __ mull($src$$Register);
 7769     __ addl(rdx, $tmp$$Register);
 7770   %}
 7771   ins_pipe( pipe_slow );
 7772 %}
 7773 
 7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7776   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7777   match(Set dst (MulL dst src));
 7778   effect(KILL cr);
 7779   ins_cost(1*400);
 7780 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7781 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7782   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7783   ins_encode %{
 7784     __ mull($src$$Register);
 7785   %}
 7786   ins_pipe( pipe_slow );
 7787 %}
 7788 
 7789 // Multiply Register Long by small constant
 7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7791   match(Set dst (MulL dst src));
 7792   effect(KILL cr, TEMP tmp);
 7793   ins_cost(2*100+2*400);
 7794   size(12);
 7795 // Basic idea: lo(result) = lo(src * EAX)
 7796 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7797   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7798             "MOV    EDX,$src\n\t"
 7799             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7800             "ADD    EDX,$tmp" %}
 7801   ins_encode( long_multiply_con( dst, src, tmp ) );
 7802   ins_pipe( pipe_slow );
 7803 %}
 7804 
 7805 // Integer DIV with Register
 7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7807   match(Set rax (DivI rax div));
 7808   effect(KILL rdx, KILL cr);
 7809   size(26);
 7810   ins_cost(30*100+10*100);
 7811   format %{ "CMP    EAX,0x80000000\n\t"
 7812             "JNE,s  normal\n\t"
 7813             "XOR    EDX,EDX\n\t"
 7814             "CMP    ECX,-1\n\t"
 7815             "JE,s   done\n"
 7816     "normal: CDQ\n\t"
 7817             "IDIV   $div\n\t"
 7818     "done:"        %}
 7819   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7820   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7821   ins_pipe( ialu_reg_reg_alu0 );
 7822 %}
 7823 
 7824 // Divide Register Long
 7825 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7826   match(Set dst (DivL src1 src2));
 7827   effect(CALL);
 7828   ins_cost(10000);
 7829   format %{ "PUSH   $src1.hi\n\t"
 7830             "PUSH   $src1.lo\n\t"
 7831             "PUSH   $src2.hi\n\t"
 7832             "PUSH   $src2.lo\n\t"
 7833             "CALL   SharedRuntime::ldiv\n\t"
 7834             "ADD    ESP,16" %}
 7835   ins_encode( long_div(src1,src2) );
 7836   ins_pipe( pipe_slow );
 7837 %}
 7838 
 7839 // Integer DIVMOD with Register, both quotient and mod results
 7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7841   match(DivModI rax div);
 7842   effect(KILL cr);
 7843   size(26);
 7844   ins_cost(30*100+10*100);
 7845   format %{ "CMP    EAX,0x80000000\n\t"
 7846             "JNE,s  normal\n\t"
 7847             "XOR    EDX,EDX\n\t"
 7848             "CMP    ECX,-1\n\t"
 7849             "JE,s   done\n"
 7850     "normal: CDQ\n\t"
 7851             "IDIV   $div\n\t"
 7852     "done:"        %}
 7853   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7854   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7855   ins_pipe( pipe_slow );
 7856 %}
 7857 
 7858 // Integer MOD with Register
 7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7860   match(Set rdx (ModI rax div));
 7861   effect(KILL rax, KILL cr);
 7862 
 7863   size(26);
 7864   ins_cost(300);
 7865   format %{ "CDQ\n\t"
 7866             "IDIV   $div" %}
 7867   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7868   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7869   ins_pipe( ialu_reg_reg_alu0 );
 7870 %}
 7871 
 7872 // Remainder Register Long
 7873 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7874   match(Set dst (ModL src1 src2));
 7875   effect(CALL);
 7876   ins_cost(10000);
 7877   format %{ "PUSH   $src1.hi\n\t"
 7878             "PUSH   $src1.lo\n\t"
 7879             "PUSH   $src2.hi\n\t"
 7880             "PUSH   $src2.lo\n\t"
 7881             "CALL   SharedRuntime::lrem\n\t"
 7882             "ADD    ESP,16" %}
 7883   ins_encode( long_mod(src1,src2) );
 7884   ins_pipe( pipe_slow );
 7885 %}
 7886 
 7887 // Divide Register Long (no special case since divisor != -1)
 7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7889   match(Set dst (DivL dst imm));
 7890   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7891   ins_cost(1000);
 7892   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7893             "XOR    $tmp2,$tmp2\n\t"
 7894             "CMP    $tmp,EDX\n\t"
 7895             "JA,s   fast\n\t"
 7896             "MOV    $tmp2,EAX\n\t"
 7897             "MOV    EAX,EDX\n\t"
 7898             "MOV    EDX,0\n\t"
 7899             "JLE,s  pos\n\t"
 7900             "LNEG   EAX : $tmp2\n\t"
 7901             "DIV    $tmp # unsigned division\n\t"
 7902             "XCHG   EAX,$tmp2\n\t"
 7903             "DIV    $tmp\n\t"
 7904             "LNEG   $tmp2 : EAX\n\t"
 7905             "JMP,s  done\n"
 7906     "pos:\n\t"
 7907             "DIV    $tmp\n\t"
 7908             "XCHG   EAX,$tmp2\n"
 7909     "fast:\n\t"
 7910             "DIV    $tmp\n"
 7911     "done:\n\t"
 7912             "MOV    EDX,$tmp2\n\t"
 7913             "NEG    EDX:EAX # if $imm < 0" %}
 7914   ins_encode %{
 7915     int con = (int)$imm$$constant;
 7916     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7917     int pcon = (con > 0) ? con : -con;
 7918     Label Lfast, Lpos, Ldone;
 7919 
 7920     __ movl($tmp$$Register, pcon);
 7921     __ xorl($tmp2$$Register,$tmp2$$Register);
 7922     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7923     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7924 
 7925     __ movl($tmp2$$Register, $dst$$Register); // save
 7926     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7927     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7928     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7929 
 7930     // Negative dividend.
 7931     // convert value to positive to use unsigned division
 7932     __ lneg($dst$$Register, $tmp2$$Register);
 7933     __ divl($tmp$$Register);
 7934     __ xchgl($dst$$Register, $tmp2$$Register);
 7935     __ divl($tmp$$Register);
 7936     // revert result back to negative
 7937     __ lneg($tmp2$$Register, $dst$$Register);
 7938     __ jmpb(Ldone);
 7939 
 7940     __ bind(Lpos);
 7941     __ divl($tmp$$Register); // Use unsigned division
 7942     __ xchgl($dst$$Register, $tmp2$$Register);
 7943     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7944 
 7945     __ bind(Lfast);
 7946     // fast path: src is positive
 7947     __ divl($tmp$$Register); // Use unsigned division
 7948 
 7949     __ bind(Ldone);
 7950     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7951     if (con < 0) {
 7952       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7953     }
 7954   %}
 7955   ins_pipe( pipe_slow );
 7956 %}
 7957 
 7958 // Remainder Register Long (remainder fit into 32 bits)
 7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7960   match(Set dst (ModL dst imm));
 7961   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7962   ins_cost(1000);
 7963   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7964             "CMP    $tmp,EDX\n\t"
 7965             "JA,s   fast\n\t"
 7966             "MOV    $tmp2,EAX\n\t"
 7967             "MOV    EAX,EDX\n\t"
 7968             "MOV    EDX,0\n\t"
 7969             "JLE,s  pos\n\t"
 7970             "LNEG   EAX : $tmp2\n\t"
 7971             "DIV    $tmp # unsigned division\n\t"
 7972             "MOV    EAX,$tmp2\n\t"
 7973             "DIV    $tmp\n\t"
 7974             "NEG    EDX\n\t"
 7975             "JMP,s  done\n"
 7976     "pos:\n\t"
 7977             "DIV    $tmp\n\t"
 7978             "MOV    EAX,$tmp2\n"
 7979     "fast:\n\t"
 7980             "DIV    $tmp\n"
 7981     "done:\n\t"
 7982             "MOV    EAX,EDX\n\t"
 7983             "SAR    EDX,31\n\t" %}
 7984   ins_encode %{
 7985     int con = (int)$imm$$constant;
 7986     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7987     int pcon = (con > 0) ? con : -con;
 7988     Label  Lfast, Lpos, Ldone;
 7989 
 7990     __ movl($tmp$$Register, pcon);
 7991     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7992     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7993 
 7994     __ movl($tmp2$$Register, $dst$$Register); // save
 7995     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7996     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7997     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7998 
 7999     // Negative dividend.
 8000     // convert value to positive to use unsigned division
 8001     __ lneg($dst$$Register, $tmp2$$Register);
 8002     __ divl($tmp$$Register);
 8003     __ movl($dst$$Register, $tmp2$$Register);
 8004     __ divl($tmp$$Register);
 8005     // revert remainder back to negative
 8006     __ negl(HIGH_FROM_LOW($dst$$Register));
 8007     __ jmpb(Ldone);
 8008 
 8009     __ bind(Lpos);
 8010     __ divl($tmp$$Register);
 8011     __ movl($dst$$Register, $tmp2$$Register);
 8012 
 8013     __ bind(Lfast);
 8014     // fast path: src is positive
 8015     __ divl($tmp$$Register);
 8016 
 8017     __ bind(Ldone);
 8018     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8019     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8020 
 8021   %}
 8022   ins_pipe( pipe_slow );
 8023 %}
 8024 
 8025 // Integer Shift Instructions
 8026 // Shift Left by one
 8027 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8028   match(Set dst (LShiftI dst shift));
 8029   effect(KILL cr);
 8030 
 8031   size(2);
 8032   format %{ "SHL    $dst,$shift" %}
 8033   opcode(0xD1, 0x4);  /* D1 /4 */
 8034   ins_encode( OpcP, RegOpc( dst ) );
 8035   ins_pipe( ialu_reg );
 8036 %}
 8037 
 8038 // Shift Left by 8-bit immediate
 8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8040   match(Set dst (LShiftI dst shift));
 8041   effect(KILL cr);
 8042 
 8043   size(3);
 8044   format %{ "SHL    $dst,$shift" %}
 8045   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8046   ins_encode( RegOpcImm( dst, shift) );
 8047   ins_pipe( ialu_reg );
 8048 %}
 8049 
 8050 // Shift Left by variable
 8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8052   match(Set dst (LShiftI dst shift));
 8053   effect(KILL cr);
 8054 
 8055   size(2);
 8056   format %{ "SHL    $dst,$shift" %}
 8057   opcode(0xD3, 0x4);  /* D3 /4 */
 8058   ins_encode( OpcP, RegOpc( dst ) );
 8059   ins_pipe( ialu_reg_reg );
 8060 %}
 8061 
 8062 // Arithmetic shift right by one
 8063 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8064   match(Set dst (RShiftI dst shift));
 8065   effect(KILL cr);
 8066 
 8067   size(2);
 8068   format %{ "SAR    $dst,$shift" %}
 8069   opcode(0xD1, 0x7);  /* D1 /7 */
 8070   ins_encode( OpcP, RegOpc( dst ) );
 8071   ins_pipe( ialu_reg );
 8072 %}
 8073 
 8074 // Arithmetic shift right by one
 8075 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8076   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8077   effect(KILL cr);
 8078   format %{ "SAR    $dst,$shift" %}
 8079   opcode(0xD1, 0x7);  /* D1 /7 */
 8080   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8081   ins_pipe( ialu_mem_imm );
 8082 %}
 8083 
 8084 // Arithmetic Shift Right by 8-bit immediate
 8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8086   match(Set dst (RShiftI dst shift));
 8087   effect(KILL cr);
 8088 
 8089   size(3);
 8090   format %{ "SAR    $dst,$shift" %}
 8091   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8092   ins_encode( RegOpcImm( dst, shift ) );
 8093   ins_pipe( ialu_mem_imm );
 8094 %}
 8095 
 8096 // Arithmetic Shift Right by 8-bit immediate
 8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8098   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8099   effect(KILL cr);
 8100 
 8101   format %{ "SAR    $dst,$shift" %}
 8102   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8103   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8104   ins_pipe( ialu_mem_imm );
 8105 %}
 8106 
 8107 // Arithmetic Shift Right by variable
 8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8109   match(Set dst (RShiftI dst shift));
 8110   effect(KILL cr);
 8111 
 8112   size(2);
 8113   format %{ "SAR    $dst,$shift" %}
 8114   opcode(0xD3, 0x7);  /* D3 /7 */
 8115   ins_encode( OpcP, RegOpc( dst ) );
 8116   ins_pipe( ialu_reg_reg );
 8117 %}
 8118 
 8119 // Logical shift right by one
 8120 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8121   match(Set dst (URShiftI dst shift));
 8122   effect(KILL cr);
 8123 
 8124   size(2);
 8125   format %{ "SHR    $dst,$shift" %}
 8126   opcode(0xD1, 0x5);  /* D1 /5 */
 8127   ins_encode( OpcP, RegOpc( dst ) );
 8128   ins_pipe( ialu_reg );
 8129 %}
 8130 
 8131 // Logical Shift Right by 8-bit immediate
 8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8133   match(Set dst (URShiftI dst shift));
 8134   effect(KILL cr);
 8135 
 8136   size(3);
 8137   format %{ "SHR    $dst,$shift" %}
 8138   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8139   ins_encode( RegOpcImm( dst, shift) );
 8140   ins_pipe( ialu_reg );
 8141 %}
 8142 
 8143 
 8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8145 // This idiom is used by the compiler for the i2b bytecode.
 8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8147   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8148 
 8149   size(3);
 8150   format %{ "MOVSX  $dst,$src :8" %}
 8151   ins_encode %{
 8152     __ movsbl($dst$$Register, $src$$Register);
 8153   %}
 8154   ins_pipe(ialu_reg_reg);
 8155 %}
 8156 
 8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8158 // This idiom is used by the compiler the i2s bytecode.
 8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8160   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8161 
 8162   size(3);
 8163   format %{ "MOVSX  $dst,$src :16" %}
 8164   ins_encode %{
 8165     __ movswl($dst$$Register, $src$$Register);
 8166   %}
 8167   ins_pipe(ialu_reg_reg);
 8168 %}
 8169 
 8170 
 8171 // Logical Shift Right by variable
 8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8173   match(Set dst (URShiftI dst shift));
 8174   effect(KILL cr);
 8175 
 8176   size(2);
 8177   format %{ "SHR    $dst,$shift" %}
 8178   opcode(0xD3, 0x5);  /* D3 /5 */
 8179   ins_encode( OpcP, RegOpc( dst ) );
 8180   ins_pipe( ialu_reg_reg );
 8181 %}
 8182 
 8183 
 8184 //----------Logical Instructions-----------------------------------------------
 8185 //----------Integer Logical Instructions---------------------------------------
 8186 // And Instructions
 8187 // And Register with Register
 8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8189   match(Set dst (AndI dst src));
 8190   effect(KILL cr);
 8191 
 8192   size(2);
 8193   format %{ "AND    $dst,$src" %}
 8194   opcode(0x23);
 8195   ins_encode( OpcP, RegReg( dst, src) );
 8196   ins_pipe( ialu_reg_reg );
 8197 %}
 8198 
 8199 // And Register with Immediate
 8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8201   match(Set dst (AndI dst src));
 8202   effect(KILL cr);
 8203 
 8204   format %{ "AND    $dst,$src" %}
 8205   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8206   // ins_encode( RegImm( dst, src) );
 8207   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8208   ins_pipe( ialu_reg );
 8209 %}
 8210 
 8211 // And Register with Memory
 8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8213   match(Set dst (AndI dst (LoadI src)));
 8214   effect(KILL cr);
 8215 
 8216   ins_cost(125);
 8217   format %{ "AND    $dst,$src" %}
 8218   opcode(0x23);
 8219   ins_encode( OpcP, RegMem( dst, src) );
 8220   ins_pipe( ialu_reg_mem );
 8221 %}
 8222 
 8223 // And Memory with Register
 8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8225   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8226   effect(KILL cr);
 8227 
 8228   ins_cost(150);
 8229   format %{ "AND    $dst,$src" %}
 8230   opcode(0x21);  /* Opcode 21 /r */
 8231   ins_encode( OpcP, RegMem( src, dst ) );
 8232   ins_pipe( ialu_mem_reg );
 8233 %}
 8234 
 8235 // And Memory with Immediate
 8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8237   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8238   effect(KILL cr);
 8239 
 8240   ins_cost(125);
 8241   format %{ "AND    $dst,$src" %}
 8242   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8243   // ins_encode( MemImm( dst, src) );
 8244   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8245   ins_pipe( ialu_mem_imm );
 8246 %}
 8247 
 8248 // BMI1 instructions
 8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8250   match(Set dst (AndI (XorI src1 minus_1) src2));
 8251   predicate(UseBMI1Instructions);
 8252   effect(KILL cr);
 8253 
 8254   format %{ "ANDNL  $dst, $src1, $src2" %}
 8255 
 8256   ins_encode %{
 8257     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8258   %}
 8259   ins_pipe(ialu_reg);
 8260 %}
 8261 
 8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8263   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8264   predicate(UseBMI1Instructions);
 8265   effect(KILL cr);
 8266 
 8267   ins_cost(125);
 8268   format %{ "ANDNL  $dst, $src1, $src2" %}
 8269 
 8270   ins_encode %{
 8271     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8272   %}
 8273   ins_pipe(ialu_reg_mem);
 8274 %}
 8275 
 8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8277   match(Set dst (AndI (SubI imm_zero src) src));
 8278   predicate(UseBMI1Instructions);
 8279   effect(KILL cr);
 8280 
 8281   format %{ "BLSIL  $dst, $src" %}
 8282 
 8283   ins_encode %{
 8284     __ blsil($dst$$Register, $src$$Register);
 8285   %}
 8286   ins_pipe(ialu_reg);
 8287 %}
 8288 
 8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8290   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8291   predicate(UseBMI1Instructions);
 8292   effect(KILL cr);
 8293 
 8294   ins_cost(125);
 8295   format %{ "BLSIL  $dst, $src" %}
 8296 
 8297   ins_encode %{
 8298     __ blsil($dst$$Register, $src$$Address);
 8299   %}
 8300   ins_pipe(ialu_reg_mem);
 8301 %}
 8302 
 8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8304 %{
 8305   match(Set dst (XorI (AddI src minus_1) src));
 8306   predicate(UseBMI1Instructions);
 8307   effect(KILL cr);
 8308 
 8309   format %{ "BLSMSKL $dst, $src" %}
 8310 
 8311   ins_encode %{
 8312     __ blsmskl($dst$$Register, $src$$Register);
 8313   %}
 8314 
 8315   ins_pipe(ialu_reg);
 8316 %}
 8317 
 8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8319 %{
 8320   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8321   predicate(UseBMI1Instructions);
 8322   effect(KILL cr);
 8323 
 8324   ins_cost(125);
 8325   format %{ "BLSMSKL $dst, $src" %}
 8326 
 8327   ins_encode %{
 8328     __ blsmskl($dst$$Register, $src$$Address);
 8329   %}
 8330 
 8331   ins_pipe(ialu_reg_mem);
 8332 %}
 8333 
 8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8335 %{
 8336   match(Set dst (AndI (AddI src minus_1) src) );
 8337   predicate(UseBMI1Instructions);
 8338   effect(KILL cr);
 8339 
 8340   format %{ "BLSRL  $dst, $src" %}
 8341 
 8342   ins_encode %{
 8343     __ blsrl($dst$$Register, $src$$Register);
 8344   %}
 8345 
 8346   ins_pipe(ialu_reg);
 8347 %}
 8348 
 8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8350 %{
 8351   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8352   predicate(UseBMI1Instructions);
 8353   effect(KILL cr);
 8354 
 8355   ins_cost(125);
 8356   format %{ "BLSRL  $dst, $src" %}
 8357 
 8358   ins_encode %{
 8359     __ blsrl($dst$$Register, $src$$Address);
 8360   %}
 8361 
 8362   ins_pipe(ialu_reg_mem);
 8363 %}
 8364 
 8365 // Or Instructions
 8366 // Or Register with Register
 8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8368   match(Set dst (OrI dst src));
 8369   effect(KILL cr);
 8370 
 8371   size(2);
 8372   format %{ "OR     $dst,$src" %}
 8373   opcode(0x0B);
 8374   ins_encode( OpcP, RegReg( dst, src) );
 8375   ins_pipe( ialu_reg_reg );
 8376 %}
 8377 
 8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8379   match(Set dst (OrI dst (CastP2X src)));
 8380   effect(KILL cr);
 8381 
 8382   size(2);
 8383   format %{ "OR     $dst,$src" %}
 8384   opcode(0x0B);
 8385   ins_encode( OpcP, RegReg( dst, src) );
 8386   ins_pipe( ialu_reg_reg );
 8387 %}
 8388 
 8389 
 8390 // Or Register with Immediate
 8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8392   match(Set dst (OrI dst src));
 8393   effect(KILL cr);
 8394 
 8395   format %{ "OR     $dst,$src" %}
 8396   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8397   // ins_encode( RegImm( dst, src) );
 8398   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8399   ins_pipe( ialu_reg );
 8400 %}
 8401 
 8402 // Or Register with Memory
 8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8404   match(Set dst (OrI dst (LoadI src)));
 8405   effect(KILL cr);
 8406 
 8407   ins_cost(125);
 8408   format %{ "OR     $dst,$src" %}
 8409   opcode(0x0B);
 8410   ins_encode( OpcP, RegMem( dst, src) );
 8411   ins_pipe( ialu_reg_mem );
 8412 %}
 8413 
 8414 // Or Memory with Register
 8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8416   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8417   effect(KILL cr);
 8418 
 8419   ins_cost(150);
 8420   format %{ "OR     $dst,$src" %}
 8421   opcode(0x09);  /* Opcode 09 /r */
 8422   ins_encode( OpcP, RegMem( src, dst ) );
 8423   ins_pipe( ialu_mem_reg );
 8424 %}
 8425 
 8426 // Or Memory with Immediate
 8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8428   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8429   effect(KILL cr);
 8430 
 8431   ins_cost(125);
 8432   format %{ "OR     $dst,$src" %}
 8433   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8434   // ins_encode( MemImm( dst, src) );
 8435   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8436   ins_pipe( ialu_mem_imm );
 8437 %}
 8438 
 8439 // ROL/ROR
 8440 // ROL expand
 8441 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8442   effect(USE_DEF dst, USE shift, KILL cr);
 8443 
 8444   format %{ "ROL    $dst, $shift" %}
 8445   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8446   ins_encode( OpcP, RegOpc( dst ));
 8447   ins_pipe( ialu_reg );
 8448 %}
 8449 
 8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8451   effect(USE_DEF dst, USE shift, KILL cr);
 8452 
 8453   format %{ "ROL    $dst, $shift" %}
 8454   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8455   ins_encode( RegOpcImm(dst, shift) );
 8456   ins_pipe(ialu_reg);
 8457 %}
 8458 
 8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8460   effect(USE_DEF dst, USE shift, KILL cr);
 8461 
 8462   format %{ "ROL    $dst, $shift" %}
 8463   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8464   ins_encode(OpcP, RegOpc(dst));
 8465   ins_pipe( ialu_reg_reg );
 8466 %}
 8467 // end of ROL expand
 8468 
 8469 // ROL 32bit by one once
 8470 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8471   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8472 
 8473   expand %{
 8474     rolI_eReg_imm1(dst, lshift, cr);
 8475   %}
 8476 %}
 8477 
 8478 // ROL 32bit var by imm8 once
 8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8480   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8481   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8482 
 8483   expand %{
 8484     rolI_eReg_imm8(dst, lshift, cr);
 8485   %}
 8486 %}
 8487 
 8488 // ROL 32bit var by var once
 8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8490   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8491 
 8492   expand %{
 8493     rolI_eReg_CL(dst, shift, cr);
 8494   %}
 8495 %}
 8496 
 8497 // ROL 32bit var by var once
 8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8499   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8500 
 8501   expand %{
 8502     rolI_eReg_CL(dst, shift, cr);
 8503   %}
 8504 %}
 8505 
 8506 // ROR expand
 8507 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8508   effect(USE_DEF dst, USE shift, KILL cr);
 8509 
 8510   format %{ "ROR    $dst, $shift" %}
 8511   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8512   ins_encode( OpcP, RegOpc( dst ) );
 8513   ins_pipe( ialu_reg );
 8514 %}
 8515 
 8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8517   effect (USE_DEF dst, USE shift, KILL cr);
 8518 
 8519   format %{ "ROR    $dst, $shift" %}
 8520   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8521   ins_encode( RegOpcImm(dst, shift) );
 8522   ins_pipe( ialu_reg );
 8523 %}
 8524 
 8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8526   effect(USE_DEF dst, USE shift, KILL cr);
 8527 
 8528   format %{ "ROR    $dst, $shift" %}
 8529   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8530   ins_encode(OpcP, RegOpc(dst));
 8531   ins_pipe( ialu_reg_reg );
 8532 %}
 8533 // end of ROR expand
 8534 
 8535 // ROR right once
 8536 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8537   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8538 
 8539   expand %{
 8540     rorI_eReg_imm1(dst, rshift, cr);
 8541   %}
 8542 %}
 8543 
 8544 // ROR 32bit by immI8 once
 8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8546   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8547   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8548 
 8549   expand %{
 8550     rorI_eReg_imm8(dst, rshift, cr);
 8551   %}
 8552 %}
 8553 
 8554 // ROR 32bit var by var once
 8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8556   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8557 
 8558   expand %{
 8559     rorI_eReg_CL(dst, shift, cr);
 8560   %}
 8561 %}
 8562 
 8563 // ROR 32bit var by var once
 8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8565   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8566 
 8567   expand %{
 8568     rorI_eReg_CL(dst, shift, cr);
 8569   %}
 8570 %}
 8571 
 8572 // Xor Instructions
 8573 // Xor Register with Register
 8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8575   match(Set dst (XorI dst src));
 8576   effect(KILL cr);
 8577 
 8578   size(2);
 8579   format %{ "XOR    $dst,$src" %}
 8580   opcode(0x33);
 8581   ins_encode( OpcP, RegReg( dst, src) );
 8582   ins_pipe( ialu_reg_reg );
 8583 %}
 8584 
 8585 // Xor Register with Immediate -1
 8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8587   match(Set dst (XorI dst imm));
 8588 
 8589   size(2);
 8590   format %{ "NOT    $dst" %}
 8591   ins_encode %{
 8592      __ notl($dst$$Register);
 8593   %}
 8594   ins_pipe( ialu_reg );
 8595 %}
 8596 
 8597 // Xor Register with Immediate
 8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8599   match(Set dst (XorI dst src));
 8600   effect(KILL cr);
 8601 
 8602   format %{ "XOR    $dst,$src" %}
 8603   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8604   // ins_encode( RegImm( dst, src) );
 8605   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8606   ins_pipe( ialu_reg );
 8607 %}
 8608 
 8609 // Xor Register with Memory
 8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8611   match(Set dst (XorI dst (LoadI src)));
 8612   effect(KILL cr);
 8613 
 8614   ins_cost(125);
 8615   format %{ "XOR    $dst,$src" %}
 8616   opcode(0x33);
 8617   ins_encode( OpcP, RegMem(dst, src) );
 8618   ins_pipe( ialu_reg_mem );
 8619 %}
 8620 
 8621 // Xor Memory with Register
 8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8623   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8624   effect(KILL cr);
 8625 
 8626   ins_cost(150);
 8627   format %{ "XOR    $dst,$src" %}
 8628   opcode(0x31);  /* Opcode 31 /r */
 8629   ins_encode( OpcP, RegMem( src, dst ) );
 8630   ins_pipe( ialu_mem_reg );
 8631 %}
 8632 
 8633 // Xor Memory with Immediate
 8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8635   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8636   effect(KILL cr);
 8637 
 8638   ins_cost(125);
 8639   format %{ "XOR    $dst,$src" %}
 8640   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8641   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8642   ins_pipe( ialu_mem_imm );
 8643 %}
 8644 
 8645 //----------Convert Int to Boolean---------------------------------------------
 8646 
 8647 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8648   effect( DEF dst, USE src );
 8649   format %{ "MOV    $dst,$src" %}
 8650   ins_encode( enc_Copy( dst, src) );
 8651   ins_pipe( ialu_reg_reg );
 8652 %}
 8653 
 8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8655   effect( USE_DEF dst, USE src, KILL cr );
 8656 
 8657   size(4);
 8658   format %{ "NEG    $dst\n\t"
 8659             "ADC    $dst,$src" %}
 8660   ins_encode( neg_reg(dst),
 8661               OpcRegReg(0x13,dst,src) );
 8662   ins_pipe( ialu_reg_reg_long );
 8663 %}
 8664 
 8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8666   match(Set dst (Conv2B src));
 8667 
 8668   expand %{
 8669     movI_nocopy(dst,src);
 8670     ci2b(dst,src,cr);
 8671   %}
 8672 %}
 8673 
 8674 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8675   effect( DEF dst, USE src );
 8676   format %{ "MOV    $dst,$src" %}
 8677   ins_encode( enc_Copy( dst, src) );
 8678   ins_pipe( ialu_reg_reg );
 8679 %}
 8680 
 8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8682   effect( USE_DEF dst, USE src, KILL cr );
 8683   format %{ "NEG    $dst\n\t"
 8684             "ADC    $dst,$src" %}
 8685   ins_encode( neg_reg(dst),
 8686               OpcRegReg(0x13,dst,src) );
 8687   ins_pipe( ialu_reg_reg_long );
 8688 %}
 8689 
 8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8691   match(Set dst (Conv2B src));
 8692 
 8693   expand %{
 8694     movP_nocopy(dst,src);
 8695     cp2b(dst,src,cr);
 8696   %}
 8697 %}
 8698 
 8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8700   match(Set dst (CmpLTMask p q));
 8701   effect(KILL cr);
 8702   ins_cost(400);
 8703 
 8704   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8705   format %{ "XOR    $dst,$dst\n\t"
 8706             "CMP    $p,$q\n\t"
 8707             "SETlt  $dst\n\t"
 8708             "NEG    $dst" %}
 8709   ins_encode %{
 8710     Register Rp = $p$$Register;
 8711     Register Rq = $q$$Register;
 8712     Register Rd = $dst$$Register;
 8713     Label done;
 8714     __ xorl(Rd, Rd);
 8715     __ cmpl(Rp, Rq);
 8716     __ setb(Assembler::less, Rd);
 8717     __ negl(Rd);
 8718   %}
 8719 
 8720   ins_pipe(pipe_slow);
 8721 %}
 8722 
 8723 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8724   match(Set dst (CmpLTMask dst zero));
 8725   effect(DEF dst, KILL cr);
 8726   ins_cost(100);
 8727 
 8728   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8729   ins_encode %{
 8730   __ sarl($dst$$Register, 31);
 8731   %}
 8732   ins_pipe(ialu_reg);
 8733 %}
 8734 
 8735 /* better to save a register than avoid a branch */
 8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8737   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8738   effect(KILL cr);
 8739   ins_cost(400);
 8740   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8741             "JGE    done\n\t"
 8742             "ADD    $p,$y\n"
 8743             "done:  " %}
 8744   ins_encode %{
 8745     Register Rp = $p$$Register;
 8746     Register Rq = $q$$Register;
 8747     Register Ry = $y$$Register;
 8748     Label done;
 8749     __ subl(Rp, Rq);
 8750     __ jccb(Assembler::greaterEqual, done);
 8751     __ addl(Rp, Ry);
 8752     __ bind(done);
 8753   %}
 8754 
 8755   ins_pipe(pipe_cmplt);
 8756 %}
 8757 
 8758 /* better to save a register than avoid a branch */
 8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8760   match(Set y (AndI (CmpLTMask p q) y));
 8761   effect(KILL cr);
 8762 
 8763   ins_cost(300);
 8764 
 8765   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8766             "JLT      done\n\t"
 8767             "XORL     $y, $y\n"
 8768             "done:  " %}
 8769   ins_encode %{
 8770     Register Rp = $p$$Register;
 8771     Register Rq = $q$$Register;
 8772     Register Ry = $y$$Register;
 8773     Label done;
 8774     __ cmpl(Rp, Rq);
 8775     __ jccb(Assembler::less, done);
 8776     __ xorl(Ry, Ry);
 8777     __ bind(done);
 8778   %}
 8779 
 8780   ins_pipe(pipe_cmplt);
 8781 %}
 8782 
 8783 /* If I enable this, I encourage spilling in the inner loop of compress.
 8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8785   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8786 */
 8787 //----------Overflow Math Instructions-----------------------------------------
 8788 
 8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8790 %{
 8791   match(Set cr (OverflowAddI op1 op2));
 8792   effect(DEF cr, USE_KILL op1, USE op2);
 8793 
 8794   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8795 
 8796   ins_encode %{
 8797     __ addl($op1$$Register, $op2$$Register);
 8798   %}
 8799   ins_pipe(ialu_reg_reg);
 8800 %}
 8801 
 8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8803 %{
 8804   match(Set cr (OverflowAddI op1 op2));
 8805   effect(DEF cr, USE_KILL op1, USE op2);
 8806 
 8807   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8808 
 8809   ins_encode %{
 8810     __ addl($op1$$Register, $op2$$constant);
 8811   %}
 8812   ins_pipe(ialu_reg_reg);
 8813 %}
 8814 
 8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8816 %{
 8817   match(Set cr (OverflowSubI op1 op2));
 8818 
 8819   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8820   ins_encode %{
 8821     __ cmpl($op1$$Register, $op2$$Register);
 8822   %}
 8823   ins_pipe(ialu_reg_reg);
 8824 %}
 8825 
 8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8827 %{
 8828   match(Set cr (OverflowSubI op1 op2));
 8829 
 8830   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8831   ins_encode %{
 8832     __ cmpl($op1$$Register, $op2$$constant);
 8833   %}
 8834   ins_pipe(ialu_reg_reg);
 8835 %}
 8836 
 8837 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8838 %{
 8839   match(Set cr (OverflowSubI zero op2));
 8840   effect(DEF cr, USE_KILL op2);
 8841 
 8842   format %{ "NEG    $op2\t# overflow check int" %}
 8843   ins_encode %{
 8844     __ negl($op2$$Register);
 8845   %}
 8846   ins_pipe(ialu_reg_reg);
 8847 %}
 8848 
 8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8850 %{
 8851   match(Set cr (OverflowMulI op1 op2));
 8852   effect(DEF cr, USE_KILL op1, USE op2);
 8853 
 8854   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8855   ins_encode %{
 8856     __ imull($op1$$Register, $op2$$Register);
 8857   %}
 8858   ins_pipe(ialu_reg_reg_alu0);
 8859 %}
 8860 
 8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8862 %{
 8863   match(Set cr (OverflowMulI op1 op2));
 8864   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8865 
 8866   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8867   ins_encode %{
 8868     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8869   %}
 8870   ins_pipe(ialu_reg_reg_alu0);
 8871 %}
 8872 
 8873 // Integer Absolute Instructions
 8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8875 %{
 8876   match(Set dst (AbsI src));
 8877   effect(TEMP dst, TEMP tmp, KILL cr);
 8878   format %{ "movl $tmp, $src\n\t"
 8879             "sarl $tmp, 31\n\t"
 8880             "movl $dst, $src\n\t"
 8881             "xorl $dst, $tmp\n\t"
 8882             "subl $dst, $tmp\n"
 8883           %}
 8884   ins_encode %{
 8885     __ movl($tmp$$Register, $src$$Register);
 8886     __ sarl($tmp$$Register, 31);
 8887     __ movl($dst$$Register, $src$$Register);
 8888     __ xorl($dst$$Register, $tmp$$Register);
 8889     __ subl($dst$$Register, $tmp$$Register);
 8890   %}
 8891 
 8892   ins_pipe(ialu_reg_reg);
 8893 %}
 8894 
 8895 //----------Long Instructions------------------------------------------------
 8896 // Add Long Register with Register
 8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8898   match(Set dst (AddL dst src));
 8899   effect(KILL cr);
 8900   ins_cost(200);
 8901   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8902             "ADC    $dst.hi,$src.hi" %}
 8903   opcode(0x03, 0x13);
 8904   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8905   ins_pipe( ialu_reg_reg_long );
 8906 %}
 8907 
 8908 // Add Long Register with Immediate
 8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8910   match(Set dst (AddL dst src));
 8911   effect(KILL cr);
 8912   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8913             "ADC    $dst.hi,$src.hi" %}
 8914   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8915   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8916   ins_pipe( ialu_reg_long );
 8917 %}
 8918 
 8919 // Add Long Register with Memory
 8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8921   match(Set dst (AddL dst (LoadL mem)));
 8922   effect(KILL cr);
 8923   ins_cost(125);
 8924   format %{ "ADD    $dst.lo,$mem\n\t"
 8925             "ADC    $dst.hi,$mem+4" %}
 8926   opcode(0x03, 0x13);
 8927   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8928   ins_pipe( ialu_reg_long_mem );
 8929 %}
 8930 
 8931 // Subtract Long Register with Register.
 8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8933   match(Set dst (SubL dst src));
 8934   effect(KILL cr);
 8935   ins_cost(200);
 8936   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8937             "SBB    $dst.hi,$src.hi" %}
 8938   opcode(0x2B, 0x1B);
 8939   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8940   ins_pipe( ialu_reg_reg_long );
 8941 %}
 8942 
 8943 // Subtract Long Register with Immediate
 8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8945   match(Set dst (SubL dst src));
 8946   effect(KILL cr);
 8947   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8948             "SBB    $dst.hi,$src.hi" %}
 8949   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8950   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8951   ins_pipe( ialu_reg_long );
 8952 %}
 8953 
 8954 // Subtract Long Register with Memory
 8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8956   match(Set dst (SubL dst (LoadL mem)));
 8957   effect(KILL cr);
 8958   ins_cost(125);
 8959   format %{ "SUB    $dst.lo,$mem\n\t"
 8960             "SBB    $dst.hi,$mem+4" %}
 8961   opcode(0x2B, 0x1B);
 8962   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8963   ins_pipe( ialu_reg_long_mem );
 8964 %}
 8965 
 8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8967   match(Set dst (SubL zero dst));
 8968   effect(KILL cr);
 8969   ins_cost(300);
 8970   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8971   ins_encode( neg_long(dst) );
 8972   ins_pipe( ialu_reg_reg_long );
 8973 %}
 8974 
 8975 // And Long Register with Register
 8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8977   match(Set dst (AndL dst src));
 8978   effect(KILL cr);
 8979   format %{ "AND    $dst.lo,$src.lo\n\t"
 8980             "AND    $dst.hi,$src.hi" %}
 8981   opcode(0x23,0x23);
 8982   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8983   ins_pipe( ialu_reg_reg_long );
 8984 %}
 8985 
 8986 // And Long Register with Immediate
 8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8988   match(Set dst (AndL dst src));
 8989   effect(KILL cr);
 8990   format %{ "AND    $dst.lo,$src.lo\n\t"
 8991             "AND    $dst.hi,$src.hi" %}
 8992   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8993   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8994   ins_pipe( ialu_reg_long );
 8995 %}
 8996 
 8997 // And Long Register with Memory
 8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8999   match(Set dst (AndL dst (LoadL mem)));
 9000   effect(KILL cr);
 9001   ins_cost(125);
 9002   format %{ "AND    $dst.lo,$mem\n\t"
 9003             "AND    $dst.hi,$mem+4" %}
 9004   opcode(0x23, 0x23);
 9005   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9006   ins_pipe( ialu_reg_long_mem );
 9007 %}
 9008 
 9009 // BMI1 instructions
 9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 9011   match(Set dst (AndL (XorL src1 minus_1) src2));
 9012   predicate(UseBMI1Instructions);
 9013   effect(KILL cr, TEMP dst);
 9014 
 9015   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9016             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9017          %}
 9018 
 9019   ins_encode %{
 9020     Register Rdst = $dst$$Register;
 9021     Register Rsrc1 = $src1$$Register;
 9022     Register Rsrc2 = $src2$$Register;
 9023     __ andnl(Rdst, Rsrc1, Rsrc2);
 9024     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9025   %}
 9026   ins_pipe(ialu_reg_reg_long);
 9027 %}
 9028 
 9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9030   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9031   predicate(UseBMI1Instructions);
 9032   effect(KILL cr, TEMP dst);
 9033 
 9034   ins_cost(125);
 9035   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9036             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9037          %}
 9038 
 9039   ins_encode %{
 9040     Register Rdst = $dst$$Register;
 9041     Register Rsrc1 = $src1$$Register;
 9042     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9043 
 9044     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9045     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9046   %}
 9047   ins_pipe(ialu_reg_mem);
 9048 %}
 9049 
 9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9051   match(Set dst (AndL (SubL imm_zero src) src));
 9052   predicate(UseBMI1Instructions);
 9053   effect(KILL cr, TEMP dst);
 9054 
 9055   format %{ "MOVL   $dst.hi, 0\n\t"
 9056             "BLSIL  $dst.lo, $src.lo\n\t"
 9057             "JNZ    done\n\t"
 9058             "BLSIL  $dst.hi, $src.hi\n"
 9059             "done:"
 9060          %}
 9061 
 9062   ins_encode %{
 9063     Label done;
 9064     Register Rdst = $dst$$Register;
 9065     Register Rsrc = $src$$Register;
 9066     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9067     __ blsil(Rdst, Rsrc);
 9068     __ jccb(Assembler::notZero, done);
 9069     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9070     __ bind(done);
 9071   %}
 9072   ins_pipe(ialu_reg);
 9073 %}
 9074 
 9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9076   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9077   predicate(UseBMI1Instructions);
 9078   effect(KILL cr, TEMP dst);
 9079 
 9080   ins_cost(125);
 9081   format %{ "MOVL   $dst.hi, 0\n\t"
 9082             "BLSIL  $dst.lo, $src\n\t"
 9083             "JNZ    done\n\t"
 9084             "BLSIL  $dst.hi, $src+4\n"
 9085             "done:"
 9086          %}
 9087 
 9088   ins_encode %{
 9089     Label done;
 9090     Register Rdst = $dst$$Register;
 9091     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9092 
 9093     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9094     __ blsil(Rdst, $src$$Address);
 9095     __ jccb(Assembler::notZero, done);
 9096     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9097     __ bind(done);
 9098   %}
 9099   ins_pipe(ialu_reg_mem);
 9100 %}
 9101 
 9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9103 %{
 9104   match(Set dst (XorL (AddL src minus_1) src));
 9105   predicate(UseBMI1Instructions);
 9106   effect(KILL cr, TEMP dst);
 9107 
 9108   format %{ "MOVL    $dst.hi, 0\n\t"
 9109             "BLSMSKL $dst.lo, $src.lo\n\t"
 9110             "JNC     done\n\t"
 9111             "BLSMSKL $dst.hi, $src.hi\n"
 9112             "done:"
 9113          %}
 9114 
 9115   ins_encode %{
 9116     Label done;
 9117     Register Rdst = $dst$$Register;
 9118     Register Rsrc = $src$$Register;
 9119     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9120     __ blsmskl(Rdst, Rsrc);
 9121     __ jccb(Assembler::carryClear, done);
 9122     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9123     __ bind(done);
 9124   %}
 9125 
 9126   ins_pipe(ialu_reg);
 9127 %}
 9128 
 9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9130 %{
 9131   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9132   predicate(UseBMI1Instructions);
 9133   effect(KILL cr, TEMP dst);
 9134 
 9135   ins_cost(125);
 9136   format %{ "MOVL    $dst.hi, 0\n\t"
 9137             "BLSMSKL $dst.lo, $src\n\t"
 9138             "JNC     done\n\t"
 9139             "BLSMSKL $dst.hi, $src+4\n"
 9140             "done:"
 9141          %}
 9142 
 9143   ins_encode %{
 9144     Label done;
 9145     Register Rdst = $dst$$Register;
 9146     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9147 
 9148     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9149     __ blsmskl(Rdst, $src$$Address);
 9150     __ jccb(Assembler::carryClear, done);
 9151     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9152     __ bind(done);
 9153   %}
 9154 
 9155   ins_pipe(ialu_reg_mem);
 9156 %}
 9157 
 9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9159 %{
 9160   match(Set dst (AndL (AddL src minus_1) src) );
 9161   predicate(UseBMI1Instructions);
 9162   effect(KILL cr, TEMP dst);
 9163 
 9164   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9165             "BLSRL  $dst.lo, $src.lo\n\t"
 9166             "JNC    done\n\t"
 9167             "BLSRL  $dst.hi, $src.hi\n"
 9168             "done:"
 9169   %}
 9170 
 9171   ins_encode %{
 9172     Label done;
 9173     Register Rdst = $dst$$Register;
 9174     Register Rsrc = $src$$Register;
 9175     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9176     __ blsrl(Rdst, Rsrc);
 9177     __ jccb(Assembler::carryClear, done);
 9178     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9179     __ bind(done);
 9180   %}
 9181 
 9182   ins_pipe(ialu_reg);
 9183 %}
 9184 
 9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9186 %{
 9187   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9188   predicate(UseBMI1Instructions);
 9189   effect(KILL cr, TEMP dst);
 9190 
 9191   ins_cost(125);
 9192   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9193             "BLSRL  $dst.lo, $src\n\t"
 9194             "JNC    done\n\t"
 9195             "BLSRL  $dst.hi, $src+4\n"
 9196             "done:"
 9197   %}
 9198 
 9199   ins_encode %{
 9200     Label done;
 9201     Register Rdst = $dst$$Register;
 9202     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9203     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9204     __ blsrl(Rdst, $src$$Address);
 9205     __ jccb(Assembler::carryClear, done);
 9206     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9207     __ bind(done);
 9208   %}
 9209 
 9210   ins_pipe(ialu_reg_mem);
 9211 %}
 9212 
 9213 // Or Long Register with Register
 9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9215   match(Set dst (OrL dst src));
 9216   effect(KILL cr);
 9217   format %{ "OR     $dst.lo,$src.lo\n\t"
 9218             "OR     $dst.hi,$src.hi" %}
 9219   opcode(0x0B,0x0B);
 9220   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9221   ins_pipe( ialu_reg_reg_long );
 9222 %}
 9223 
 9224 // Or Long Register with Immediate
 9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9226   match(Set dst (OrL dst src));
 9227   effect(KILL cr);
 9228   format %{ "OR     $dst.lo,$src.lo\n\t"
 9229             "OR     $dst.hi,$src.hi" %}
 9230   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9231   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9232   ins_pipe( ialu_reg_long );
 9233 %}
 9234 
 9235 // Or Long Register with Memory
 9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9237   match(Set dst (OrL dst (LoadL mem)));
 9238   effect(KILL cr);
 9239   ins_cost(125);
 9240   format %{ "OR     $dst.lo,$mem\n\t"
 9241             "OR     $dst.hi,$mem+4" %}
 9242   opcode(0x0B,0x0B);
 9243   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9244   ins_pipe( ialu_reg_long_mem );
 9245 %}
 9246 
 9247 // Xor Long Register with Register
 9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9249   match(Set dst (XorL dst src));
 9250   effect(KILL cr);
 9251   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9252             "XOR    $dst.hi,$src.hi" %}
 9253   opcode(0x33,0x33);
 9254   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9255   ins_pipe( ialu_reg_reg_long );
 9256 %}
 9257 
 9258 // Xor Long Register with Immediate -1
 9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9260   match(Set dst (XorL dst imm));
 9261   format %{ "NOT    $dst.lo\n\t"
 9262             "NOT    $dst.hi" %}
 9263   ins_encode %{
 9264      __ notl($dst$$Register);
 9265      __ notl(HIGH_FROM_LOW($dst$$Register));
 9266   %}
 9267   ins_pipe( ialu_reg_long );
 9268 %}
 9269 
 9270 // Xor Long Register with Immediate
 9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9272   match(Set dst (XorL dst src));
 9273   effect(KILL cr);
 9274   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9275             "XOR    $dst.hi,$src.hi" %}
 9276   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9277   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9278   ins_pipe( ialu_reg_long );
 9279 %}
 9280 
 9281 // Xor Long Register with Memory
 9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9283   match(Set dst (XorL dst (LoadL mem)));
 9284   effect(KILL cr);
 9285   ins_cost(125);
 9286   format %{ "XOR    $dst.lo,$mem\n\t"
 9287             "XOR    $dst.hi,$mem+4" %}
 9288   opcode(0x33,0x33);
 9289   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9290   ins_pipe( ialu_reg_long_mem );
 9291 %}
 9292 
 9293 // Shift Left Long by 1
 9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9295   predicate(UseNewLongLShift);
 9296   match(Set dst (LShiftL dst cnt));
 9297   effect(KILL cr);
 9298   ins_cost(100);
 9299   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9300             "ADC    $dst.hi,$dst.hi" %}
 9301   ins_encode %{
 9302     __ addl($dst$$Register,$dst$$Register);
 9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9304   %}
 9305   ins_pipe( ialu_reg_long );
 9306 %}
 9307 
 9308 // Shift Left Long by 2
 9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9310   predicate(UseNewLongLShift);
 9311   match(Set dst (LShiftL dst cnt));
 9312   effect(KILL cr);
 9313   ins_cost(100);
 9314   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9315             "ADC    $dst.hi,$dst.hi\n\t"
 9316             "ADD    $dst.lo,$dst.lo\n\t"
 9317             "ADC    $dst.hi,$dst.hi" %}
 9318   ins_encode %{
 9319     __ addl($dst$$Register,$dst$$Register);
 9320     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9321     __ addl($dst$$Register,$dst$$Register);
 9322     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9323   %}
 9324   ins_pipe( ialu_reg_long );
 9325 %}
 9326 
 9327 // Shift Left Long by 3
 9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9329   predicate(UseNewLongLShift);
 9330   match(Set dst (LShiftL dst cnt));
 9331   effect(KILL cr);
 9332   ins_cost(100);
 9333   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9334             "ADC    $dst.hi,$dst.hi\n\t"
 9335             "ADD    $dst.lo,$dst.lo\n\t"
 9336             "ADC    $dst.hi,$dst.hi\n\t"
 9337             "ADD    $dst.lo,$dst.lo\n\t"
 9338             "ADC    $dst.hi,$dst.hi" %}
 9339   ins_encode %{
 9340     __ addl($dst$$Register,$dst$$Register);
 9341     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9342     __ addl($dst$$Register,$dst$$Register);
 9343     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9344     __ addl($dst$$Register,$dst$$Register);
 9345     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9346   %}
 9347   ins_pipe( ialu_reg_long );
 9348 %}
 9349 
 9350 // Shift Left Long by 1-31
 9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9352   match(Set dst (LShiftL dst cnt));
 9353   effect(KILL cr);
 9354   ins_cost(200);
 9355   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9356             "SHL    $dst.lo,$cnt" %}
 9357   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9358   ins_encode( move_long_small_shift(dst,cnt) );
 9359   ins_pipe( ialu_reg_long );
 9360 %}
 9361 
 9362 // Shift Left Long by 32-63
 9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9364   match(Set dst (LShiftL dst cnt));
 9365   effect(KILL cr);
 9366   ins_cost(300);
 9367   format %{ "MOV    $dst.hi,$dst.lo\n"
 9368           "\tSHL    $dst.hi,$cnt-32\n"
 9369           "\tXOR    $dst.lo,$dst.lo" %}
 9370   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9371   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9372   ins_pipe( ialu_reg_long );
 9373 %}
 9374 
 9375 // Shift Left Long by variable
 9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9377   match(Set dst (LShiftL dst shift));
 9378   effect(KILL cr);
 9379   ins_cost(500+200);
 9380   size(17);
 9381   format %{ "TEST   $shift,32\n\t"
 9382             "JEQ,s  small\n\t"
 9383             "MOV    $dst.hi,$dst.lo\n\t"
 9384             "XOR    $dst.lo,$dst.lo\n"
 9385     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9386             "SHL    $dst.lo,$shift" %}
 9387   ins_encode( shift_left_long( dst, shift ) );
 9388   ins_pipe( pipe_slow );
 9389 %}
 9390 
 9391 // Shift Right Long by 1-31
 9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9393   match(Set dst (URShiftL dst cnt));
 9394   effect(KILL cr);
 9395   ins_cost(200);
 9396   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9397             "SHR    $dst.hi,$cnt" %}
 9398   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9399   ins_encode( move_long_small_shift(dst,cnt) );
 9400   ins_pipe( ialu_reg_long );
 9401 %}
 9402 
 9403 // Shift Right Long by 32-63
 9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9405   match(Set dst (URShiftL dst cnt));
 9406   effect(KILL cr);
 9407   ins_cost(300);
 9408   format %{ "MOV    $dst.lo,$dst.hi\n"
 9409           "\tSHR    $dst.lo,$cnt-32\n"
 9410           "\tXOR    $dst.hi,$dst.hi" %}
 9411   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9412   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9413   ins_pipe( ialu_reg_long );
 9414 %}
 9415 
 9416 // Shift Right Long by variable
 9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9418   match(Set dst (URShiftL dst shift));
 9419   effect(KILL cr);
 9420   ins_cost(600);
 9421   size(17);
 9422   format %{ "TEST   $shift,32\n\t"
 9423             "JEQ,s  small\n\t"
 9424             "MOV    $dst.lo,$dst.hi\n\t"
 9425             "XOR    $dst.hi,$dst.hi\n"
 9426     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9427             "SHR    $dst.hi,$shift" %}
 9428   ins_encode( shift_right_long( dst, shift ) );
 9429   ins_pipe( pipe_slow );
 9430 %}
 9431 
 9432 // Shift Right Long by 1-31
 9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9434   match(Set dst (RShiftL dst cnt));
 9435   effect(KILL cr);
 9436   ins_cost(200);
 9437   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9438             "SAR    $dst.hi,$cnt" %}
 9439   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9440   ins_encode( move_long_small_shift(dst,cnt) );
 9441   ins_pipe( ialu_reg_long );
 9442 %}
 9443 
 9444 // Shift Right Long by 32-63
 9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9446   match(Set dst (RShiftL dst cnt));
 9447   effect(KILL cr);
 9448   ins_cost(300);
 9449   format %{ "MOV    $dst.lo,$dst.hi\n"
 9450           "\tSAR    $dst.lo,$cnt-32\n"
 9451           "\tSAR    $dst.hi,31" %}
 9452   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9453   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9454   ins_pipe( ialu_reg_long );
 9455 %}
 9456 
 9457 // Shift Right arithmetic Long by variable
 9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9459   match(Set dst (RShiftL dst shift));
 9460   effect(KILL cr);
 9461   ins_cost(600);
 9462   size(18);
 9463   format %{ "TEST   $shift,32\n\t"
 9464             "JEQ,s  small\n\t"
 9465             "MOV    $dst.lo,$dst.hi\n\t"
 9466             "SAR    $dst.hi,31\n"
 9467     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9468             "SAR    $dst.hi,$shift" %}
 9469   ins_encode( shift_right_arith_long( dst, shift ) );
 9470   ins_pipe( pipe_slow );
 9471 %}
 9472 
 9473 
 9474 //----------Double Instructions------------------------------------------------
 9475 // Double Math
 9476 
 9477 // Compare & branch
 9478 
 9479 // P6 version of float compare, sets condition codes in EFLAGS
 9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9481   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9482   match(Set cr (CmpD src1 src2));
 9483   effect(KILL rax);
 9484   ins_cost(150);
 9485   format %{ "FLD    $src1\n\t"
 9486             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9487             "JNP    exit\n\t"
 9488             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9489             "SAHF\n"
 9490      "exit:\tNOP               // avoid branch to branch" %}
 9491   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9492   ins_encode( Push_Reg_DPR(src1),
 9493               OpcP, RegOpc(src2),
 9494               cmpF_P6_fixup );
 9495   ins_pipe( pipe_slow );
 9496 %}
 9497 
 9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9499   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9500   match(Set cr (CmpD src1 src2));
 9501   ins_cost(150);
 9502   format %{ "FLD    $src1\n\t"
 9503             "FUCOMIP ST,$src2  // P6 instruction" %}
 9504   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9505   ins_encode( Push_Reg_DPR(src1),
 9506               OpcP, RegOpc(src2));
 9507   ins_pipe( pipe_slow );
 9508 %}
 9509 
 9510 // Compare & branch
 9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9512   predicate(UseSSE<=1);
 9513   match(Set cr (CmpD src1 src2));
 9514   effect(KILL rax);
 9515   ins_cost(200);
 9516   format %{ "FLD    $src1\n\t"
 9517             "FCOMp  $src2\n\t"
 9518             "FNSTSW AX\n\t"
 9519             "TEST   AX,0x400\n\t"
 9520             "JZ,s   flags\n\t"
 9521             "MOV    AH,1\t# unordered treat as LT\n"
 9522     "flags:\tSAHF" %}
 9523   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9524   ins_encode( Push_Reg_DPR(src1),
 9525               OpcP, RegOpc(src2),
 9526               fpu_flags);
 9527   ins_pipe( pipe_slow );
 9528 %}
 9529 
 9530 // Compare vs zero into -1,0,1
 9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9532   predicate(UseSSE<=1);
 9533   match(Set dst (CmpD3 src1 zero));
 9534   effect(KILL cr, KILL rax);
 9535   ins_cost(280);
 9536   format %{ "FTSTD  $dst,$src1" %}
 9537   opcode(0xE4, 0xD9);
 9538   ins_encode( Push_Reg_DPR(src1),
 9539               OpcS, OpcP, PopFPU,
 9540               CmpF_Result(dst));
 9541   ins_pipe( pipe_slow );
 9542 %}
 9543 
 9544 // Compare into -1,0,1
 9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9546   predicate(UseSSE<=1);
 9547   match(Set dst (CmpD3 src1 src2));
 9548   effect(KILL cr, KILL rax);
 9549   ins_cost(300);
 9550   format %{ "FCMPD  $dst,$src1,$src2" %}
 9551   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9552   ins_encode( Push_Reg_DPR(src1),
 9553               OpcP, RegOpc(src2),
 9554               CmpF_Result(dst));
 9555   ins_pipe( pipe_slow );
 9556 %}
 9557 
 9558 // float compare and set condition codes in EFLAGS by XMM regs
 9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9560   predicate(UseSSE>=2);
 9561   match(Set cr (CmpD src1 src2));
 9562   ins_cost(145);
 9563   format %{ "UCOMISD $src1,$src2\n\t"
 9564             "JNP,s   exit\n\t"
 9565             "PUSHF\t# saw NaN, set CF\n\t"
 9566             "AND     [rsp], #0xffffff2b\n\t"
 9567             "POPF\n"
 9568     "exit:" %}
 9569   ins_encode %{
 9570     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9571     emit_cmpfp_fixup(_masm);
 9572   %}
 9573   ins_pipe( pipe_slow );
 9574 %}
 9575 
 9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9577   predicate(UseSSE>=2);
 9578   match(Set cr (CmpD src1 src2));
 9579   ins_cost(100);
 9580   format %{ "UCOMISD $src1,$src2" %}
 9581   ins_encode %{
 9582     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9583   %}
 9584   ins_pipe( pipe_slow );
 9585 %}
 9586 
 9587 // float compare and set condition codes in EFLAGS by XMM regs
 9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9589   predicate(UseSSE>=2);
 9590   match(Set cr (CmpD src1 (LoadD src2)));
 9591   ins_cost(145);
 9592   format %{ "UCOMISD $src1,$src2\n\t"
 9593             "JNP,s   exit\n\t"
 9594             "PUSHF\t# saw NaN, set CF\n\t"
 9595             "AND     [rsp], #0xffffff2b\n\t"
 9596             "POPF\n"
 9597     "exit:" %}
 9598   ins_encode %{
 9599     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9600     emit_cmpfp_fixup(_masm);
 9601   %}
 9602   ins_pipe( pipe_slow );
 9603 %}
 9604 
 9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9606   predicate(UseSSE>=2);
 9607   match(Set cr (CmpD src1 (LoadD src2)));
 9608   ins_cost(100);
 9609   format %{ "UCOMISD $src1,$src2" %}
 9610   ins_encode %{
 9611     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9612   %}
 9613   ins_pipe( pipe_slow );
 9614 %}
 9615 
 9616 // Compare into -1,0,1 in XMM
 9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9618   predicate(UseSSE>=2);
 9619   match(Set dst (CmpD3 src1 src2));
 9620   effect(KILL cr);
 9621   ins_cost(255);
 9622   format %{ "UCOMISD $src1, $src2\n\t"
 9623             "MOV     $dst, #-1\n\t"
 9624             "JP,s    done\n\t"
 9625             "JB,s    done\n\t"
 9626             "SETNE   $dst\n\t"
 9627             "MOVZB   $dst, $dst\n"
 9628     "done:" %}
 9629   ins_encode %{
 9630     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9631     emit_cmpfp3(_masm, $dst$$Register);
 9632   %}
 9633   ins_pipe( pipe_slow );
 9634 %}
 9635 
 9636 // Compare into -1,0,1 in XMM and memory
 9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9638   predicate(UseSSE>=2);
 9639   match(Set dst (CmpD3 src1 (LoadD src2)));
 9640   effect(KILL cr);
 9641   ins_cost(275);
 9642   format %{ "UCOMISD $src1, $src2\n\t"
 9643             "MOV     $dst, #-1\n\t"
 9644             "JP,s    done\n\t"
 9645             "JB,s    done\n\t"
 9646             "SETNE   $dst\n\t"
 9647             "MOVZB   $dst, $dst\n"
 9648     "done:" %}
 9649   ins_encode %{
 9650     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9651     emit_cmpfp3(_masm, $dst$$Register);
 9652   %}
 9653   ins_pipe( pipe_slow );
 9654 %}
 9655 
 9656 
 9657 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9658   predicate (UseSSE <=1);
 9659   match(Set dst (SubD dst src));
 9660 
 9661   format %{ "FLD    $src\n\t"
 9662             "DSUBp  $dst,ST" %}
 9663   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9664   ins_cost(150);
 9665   ins_encode( Push_Reg_DPR(src),
 9666               OpcP, RegOpc(dst) );
 9667   ins_pipe( fpu_reg_reg );
 9668 %}
 9669 
 9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9671   predicate (UseSSE <=1);
 9672   match(Set dst (RoundDouble (SubD src1 src2)));
 9673   ins_cost(250);
 9674 
 9675   format %{ "FLD    $src2\n\t"
 9676             "DSUB   ST,$src1\n\t"
 9677             "FSTP_D $dst\t# D-round" %}
 9678   opcode(0xD8, 0x5);
 9679   ins_encode( Push_Reg_DPR(src2),
 9680               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9681   ins_pipe( fpu_mem_reg_reg );
 9682 %}
 9683 
 9684 
 9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9686   predicate (UseSSE <=1);
 9687   match(Set dst (SubD dst (LoadD src)));
 9688   ins_cost(150);
 9689 
 9690   format %{ "FLD    $src\n\t"
 9691             "DSUBp  $dst,ST" %}
 9692   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9693   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9694               OpcP, RegOpc(dst) );
 9695   ins_pipe( fpu_reg_mem );
 9696 %}
 9697 
 9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9699   predicate (UseSSE<=1);
 9700   match(Set dst (AbsD src));
 9701   ins_cost(100);
 9702   format %{ "FABS" %}
 9703   opcode(0xE1, 0xD9);
 9704   ins_encode( OpcS, OpcP );
 9705   ins_pipe( fpu_reg_reg );
 9706 %}
 9707 
 9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9709   predicate(UseSSE<=1);
 9710   match(Set dst (NegD src));
 9711   ins_cost(100);
 9712   format %{ "FCHS" %}
 9713   opcode(0xE0, 0xD9);
 9714   ins_encode( OpcS, OpcP );
 9715   ins_pipe( fpu_reg_reg );
 9716 %}
 9717 
 9718 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9719   predicate(UseSSE<=1);
 9720   match(Set dst (AddD dst src));
 9721   format %{ "FLD    $src\n\t"
 9722             "DADD   $dst,ST" %}
 9723   size(4);
 9724   ins_cost(150);
 9725   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9726   ins_encode( Push_Reg_DPR(src),
 9727               OpcP, RegOpc(dst) );
 9728   ins_pipe( fpu_reg_reg );
 9729 %}
 9730 
 9731 
 9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9733   predicate(UseSSE<=1);
 9734   match(Set dst (RoundDouble (AddD src1 src2)));
 9735   ins_cost(250);
 9736 
 9737   format %{ "FLD    $src2\n\t"
 9738             "DADD   ST,$src1\n\t"
 9739             "FSTP_D $dst\t# D-round" %}
 9740   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9741   ins_encode( Push_Reg_DPR(src2),
 9742               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9743   ins_pipe( fpu_mem_reg_reg );
 9744 %}
 9745 
 9746 
 9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9748   predicate(UseSSE<=1);
 9749   match(Set dst (AddD dst (LoadD src)));
 9750   ins_cost(150);
 9751 
 9752   format %{ "FLD    $src\n\t"
 9753             "DADDp  $dst,ST" %}
 9754   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9755   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9756               OpcP, RegOpc(dst) );
 9757   ins_pipe( fpu_reg_mem );
 9758 %}
 9759 
 9760 // add-to-memory
 9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9762   predicate(UseSSE<=1);
 9763   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9764   ins_cost(150);
 9765 
 9766   format %{ "FLD_D  $dst\n\t"
 9767             "DADD   ST,$src\n\t"
 9768             "FST_D  $dst" %}
 9769   opcode(0xDD, 0x0);
 9770   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9771               Opcode(0xD8), RegOpc(src),
 9772               set_instruction_start,
 9773               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9774   ins_pipe( fpu_reg_mem );
 9775 %}
 9776 
 9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9778   predicate(UseSSE<=1);
 9779   match(Set dst (AddD dst con));
 9780   ins_cost(125);
 9781   format %{ "FLD1\n\t"
 9782             "DADDp  $dst,ST" %}
 9783   ins_encode %{
 9784     __ fld1();
 9785     __ faddp($dst$$reg);
 9786   %}
 9787   ins_pipe(fpu_reg);
 9788 %}
 9789 
 9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9791   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9792   match(Set dst (AddD dst con));
 9793   ins_cost(200);
 9794   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9795             "DADDp  $dst,ST" %}
 9796   ins_encode %{
 9797     __ fld_d($constantaddress($con));
 9798     __ faddp($dst$$reg);
 9799   %}
 9800   ins_pipe(fpu_reg_mem);
 9801 %}
 9802 
 9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9804   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9805   match(Set dst (RoundDouble (AddD src con)));
 9806   ins_cost(200);
 9807   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9808             "DADD   ST,$src\n\t"
 9809             "FSTP_D $dst\t# D-round" %}
 9810   ins_encode %{
 9811     __ fld_d($constantaddress($con));
 9812     __ fadd($src$$reg);
 9813     __ fstp_d(Address(rsp, $dst$$disp));
 9814   %}
 9815   ins_pipe(fpu_mem_reg_con);
 9816 %}
 9817 
 9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9819   predicate(UseSSE<=1);
 9820   match(Set dst (MulD dst src));
 9821   format %{ "FLD    $src\n\t"
 9822             "DMULp  $dst,ST" %}
 9823   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9824   ins_cost(150);
 9825   ins_encode( Push_Reg_DPR(src),
 9826               OpcP, RegOpc(dst) );
 9827   ins_pipe( fpu_reg_reg );
 9828 %}
 9829 
 9830 // Strict FP instruction biases argument before multiply then
 9831 // biases result to avoid double rounding of subnormals.
 9832 //
 9833 // scale arg1 by multiplying arg1 by 2^(-15360)
 9834 // load arg2
 9835 // multiply scaled arg1 by arg2
 9836 // rescale product by 2^(15360)
 9837 //
 9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9839   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9840   match(Set dst (MulD dst src));
 9841   ins_cost(1);   // Select this instruction for all FP double multiplies
 9842 
 9843   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9844             "DMULp  $dst,ST\n\t"
 9845             "FLD    $src\n\t"
 9846             "DMULp  $dst,ST\n\t"
 9847             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9848             "DMULp  $dst,ST\n\t" %}
 9849   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9850   ins_encode( strictfp_bias1(dst),
 9851               Push_Reg_DPR(src),
 9852               OpcP, RegOpc(dst),
 9853               strictfp_bias2(dst) );
 9854   ins_pipe( fpu_reg_reg );
 9855 %}
 9856 
 9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9858   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9859   match(Set dst (MulD dst con));
 9860   ins_cost(200);
 9861   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9862             "DMULp  $dst,ST" %}
 9863   ins_encode %{
 9864     __ fld_d($constantaddress($con));
 9865     __ fmulp($dst$$reg);
 9866   %}
 9867   ins_pipe(fpu_reg_mem);
 9868 %}
 9869 
 9870 
 9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9872   predicate( UseSSE<=1 );
 9873   match(Set dst (MulD dst (LoadD src)));
 9874   ins_cost(200);
 9875   format %{ "FLD_D  $src\n\t"
 9876             "DMULp  $dst,ST" %}
 9877   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9878   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9879               OpcP, RegOpc(dst) );
 9880   ins_pipe( fpu_reg_mem );
 9881 %}
 9882 
 9883 //
 9884 // Cisc-alternate to reg-reg multiply
 9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9886   predicate( UseSSE<=1 );
 9887   match(Set dst (MulD src (LoadD mem)));
 9888   ins_cost(250);
 9889   format %{ "FLD_D  $mem\n\t"
 9890             "DMUL   ST,$src\n\t"
 9891             "FSTP_D $dst" %}
 9892   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9893   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9894               OpcReg_FPR(src),
 9895               Pop_Reg_DPR(dst) );
 9896   ins_pipe( fpu_reg_reg_mem );
 9897 %}
 9898 
 9899 
 9900 // MACRO3 -- addDPR a mulDPR
 9901 // This instruction is a '2-address' instruction in that the result goes
 9902 // back to src2.  This eliminates a move from the macro; possibly the
 9903 // register allocator will have to add it back (and maybe not).
 9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9905   predicate( UseSSE<=1 );
 9906   match(Set src2 (AddD (MulD src0 src1) src2));
 9907   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9908             "DMUL   ST,$src1\n\t"
 9909             "DADDp  $src2,ST" %}
 9910   ins_cost(250);
 9911   opcode(0xDD); /* LoadD DD /0 */
 9912   ins_encode( Push_Reg_FPR(src0),
 9913               FMul_ST_reg(src1),
 9914               FAddP_reg_ST(src2) );
 9915   ins_pipe( fpu_reg_reg_reg );
 9916 %}
 9917 
 9918 
 9919 // MACRO3 -- subDPR a mulDPR
 9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9921   predicate( UseSSE<=1 );
 9922   match(Set src2 (SubD (MulD src0 src1) src2));
 9923   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9924             "DMUL   ST,$src1\n\t"
 9925             "DSUBRp $src2,ST" %}
 9926   ins_cost(250);
 9927   ins_encode( Push_Reg_FPR(src0),
 9928               FMul_ST_reg(src1),
 9929               Opcode(0xDE), Opc_plus(0xE0,src2));
 9930   ins_pipe( fpu_reg_reg_reg );
 9931 %}
 9932 
 9933 
 9934 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9935   predicate( UseSSE<=1 );
 9936   match(Set dst (DivD dst src));
 9937 
 9938   format %{ "FLD    $src\n\t"
 9939             "FDIVp  $dst,ST" %}
 9940   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9941   ins_cost(150);
 9942   ins_encode( Push_Reg_DPR(src),
 9943               OpcP, RegOpc(dst) );
 9944   ins_pipe( fpu_reg_reg );
 9945 %}
 9946 
 9947 // Strict FP instruction biases argument before division then
 9948 // biases result, to avoid double rounding of subnormals.
 9949 //
 9950 // scale dividend by multiplying dividend by 2^(-15360)
 9951 // load divisor
 9952 // divide scaled dividend by divisor
 9953 // rescale quotient by 2^(15360)
 9954 //
 9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9956   predicate (UseSSE<=1);
 9957   match(Set dst (DivD dst src));
 9958   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9959   ins_cost(01);
 9960 
 9961   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9962             "DMULp  $dst,ST\n\t"
 9963             "FLD    $src\n\t"
 9964             "FDIVp  $dst,ST\n\t"
 9965             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9966             "DMULp  $dst,ST\n\t" %}
 9967   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9968   ins_encode( strictfp_bias1(dst),
 9969               Push_Reg_DPR(src),
 9970               OpcP, RegOpc(dst),
 9971               strictfp_bias2(dst) );
 9972   ins_pipe( fpu_reg_reg );
 9973 %}
 9974 
 9975 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9976   predicate(UseSSE<=1);
 9977   match(Set dst (ModD dst src));
 9978   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9979 
 9980   format %{ "DMOD   $dst,$src" %}
 9981   ins_cost(250);
 9982   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9983               emitModDPR(),
 9984               Push_Result_Mod_DPR(src),
 9985               Pop_Reg_DPR(dst));
 9986   ins_pipe( pipe_slow );
 9987 %}
 9988 
 9989 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9990   predicate(UseSSE>=2);
 9991   match(Set dst (ModD src0 src1));
 9992   effect(KILL rax, KILL cr);
 9993 
 9994   format %{ "SUB    ESP,8\t # DMOD\n"
 9995           "\tMOVSD  [ESP+0],$src1\n"
 9996           "\tFLD_D  [ESP+0]\n"
 9997           "\tMOVSD  [ESP+0],$src0\n"
 9998           "\tFLD_D  [ESP+0]\n"
 9999      "loop:\tFPREM\n"
10000           "\tFWAIT\n"
10001           "\tFNSTSW AX\n"
10002           "\tSAHF\n"
10003           "\tJP     loop\n"
10004           "\tFSTP_D [ESP+0]\n"
10005           "\tMOVSD  $dst,[ESP+0]\n"
10006           "\tADD    ESP,8\n"
10007           "\tFSTP   ST0\t # Restore FPU Stack"
10008     %}
10009   ins_cost(250);
10010   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10015   predicate (UseSSE<=1);
10016   match(Set dst(AtanD dst src));
10017   format %{ "DATA   $dst,$src" %}
10018   opcode(0xD9, 0xF3);
10019   ins_encode( Push_Reg_DPR(src),
10020               OpcP, OpcS, RegOpc(dst) );
10021   ins_pipe( pipe_slow );
10022 %}
10023 
10024 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10025   predicate (UseSSE>=2);
10026   match(Set dst(AtanD dst src));
10027   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10028   format %{ "DATA   $dst,$src" %}
10029   opcode(0xD9, 0xF3);
10030   ins_encode( Push_SrcD(src),
10031               OpcP, OpcS, Push_ResultD(dst) );
10032   ins_pipe( pipe_slow );
10033 %}
10034 
10035 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10036   predicate (UseSSE<=1);
10037   match(Set dst (SqrtD src));
10038   format %{ "DSQRT  $dst,$src" %}
10039   opcode(0xFA, 0xD9);
10040   ins_encode( Push_Reg_DPR(src),
10041               OpcS, OpcP, Pop_Reg_DPR(dst) );
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 //-------------Float Instructions-------------------------------
10046 // Float Math
10047 
10048 // Code for float compare:
10049 //     fcompp();
10050 //     fwait(); fnstsw_ax();
10051 //     sahf();
10052 //     movl(dst, unordered_result);
10053 //     jcc(Assembler::parity, exit);
10054 //     movl(dst, less_result);
10055 //     jcc(Assembler::below, exit);
10056 //     movl(dst, equal_result);
10057 //     jcc(Assembler::equal, exit);
10058 //     movl(dst, greater_result);
10059 //   exit:
10060 
10061 // P6 version of float compare, sets condition codes in EFLAGS
10062 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10063   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10064   match(Set cr (CmpF src1 src2));
10065   effect(KILL rax);
10066   ins_cost(150);
10067   format %{ "FLD    $src1\n\t"
10068             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10069             "JNP    exit\n\t"
10070             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10071             "SAHF\n"
10072      "exit:\tNOP               // avoid branch to branch" %}
10073   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10074   ins_encode( Push_Reg_DPR(src1),
10075               OpcP, RegOpc(src2),
10076               cmpF_P6_fixup );
10077   ins_pipe( pipe_slow );
10078 %}
10079 
10080 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10081   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10082   match(Set cr (CmpF src1 src2));
10083   ins_cost(100);
10084   format %{ "FLD    $src1\n\t"
10085             "FUCOMIP ST,$src2  // P6 instruction" %}
10086   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10087   ins_encode( Push_Reg_DPR(src1),
10088               OpcP, RegOpc(src2));
10089   ins_pipe( pipe_slow );
10090 %}
10091 
10092 
10093 // Compare & branch
10094 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10095   predicate(UseSSE == 0);
10096   match(Set cr (CmpF src1 src2));
10097   effect(KILL rax);
10098   ins_cost(200);
10099   format %{ "FLD    $src1\n\t"
10100             "FCOMp  $src2\n\t"
10101             "FNSTSW AX\n\t"
10102             "TEST   AX,0x400\n\t"
10103             "JZ,s   flags\n\t"
10104             "MOV    AH,1\t# unordered treat as LT\n"
10105     "flags:\tSAHF" %}
10106   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10107   ins_encode( Push_Reg_DPR(src1),
10108               OpcP, RegOpc(src2),
10109               fpu_flags);
10110   ins_pipe( pipe_slow );
10111 %}
10112 
10113 // Compare vs zero into -1,0,1
10114 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10115   predicate(UseSSE == 0);
10116   match(Set dst (CmpF3 src1 zero));
10117   effect(KILL cr, KILL rax);
10118   ins_cost(280);
10119   format %{ "FTSTF  $dst,$src1" %}
10120   opcode(0xE4, 0xD9);
10121   ins_encode( Push_Reg_DPR(src1),
10122               OpcS, OpcP, PopFPU,
10123               CmpF_Result(dst));
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 // Compare into -1,0,1
10128 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10129   predicate(UseSSE == 0);
10130   match(Set dst (CmpF3 src1 src2));
10131   effect(KILL cr, KILL rax);
10132   ins_cost(300);
10133   format %{ "FCMPF  $dst,$src1,$src2" %}
10134   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10135   ins_encode( Push_Reg_DPR(src1),
10136               OpcP, RegOpc(src2),
10137               CmpF_Result(dst));
10138   ins_pipe( pipe_slow );
10139 %}
10140 
10141 // float compare and set condition codes in EFLAGS by XMM regs
10142 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10143   predicate(UseSSE>=1);
10144   match(Set cr (CmpF src1 src2));
10145   ins_cost(145);
10146   format %{ "UCOMISS $src1,$src2\n\t"
10147             "JNP,s   exit\n\t"
10148             "PUSHF\t# saw NaN, set CF\n\t"
10149             "AND     [rsp], #0xffffff2b\n\t"
10150             "POPF\n"
10151     "exit:" %}
10152   ins_encode %{
10153     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10154     emit_cmpfp_fixup(_masm);
10155   %}
10156   ins_pipe( pipe_slow );
10157 %}
10158 
10159 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10160   predicate(UseSSE>=1);
10161   match(Set cr (CmpF src1 src2));
10162   ins_cost(100);
10163   format %{ "UCOMISS $src1,$src2" %}
10164   ins_encode %{
10165     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10166   %}
10167   ins_pipe( pipe_slow );
10168 %}
10169 
10170 // float compare and set condition codes in EFLAGS by XMM regs
10171 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10172   predicate(UseSSE>=1);
10173   match(Set cr (CmpF src1 (LoadF src2)));
10174   ins_cost(165);
10175   format %{ "UCOMISS $src1,$src2\n\t"
10176             "JNP,s   exit\n\t"
10177             "PUSHF\t# saw NaN, set CF\n\t"
10178             "AND     [rsp], #0xffffff2b\n\t"
10179             "POPF\n"
10180     "exit:" %}
10181   ins_encode %{
10182     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10183     emit_cmpfp_fixup(_masm);
10184   %}
10185   ins_pipe( pipe_slow );
10186 %}
10187 
10188 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10189   predicate(UseSSE>=1);
10190   match(Set cr (CmpF src1 (LoadF src2)));
10191   ins_cost(100);
10192   format %{ "UCOMISS $src1,$src2" %}
10193   ins_encode %{
10194     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10195   %}
10196   ins_pipe( pipe_slow );
10197 %}
10198 
10199 // Compare into -1,0,1 in XMM
10200 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10201   predicate(UseSSE>=1);
10202   match(Set dst (CmpF3 src1 src2));
10203   effect(KILL cr);
10204   ins_cost(255);
10205   format %{ "UCOMISS $src1, $src2\n\t"
10206             "MOV     $dst, #-1\n\t"
10207             "JP,s    done\n\t"
10208             "JB,s    done\n\t"
10209             "SETNE   $dst\n\t"
10210             "MOVZB   $dst, $dst\n"
10211     "done:" %}
10212   ins_encode %{
10213     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10214     emit_cmpfp3(_masm, $dst$$Register);
10215   %}
10216   ins_pipe( pipe_slow );
10217 %}
10218 
10219 // Compare into -1,0,1 in XMM and memory
10220 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10221   predicate(UseSSE>=1);
10222   match(Set dst (CmpF3 src1 (LoadF src2)));
10223   effect(KILL cr);
10224   ins_cost(275);
10225   format %{ "UCOMISS $src1, $src2\n\t"
10226             "MOV     $dst, #-1\n\t"
10227             "JP,s    done\n\t"
10228             "JB,s    done\n\t"
10229             "SETNE   $dst\n\t"
10230             "MOVZB   $dst, $dst\n"
10231     "done:" %}
10232   ins_encode %{
10233     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10234     emit_cmpfp3(_masm, $dst$$Register);
10235   %}
10236   ins_pipe( pipe_slow );
10237 %}
10238 
10239 // Spill to obtain 24-bit precision
10240 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10241   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10242   match(Set dst (SubF src1 src2));
10243 
10244   format %{ "FSUB   $dst,$src1 - $src2" %}
10245   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10246   ins_encode( Push_Reg_FPR(src1),
10247               OpcReg_FPR(src2),
10248               Pop_Mem_FPR(dst) );
10249   ins_pipe( fpu_mem_reg_reg );
10250 %}
10251 //
10252 // This instruction does not round to 24-bits
10253 instruct subFPR_reg(regFPR dst, regFPR src) %{
10254   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10255   match(Set dst (SubF dst src));
10256 
10257   format %{ "FSUB   $dst,$src" %}
10258   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10259   ins_encode( Push_Reg_FPR(src),
10260               OpcP, RegOpc(dst) );
10261   ins_pipe( fpu_reg_reg );
10262 %}
10263 
10264 // Spill to obtain 24-bit precision
10265 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10266   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10267   match(Set dst (AddF src1 src2));
10268 
10269   format %{ "FADD   $dst,$src1,$src2" %}
10270   opcode(0xD8, 0x0); /* D8 C0+i */
10271   ins_encode( Push_Reg_FPR(src2),
10272               OpcReg_FPR(src1),
10273               Pop_Mem_FPR(dst) );
10274   ins_pipe( fpu_mem_reg_reg );
10275 %}
10276 //
10277 // This instruction does not round to 24-bits
10278 instruct addFPR_reg(regFPR dst, regFPR src) %{
10279   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10280   match(Set dst (AddF dst src));
10281 
10282   format %{ "FLD    $src\n\t"
10283             "FADDp  $dst,ST" %}
10284   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10285   ins_encode( Push_Reg_FPR(src),
10286               OpcP, RegOpc(dst) );
10287   ins_pipe( fpu_reg_reg );
10288 %}
10289 
10290 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10291   predicate(UseSSE==0);
10292   match(Set dst (AbsF src));
10293   ins_cost(100);
10294   format %{ "FABS" %}
10295   opcode(0xE1, 0xD9);
10296   ins_encode( OpcS, OpcP );
10297   ins_pipe( fpu_reg_reg );
10298 %}
10299 
10300 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10301   predicate(UseSSE==0);
10302   match(Set dst (NegF src));
10303   ins_cost(100);
10304   format %{ "FCHS" %}
10305   opcode(0xE0, 0xD9);
10306   ins_encode( OpcS, OpcP );
10307   ins_pipe( fpu_reg_reg );
10308 %}
10309 
10310 // Cisc-alternate to addFPR_reg
10311 // Spill to obtain 24-bit precision
10312 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10313   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10314   match(Set dst (AddF src1 (LoadF src2)));
10315 
10316   format %{ "FLD    $src2\n\t"
10317             "FADD   ST,$src1\n\t"
10318             "FSTP_S $dst" %}
10319   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10320   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10321               OpcReg_FPR(src1),
10322               Pop_Mem_FPR(dst) );
10323   ins_pipe( fpu_mem_reg_mem );
10324 %}
10325 //
10326 // Cisc-alternate to addFPR_reg
10327 // This instruction does not round to 24-bits
10328 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10329   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10330   match(Set dst (AddF dst (LoadF src)));
10331 
10332   format %{ "FADD   $dst,$src" %}
10333   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10334   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10335               OpcP, RegOpc(dst) );
10336   ins_pipe( fpu_reg_mem );
10337 %}
10338 
10339 // // Following two instructions for _222_mpegaudio
10340 // Spill to obtain 24-bit precision
10341 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10342   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10343   match(Set dst (AddF src1 src2));
10344 
10345   format %{ "FADD   $dst,$src1,$src2" %}
10346   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10347   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10348               OpcReg_FPR(src2),
10349               Pop_Mem_FPR(dst) );
10350   ins_pipe( fpu_mem_reg_mem );
10351 %}
10352 
10353 // Cisc-spill variant
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src1 (LoadF src2)));
10358 
10359   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10360   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10361   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10362               set_instruction_start,
10363               OpcP, RMopc_Mem(secondary,src1),
10364               Pop_Mem_FPR(dst) );
10365   ins_pipe( fpu_mem_mem_mem );
10366 %}
10367 
10368 // Spill to obtain 24-bit precision
10369 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10370   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10371   match(Set dst (AddF src1 src2));
10372 
10373   format %{ "FADD   $dst,$src1,$src2" %}
10374   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10375   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10376               set_instruction_start,
10377               OpcP, RMopc_Mem(secondary,src1),
10378               Pop_Mem_FPR(dst) );
10379   ins_pipe( fpu_mem_mem_mem );
10380 %}
10381 
10382 
10383 // Spill to obtain 24-bit precision
10384 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10385   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10386   match(Set dst (AddF src con));
10387   format %{ "FLD    $src\n\t"
10388             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10389             "FSTP_S $dst"  %}
10390   ins_encode %{
10391     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10392     __ fadd_s($constantaddress($con));
10393     __ fstp_s(Address(rsp, $dst$$disp));
10394   %}
10395   ins_pipe(fpu_mem_reg_con);
10396 %}
10397 //
10398 // This instruction does not round to 24-bits
10399 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10400   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10401   match(Set dst (AddF src con));
10402   format %{ "FLD    $src\n\t"
10403             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10404             "FSTP   $dst"  %}
10405   ins_encode %{
10406     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10407     __ fadd_s($constantaddress($con));
10408     __ fstp_d($dst$$reg);
10409   %}
10410   ins_pipe(fpu_reg_reg_con);
10411 %}
10412 
10413 // Spill to obtain 24-bit precision
10414 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10415   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10416   match(Set dst (MulF src1 src2));
10417 
10418   format %{ "FLD    $src1\n\t"
10419             "FMUL   $src2\n\t"
10420             "FSTP_S $dst"  %}
10421   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10422   ins_encode( Push_Reg_FPR(src1),
10423               OpcReg_FPR(src2),
10424               Pop_Mem_FPR(dst) );
10425   ins_pipe( fpu_mem_reg_reg );
10426 %}
10427 //
10428 // This instruction does not round to 24-bits
10429 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10430   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10431   match(Set dst (MulF src1 src2));
10432 
10433   format %{ "FLD    $src1\n\t"
10434             "FMUL   $src2\n\t"
10435             "FSTP_S $dst"  %}
10436   opcode(0xD8, 0x1); /* D8 C8+i */
10437   ins_encode( Push_Reg_FPR(src2),
10438               OpcReg_FPR(src1),
10439               Pop_Reg_FPR(dst) );
10440   ins_pipe( fpu_reg_reg_reg );
10441 %}
10442 
10443 
10444 // Spill to obtain 24-bit precision
10445 // Cisc-alternate to reg-reg multiply
10446 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10447   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10448   match(Set dst (MulF src1 (LoadF src2)));
10449 
10450   format %{ "FLD_S  $src2\n\t"
10451             "FMUL   $src1\n\t"
10452             "FSTP_S $dst"  %}
10453   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10454   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10455               OpcReg_FPR(src1),
10456               Pop_Mem_FPR(dst) );
10457   ins_pipe( fpu_mem_reg_mem );
10458 %}
10459 //
10460 // This instruction does not round to 24-bits
10461 // Cisc-alternate to reg-reg multiply
10462 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10463   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10464   match(Set dst (MulF src1 (LoadF src2)));
10465 
10466   format %{ "FMUL   $dst,$src1,$src2" %}
10467   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10468   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10469               OpcReg_FPR(src1),
10470               Pop_Reg_FPR(dst) );
10471   ins_pipe( fpu_reg_reg_mem );
10472 %}
10473 
10474 // Spill to obtain 24-bit precision
10475 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10476   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10477   match(Set dst (MulF src1 src2));
10478 
10479   format %{ "FMUL   $dst,$src1,$src2" %}
10480   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10481   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10482               set_instruction_start,
10483               OpcP, RMopc_Mem(secondary,src1),
10484               Pop_Mem_FPR(dst) );
10485   ins_pipe( fpu_mem_mem_mem );
10486 %}
10487 
10488 // Spill to obtain 24-bit precision
10489 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10490   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10491   match(Set dst (MulF src con));
10492 
10493   format %{ "FLD    $src\n\t"
10494             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10495             "FSTP_S $dst"  %}
10496   ins_encode %{
10497     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10498     __ fmul_s($constantaddress($con));
10499     __ fstp_s(Address(rsp, $dst$$disp));
10500   %}
10501   ins_pipe(fpu_mem_reg_con);
10502 %}
10503 //
10504 // This instruction does not round to 24-bits
10505 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (MulF src con));
10508 
10509   format %{ "FLD    $src\n\t"
10510             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10511             "FSTP   $dst"  %}
10512   ins_encode %{
10513     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10514     __ fmul_s($constantaddress($con));
10515     __ fstp_d($dst$$reg);
10516   %}
10517   ins_pipe(fpu_reg_reg_con);
10518 %}
10519 
10520 
10521 //
10522 // MACRO1 -- subsume unshared load into mulFPR
10523 // This instruction does not round to 24-bits
10524 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10525   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10526   match(Set dst (MulF (LoadF mem1) src));
10527 
10528   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10529             "FMUL   ST,$src\n\t"
10530             "FSTP   $dst" %}
10531   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10532   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10533               OpcReg_FPR(src),
10534               Pop_Reg_FPR(dst) );
10535   ins_pipe( fpu_reg_reg_mem );
10536 %}
10537 //
10538 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10539 // This instruction does not round to 24-bits
10540 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10541   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10542   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10543   ins_cost(95);
10544 
10545   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10546             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10547             "FADD   ST,$src2\n\t"
10548             "FSTP   $dst" %}
10549   opcode(0xD9); /* LoadF D9 /0 */
10550   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10551               FMul_ST_reg(src1),
10552               FAdd_ST_reg(src2),
10553               Pop_Reg_FPR(dst) );
10554   ins_pipe( fpu_reg_mem_reg_reg );
10555 %}
10556 
10557 // MACRO3 -- addFPR a mulFPR
10558 // This instruction does not round to 24-bits.  It is a '2-address'
10559 // instruction in that the result goes back to src2.  This eliminates
10560 // a move from the macro; possibly the register allocator will have
10561 // to add it back (and maybe not).
10562 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10563   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10564   match(Set src2 (AddF (MulF src0 src1) src2));
10565 
10566   format %{ "FLD    $src0     ===MACRO3===\n\t"
10567             "FMUL   ST,$src1\n\t"
10568             "FADDP  $src2,ST" %}
10569   opcode(0xD9); /* LoadF D9 /0 */
10570   ins_encode( Push_Reg_FPR(src0),
10571               FMul_ST_reg(src1),
10572               FAddP_reg_ST(src2) );
10573   ins_pipe( fpu_reg_reg_reg );
10574 %}
10575 
10576 // MACRO4 -- divFPR subFPR
10577 // This instruction does not round to 24-bits
10578 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10579   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10580   match(Set dst (DivF (SubF src2 src1) src3));
10581 
10582   format %{ "FLD    $src2   ===MACRO4===\n\t"
10583             "FSUB   ST,$src1\n\t"
10584             "FDIV   ST,$src3\n\t"
10585             "FSTP  $dst" %}
10586   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10587   ins_encode( Push_Reg_FPR(src2),
10588               subFPR_divFPR_encode(src1,src3),
10589               Pop_Reg_FPR(dst) );
10590   ins_pipe( fpu_reg_reg_reg_reg );
10591 %}
10592 
10593 // Spill to obtain 24-bit precision
10594 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10595   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10596   match(Set dst (DivF src1 src2));
10597 
10598   format %{ "FDIV   $dst,$src1,$src2" %}
10599   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10600   ins_encode( Push_Reg_FPR(src1),
10601               OpcReg_FPR(src2),
10602               Pop_Mem_FPR(dst) );
10603   ins_pipe( fpu_mem_reg_reg );
10604 %}
10605 //
10606 // This instruction does not round to 24-bits
10607 instruct divFPR_reg(regFPR dst, regFPR src) %{
10608   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10609   match(Set dst (DivF dst src));
10610 
10611   format %{ "FDIV   $dst,$src" %}
10612   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10613   ins_encode( Push_Reg_FPR(src),
10614               OpcP, RegOpc(dst) );
10615   ins_pipe( fpu_reg_reg );
10616 %}
10617 
10618 
10619 // Spill to obtain 24-bit precision
10620 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10621   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10622   match(Set dst (ModF src1 src2));
10623   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10624 
10625   format %{ "FMOD   $dst,$src1,$src2" %}
10626   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10627               emitModDPR(),
10628               Push_Result_Mod_DPR(src2),
10629               Pop_Mem_FPR(dst));
10630   ins_pipe( pipe_slow );
10631 %}
10632 //
10633 // This instruction does not round to 24-bits
10634 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10635   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10636   match(Set dst (ModF dst src));
10637   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10638 
10639   format %{ "FMOD   $dst,$src" %}
10640   ins_encode(Push_Reg_Mod_DPR(dst, src),
10641               emitModDPR(),
10642               Push_Result_Mod_DPR(src),
10643               Pop_Reg_FPR(dst));
10644   ins_pipe( pipe_slow );
10645 %}
10646 
10647 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10648   predicate(UseSSE>=1);
10649   match(Set dst (ModF src0 src1));
10650   effect(KILL rax, KILL cr);
10651   format %{ "SUB    ESP,4\t # FMOD\n"
10652           "\tMOVSS  [ESP+0],$src1\n"
10653           "\tFLD_S  [ESP+0]\n"
10654           "\tMOVSS  [ESP+0],$src0\n"
10655           "\tFLD_S  [ESP+0]\n"
10656      "loop:\tFPREM\n"
10657           "\tFWAIT\n"
10658           "\tFNSTSW AX\n"
10659           "\tSAHF\n"
10660           "\tJP     loop\n"
10661           "\tFSTP_S [ESP+0]\n"
10662           "\tMOVSS  $dst,[ESP+0]\n"
10663           "\tADD    ESP,4\n"
10664           "\tFSTP   ST0\t # Restore FPU Stack"
10665     %}
10666   ins_cost(250);
10667   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 
10672 //----------Arithmetic Conversion Instructions---------------------------------
10673 // The conversions operations are all Alpha sorted.  Please keep it that way!
10674 
10675 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10676   predicate(UseSSE==0);
10677   match(Set dst (RoundFloat src));
10678   ins_cost(125);
10679   format %{ "FST_S  $dst,$src\t# F-round" %}
10680   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10681   ins_pipe( fpu_mem_reg );
10682 %}
10683 
10684 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10685   predicate(UseSSE<=1);
10686   match(Set dst (RoundDouble src));
10687   ins_cost(125);
10688   format %{ "FST_D  $dst,$src\t# D-round" %}
10689   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10690   ins_pipe( fpu_mem_reg );
10691 %}
10692 
10693 // Force rounding to 24-bit precision and 6-bit exponent
10694 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10695   predicate(UseSSE==0);
10696   match(Set dst (ConvD2F src));
10697   format %{ "FST_S  $dst,$src\t# F-round" %}
10698   expand %{
10699     roundFloat_mem_reg(dst,src);
10700   %}
10701 %}
10702 
10703 // Force rounding to 24-bit precision and 6-bit exponent
10704 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10705   predicate(UseSSE==1);
10706   match(Set dst (ConvD2F src));
10707   effect( KILL cr );
10708   format %{ "SUB    ESP,4\n\t"
10709             "FST_S  [ESP],$src\t# F-round\n\t"
10710             "MOVSS  $dst,[ESP]\n\t"
10711             "ADD ESP,4" %}
10712   ins_encode %{
10713     __ subptr(rsp, 4);
10714     if ($src$$reg != FPR1L_enc) {
10715       __ fld_s($src$$reg-1);
10716       __ fstp_s(Address(rsp, 0));
10717     } else {
10718       __ fst_s(Address(rsp, 0));
10719     }
10720     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10721     __ addptr(rsp, 4);
10722   %}
10723   ins_pipe( pipe_slow );
10724 %}
10725 
10726 // Force rounding double precision to single precision
10727 instruct convD2F_reg(regF dst, regD src) %{
10728   predicate(UseSSE>=2);
10729   match(Set dst (ConvD2F src));
10730   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10731   ins_encode %{
10732     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10733   %}
10734   ins_pipe( pipe_slow );
10735 %}
10736 
10737 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10738   predicate(UseSSE==0);
10739   match(Set dst (ConvF2D src));
10740   format %{ "FST_S  $dst,$src\t# D-round" %}
10741   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10742   ins_pipe( fpu_reg_reg );
10743 %}
10744 
10745 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10746   predicate(UseSSE==1);
10747   match(Set dst (ConvF2D src));
10748   format %{ "FST_D  $dst,$src\t# D-round" %}
10749   expand %{
10750     roundDouble_mem_reg(dst,src);
10751   %}
10752 %}
10753 
10754 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10755   predicate(UseSSE==1);
10756   match(Set dst (ConvF2D src));
10757   effect( KILL cr );
10758   format %{ "SUB    ESP,4\n\t"
10759             "MOVSS  [ESP] $src\n\t"
10760             "FLD_S  [ESP]\n\t"
10761             "ADD    ESP,4\n\t"
10762             "FSTP   $dst\t# D-round" %}
10763   ins_encode %{
10764     __ subptr(rsp, 4);
10765     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10766     __ fld_s(Address(rsp, 0));
10767     __ addptr(rsp, 4);
10768     __ fstp_d($dst$$reg);
10769   %}
10770   ins_pipe( pipe_slow );
10771 %}
10772 
10773 instruct convF2D_reg(regD dst, regF src) %{
10774   predicate(UseSSE>=2);
10775   match(Set dst (ConvF2D src));
10776   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10777   ins_encode %{
10778     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10779   %}
10780   ins_pipe( pipe_slow );
10781 %}
10782 
10783 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10784 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10785   predicate(UseSSE<=1);
10786   match(Set dst (ConvD2I src));
10787   effect( KILL tmp, KILL cr );
10788   format %{ "FLD    $src\t# Convert double to int \n\t"
10789             "FLDCW  trunc mode\n\t"
10790             "SUB    ESP,4\n\t"
10791             "FISTp  [ESP + #0]\n\t"
10792             "FLDCW  std/24-bit mode\n\t"
10793             "POP    EAX\n\t"
10794             "CMP    EAX,0x80000000\n\t"
10795             "JNE,s  fast\n\t"
10796             "FLD_D  $src\n\t"
10797             "CALL   d2i_wrapper\n"
10798       "fast:" %}
10799   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10800   ins_pipe( pipe_slow );
10801 %}
10802 
10803 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10804 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10805   predicate(UseSSE>=2);
10806   match(Set dst (ConvD2I src));
10807   effect( KILL tmp, KILL cr );
10808   format %{ "CVTTSD2SI $dst, $src\n\t"
10809             "CMP    $dst,0x80000000\n\t"
10810             "JNE,s  fast\n\t"
10811             "SUB    ESP, 8\n\t"
10812             "MOVSD  [ESP], $src\n\t"
10813             "FLD_D  [ESP]\n\t"
10814             "ADD    ESP, 8\n\t"
10815             "CALL   d2i_wrapper\n"
10816       "fast:" %}
10817   ins_encode %{
10818     Label fast;
10819     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10820     __ cmpl($dst$$Register, 0x80000000);
10821     __ jccb(Assembler::notEqual, fast);
10822     __ subptr(rsp, 8);
10823     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10824     __ fld_d(Address(rsp, 0));
10825     __ addptr(rsp, 8);
10826     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10827     __ bind(fast);
10828   %}
10829   ins_pipe( pipe_slow );
10830 %}
10831 
10832 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10833   predicate(UseSSE<=1);
10834   match(Set dst (ConvD2L src));
10835   effect( KILL cr );
10836   format %{ "FLD    $src\t# Convert double to long\n\t"
10837             "FLDCW  trunc mode\n\t"
10838             "SUB    ESP,8\n\t"
10839             "FISTp  [ESP + #0]\n\t"
10840             "FLDCW  std/24-bit mode\n\t"
10841             "POP    EAX\n\t"
10842             "POP    EDX\n\t"
10843             "CMP    EDX,0x80000000\n\t"
10844             "JNE,s  fast\n\t"
10845             "TEST   EAX,EAX\n\t"
10846             "JNE,s  fast\n\t"
10847             "FLD    $src\n\t"
10848             "CALL   d2l_wrapper\n"
10849       "fast:" %}
10850   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10851   ins_pipe( pipe_slow );
10852 %}
10853 
10854 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10855 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10856   predicate (UseSSE>=2);
10857   match(Set dst (ConvD2L src));
10858   effect( KILL cr );
10859   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10860             "MOVSD  [ESP],$src\n\t"
10861             "FLD_D  [ESP]\n\t"
10862             "FLDCW  trunc mode\n\t"
10863             "FISTp  [ESP + #0]\n\t"
10864             "FLDCW  std/24-bit mode\n\t"
10865             "POP    EAX\n\t"
10866             "POP    EDX\n\t"
10867             "CMP    EDX,0x80000000\n\t"
10868             "JNE,s  fast\n\t"
10869             "TEST   EAX,EAX\n\t"
10870             "JNE,s  fast\n\t"
10871             "SUB    ESP,8\n\t"
10872             "MOVSD  [ESP],$src\n\t"
10873             "FLD_D  [ESP]\n\t"
10874             "ADD    ESP,8\n\t"
10875             "CALL   d2l_wrapper\n"
10876       "fast:" %}
10877   ins_encode %{
10878     Label fast;
10879     __ subptr(rsp, 8);
10880     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10881     __ fld_d(Address(rsp, 0));
10882     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10883     __ fistp_d(Address(rsp, 0));
10884     // Restore the rounding mode, mask the exception
10885     if (Compile::current()->in_24_bit_fp_mode()) {
10886       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10887     } else {
10888       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10889     }
10890     // Load the converted long, adjust CPU stack
10891     __ pop(rax);
10892     __ pop(rdx);
10893     __ cmpl(rdx, 0x80000000);
10894     __ jccb(Assembler::notEqual, fast);
10895     __ testl(rax, rax);
10896     __ jccb(Assembler::notEqual, fast);
10897     __ subptr(rsp, 8);
10898     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10899     __ fld_d(Address(rsp, 0));
10900     __ addptr(rsp, 8);
10901     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10902     __ bind(fast);
10903   %}
10904   ins_pipe( pipe_slow );
10905 %}
10906 
10907 // Convert a double to an int.  Java semantics require we do complex
10908 // manglations in the corner cases.  So we set the rounding mode to
10909 // 'zero', store the darned double down as an int, and reset the
10910 // rounding mode to 'nearest'.  The hardware stores a flag value down
10911 // if we would overflow or converted a NAN; we check for this and
10912 // and go the slow path if needed.
10913 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10914   predicate(UseSSE==0);
10915   match(Set dst (ConvF2I src));
10916   effect( KILL tmp, KILL cr );
10917   format %{ "FLD    $src\t# Convert float to int \n\t"
10918             "FLDCW  trunc mode\n\t"
10919             "SUB    ESP,4\n\t"
10920             "FISTp  [ESP + #0]\n\t"
10921             "FLDCW  std/24-bit mode\n\t"
10922             "POP    EAX\n\t"
10923             "CMP    EAX,0x80000000\n\t"
10924             "JNE,s  fast\n\t"
10925             "FLD    $src\n\t"
10926             "CALL   d2i_wrapper\n"
10927       "fast:" %}
10928   // DPR2I_encoding works for FPR2I
10929   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 // Convert a float in xmm to an int reg.
10934 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10935   predicate(UseSSE>=1);
10936   match(Set dst (ConvF2I src));
10937   effect( KILL tmp, KILL cr );
10938   format %{ "CVTTSS2SI $dst, $src\n\t"
10939             "CMP    $dst,0x80000000\n\t"
10940             "JNE,s  fast\n\t"
10941             "SUB    ESP, 4\n\t"
10942             "MOVSS  [ESP], $src\n\t"
10943             "FLD    [ESP]\n\t"
10944             "ADD    ESP, 4\n\t"
10945             "CALL   d2i_wrapper\n"
10946       "fast:" %}
10947   ins_encode %{
10948     Label fast;
10949     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10950     __ cmpl($dst$$Register, 0x80000000);
10951     __ jccb(Assembler::notEqual, fast);
10952     __ subptr(rsp, 4);
10953     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10954     __ fld_s(Address(rsp, 0));
10955     __ addptr(rsp, 4);
10956     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10957     __ bind(fast);
10958   %}
10959   ins_pipe( pipe_slow );
10960 %}
10961 
10962 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10963   predicate(UseSSE==0);
10964   match(Set dst (ConvF2L src));
10965   effect( KILL cr );
10966   format %{ "FLD    $src\t# Convert float to long\n\t"
10967             "FLDCW  trunc mode\n\t"
10968             "SUB    ESP,8\n\t"
10969             "FISTp  [ESP + #0]\n\t"
10970             "FLDCW  std/24-bit mode\n\t"
10971             "POP    EAX\n\t"
10972             "POP    EDX\n\t"
10973             "CMP    EDX,0x80000000\n\t"
10974             "JNE,s  fast\n\t"
10975             "TEST   EAX,EAX\n\t"
10976             "JNE,s  fast\n\t"
10977             "FLD    $src\n\t"
10978             "CALL   d2l_wrapper\n"
10979       "fast:" %}
10980   // DPR2L_encoding works for FPR2L
10981   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10982   ins_pipe( pipe_slow );
10983 %}
10984 
10985 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10986 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10987   predicate (UseSSE>=1);
10988   match(Set dst (ConvF2L src));
10989   effect( KILL cr );
10990   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10991             "MOVSS  [ESP],$src\n\t"
10992             "FLD_S  [ESP]\n\t"
10993             "FLDCW  trunc mode\n\t"
10994             "FISTp  [ESP + #0]\n\t"
10995             "FLDCW  std/24-bit mode\n\t"
10996             "POP    EAX\n\t"
10997             "POP    EDX\n\t"
10998             "CMP    EDX,0x80000000\n\t"
10999             "JNE,s  fast\n\t"
11000             "TEST   EAX,EAX\n\t"
11001             "JNE,s  fast\n\t"
11002             "SUB    ESP,4\t# Convert float to long\n\t"
11003             "MOVSS  [ESP],$src\n\t"
11004             "FLD_S  [ESP]\n\t"
11005             "ADD    ESP,4\n\t"
11006             "CALL   d2l_wrapper\n"
11007       "fast:" %}
11008   ins_encode %{
11009     Label fast;
11010     __ subptr(rsp, 8);
11011     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11012     __ fld_s(Address(rsp, 0));
11013     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11014     __ fistp_d(Address(rsp, 0));
11015     // Restore the rounding mode, mask the exception
11016     if (Compile::current()->in_24_bit_fp_mode()) {
11017       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11018     } else {
11019       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11020     }
11021     // Load the converted long, adjust CPU stack
11022     __ pop(rax);
11023     __ pop(rdx);
11024     __ cmpl(rdx, 0x80000000);
11025     __ jccb(Assembler::notEqual, fast);
11026     __ testl(rax, rax);
11027     __ jccb(Assembler::notEqual, fast);
11028     __ subptr(rsp, 4);
11029     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11030     __ fld_s(Address(rsp, 0));
11031     __ addptr(rsp, 4);
11032     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11033     __ bind(fast);
11034   %}
11035   ins_pipe( pipe_slow );
11036 %}
11037 
11038 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11039   predicate( UseSSE<=1 );
11040   match(Set dst (ConvI2D src));
11041   format %{ "FILD   $src\n\t"
11042             "FSTP   $dst" %}
11043   opcode(0xDB, 0x0);  /* DB /0 */
11044   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11045   ins_pipe( fpu_reg_mem );
11046 %}
11047 
11048 instruct convI2D_reg(regD dst, rRegI src) %{
11049   predicate( UseSSE>=2 && !UseXmmI2D );
11050   match(Set dst (ConvI2D src));
11051   format %{ "CVTSI2SD $dst,$src" %}
11052   ins_encode %{
11053     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11054   %}
11055   ins_pipe( pipe_slow );
11056 %}
11057 
11058 instruct convI2D_mem(regD dst, memory mem) %{
11059   predicate( UseSSE>=2 );
11060   match(Set dst (ConvI2D (LoadI mem)));
11061   format %{ "CVTSI2SD $dst,$mem" %}
11062   ins_encode %{
11063     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11064   %}
11065   ins_pipe( pipe_slow );
11066 %}
11067 
11068 instruct convXI2D_reg(regD dst, rRegI src)
11069 %{
11070   predicate( UseSSE>=2 && UseXmmI2D );
11071   match(Set dst (ConvI2D src));
11072 
11073   format %{ "MOVD  $dst,$src\n\t"
11074             "CVTDQ2PD $dst,$dst\t# i2d" %}
11075   ins_encode %{
11076     __ movdl($dst$$XMMRegister, $src$$Register);
11077     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11078   %}
11079   ins_pipe(pipe_slow); // XXX
11080 %}
11081 
11082 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11083   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11084   match(Set dst (ConvI2D (LoadI mem)));
11085   format %{ "FILD   $mem\n\t"
11086             "FSTP   $dst" %}
11087   opcode(0xDB);      /* DB /0 */
11088   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11089               Pop_Reg_DPR(dst));
11090   ins_pipe( fpu_reg_mem );
11091 %}
11092 
11093 // Convert a byte to a float; no rounding step needed.
11094 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11095   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11096   match(Set dst (ConvI2F src));
11097   format %{ "FILD   $src\n\t"
11098             "FSTP   $dst" %}
11099 
11100   opcode(0xDB, 0x0);  /* DB /0 */
11101   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11102   ins_pipe( fpu_reg_mem );
11103 %}
11104 
11105 // In 24-bit mode, force exponent rounding by storing back out
11106 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11107   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11108   match(Set dst (ConvI2F src));
11109   ins_cost(200);
11110   format %{ "FILD   $src\n\t"
11111             "FSTP_S $dst" %}
11112   opcode(0xDB, 0x0);  /* DB /0 */
11113   ins_encode( Push_Mem_I(src),
11114               Pop_Mem_FPR(dst));
11115   ins_pipe( fpu_mem_mem );
11116 %}
11117 
11118 // In 24-bit mode, force exponent rounding by storing back out
11119 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11120   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11121   match(Set dst (ConvI2F (LoadI mem)));
11122   ins_cost(200);
11123   format %{ "FILD   $mem\n\t"
11124             "FSTP_S $dst" %}
11125   opcode(0xDB);  /* DB /0 */
11126   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11127               Pop_Mem_FPR(dst));
11128   ins_pipe( fpu_mem_mem );
11129 %}
11130 
11131 // This instruction does not round to 24-bits
11132 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11133   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11134   match(Set dst (ConvI2F src));
11135   format %{ "FILD   $src\n\t"
11136             "FSTP   $dst" %}
11137   opcode(0xDB, 0x0);  /* DB /0 */
11138   ins_encode( Push_Mem_I(src),
11139               Pop_Reg_FPR(dst));
11140   ins_pipe( fpu_reg_mem );
11141 %}
11142 
11143 // This instruction does not round to 24-bits
11144 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11145   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11146   match(Set dst (ConvI2F (LoadI mem)));
11147   format %{ "FILD   $mem\n\t"
11148             "FSTP   $dst" %}
11149   opcode(0xDB);      /* DB /0 */
11150   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11151               Pop_Reg_FPR(dst));
11152   ins_pipe( fpu_reg_mem );
11153 %}
11154 
11155 // Convert an int to a float in xmm; no rounding step needed.
11156 instruct convI2F_reg(regF dst, rRegI src) %{
11157   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11158   match(Set dst (ConvI2F src));
11159   format %{ "CVTSI2SS $dst, $src" %}
11160   ins_encode %{
11161     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11162   %}
11163   ins_pipe( pipe_slow );
11164 %}
11165 
11166  instruct convXI2F_reg(regF dst, rRegI src)
11167 %{
11168   predicate( UseSSE>=2 && UseXmmI2F );
11169   match(Set dst (ConvI2F src));
11170 
11171   format %{ "MOVD  $dst,$src\n\t"
11172             "CVTDQ2PS $dst,$dst\t# i2f" %}
11173   ins_encode %{
11174     __ movdl($dst$$XMMRegister, $src$$Register);
11175     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11176   %}
11177   ins_pipe(pipe_slow); // XXX
11178 %}
11179 
11180 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11181   match(Set dst (ConvI2L src));
11182   effect(KILL cr);
11183   ins_cost(375);
11184   format %{ "MOV    $dst.lo,$src\n\t"
11185             "MOV    $dst.hi,$src\n\t"
11186             "SAR    $dst.hi,31" %}
11187   ins_encode(convert_int_long(dst,src));
11188   ins_pipe( ialu_reg_reg_long );
11189 %}
11190 
11191 // Zero-extend convert int to long
11192 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11193   match(Set dst (AndL (ConvI2L src) mask) );
11194   effect( KILL flags );
11195   ins_cost(250);
11196   format %{ "MOV    $dst.lo,$src\n\t"
11197             "XOR    $dst.hi,$dst.hi" %}
11198   opcode(0x33); // XOR
11199   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11200   ins_pipe( ialu_reg_reg_long );
11201 %}
11202 
11203 // Zero-extend long
11204 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11205   match(Set dst (AndL src mask) );
11206   effect( KILL flags );
11207   ins_cost(250);
11208   format %{ "MOV    $dst.lo,$src.lo\n\t"
11209             "XOR    $dst.hi,$dst.hi\n\t" %}
11210   opcode(0x33); // XOR
11211   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11212   ins_pipe( ialu_reg_reg_long );
11213 %}
11214 
11215 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11216   predicate (UseSSE<=1);
11217   match(Set dst (ConvL2D src));
11218   effect( KILL cr );
11219   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11220             "PUSH   $src.lo\n\t"
11221             "FILD   ST,[ESP + #0]\n\t"
11222             "ADD    ESP,8\n\t"
11223             "FSTP_D $dst\t# D-round" %}
11224   opcode(0xDF, 0x5);  /* DF /5 */
11225   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11226   ins_pipe( pipe_slow );
11227 %}
11228 
11229 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11230   predicate (UseSSE>=2);
11231   match(Set dst (ConvL2D src));
11232   effect( KILL cr );
11233   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11234             "PUSH   $src.lo\n\t"
11235             "FILD_D [ESP]\n\t"
11236             "FSTP_D [ESP]\n\t"
11237             "MOVSD  $dst,[ESP]\n\t"
11238             "ADD    ESP,8" %}
11239   opcode(0xDF, 0x5);  /* DF /5 */
11240   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11241   ins_pipe( pipe_slow );
11242 %}
11243 
11244 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11245   predicate (UseSSE>=1);
11246   match(Set dst (ConvL2F src));
11247   effect( KILL cr );
11248   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11249             "PUSH   $src.lo\n\t"
11250             "FILD_D [ESP]\n\t"
11251             "FSTP_S [ESP]\n\t"
11252             "MOVSS  $dst,[ESP]\n\t"
11253             "ADD    ESP,8" %}
11254   opcode(0xDF, 0x5);  /* DF /5 */
11255   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11256   ins_pipe( pipe_slow );
11257 %}
11258 
11259 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11260   match(Set dst (ConvL2F src));
11261   effect( KILL cr );
11262   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11263             "PUSH   $src.lo\n\t"
11264             "FILD   ST,[ESP + #0]\n\t"
11265             "ADD    ESP,8\n\t"
11266             "FSTP_S $dst\t# F-round" %}
11267   opcode(0xDF, 0x5);  /* DF /5 */
11268   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11269   ins_pipe( pipe_slow );
11270 %}
11271 
11272 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11273   match(Set dst (ConvL2I src));
11274   effect( DEF dst, USE src );
11275   format %{ "MOV    $dst,$src.lo" %}
11276   ins_encode(enc_CopyL_Lo(dst,src));
11277   ins_pipe( ialu_reg_reg );
11278 %}
11279 
11280 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11281   match(Set dst (MoveF2I src));
11282   effect( DEF dst, USE src );
11283   ins_cost(100);
11284   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11285   ins_encode %{
11286     __ movl($dst$$Register, Address(rsp, $src$$disp));
11287   %}
11288   ins_pipe( ialu_reg_mem );
11289 %}
11290 
11291 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11292   predicate(UseSSE==0);
11293   match(Set dst (MoveF2I src));
11294   effect( DEF dst, USE src );
11295 
11296   ins_cost(125);
11297   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11298   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11299   ins_pipe( fpu_mem_reg );
11300 %}
11301 
11302 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11303   predicate(UseSSE>=1);
11304   match(Set dst (MoveF2I src));
11305   effect( DEF dst, USE src );
11306 
11307   ins_cost(95);
11308   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11309   ins_encode %{
11310     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11311   %}
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11316   predicate(UseSSE>=2);
11317   match(Set dst (MoveF2I src));
11318   effect( DEF dst, USE src );
11319   ins_cost(85);
11320   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11321   ins_encode %{
11322     __ movdl($dst$$Register, $src$$XMMRegister);
11323   %}
11324   ins_pipe( pipe_slow );
11325 %}
11326 
11327 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11328   match(Set dst (MoveI2F src));
11329   effect( DEF dst, USE src );
11330 
11331   ins_cost(100);
11332   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11333   ins_encode %{
11334     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11335   %}
11336   ins_pipe( ialu_mem_reg );
11337 %}
11338 
11339 
11340 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11341   predicate(UseSSE==0);
11342   match(Set dst (MoveI2F src));
11343   effect(DEF dst, USE src);
11344 
11345   ins_cost(125);
11346   format %{ "FLD_S  $src\n\t"
11347             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11348   opcode(0xD9);               /* D9 /0, FLD m32real */
11349   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11350               Pop_Reg_FPR(dst) );
11351   ins_pipe( fpu_reg_mem );
11352 %}
11353 
11354 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11355   predicate(UseSSE>=1);
11356   match(Set dst (MoveI2F src));
11357   effect( DEF dst, USE src );
11358 
11359   ins_cost(95);
11360   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11361   ins_encode %{
11362     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11363   %}
11364   ins_pipe( pipe_slow );
11365 %}
11366 
11367 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11368   predicate(UseSSE>=2);
11369   match(Set dst (MoveI2F src));
11370   effect( DEF dst, USE src );
11371 
11372   ins_cost(85);
11373   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11374   ins_encode %{
11375     __ movdl($dst$$XMMRegister, $src$$Register);
11376   %}
11377   ins_pipe( pipe_slow );
11378 %}
11379 
11380 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11381   match(Set dst (MoveD2L src));
11382   effect(DEF dst, USE src);
11383 
11384   ins_cost(250);
11385   format %{ "MOV    $dst.lo,$src\n\t"
11386             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11387   opcode(0x8B, 0x8B);
11388   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11389   ins_pipe( ialu_mem_long_reg );
11390 %}
11391 
11392 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11393   predicate(UseSSE<=1);
11394   match(Set dst (MoveD2L src));
11395   effect(DEF dst, USE src);
11396 
11397   ins_cost(125);
11398   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11399   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11400   ins_pipe( fpu_mem_reg );
11401 %}
11402 
11403 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11404   predicate(UseSSE>=2);
11405   match(Set dst (MoveD2L src));
11406   effect(DEF dst, USE src);
11407   ins_cost(95);
11408   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11409   ins_encode %{
11410     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11411   %}
11412   ins_pipe( pipe_slow );
11413 %}
11414 
11415 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11416   predicate(UseSSE>=2);
11417   match(Set dst (MoveD2L src));
11418   effect(DEF dst, USE src, TEMP tmp);
11419   ins_cost(85);
11420   format %{ "MOVD   $dst.lo,$src\n\t"
11421             "PSHUFLW $tmp,$src,0x4E\n\t"
11422             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11423   ins_encode %{
11424     __ movdl($dst$$Register, $src$$XMMRegister);
11425     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11426     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11427   %}
11428   ins_pipe( pipe_slow );
11429 %}
11430 
11431 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11432   match(Set dst (MoveL2D src));
11433   effect(DEF dst, USE src);
11434 
11435   ins_cost(200);
11436   format %{ "MOV    $dst,$src.lo\n\t"
11437             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11438   opcode(0x89, 0x89);
11439   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11440   ins_pipe( ialu_mem_long_reg );
11441 %}
11442 
11443 
11444 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11445   predicate(UseSSE<=1);
11446   match(Set dst (MoveL2D src));
11447   effect(DEF dst, USE src);
11448   ins_cost(125);
11449 
11450   format %{ "FLD_D  $src\n\t"
11451             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11452   opcode(0xDD);               /* DD /0, FLD m64real */
11453   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11454               Pop_Reg_DPR(dst) );
11455   ins_pipe( fpu_reg_mem );
11456 %}
11457 
11458 
11459 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11460   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11461   match(Set dst (MoveL2D src));
11462   effect(DEF dst, USE src);
11463 
11464   ins_cost(95);
11465   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11466   ins_encode %{
11467     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11468   %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11473   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11474   match(Set dst (MoveL2D src));
11475   effect(DEF dst, USE src);
11476 
11477   ins_cost(95);
11478   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11479   ins_encode %{
11480     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11481   %}
11482   ins_pipe( pipe_slow );
11483 %}
11484 
11485 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11486   predicate(UseSSE>=2);
11487   match(Set dst (MoveL2D src));
11488   effect(TEMP dst, USE src, TEMP tmp);
11489   ins_cost(85);
11490   format %{ "MOVD   $dst,$src.lo\n\t"
11491             "MOVD   $tmp,$src.hi\n\t"
11492             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11493   ins_encode %{
11494     __ movdl($dst$$XMMRegister, $src$$Register);
11495     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11496     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11497   %}
11498   ins_pipe( pipe_slow );
11499 %}
11500 
11501 
11502 // =======================================================================
11503 // fast clearing of an array
11504 // Small ClearArray non-AVX512.
11505 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11506   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11507   match(Set dummy (ClearArray cnt base));
11508   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11509 
11510   format %{ $$template
11511     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11512     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11513     $$emit$$"JG     LARGE\n\t"
11514     $$emit$$"SHL    ECX, 1\n\t"
11515     $$emit$$"DEC    ECX\n\t"
11516     $$emit$$"JS     DONE\t# Zero length\n\t"
11517     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11518     $$emit$$"DEC    ECX\n\t"
11519     $$emit$$"JGE    LOOP\n\t"
11520     $$emit$$"JMP    DONE\n\t"
11521     $$emit$$"# LARGE:\n\t"
11522     if (UseFastStosb) {
11523        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11524        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11525     } else if (UseXMMForObjInit) {
11526        $$emit$$"MOV     RDI,RAX\n\t"
11527        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11528        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11529        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11530        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11531        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11532        $$emit$$"ADD     0x40,RAX\n\t"
11533        $$emit$$"# L_zero_64_bytes:\n\t"
11534        $$emit$$"SUB     0x8,RCX\n\t"
11535        $$emit$$"JGE     L_loop\n\t"
11536        $$emit$$"ADD     0x4,RCX\n\t"
11537        $$emit$$"JL      L_tail\n\t"
11538        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11539        $$emit$$"ADD     0x20,RAX\n\t"
11540        $$emit$$"SUB     0x4,RCX\n\t"
11541        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11542        $$emit$$"ADD     0x4,RCX\n\t"
11543        $$emit$$"JLE     L_end\n\t"
11544        $$emit$$"DEC     RCX\n\t"
11545        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11546        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11547        $$emit$$"ADD     0x8,RAX\n\t"
11548        $$emit$$"DEC     RCX\n\t"
11549        $$emit$$"JGE     L_sloop\n\t"
11550        $$emit$$"# L_end:\n\t"
11551     } else {
11552        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11553        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11554     }
11555     $$emit$$"# DONE"
11556   %}
11557   ins_encode %{
11558     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11559                  $tmp$$XMMRegister, false, knoreg);
11560   %}
11561   ins_pipe( pipe_slow );
11562 %}
11563 
11564 // Small ClearArray AVX512 non-constant length.
11565 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11566   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11567   match(Set dummy (ClearArray cnt base));
11568   ins_cost(125);
11569   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11570 
11571   format %{ $$template
11572     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11573     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11574     $$emit$$"JG     LARGE\n\t"
11575     $$emit$$"SHL    ECX, 1\n\t"
11576     $$emit$$"DEC    ECX\n\t"
11577     $$emit$$"JS     DONE\t# Zero length\n\t"
11578     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11579     $$emit$$"DEC    ECX\n\t"
11580     $$emit$$"JGE    LOOP\n\t"
11581     $$emit$$"JMP    DONE\n\t"
11582     $$emit$$"# LARGE:\n\t"
11583     if (UseFastStosb) {
11584        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11585        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11586     } else if (UseXMMForObjInit) {
11587        $$emit$$"MOV     RDI,RAX\n\t"
11588        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11589        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11590        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11591        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11592        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11593        $$emit$$"ADD     0x40,RAX\n\t"
11594        $$emit$$"# L_zero_64_bytes:\n\t"
11595        $$emit$$"SUB     0x8,RCX\n\t"
11596        $$emit$$"JGE     L_loop\n\t"
11597        $$emit$$"ADD     0x4,RCX\n\t"
11598        $$emit$$"JL      L_tail\n\t"
11599        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11600        $$emit$$"ADD     0x20,RAX\n\t"
11601        $$emit$$"SUB     0x4,RCX\n\t"
11602        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11603        $$emit$$"ADD     0x4,RCX\n\t"
11604        $$emit$$"JLE     L_end\n\t"
11605        $$emit$$"DEC     RCX\n\t"
11606        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11607        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11608        $$emit$$"ADD     0x8,RAX\n\t"
11609        $$emit$$"DEC     RCX\n\t"
11610        $$emit$$"JGE     L_sloop\n\t"
11611        $$emit$$"# L_end:\n\t"
11612     } else {
11613        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11614        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11615     }
11616     $$emit$$"# DONE"
11617   %}
11618   ins_encode %{
11619     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11620                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11621   %}
11622   ins_pipe( pipe_slow );
11623 %}
11624 
11625 // Large ClearArray non-AVX512.
11626 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11627   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11628   match(Set dummy (ClearArray cnt base));
11629   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11630   format %{ $$template
11631     if (UseFastStosb) {
11632        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11633        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11634        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11635     } else if (UseXMMForObjInit) {
11636        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11637        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11638        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11639        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11640        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11641        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11642        $$emit$$"ADD     0x40,RAX\n\t"
11643        $$emit$$"# L_zero_64_bytes:\n\t"
11644        $$emit$$"SUB     0x8,RCX\n\t"
11645        $$emit$$"JGE     L_loop\n\t"
11646        $$emit$$"ADD     0x4,RCX\n\t"
11647        $$emit$$"JL      L_tail\n\t"
11648        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11649        $$emit$$"ADD     0x20,RAX\n\t"
11650        $$emit$$"SUB     0x4,RCX\n\t"
11651        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11652        $$emit$$"ADD     0x4,RCX\n\t"
11653        $$emit$$"JLE     L_end\n\t"
11654        $$emit$$"DEC     RCX\n\t"
11655        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11656        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11657        $$emit$$"ADD     0x8,RAX\n\t"
11658        $$emit$$"DEC     RCX\n\t"
11659        $$emit$$"JGE     L_sloop\n\t"
11660        $$emit$$"# L_end:\n\t"
11661     } else {
11662        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11663        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11664        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11665     }
11666     $$emit$$"# DONE"
11667   %}
11668   ins_encode %{
11669     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11670                  $tmp$$XMMRegister, true, knoreg);
11671   %}
11672   ins_pipe( pipe_slow );
11673 %}
11674 
11675 // Large ClearArray AVX512.
11676 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11677   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11678   match(Set dummy (ClearArray cnt base));
11679   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11680   format %{ $$template
11681     if (UseFastStosb) {
11682        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11683        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11684        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11685     } else if (UseXMMForObjInit) {
11686        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11687        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11688        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11689        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11690        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11691        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11692        $$emit$$"ADD     0x40,RAX\n\t"
11693        $$emit$$"# L_zero_64_bytes:\n\t"
11694        $$emit$$"SUB     0x8,RCX\n\t"
11695        $$emit$$"JGE     L_loop\n\t"
11696        $$emit$$"ADD     0x4,RCX\n\t"
11697        $$emit$$"JL      L_tail\n\t"
11698        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11699        $$emit$$"ADD     0x20,RAX\n\t"
11700        $$emit$$"SUB     0x4,RCX\n\t"
11701        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11702        $$emit$$"ADD     0x4,RCX\n\t"
11703        $$emit$$"JLE     L_end\n\t"
11704        $$emit$$"DEC     RCX\n\t"
11705        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11706        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11707        $$emit$$"ADD     0x8,RAX\n\t"
11708        $$emit$$"DEC     RCX\n\t"
11709        $$emit$$"JGE     L_sloop\n\t"
11710        $$emit$$"# L_end:\n\t"
11711     } else {
11712        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11713        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11714        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11715     }
11716     $$emit$$"# DONE"
11717   %}
11718   ins_encode %{
11719     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11720                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11721   %}
11722   ins_pipe( pipe_slow );
11723 %}
11724 
11725 // Small ClearArray AVX512 constant length.
11726 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11727 %{
11728   predicate(!((ClearArrayNode*)n)->is_large() &&
11729                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11730   match(Set dummy (ClearArray cnt base));
11731   ins_cost(100);
11732   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11733   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11734   ins_encode %{
11735    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11736   %}
11737   ins_pipe(pipe_slow);
11738 %}
11739 
11740 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11741                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11742   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11743   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11744   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11745 
11746   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11747   ins_encode %{
11748     __ string_compare($str1$$Register, $str2$$Register,
11749                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11750                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11751   %}
11752   ins_pipe( pipe_slow );
11753 %}
11754 
11755 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11756                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11757   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11758   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11759   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11760 
11761   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11762   ins_encode %{
11763     __ string_compare($str1$$Register, $str2$$Register,
11764                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11765                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11766   %}
11767   ins_pipe( pipe_slow );
11768 %}
11769 
11770 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11771                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11772   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11773   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11774   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11775 
11776   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11777   ins_encode %{
11778     __ string_compare($str1$$Register, $str2$$Register,
11779                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11780                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11781   %}
11782   ins_pipe( pipe_slow );
11783 %}
11784 
11785 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11786                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11787   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11788   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11789   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11790 
11791   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11792   ins_encode %{
11793     __ string_compare($str1$$Register, $str2$$Register,
11794                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11795                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11796   %}
11797   ins_pipe( pipe_slow );
11798 %}
11799 
11800 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11801                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11802   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11803   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11804   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11805 
11806   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11807   ins_encode %{
11808     __ string_compare($str1$$Register, $str2$$Register,
11809                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11810                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11811   %}
11812   ins_pipe( pipe_slow );
11813 %}
11814 
11815 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11816                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11817   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11818   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11819   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11820 
11821   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11822   ins_encode %{
11823     __ string_compare($str1$$Register, $str2$$Register,
11824                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11825                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11826   %}
11827   ins_pipe( pipe_slow );
11828 %}
11829 
11830 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11831                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11832   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11833   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11834   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11835 
11836   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11837   ins_encode %{
11838     __ string_compare($str2$$Register, $str1$$Register,
11839                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11840                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11841   %}
11842   ins_pipe( pipe_slow );
11843 %}
11844 
11845 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11846                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11847   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11848   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11849   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11850 
11851   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11852   ins_encode %{
11853     __ string_compare($str2$$Register, $str1$$Register,
11854                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11855                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11856   %}
11857   ins_pipe( pipe_slow );
11858 %}
11859 
11860 // fast string equals
11861 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11862                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11863   predicate(!VM_Version::supports_avx512vlbw());
11864   match(Set result (StrEquals (Binary str1 str2) cnt));
11865   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11866 
11867   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11868   ins_encode %{
11869     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11870                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11871                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11872   %}
11873 
11874   ins_pipe( pipe_slow );
11875 %}
11876 
11877 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11878                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11879   predicate(VM_Version::supports_avx512vlbw());
11880   match(Set result (StrEquals (Binary str1 str2) cnt));
11881   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11882 
11883   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11884   ins_encode %{
11885     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11886                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11887                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11888   %}
11889 
11890   ins_pipe( pipe_slow );
11891 %}
11892 
11893 
11894 // fast search of substring with known size.
11895 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11896                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11897   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11898   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11899   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11900 
11901   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11902   ins_encode %{
11903     int icnt2 = (int)$int_cnt2$$constant;
11904     if (icnt2 >= 16) {
11905       // IndexOf for constant substrings with size >= 16 elements
11906       // which don't need to be loaded through stack.
11907       __ string_indexofC8($str1$$Register, $str2$$Register,
11908                           $cnt1$$Register, $cnt2$$Register,
11909                           icnt2, $result$$Register,
11910                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11911     } else {
11912       // Small strings are loaded through stack if they cross page boundary.
11913       __ string_indexof($str1$$Register, $str2$$Register,
11914                         $cnt1$$Register, $cnt2$$Register,
11915                         icnt2, $result$$Register,
11916                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11917     }
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 // fast search of substring with known size.
11923 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11924                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11925   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11926   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11927   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11928 
11929   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11930   ins_encode %{
11931     int icnt2 = (int)$int_cnt2$$constant;
11932     if (icnt2 >= 8) {
11933       // IndexOf for constant substrings with size >= 8 elements
11934       // which don't need to be loaded through stack.
11935       __ string_indexofC8($str1$$Register, $str2$$Register,
11936                           $cnt1$$Register, $cnt2$$Register,
11937                           icnt2, $result$$Register,
11938                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11939     } else {
11940       // Small strings are loaded through stack if they cross page boundary.
11941       __ string_indexof($str1$$Register, $str2$$Register,
11942                         $cnt1$$Register, $cnt2$$Register,
11943                         icnt2, $result$$Register,
11944                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11945     }
11946   %}
11947   ins_pipe( pipe_slow );
11948 %}
11949 
11950 // fast search of substring with known size.
11951 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11952                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11953   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11954   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11955   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11956 
11957   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11958   ins_encode %{
11959     int icnt2 = (int)$int_cnt2$$constant;
11960     if (icnt2 >= 8) {
11961       // IndexOf for constant substrings with size >= 8 elements
11962       // which don't need to be loaded through stack.
11963       __ string_indexofC8($str1$$Register, $str2$$Register,
11964                           $cnt1$$Register, $cnt2$$Register,
11965                           icnt2, $result$$Register,
11966                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11967     } else {
11968       // Small strings are loaded through stack if they cross page boundary.
11969       __ string_indexof($str1$$Register, $str2$$Register,
11970                         $cnt1$$Register, $cnt2$$Register,
11971                         icnt2, $result$$Register,
11972                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11973     }
11974   %}
11975   ins_pipe( pipe_slow );
11976 %}
11977 
11978 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11979                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11980   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11981   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11982   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11983 
11984   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11985   ins_encode %{
11986     __ string_indexof($str1$$Register, $str2$$Register,
11987                       $cnt1$$Register, $cnt2$$Register,
11988                       (-1), $result$$Register,
11989                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11990   %}
11991   ins_pipe( pipe_slow );
11992 %}
11993 
11994 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11995                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11996   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11997   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11998   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11999 
12000   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12001   ins_encode %{
12002     __ string_indexof($str1$$Register, $str2$$Register,
12003                       $cnt1$$Register, $cnt2$$Register,
12004                       (-1), $result$$Register,
12005                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12006   %}
12007   ins_pipe( pipe_slow );
12008 %}
12009 
12010 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12011                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12012   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12013   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12014   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12015 
12016   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12017   ins_encode %{
12018     __ string_indexof($str1$$Register, $str2$$Register,
12019                       $cnt1$$Register, $cnt2$$Register,
12020                       (-1), $result$$Register,
12021                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12022   %}
12023   ins_pipe( pipe_slow );
12024 %}
12025 
12026 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12027                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12028   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12029   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12030   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12031   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12032   ins_encode %{
12033     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12034                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12035   %}
12036   ins_pipe( pipe_slow );
12037 %}
12038 
12039 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12040                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12041   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12042   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12043   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12044   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12045   ins_encode %{
12046     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12047                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 
12053 // fast array equals
12054 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12055                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12056 %{
12057   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12058   match(Set result (AryEq ary1 ary2));
12059   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12060   //ins_cost(300);
12061 
12062   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12063   ins_encode %{
12064     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12065                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12066                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12067   %}
12068   ins_pipe( pipe_slow );
12069 %}
12070 
12071 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12072                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12073 %{
12074   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12075   match(Set result (AryEq ary1 ary2));
12076   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12077   //ins_cost(300);
12078 
12079   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12080   ins_encode %{
12081     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12082                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12083                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12084   %}
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12089                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12090 %{
12091   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12092   match(Set result (AryEq ary1 ary2));
12093   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12094   //ins_cost(300);
12095 
12096   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12097   ins_encode %{
12098     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12099                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12100                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12101   %}
12102   ins_pipe( pipe_slow );
12103 %}
12104 
12105 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12106                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12107 %{
12108   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12109   match(Set result (AryEq ary1 ary2));
12110   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12111   //ins_cost(300);
12112 
12113   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12114   ins_encode %{
12115     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12116                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12117                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12118   %}
12119   ins_pipe( pipe_slow );
12120 %}
12121 
12122 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12123                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12124 %{
12125   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12126   match(Set result (CountPositives ary1 len));
12127   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12128 
12129   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12130   ins_encode %{
12131     __ count_positives($ary1$$Register, $len$$Register,
12132                        $result$$Register, $tmp3$$Register,
12133                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12134   %}
12135   ins_pipe( pipe_slow );
12136 %}
12137 
12138 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12139                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12140 %{
12141   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12142   match(Set result (CountPositives ary1 len));
12143   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12144 
12145   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12146   ins_encode %{
12147     __ count_positives($ary1$$Register, $len$$Register,
12148                        $result$$Register, $tmp3$$Register,
12149                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12150   %}
12151   ins_pipe( pipe_slow );
12152 %}
12153 
12154 
12155 // fast char[] to byte[] compression
12156 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12157                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12158   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12159   match(Set result (StrCompressedCopy src (Binary dst len)));
12160   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12161 
12162   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12163   ins_encode %{
12164     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12165                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12166                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12167                            knoreg, knoreg);
12168   %}
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12173                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12174   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12175   match(Set result (StrCompressedCopy src (Binary dst len)));
12176   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12177 
12178   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12179   ins_encode %{
12180     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12181                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12182                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12183                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12184   %}
12185   ins_pipe( pipe_slow );
12186 %}
12187 
12188 // fast byte[] to char[] inflation
12189 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12190                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12191   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12192   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12193   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12194 
12195   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12196   ins_encode %{
12197     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12198                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12199   %}
12200   ins_pipe( pipe_slow );
12201 %}
12202 
12203 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12204                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12205   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12206   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12207   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12208 
12209   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12210   ins_encode %{
12211     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12212                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12213   %}
12214   ins_pipe( pipe_slow );
12215 %}
12216 
12217 // encode char[] to byte[] in ISO_8859_1
12218 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12219                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12220                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12221   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12222   match(Set result (EncodeISOArray src (Binary dst len)));
12223   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12224 
12225   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12226   ins_encode %{
12227     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12228                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12229                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12230   %}
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234 // encode char[] to byte[] in ASCII
12235 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12236                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12237                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12238   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12239   match(Set result (EncodeISOArray src (Binary dst len)));
12240   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12241 
12242   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12243   ins_encode %{
12244     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12245                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12246                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12247   %}
12248   ins_pipe( pipe_slow );
12249 %}
12250 
12251 //----------Control Flow Instructions------------------------------------------
12252 // Signed compare Instructions
12253 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12254   match(Set cr (CmpI op1 op2));
12255   effect( DEF cr, USE op1, USE op2 );
12256   format %{ "CMP    $op1,$op2" %}
12257   opcode(0x3B);  /* Opcode 3B /r */
12258   ins_encode( OpcP, RegReg( op1, op2) );
12259   ins_pipe( ialu_cr_reg_reg );
12260 %}
12261 
12262 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12263   match(Set cr (CmpI op1 op2));
12264   effect( DEF cr, USE op1 );
12265   format %{ "CMP    $op1,$op2" %}
12266   opcode(0x81,0x07);  /* Opcode 81 /7 */
12267   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12268   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12269   ins_pipe( ialu_cr_reg_imm );
12270 %}
12271 
12272 // Cisc-spilled version of cmpI_eReg
12273 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12274   match(Set cr (CmpI op1 (LoadI op2)));
12275 
12276   format %{ "CMP    $op1,$op2" %}
12277   ins_cost(500);
12278   opcode(0x3B);  /* Opcode 3B /r */
12279   ins_encode( OpcP, RegMem( op1, op2) );
12280   ins_pipe( ialu_cr_reg_mem );
12281 %}
12282 
12283 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12284   match(Set cr (CmpI src zero));
12285   effect( DEF cr, USE src );
12286 
12287   format %{ "TEST   $src,$src" %}
12288   opcode(0x85);
12289   ins_encode( OpcP, RegReg( src, src ) );
12290   ins_pipe( ialu_cr_reg_imm );
12291 %}
12292 
12293 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12294   match(Set cr (CmpI (AndI src con) zero));
12295 
12296   format %{ "TEST   $src,$con" %}
12297   opcode(0xF7,0x00);
12298   ins_encode( OpcP, RegOpc(src), Con32(con) );
12299   ins_pipe( ialu_cr_reg_imm );
12300 %}
12301 
12302 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12303   match(Set cr (CmpI (AndI src mem) zero));
12304 
12305   format %{ "TEST   $src,$mem" %}
12306   opcode(0x85);
12307   ins_encode( OpcP, RegMem( src, mem ) );
12308   ins_pipe( ialu_cr_reg_mem );
12309 %}
12310 
12311 // Unsigned compare Instructions; really, same as signed except they
12312 // produce an eFlagsRegU instead of eFlagsReg.
12313 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12314   match(Set cr (CmpU op1 op2));
12315 
12316   format %{ "CMPu   $op1,$op2" %}
12317   opcode(0x3B);  /* Opcode 3B /r */
12318   ins_encode( OpcP, RegReg( op1, op2) );
12319   ins_pipe( ialu_cr_reg_reg );
12320 %}
12321 
12322 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12323   match(Set cr (CmpU op1 op2));
12324 
12325   format %{ "CMPu   $op1,$op2" %}
12326   opcode(0x81,0x07);  /* Opcode 81 /7 */
12327   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12328   ins_pipe( ialu_cr_reg_imm );
12329 %}
12330 
12331 // // Cisc-spilled version of cmpU_eReg
12332 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12333   match(Set cr (CmpU op1 (LoadI op2)));
12334 
12335   format %{ "CMPu   $op1,$op2" %}
12336   ins_cost(500);
12337   opcode(0x3B);  /* Opcode 3B /r */
12338   ins_encode( OpcP, RegMem( op1, op2) );
12339   ins_pipe( ialu_cr_reg_mem );
12340 %}
12341 
12342 // // Cisc-spilled version of cmpU_eReg
12343 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12344 //  match(Set cr (CmpU (LoadI op1) op2));
12345 //
12346 //  format %{ "CMPu   $op1,$op2" %}
12347 //  ins_cost(500);
12348 //  opcode(0x39);  /* Opcode 39 /r */
12349 //  ins_encode( OpcP, RegMem( op1, op2) );
12350 //%}
12351 
12352 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12353   match(Set cr (CmpU src zero));
12354 
12355   format %{ "TESTu  $src,$src" %}
12356   opcode(0x85);
12357   ins_encode( OpcP, RegReg( src, src ) );
12358   ins_pipe( ialu_cr_reg_imm );
12359 %}
12360 
12361 // Unsigned pointer compare Instructions
12362 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12363   match(Set cr (CmpP op1 op2));
12364 
12365   format %{ "CMPu   $op1,$op2" %}
12366   opcode(0x3B);  /* Opcode 3B /r */
12367   ins_encode( OpcP, RegReg( op1, op2) );
12368   ins_pipe( ialu_cr_reg_reg );
12369 %}
12370 
12371 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12372   match(Set cr (CmpP op1 op2));
12373 
12374   format %{ "CMPu   $op1,$op2" %}
12375   opcode(0x81,0x07);  /* Opcode 81 /7 */
12376   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12377   ins_pipe( ialu_cr_reg_imm );
12378 %}
12379 
12380 // // Cisc-spilled version of cmpP_eReg
12381 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12382   match(Set cr (CmpP op1 (LoadP op2)));
12383 
12384   format %{ "CMPu   $op1,$op2" %}
12385   ins_cost(500);
12386   opcode(0x3B);  /* Opcode 3B /r */
12387   ins_encode( OpcP, RegMem( op1, op2) );
12388   ins_pipe( ialu_cr_reg_mem );
12389 %}
12390 
12391 // // Cisc-spilled version of cmpP_eReg
12392 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12393 //  match(Set cr (CmpP (LoadP op1) op2));
12394 //
12395 //  format %{ "CMPu   $op1,$op2" %}
12396 //  ins_cost(500);
12397 //  opcode(0x39);  /* Opcode 39 /r */
12398 //  ins_encode( OpcP, RegMem( op1, op2) );
12399 //%}
12400 
12401 // Compare raw pointer (used in out-of-heap check).
12402 // Only works because non-oop pointers must be raw pointers
12403 // and raw pointers have no anti-dependencies.
12404 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12405   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12406   match(Set cr (CmpP op1 (LoadP op2)));
12407 
12408   format %{ "CMPu   $op1,$op2" %}
12409   opcode(0x3B);  /* Opcode 3B /r */
12410   ins_encode( OpcP, RegMem( op1, op2) );
12411   ins_pipe( ialu_cr_reg_mem );
12412 %}
12413 
12414 //
12415 // This will generate a signed flags result. This should be ok
12416 // since any compare to a zero should be eq/neq.
12417 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12418   match(Set cr (CmpP src zero));
12419 
12420   format %{ "TEST   $src,$src" %}
12421   opcode(0x85);
12422   ins_encode( OpcP, RegReg( src, src ) );
12423   ins_pipe( ialu_cr_reg_imm );
12424 %}
12425 
12426 // Cisc-spilled version of testP_reg
12427 // This will generate a signed flags result. This should be ok
12428 // since any compare to a zero should be eq/neq.
12429 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12430   match(Set cr (CmpP (LoadP op) zero));
12431 
12432   format %{ "TEST   $op,0xFFFFFFFF" %}
12433   ins_cost(500);
12434   opcode(0xF7);               /* Opcode F7 /0 */
12435   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12436   ins_pipe( ialu_cr_reg_imm );
12437 %}
12438 
12439 // Yanked all unsigned pointer compare operations.
12440 // Pointer compares are done with CmpP which is already unsigned.
12441 
12442 //----------Max and Min--------------------------------------------------------
12443 // Min Instructions
12444 ////
12445 //   *** Min and Max using the conditional move are slower than the
12446 //   *** branch version on a Pentium III.
12447 // // Conditional move for min
12448 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12449 //  effect( USE_DEF op2, USE op1, USE cr );
12450 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12451 //  opcode(0x4C,0x0F);
12452 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12453 //  ins_pipe( pipe_cmov_reg );
12454 //%}
12455 //
12456 //// Min Register with Register (P6 version)
12457 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12458 //  predicate(VM_Version::supports_cmov() );
12459 //  match(Set op2 (MinI op1 op2));
12460 //  ins_cost(200);
12461 //  expand %{
12462 //    eFlagsReg cr;
12463 //    compI_eReg(cr,op1,op2);
12464 //    cmovI_reg_lt(op2,op1,cr);
12465 //  %}
12466 //%}
12467 
12468 // Min Register with Register (generic version)
12469 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12470   match(Set dst (MinI dst src));
12471   effect(KILL flags);
12472   ins_cost(300);
12473 
12474   format %{ "MIN    $dst,$src" %}
12475   opcode(0xCC);
12476   ins_encode( min_enc(dst,src) );
12477   ins_pipe( pipe_slow );
12478 %}
12479 
12480 // Max Register with Register
12481 //   *** Min and Max using the conditional move are slower than the
12482 //   *** branch version on a Pentium III.
12483 // // Conditional move for max
12484 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12485 //  effect( USE_DEF op2, USE op1, USE cr );
12486 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12487 //  opcode(0x4F,0x0F);
12488 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12489 //  ins_pipe( pipe_cmov_reg );
12490 //%}
12491 //
12492 // // Max Register with Register (P6 version)
12493 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12494 //  predicate(VM_Version::supports_cmov() );
12495 //  match(Set op2 (MaxI op1 op2));
12496 //  ins_cost(200);
12497 //  expand %{
12498 //    eFlagsReg cr;
12499 //    compI_eReg(cr,op1,op2);
12500 //    cmovI_reg_gt(op2,op1,cr);
12501 //  %}
12502 //%}
12503 
12504 // Max Register with Register (generic version)
12505 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12506   match(Set dst (MaxI dst src));
12507   effect(KILL flags);
12508   ins_cost(300);
12509 
12510   format %{ "MAX    $dst,$src" %}
12511   opcode(0xCC);
12512   ins_encode( max_enc(dst,src) );
12513   ins_pipe( pipe_slow );
12514 %}
12515 
12516 // ============================================================================
12517 // Counted Loop limit node which represents exact final iterator value.
12518 // Note: the resulting value should fit into integer range since
12519 // counted loops have limit check on overflow.
12520 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12521   match(Set limit (LoopLimit (Binary init limit) stride));
12522   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12523   ins_cost(300);
12524 
12525   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12526   ins_encode %{
12527     int strd = (int)$stride$$constant;
12528     assert(strd != 1 && strd != -1, "sanity");
12529     int m1 = (strd > 0) ? 1 : -1;
12530     // Convert limit to long (EAX:EDX)
12531     __ cdql();
12532     // Convert init to long (init:tmp)
12533     __ movl($tmp$$Register, $init$$Register);
12534     __ sarl($tmp$$Register, 31);
12535     // $limit - $init
12536     __ subl($limit$$Register, $init$$Register);
12537     __ sbbl($limit_hi$$Register, $tmp$$Register);
12538     // + ($stride - 1)
12539     if (strd > 0) {
12540       __ addl($limit$$Register, (strd - 1));
12541       __ adcl($limit_hi$$Register, 0);
12542       __ movl($tmp$$Register, strd);
12543     } else {
12544       __ addl($limit$$Register, (strd + 1));
12545       __ adcl($limit_hi$$Register, -1);
12546       __ lneg($limit_hi$$Register, $limit$$Register);
12547       __ movl($tmp$$Register, -strd);
12548     }
12549     // signed devision: (EAX:EDX) / pos_stride
12550     __ idivl($tmp$$Register);
12551     if (strd < 0) {
12552       // restore sign
12553       __ negl($tmp$$Register);
12554     }
12555     // (EAX) * stride
12556     __ mull($tmp$$Register);
12557     // + init (ignore upper bits)
12558     __ addl($limit$$Register, $init$$Register);
12559   %}
12560   ins_pipe( pipe_slow );
12561 %}
12562 
12563 // ============================================================================
12564 // Branch Instructions
12565 // Jump Table
12566 instruct jumpXtnd(rRegI switch_val) %{
12567   match(Jump switch_val);
12568   ins_cost(350);
12569   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12570   ins_encode %{
12571     // Jump to Address(table_base + switch_reg)
12572     Address index(noreg, $switch_val$$Register, Address::times_1);
12573     __ jump(ArrayAddress($constantaddress, index));
12574   %}
12575   ins_pipe(pipe_jmp);
12576 %}
12577 
12578 // Jump Direct - Label defines a relative address from JMP+1
12579 instruct jmpDir(label labl) %{
12580   match(Goto);
12581   effect(USE labl);
12582 
12583   ins_cost(300);
12584   format %{ "JMP    $labl" %}
12585   size(5);
12586   ins_encode %{
12587     Label* L = $labl$$label;
12588     __ jmp(*L, false); // Always long jump
12589   %}
12590   ins_pipe( pipe_jmp );
12591 %}
12592 
12593 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12594 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12595   match(If cop cr);
12596   effect(USE labl);
12597 
12598   ins_cost(300);
12599   format %{ "J$cop    $labl" %}
12600   size(6);
12601   ins_encode %{
12602     Label* L = $labl$$label;
12603     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12604   %}
12605   ins_pipe( pipe_jcc );
12606 %}
12607 
12608 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12609 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12610   predicate(!n->has_vector_mask_set());
12611   match(CountedLoopEnd cop cr);
12612   effect(USE labl);
12613 
12614   ins_cost(300);
12615   format %{ "J$cop    $labl\t# Loop end" %}
12616   size(6);
12617   ins_encode %{
12618     Label* L = $labl$$label;
12619     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12620   %}
12621   ins_pipe( pipe_jcc );
12622 %}
12623 
12624 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12625 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12626   predicate(!n->has_vector_mask_set());
12627   match(CountedLoopEnd cop cmp);
12628   effect(USE labl);
12629 
12630   ins_cost(300);
12631   format %{ "J$cop,u  $labl\t# Loop end" %}
12632   size(6);
12633   ins_encode %{
12634     Label* L = $labl$$label;
12635     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12636   %}
12637   ins_pipe( pipe_jcc );
12638 %}
12639 
12640 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12641   predicate(!n->has_vector_mask_set());
12642   match(CountedLoopEnd cop cmp);
12643   effect(USE labl);
12644 
12645   ins_cost(200);
12646   format %{ "J$cop,u  $labl\t# Loop end" %}
12647   size(6);
12648   ins_encode %{
12649     Label* L = $labl$$label;
12650     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12651   %}
12652   ins_pipe( pipe_jcc );
12653 %}
12654 
12655 // mask version
12656 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12657 // Bounded mask operand used in following patten is needed for
12658 // post-loop multiversioning.
12659 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12660   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12661   match(CountedLoopEnd cop cr);
12662   effect(USE labl, TEMP ktmp);
12663 
12664   ins_cost(400);
12665   format %{ "J$cop    $labl\t# Loop end\n\t"
12666             "restorevectmask \t# vector mask restore for loops" %}
12667   size(10);
12668   ins_encode %{
12669     Label* L = $labl$$label;
12670     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12671     __ restorevectmask($ktmp$$KRegister);
12672   %}
12673   ins_pipe( pipe_jcc );
12674 %}
12675 
12676 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12677 // Bounded mask operand used in following patten is needed for
12678 // post-loop multiversioning.
12679 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12680   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12681   match(CountedLoopEnd cop cmp);
12682   effect(USE labl, TEMP ktmp);
12683 
12684   ins_cost(400);
12685   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12686             "restorevectmask \t# vector mask restore for loops" %}
12687   size(10);
12688   ins_encode %{
12689     Label* L = $labl$$label;
12690     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12691     __ restorevectmask($ktmp$$KRegister);
12692   %}
12693   ins_pipe( pipe_jcc );
12694 %}
12695 
12696 // Bounded mask operand used in following patten is needed for
12697 // post-loop multiversioning.
12698 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12699   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12700   match(CountedLoopEnd cop cmp);
12701   effect(USE labl, TEMP ktmp);
12702 
12703   ins_cost(300);
12704   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12705             "restorevectmask \t# vector mask restore for loops" %}
12706   size(10);
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12710     __ restorevectmask($ktmp$$KRegister);
12711   %}
12712   ins_pipe( pipe_jcc );
12713 %}
12714 
12715 // Jump Direct Conditional - using unsigned comparison
12716 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12717   match(If cop cmp);
12718   effect(USE labl);
12719 
12720   ins_cost(300);
12721   format %{ "J$cop,u  $labl" %}
12722   size(6);
12723   ins_encode %{
12724     Label* L = $labl$$label;
12725     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12726   %}
12727   ins_pipe(pipe_jcc);
12728 %}
12729 
12730 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12731   match(If cop cmp);
12732   effect(USE labl);
12733 
12734   ins_cost(200);
12735   format %{ "J$cop,u  $labl" %}
12736   size(6);
12737   ins_encode %{
12738     Label* L = $labl$$label;
12739     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12740   %}
12741   ins_pipe(pipe_jcc);
12742 %}
12743 
12744 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12745   match(If cop cmp);
12746   effect(USE labl);
12747 
12748   ins_cost(200);
12749   format %{ $$template
12750     if ($cop$$cmpcode == Assembler::notEqual) {
12751       $$emit$$"JP,u   $labl\n\t"
12752       $$emit$$"J$cop,u   $labl"
12753     } else {
12754       $$emit$$"JP,u   done\n\t"
12755       $$emit$$"J$cop,u   $labl\n\t"
12756       $$emit$$"done:"
12757     }
12758   %}
12759   ins_encode %{
12760     Label* l = $labl$$label;
12761     if ($cop$$cmpcode == Assembler::notEqual) {
12762       __ jcc(Assembler::parity, *l, false);
12763       __ jcc(Assembler::notEqual, *l, false);
12764     } else if ($cop$$cmpcode == Assembler::equal) {
12765       Label done;
12766       __ jccb(Assembler::parity, done);
12767       __ jcc(Assembler::equal, *l, false);
12768       __ bind(done);
12769     } else {
12770        ShouldNotReachHere();
12771     }
12772   %}
12773   ins_pipe(pipe_jcc);
12774 %}
12775 
12776 // ============================================================================
12777 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12778 // array for an instance of the superklass.  Set a hidden internal cache on a
12779 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12780 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12781 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12782   match(Set result (PartialSubtypeCheck sub super));
12783   effect( KILL rcx, KILL cr );
12784 
12785   ins_cost(1100);  // slightly larger than the next version
12786   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12787             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12788             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12789             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12790             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12791             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12792             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12793      "miss:\t" %}
12794 
12795   opcode(0x1); // Force a XOR of EDI
12796   ins_encode( enc_PartialSubtypeCheck() );
12797   ins_pipe( pipe_slow );
12798 %}
12799 
12800 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12801   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12802   effect( KILL rcx, KILL result );
12803 
12804   ins_cost(1000);
12805   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12806             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12807             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12808             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12809             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12810             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12811      "miss:\t" %}
12812 
12813   opcode(0x0);  // No need to XOR EDI
12814   ins_encode( enc_PartialSubtypeCheck() );
12815   ins_pipe( pipe_slow );
12816 %}
12817 
12818 // ============================================================================
12819 // Branch Instructions -- short offset versions
12820 //
12821 // These instructions are used to replace jumps of a long offset (the default
12822 // match) with jumps of a shorter offset.  These instructions are all tagged
12823 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12824 // match rules in general matching.  Instead, the ADLC generates a conversion
12825 // method in the MachNode which can be used to do in-place replacement of the
12826 // long variant with the shorter variant.  The compiler will determine if a
12827 // branch can be taken by the is_short_branch_offset() predicate in the machine
12828 // specific code section of the file.
12829 
12830 // Jump Direct - Label defines a relative address from JMP+1
12831 instruct jmpDir_short(label labl) %{
12832   match(Goto);
12833   effect(USE labl);
12834 
12835   ins_cost(300);
12836   format %{ "JMP,s  $labl" %}
12837   size(2);
12838   ins_encode %{
12839     Label* L = $labl$$label;
12840     __ jmpb(*L);
12841   %}
12842   ins_pipe( pipe_jmp );
12843   ins_short_branch(1);
12844 %}
12845 
12846 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12847 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12848   match(If cop cr);
12849   effect(USE labl);
12850 
12851   ins_cost(300);
12852   format %{ "J$cop,s  $labl" %}
12853   size(2);
12854   ins_encode %{
12855     Label* L = $labl$$label;
12856     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12857   %}
12858   ins_pipe( pipe_jcc );
12859   ins_short_branch(1);
12860 %}
12861 
12862 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12863 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12864   match(CountedLoopEnd cop cr);
12865   effect(USE labl);
12866 
12867   ins_cost(300);
12868   format %{ "J$cop,s  $labl\t# Loop end" %}
12869   size(2);
12870   ins_encode %{
12871     Label* L = $labl$$label;
12872     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12873   %}
12874   ins_pipe( pipe_jcc );
12875   ins_short_branch(1);
12876 %}
12877 
12878 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12879 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12880   match(CountedLoopEnd cop cmp);
12881   effect(USE labl);
12882 
12883   ins_cost(300);
12884   format %{ "J$cop,us $labl\t# Loop end" %}
12885   size(2);
12886   ins_encode %{
12887     Label* L = $labl$$label;
12888     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12889   %}
12890   ins_pipe( pipe_jcc );
12891   ins_short_branch(1);
12892 %}
12893 
12894 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12895   match(CountedLoopEnd cop cmp);
12896   effect(USE labl);
12897 
12898   ins_cost(300);
12899   format %{ "J$cop,us $labl\t# Loop end" %}
12900   size(2);
12901   ins_encode %{
12902     Label* L = $labl$$label;
12903     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12904   %}
12905   ins_pipe( pipe_jcc );
12906   ins_short_branch(1);
12907 %}
12908 
12909 // Jump Direct Conditional - using unsigned comparison
12910 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12911   match(If cop cmp);
12912   effect(USE labl);
12913 
12914   ins_cost(300);
12915   format %{ "J$cop,us $labl" %}
12916   size(2);
12917   ins_encode %{
12918     Label* L = $labl$$label;
12919     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12920   %}
12921   ins_pipe( pipe_jcc );
12922   ins_short_branch(1);
12923 %}
12924 
12925 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12926   match(If cop cmp);
12927   effect(USE labl);
12928 
12929   ins_cost(300);
12930   format %{ "J$cop,us $labl" %}
12931   size(2);
12932   ins_encode %{
12933     Label* L = $labl$$label;
12934     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12935   %}
12936   ins_pipe( pipe_jcc );
12937   ins_short_branch(1);
12938 %}
12939 
12940 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12941   match(If cop cmp);
12942   effect(USE labl);
12943 
12944   ins_cost(300);
12945   format %{ $$template
12946     if ($cop$$cmpcode == Assembler::notEqual) {
12947       $$emit$$"JP,u,s   $labl\n\t"
12948       $$emit$$"J$cop,u,s   $labl"
12949     } else {
12950       $$emit$$"JP,u,s   done\n\t"
12951       $$emit$$"J$cop,u,s  $labl\n\t"
12952       $$emit$$"done:"
12953     }
12954   %}
12955   size(4);
12956   ins_encode %{
12957     Label* l = $labl$$label;
12958     if ($cop$$cmpcode == Assembler::notEqual) {
12959       __ jccb(Assembler::parity, *l);
12960       __ jccb(Assembler::notEqual, *l);
12961     } else if ($cop$$cmpcode == Assembler::equal) {
12962       Label done;
12963       __ jccb(Assembler::parity, done);
12964       __ jccb(Assembler::equal, *l);
12965       __ bind(done);
12966     } else {
12967        ShouldNotReachHere();
12968     }
12969   %}
12970   ins_pipe(pipe_jcc);
12971   ins_short_branch(1);
12972 %}
12973 
12974 // ============================================================================
12975 // Long Compare
12976 //
12977 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12978 // is tricky.  The flavor of compare used depends on whether we are testing
12979 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12980 // The GE test is the negated LT test.  The LE test can be had by commuting
12981 // the operands (yielding a GE test) and then negating; negate again for the
12982 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12983 // NE test is negated from that.
12984 
12985 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12986 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12987 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12988 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12989 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12990 // foo match ends up with the wrong leaf.  One fix is to not match both
12991 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12992 // both forms beat the trinary form of long-compare and both are very useful
12993 // on Intel which has so few registers.
12994 
12995 // Manifest a CmpL result in an integer register.  Very painful.
12996 // This is the test to avoid.
12997 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12998   match(Set dst (CmpL3 src1 src2));
12999   effect( KILL flags );
13000   ins_cost(1000);
13001   format %{ "XOR    $dst,$dst\n\t"
13002             "CMP    $src1.hi,$src2.hi\n\t"
13003             "JLT,s  m_one\n\t"
13004             "JGT,s  p_one\n\t"
13005             "CMP    $src1.lo,$src2.lo\n\t"
13006             "JB,s   m_one\n\t"
13007             "JEQ,s  done\n"
13008     "p_one:\tINC    $dst\n\t"
13009             "JMP,s  done\n"
13010     "m_one:\tDEC    $dst\n"
13011      "done:" %}
13012   ins_encode %{
13013     Label p_one, m_one, done;
13014     __ xorptr($dst$$Register, $dst$$Register);
13015     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13016     __ jccb(Assembler::less,    m_one);
13017     __ jccb(Assembler::greater, p_one);
13018     __ cmpl($src1$$Register, $src2$$Register);
13019     __ jccb(Assembler::below,   m_one);
13020     __ jccb(Assembler::equal,   done);
13021     __ bind(p_one);
13022     __ incrementl($dst$$Register);
13023     __ jmpb(done);
13024     __ bind(m_one);
13025     __ decrementl($dst$$Register);
13026     __ bind(done);
13027   %}
13028   ins_pipe( pipe_slow );
13029 %}
13030 
13031 //======
13032 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13033 // compares.  Can be used for LE or GT compares by reversing arguments.
13034 // NOT GOOD FOR EQ/NE tests.
13035 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13036   match( Set flags (CmpL src zero ));
13037   ins_cost(100);
13038   format %{ "TEST   $src.hi,$src.hi" %}
13039   opcode(0x85);
13040   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13041   ins_pipe( ialu_cr_reg_reg );
13042 %}
13043 
13044 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13045 // compares.  Can be used for LE or GT compares by reversing arguments.
13046 // NOT GOOD FOR EQ/NE tests.
13047 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13048   match( Set flags (CmpL src1 src2 ));
13049   effect( TEMP tmp );
13050   ins_cost(300);
13051   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13052             "MOV    $tmp,$src1.hi\n\t"
13053             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13054   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13055   ins_pipe( ialu_cr_reg_reg );
13056 %}
13057 
13058 // Long compares reg < zero/req OR reg >= zero/req.
13059 // Just a wrapper for a normal branch, plus the predicate test.
13060 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13061   match(If cmp flags);
13062   effect(USE labl);
13063   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13064   expand %{
13065     jmpCon(cmp,flags,labl);    // JLT or JGE...
13066   %}
13067 %}
13068 
13069 //======
13070 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13071 // compares.  Can be used for LE or GT compares by reversing arguments.
13072 // NOT GOOD FOR EQ/NE tests.
13073 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13074   match(Set flags (CmpUL src zero));
13075   ins_cost(100);
13076   format %{ "TEST   $src.hi,$src.hi" %}
13077   opcode(0x85);
13078   ins_encode(OpcP, RegReg_Hi2(src, src));
13079   ins_pipe(ialu_cr_reg_reg);
13080 %}
13081 
13082 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13083 // compares.  Can be used for LE or GT compares by reversing arguments.
13084 // NOT GOOD FOR EQ/NE tests.
13085 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13086   match(Set flags (CmpUL src1 src2));
13087   effect(TEMP tmp);
13088   ins_cost(300);
13089   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13090             "MOV    $tmp,$src1.hi\n\t"
13091             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13092   ins_encode(long_cmp_flags2(src1, src2, tmp));
13093   ins_pipe(ialu_cr_reg_reg);
13094 %}
13095 
13096 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13097 // Just a wrapper for a normal branch, plus the predicate test.
13098 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13099   match(If cmp flags);
13100   effect(USE labl);
13101   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13102   expand %{
13103     jmpCon(cmp, flags, labl);    // JLT or JGE...
13104   %}
13105 %}
13106 
13107 // Compare 2 longs and CMOVE longs.
13108 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13109   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13110   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13111   ins_cost(400);
13112   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13113             "CMOV$cmp $dst.hi,$src.hi" %}
13114   opcode(0x0F,0x40);
13115   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13116   ins_pipe( pipe_cmov_reg_long );
13117 %}
13118 
13119 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13120   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13121   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13122   ins_cost(500);
13123   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13124             "CMOV$cmp $dst.hi,$src.hi" %}
13125   opcode(0x0F,0x40);
13126   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13127   ins_pipe( pipe_cmov_reg_long );
13128 %}
13129 
13130 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13131   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13132   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13133   ins_cost(400);
13134   expand %{
13135     cmovLL_reg_LTGE(cmp, flags, dst, src);
13136   %}
13137 %}
13138 
13139 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13140   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13141   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13142   ins_cost(500);
13143   expand %{
13144     cmovLL_mem_LTGE(cmp, flags, dst, src);
13145   %}
13146 %}
13147 
13148 // Compare 2 longs and CMOVE ints.
13149 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13150   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13151   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13152   ins_cost(200);
13153   format %{ "CMOV$cmp $dst,$src" %}
13154   opcode(0x0F,0x40);
13155   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13156   ins_pipe( pipe_cmov_reg );
13157 %}
13158 
13159 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13160   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13161   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13162   ins_cost(250);
13163   format %{ "CMOV$cmp $dst,$src" %}
13164   opcode(0x0F,0x40);
13165   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13166   ins_pipe( pipe_cmov_mem );
13167 %}
13168 
13169 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13170   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13171   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13172   ins_cost(200);
13173   expand %{
13174     cmovII_reg_LTGE(cmp, flags, dst, src);
13175   %}
13176 %}
13177 
13178 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13179   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13180   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13181   ins_cost(250);
13182   expand %{
13183     cmovII_mem_LTGE(cmp, flags, dst, src);
13184   %}
13185 %}
13186 
13187 // Compare 2 longs and CMOVE ptrs.
13188 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13189   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13190   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13191   ins_cost(200);
13192   format %{ "CMOV$cmp $dst,$src" %}
13193   opcode(0x0F,0x40);
13194   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13195   ins_pipe( pipe_cmov_reg );
13196 %}
13197 
13198 // Compare 2 unsigned longs and CMOVE ptrs.
13199 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13200   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13201   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13202   ins_cost(200);
13203   expand %{
13204     cmovPP_reg_LTGE(cmp,flags,dst,src);
13205   %}
13206 %}
13207 
13208 // Compare 2 longs and CMOVE doubles
13209 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13210   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13211   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13212   ins_cost(200);
13213   expand %{
13214     fcmovDPR_regS(cmp,flags,dst,src);
13215   %}
13216 %}
13217 
13218 // Compare 2 longs and CMOVE doubles
13219 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13220   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13221   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13222   ins_cost(200);
13223   expand %{
13224     fcmovD_regS(cmp,flags,dst,src);
13225   %}
13226 %}
13227 
13228 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13229   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13230   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13231   ins_cost(200);
13232   expand %{
13233     fcmovFPR_regS(cmp,flags,dst,src);
13234   %}
13235 %}
13236 
13237 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13238   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13239   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13240   ins_cost(200);
13241   expand %{
13242     fcmovF_regS(cmp,flags,dst,src);
13243   %}
13244 %}
13245 
13246 //======
13247 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13248 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13249   match( Set flags (CmpL src zero ));
13250   effect(TEMP tmp);
13251   ins_cost(200);
13252   format %{ "MOV    $tmp,$src.lo\n\t"
13253             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13254   ins_encode( long_cmp_flags0( src, tmp ) );
13255   ins_pipe( ialu_reg_reg_long );
13256 %}
13257 
13258 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13259 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13260   match( Set flags (CmpL src1 src2 ));
13261   ins_cost(200+300);
13262   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13263             "JNE,s  skip\n\t"
13264             "CMP    $src1.hi,$src2.hi\n\t"
13265      "skip:\t" %}
13266   ins_encode( long_cmp_flags1( src1, src2 ) );
13267   ins_pipe( ialu_cr_reg_reg );
13268 %}
13269 
13270 // Long compare reg == zero/reg OR reg != zero/reg
13271 // Just a wrapper for a normal branch, plus the predicate test.
13272 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13273   match(If cmp flags);
13274   effect(USE labl);
13275   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13276   expand %{
13277     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13278   %}
13279 %}
13280 
13281 //======
13282 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13283 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13284   match(Set flags (CmpUL src zero));
13285   effect(TEMP tmp);
13286   ins_cost(200);
13287   format %{ "MOV    $tmp,$src.lo\n\t"
13288             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13289   ins_encode(long_cmp_flags0(src, tmp));
13290   ins_pipe(ialu_reg_reg_long);
13291 %}
13292 
13293 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13294 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13295   match(Set flags (CmpUL src1 src2));
13296   ins_cost(200+300);
13297   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13298             "JNE,s  skip\n\t"
13299             "CMP    $src1.hi,$src2.hi\n\t"
13300      "skip:\t" %}
13301   ins_encode(long_cmp_flags1(src1, src2));
13302   ins_pipe(ialu_cr_reg_reg);
13303 %}
13304 
13305 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13306 // Just a wrapper for a normal branch, plus the predicate test.
13307 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13308   match(If cmp flags);
13309   effect(USE labl);
13310   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13311   expand %{
13312     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13313   %}
13314 %}
13315 
13316 // Compare 2 longs and CMOVE longs.
13317 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13318   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13319   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13320   ins_cost(400);
13321   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13322             "CMOV$cmp $dst.hi,$src.hi" %}
13323   opcode(0x0F,0x40);
13324   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13325   ins_pipe( pipe_cmov_reg_long );
13326 %}
13327 
13328 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13329   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13330   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13331   ins_cost(500);
13332   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13333             "CMOV$cmp $dst.hi,$src.hi" %}
13334   opcode(0x0F,0x40);
13335   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13336   ins_pipe( pipe_cmov_reg_long );
13337 %}
13338 
13339 // Compare 2 longs and CMOVE ints.
13340 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13341   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13342   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13343   ins_cost(200);
13344   format %{ "CMOV$cmp $dst,$src" %}
13345   opcode(0x0F,0x40);
13346   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13347   ins_pipe( pipe_cmov_reg );
13348 %}
13349 
13350 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13351   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13352   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13353   ins_cost(250);
13354   format %{ "CMOV$cmp $dst,$src" %}
13355   opcode(0x0F,0x40);
13356   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13357   ins_pipe( pipe_cmov_mem );
13358 %}
13359 
13360 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13361   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13362   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13363   ins_cost(200);
13364   expand %{
13365     cmovII_reg_EQNE(cmp, flags, dst, src);
13366   %}
13367 %}
13368 
13369 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13370   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13371   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13372   ins_cost(250);
13373   expand %{
13374     cmovII_mem_EQNE(cmp, flags, dst, src);
13375   %}
13376 %}
13377 
13378 // Compare 2 longs and CMOVE ptrs.
13379 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13380   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13381   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13382   ins_cost(200);
13383   format %{ "CMOV$cmp $dst,$src" %}
13384   opcode(0x0F,0x40);
13385   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13386   ins_pipe( pipe_cmov_reg );
13387 %}
13388 
13389 // Compare 2 unsigned longs and CMOVE ptrs.
13390 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13391   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13392   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13393   ins_cost(200);
13394   expand %{
13395     cmovPP_reg_EQNE(cmp,flags,dst,src);
13396   %}
13397 %}
13398 
13399 // Compare 2 longs and CMOVE doubles
13400 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13401   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13402   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13403   ins_cost(200);
13404   expand %{
13405     fcmovDPR_regS(cmp,flags,dst,src);
13406   %}
13407 %}
13408 
13409 // Compare 2 longs and CMOVE doubles
13410 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13411   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13412   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13413   ins_cost(200);
13414   expand %{
13415     fcmovD_regS(cmp,flags,dst,src);
13416   %}
13417 %}
13418 
13419 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13420   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13421   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13422   ins_cost(200);
13423   expand %{
13424     fcmovFPR_regS(cmp,flags,dst,src);
13425   %}
13426 %}
13427 
13428 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13429   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13430   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13431   ins_cost(200);
13432   expand %{
13433     fcmovF_regS(cmp,flags,dst,src);
13434   %}
13435 %}
13436 
13437 //======
13438 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13439 // Same as cmpL_reg_flags_LEGT except must negate src
13440 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13441   match( Set flags (CmpL src zero ));
13442   effect( TEMP tmp );
13443   ins_cost(300);
13444   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13445             "CMP    $tmp,$src.lo\n\t"
13446             "SBB    $tmp,$src.hi\n\t" %}
13447   ins_encode( long_cmp_flags3(src, tmp) );
13448   ins_pipe( ialu_reg_reg_long );
13449 %}
13450 
13451 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13452 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13453 // requires a commuted test to get the same result.
13454 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13455   match( Set flags (CmpL src1 src2 ));
13456   effect( TEMP tmp );
13457   ins_cost(300);
13458   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13459             "MOV    $tmp,$src2.hi\n\t"
13460             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13461   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13462   ins_pipe( ialu_cr_reg_reg );
13463 %}
13464 
13465 // Long compares reg < zero/req OR reg >= zero/req.
13466 // Just a wrapper for a normal branch, plus the predicate test
13467 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13468   match(If cmp flags);
13469   effect(USE labl);
13470   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13471   ins_cost(300);
13472   expand %{
13473     jmpCon(cmp,flags,labl);    // JGT or JLE...
13474   %}
13475 %}
13476 
13477 //======
13478 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13479 // Same as cmpUL_reg_flags_LEGT except must negate src
13480 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13481   match(Set flags (CmpUL src zero));
13482   effect(TEMP tmp);
13483   ins_cost(300);
13484   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13485             "CMP    $tmp,$src.lo\n\t"
13486             "SBB    $tmp,$src.hi\n\t" %}
13487   ins_encode(long_cmp_flags3(src, tmp));
13488   ins_pipe(ialu_reg_reg_long);
13489 %}
13490 
13491 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13492 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13493 // requires a commuted test to get the same result.
13494 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13495   match(Set flags (CmpUL src1 src2));
13496   effect(TEMP tmp);
13497   ins_cost(300);
13498   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13499             "MOV    $tmp,$src2.hi\n\t"
13500             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13501   ins_encode(long_cmp_flags2( src2, src1, tmp));
13502   ins_pipe(ialu_cr_reg_reg);
13503 %}
13504 
13505 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13506 // Just a wrapper for a normal branch, plus the predicate test
13507 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13508   match(If cmp flags);
13509   effect(USE labl);
13510   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13511   ins_cost(300);
13512   expand %{
13513     jmpCon(cmp, flags, labl);    // JGT or JLE...
13514   %}
13515 %}
13516 
13517 // Compare 2 longs and CMOVE longs.
13518 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13519   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13520   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13521   ins_cost(400);
13522   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13523             "CMOV$cmp $dst.hi,$src.hi" %}
13524   opcode(0x0F,0x40);
13525   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13526   ins_pipe( pipe_cmov_reg_long );
13527 %}
13528 
13529 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13530   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13531   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13532   ins_cost(500);
13533   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13534             "CMOV$cmp $dst.hi,$src.hi+4" %}
13535   opcode(0x0F,0x40);
13536   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13537   ins_pipe( pipe_cmov_reg_long );
13538 %}
13539 
13540 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13541   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13542   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13543   ins_cost(400);
13544   expand %{
13545     cmovLL_reg_LEGT(cmp, flags, dst, src);
13546   %}
13547 %}
13548 
13549 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13550   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13551   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13552   ins_cost(500);
13553   expand %{
13554     cmovLL_mem_LEGT(cmp, flags, dst, src);
13555   %}
13556 %}
13557 
13558 // Compare 2 longs and CMOVE ints.
13559 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13560   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13561   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13562   ins_cost(200);
13563   format %{ "CMOV$cmp $dst,$src" %}
13564   opcode(0x0F,0x40);
13565   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13566   ins_pipe( pipe_cmov_reg );
13567 %}
13568 
13569 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13570   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13571   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13572   ins_cost(250);
13573   format %{ "CMOV$cmp $dst,$src" %}
13574   opcode(0x0F,0x40);
13575   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13576   ins_pipe( pipe_cmov_mem );
13577 %}
13578 
13579 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13580   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13581   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13582   ins_cost(200);
13583   expand %{
13584     cmovII_reg_LEGT(cmp, flags, dst, src);
13585   %}
13586 %}
13587 
13588 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13589   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13590   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13591   ins_cost(250);
13592   expand %{
13593     cmovII_mem_LEGT(cmp, flags, dst, src);
13594   %}
13595 %}
13596 
13597 // Compare 2 longs and CMOVE ptrs.
13598 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13599   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13600   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13601   ins_cost(200);
13602   format %{ "CMOV$cmp $dst,$src" %}
13603   opcode(0x0F,0x40);
13604   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13605   ins_pipe( pipe_cmov_reg );
13606 %}
13607 
13608 // Compare 2 unsigned longs and CMOVE ptrs.
13609 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13610   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13611   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13612   ins_cost(200);
13613   expand %{
13614     cmovPP_reg_LEGT(cmp,flags,dst,src);
13615   %}
13616 %}
13617 
13618 // Compare 2 longs and CMOVE doubles
13619 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13620   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13621   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13622   ins_cost(200);
13623   expand %{
13624     fcmovDPR_regS(cmp,flags,dst,src);
13625   %}
13626 %}
13627 
13628 // Compare 2 longs and CMOVE doubles
13629 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13630   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13631   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13632   ins_cost(200);
13633   expand %{
13634     fcmovD_regS(cmp,flags,dst,src);
13635   %}
13636 %}
13637 
13638 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13639   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13640   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13641   ins_cost(200);
13642   expand %{
13643     fcmovFPR_regS(cmp,flags,dst,src);
13644   %}
13645 %}
13646 
13647 
13648 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13649   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13650   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13651   ins_cost(200);
13652   expand %{
13653     fcmovF_regS(cmp,flags,dst,src);
13654   %}
13655 %}
13656 
13657 
13658 // ============================================================================
13659 // Procedure Call/Return Instructions
13660 // Call Java Static Instruction
13661 // Note: If this code changes, the corresponding ret_addr_offset() and
13662 //       compute_padding() functions will have to be adjusted.
13663 instruct CallStaticJavaDirect(method meth) %{
13664   match(CallStaticJava);
13665   effect(USE meth);
13666 
13667   ins_cost(300);
13668   format %{ "CALL,static " %}
13669   opcode(0xE8); /* E8 cd */
13670   ins_encode( pre_call_resets,
13671               Java_Static_Call( meth ),
13672               call_epilog,
13673               post_call_FPU );
13674   ins_pipe( pipe_slow );
13675   ins_alignment(4);
13676 %}
13677 
13678 // Call Java Dynamic Instruction
13679 // Note: If this code changes, the corresponding ret_addr_offset() and
13680 //       compute_padding() functions will have to be adjusted.
13681 instruct CallDynamicJavaDirect(method meth) %{
13682   match(CallDynamicJava);
13683   effect(USE meth);
13684 
13685   ins_cost(300);
13686   format %{ "MOV    EAX,(oop)-1\n\t"
13687             "CALL,dynamic" %}
13688   opcode(0xE8); /* E8 cd */
13689   ins_encode( pre_call_resets,
13690               Java_Dynamic_Call( meth ),
13691               call_epilog,
13692               post_call_FPU );
13693   ins_pipe( pipe_slow );
13694   ins_alignment(4);
13695 %}
13696 
13697 // Call Runtime Instruction
13698 instruct CallRuntimeDirect(method meth) %{
13699   match(CallRuntime );
13700   effect(USE meth);
13701 
13702   ins_cost(300);
13703   format %{ "CALL,runtime " %}
13704   opcode(0xE8); /* E8 cd */
13705   // Use FFREEs to clear entries in float stack
13706   ins_encode( pre_call_resets,
13707               FFree_Float_Stack_All,
13708               Java_To_Runtime( meth ),
13709               post_call_FPU );
13710   ins_pipe( pipe_slow );
13711 %}
13712 
13713 // Call runtime without safepoint
13714 instruct CallLeafDirect(method meth) %{
13715   match(CallLeaf);
13716   effect(USE meth);
13717 
13718   ins_cost(300);
13719   format %{ "CALL_LEAF,runtime " %}
13720   opcode(0xE8); /* E8 cd */
13721   ins_encode( pre_call_resets,
13722               FFree_Float_Stack_All,
13723               Java_To_Runtime( meth ),
13724               Verify_FPU_For_Leaf, post_call_FPU );
13725   ins_pipe( pipe_slow );
13726 %}
13727 
13728 instruct CallLeafNoFPDirect(method meth) %{
13729   match(CallLeafNoFP);
13730   effect(USE meth);
13731 
13732   ins_cost(300);
13733   format %{ "CALL_LEAF_NOFP,runtime " %}
13734   opcode(0xE8); /* E8 cd */
13735   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13736   ins_pipe( pipe_slow );
13737 %}
13738 
13739 
13740 // Return Instruction
13741 // Remove the return address & jump to it.
13742 instruct Ret() %{
13743   match(Return);
13744   format %{ "RET" %}
13745   opcode(0xC3);
13746   ins_encode(OpcP);
13747   ins_pipe( pipe_jmp );
13748 %}
13749 
13750 // Tail Call; Jump from runtime stub to Java code.
13751 // Also known as an 'interprocedural jump'.
13752 // Target of jump will eventually return to caller.
13753 // TailJump below removes the return address.
13754 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13755   match(TailCall jump_target method_ptr);
13756   ins_cost(300);
13757   format %{ "JMP    $jump_target \t# EBX holds method" %}
13758   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13759   ins_encode( OpcP, RegOpc(jump_target) );
13760   ins_pipe( pipe_jmp );
13761 %}
13762 
13763 
13764 // Tail Jump; remove the return address; jump to target.
13765 // TailCall above leaves the return address around.
13766 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13767   match( TailJump jump_target ex_oop );
13768   ins_cost(300);
13769   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13770             "JMP    $jump_target " %}
13771   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13772   ins_encode( enc_pop_rdx,
13773               OpcP, RegOpc(jump_target) );
13774   ins_pipe( pipe_jmp );
13775 %}
13776 
13777 // Create exception oop: created by stack-crawling runtime code.
13778 // Created exception is now available to this handler, and is setup
13779 // just prior to jumping to this handler.  No code emitted.
13780 instruct CreateException( eAXRegP ex_oop )
13781 %{
13782   match(Set ex_oop (CreateEx));
13783 
13784   size(0);
13785   // use the following format syntax
13786   format %{ "# exception oop is in EAX; no code emitted" %}
13787   ins_encode();
13788   ins_pipe( empty );
13789 %}
13790 
13791 
13792 // Rethrow exception:
13793 // The exception oop will come in the first argument position.
13794 // Then JUMP (not call) to the rethrow stub code.
13795 instruct RethrowException()
13796 %{
13797   match(Rethrow);
13798 
13799   // use the following format syntax
13800   format %{ "JMP    rethrow_stub" %}
13801   ins_encode(enc_rethrow);
13802   ins_pipe( pipe_jmp );
13803 %}
13804 
13805 // inlined locking and unlocking
13806 
13807 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13808   predicate(Compile::current()->use_rtm());
13809   match(Set cr (FastLock object box));
13810   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13811   ins_cost(300);
13812   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13813   ins_encode %{
13814     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13815                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13816                  _rtm_counters, _stack_rtm_counters,
13817                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13818                  true, ra_->C->profile_rtm());
13819   %}
13820   ins_pipe(pipe_slow);
13821 %}
13822 
13823 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13824   predicate(!Compile::current()->use_rtm());
13825   match(Set cr (FastLock object box));
13826   effect(TEMP tmp, TEMP scr, USE_KILL box);
13827   ins_cost(300);
13828   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13829   ins_encode %{
13830     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13831                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13832   %}
13833   ins_pipe(pipe_slow);
13834 %}
13835 
13836 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13837   match(Set cr (FastUnlock object box));
13838   effect(TEMP tmp, USE_KILL box);
13839   ins_cost(300);
13840   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13841   ins_encode %{
13842     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13843   %}
13844   ins_pipe(pipe_slow);
13845 %}
13846 
13847 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13848   predicate(Matcher::vector_length(n) <= 32);
13849   match(Set dst (MaskAll src));
13850   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13851   ins_encode %{
13852     int mask_len = Matcher::vector_length(this);
13853     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13854   %}
13855   ins_pipe( pipe_slow );
13856 %}
13857 
13858 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13859   predicate(Matcher::vector_length(n) > 32);
13860   match(Set dst (MaskAll src));
13861   effect(TEMP ktmp);
13862   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13863   ins_encode %{
13864     int mask_len = Matcher::vector_length(this);
13865     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13866   %}
13867   ins_pipe( pipe_slow );
13868 %}
13869 
13870 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13871   predicate(Matcher::vector_length(n) > 32);
13872   match(Set dst (MaskAll src));
13873   effect(TEMP ktmp);
13874   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13875   ins_encode %{
13876     int mask_len = Matcher::vector_length(this);
13877     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13878   %}
13879   ins_pipe( pipe_slow );
13880 %}
13881 
13882 // ============================================================================
13883 // Safepoint Instruction
13884 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13885   match(SafePoint poll);
13886   effect(KILL cr, USE poll);
13887 
13888   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13889   ins_cost(125);
13890   // EBP would need size(3)
13891   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13892   ins_encode %{
13893     __ relocate(relocInfo::poll_type);
13894     address pre_pc = __ pc();
13895     __ testl(rax, Address($poll$$Register, 0));
13896     address post_pc = __ pc();
13897     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13898   %}
13899   ins_pipe(ialu_reg_mem);
13900 %}
13901 
13902 
13903 // ============================================================================
13904 // This name is KNOWN by the ADLC and cannot be changed.
13905 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13906 // for this guy.
13907 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13908   match(Set dst (ThreadLocal));
13909   effect(DEF dst, KILL cr);
13910 
13911   format %{ "MOV    $dst, Thread::current()" %}
13912   ins_encode %{
13913     Register dstReg = as_Register($dst$$reg);
13914     __ get_thread(dstReg);
13915   %}
13916   ins_pipe( ialu_reg_fat );
13917 %}
13918 
13919 
13920 
13921 //----------PEEPHOLE RULES-----------------------------------------------------
13922 // These must follow all instruction definitions as they use the names
13923 // defined in the instructions definitions.
13924 //
13925 // peepmatch ( root_instr_name [preceding_instruction]* );
13926 //
13927 // peepconstraint %{
13928 // (instruction_number.operand_name relational_op instruction_number.operand_name
13929 //  [, ...] );
13930 // // instruction numbers are zero-based using left to right order in peepmatch
13931 //
13932 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13933 // // provide an instruction_number.operand_name for each operand that appears
13934 // // in the replacement instruction's match rule
13935 //
13936 // ---------VM FLAGS---------------------------------------------------------
13937 //
13938 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13939 //
13940 // Each peephole rule is given an identifying number starting with zero and
13941 // increasing by one in the order seen by the parser.  An individual peephole
13942 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13943 // on the command-line.
13944 //
13945 // ---------CURRENT LIMITATIONS----------------------------------------------
13946 //
13947 // Only match adjacent instructions in same basic block
13948 // Only equality constraints
13949 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13950 // Only one replacement instruction
13951 //
13952 // ---------EXAMPLE----------------------------------------------------------
13953 //
13954 // // pertinent parts of existing instructions in architecture description
13955 // instruct movI(rRegI dst, rRegI src) %{
13956 //   match(Set dst (CopyI src));
13957 // %}
13958 //
13959 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13960 //   match(Set dst (AddI dst src));
13961 //   effect(KILL cr);
13962 // %}
13963 //
13964 // // Change (inc mov) to lea
13965 // peephole %{
13966 //   // increment preceeded by register-register move
13967 //   peepmatch ( incI_eReg movI );
13968 //   // require that the destination register of the increment
13969 //   // match the destination register of the move
13970 //   peepconstraint ( 0.dst == 1.dst );
13971 //   // construct a replacement instruction that sets
13972 //   // the destination to ( move's source register + one )
13973 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13974 // %}
13975 //
13976 // Implementation no longer uses movX instructions since
13977 // machine-independent system no longer uses CopyX nodes.
13978 //
13979 // peephole %{
13980 //   peepmatch ( incI_eReg movI );
13981 //   peepconstraint ( 0.dst == 1.dst );
13982 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13983 // %}
13984 //
13985 // peephole %{
13986 //   peepmatch ( decI_eReg movI );
13987 //   peepconstraint ( 0.dst == 1.dst );
13988 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13989 // %}
13990 //
13991 // peephole %{
13992 //   peepmatch ( addI_eReg_imm movI );
13993 //   peepconstraint ( 0.dst == 1.dst );
13994 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13995 // %}
13996 //
13997 // peephole %{
13998 //   peepmatch ( addP_eReg_imm movP );
13999 //   peepconstraint ( 0.dst == 1.dst );
14000 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14001 // %}
14002 
14003 // // Change load of spilled value to only a spill
14004 // instruct storeI(memory mem, rRegI src) %{
14005 //   match(Set mem (StoreI mem src));
14006 // %}
14007 //
14008 // instruct loadI(rRegI dst, memory mem) %{
14009 //   match(Set dst (LoadI mem));
14010 // %}
14011 //
14012 peephole %{
14013   peepmatch ( loadI storeI );
14014   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14015   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14016 %}
14017 
14018 //----------SMARTSPILL RULES---------------------------------------------------
14019 // These must follow all instruction definitions as they use the names
14020 // defined in the instructions definitions.