1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  631 
  632   C->output()->set_frame_complete(cbuf.insts_size());
  633 
  634   if (C->has_mach_constant_base_node()) {
  635     // NOTE: We set the table base offset here because users might be
  636     // emitted before MachConstantBaseNode.
  637     ConstantTable& constant_table = C->output()->constant_table();
  638     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  639   }
  640 }
  641 
  642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  643   return MachNode::size(ra_); // too many variables; just compute it the hard way
  644 }
  645 
  646 int MachPrologNode::reloc() const {
  647   return 0; // a large enough number
  648 }
  649 
  650 //=============================================================================
  651 #ifndef PRODUCT
  652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  653   Compile *C = ra_->C;
  654   int framesize = C->output()->frame_size_in_bytes();
  655   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  656   // Remove two words for return addr and rbp,
  657   framesize -= 2*wordSize;
  658 
  659   if (C->max_vector_size() > 16) {
  660     st->print("VZEROUPPER");
  661     st->cr(); st->print("\t");
  662   }
  663   if (C->in_24_bit_fp_mode()) {
  664     st->print("FLDCW  standard control word");
  665     st->cr(); st->print("\t");
  666   }
  667   if (framesize) {
  668     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  669     st->cr(); st->print("\t");
  670   }
  671   st->print_cr("POPL   EBP"); st->print("\t");
  672   if (do_polling() && C->is_method_compilation()) {
  673     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  674               "JA       #safepoint_stub\t"
  675               "# Safepoint: poll for GC");
  676   }
  677 }
  678 #endif
  679 
  680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  681   Compile *C = ra_->C;
  682   MacroAssembler _masm(&cbuf);
  683 
  684   if (C->max_vector_size() > 16) {
  685     // Clear upper bits of YMM registers when current compiled code uses
  686     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  687     _masm.vzeroupper();
  688   }
  689   // If method set FPU control word, restore to standard control word
  690   if (C->in_24_bit_fp_mode()) {
  691     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  692   }
  693 
  694   int framesize = C->output()->frame_size_in_bytes();
  695   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  696   // Remove two words for return addr and rbp,
  697   framesize -= 2*wordSize;
  698 
  699   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  700 
  701   if (framesize >= 128) {
  702     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  703     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  704     emit_d32(cbuf, framesize);
  705   } else if (framesize) {
  706     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  707     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  708     emit_d8(cbuf, framesize);
  709   }
  710 
  711   emit_opcode(cbuf, 0x58 | EBP_enc);
  712 
  713   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  714     __ reserved_stack_check();
  715   }
  716 
  717   if (do_polling() && C->is_method_compilation()) {
  718     Register thread = as_Register(EBX_enc);
  719     MacroAssembler masm(&cbuf);
  720     __ get_thread(thread);
  721     Label dummy_label;
  722     Label* code_stub = &dummy_label;
  723     if (!C->output()->in_scratch_emit_size()) {
  724       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  725     }
  726     __ relocate(relocInfo::poll_return_type);
  727     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  728   }
  729 }
  730 
  731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  732   return MachNode::size(ra_); // too many variables; just compute it
  733                               // the hard way
  734 }
  735 
  736 int MachEpilogNode::reloc() const {
  737   return 0; // a large enough number
  738 }
  739 
  740 const Pipeline * MachEpilogNode::pipeline() const {
  741   return MachNode::pipeline_class();
  742 }
  743 
  744 //=============================================================================
  745 
  746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  747 static enum RC rc_class( OptoReg::Name reg ) {
  748 
  749   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  750   if (OptoReg::is_stack(reg)) return rc_stack;
  751 
  752   VMReg r = OptoReg::as_VMReg(reg);
  753   if (r->is_Register()) return rc_int;
  754   if (r->is_FloatRegister()) {
  755     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  756     return rc_float;
  757   }
  758   if (r->is_KRegister()) return rc_kreg;
  759   assert(r->is_XMMRegister(), "must be");
  760   return rc_xmm;
  761 }
  762 
  763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  764                         int opcode, const char *op_str, int size, outputStream* st ) {
  765   if( cbuf ) {
  766     emit_opcode  (*cbuf, opcode );
  767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  768 #ifndef PRODUCT
  769   } else if( !do_size ) {
  770     if( size != 0 ) st->print("\n\t");
  771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  774     } else { // FLD, FST, PUSH, POP
  775       st->print("%s [ESP + #%d]",op_str,offset);
  776     }
  777 #endif
  778   }
  779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  780   return size+3+offset_size;
  781 }
  782 
  783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  786   int in_size_in_bits = Assembler::EVEX_32bit;
  787   int evex_encoding = 0;
  788   if (reg_lo+1 == reg_hi) {
  789     in_size_in_bits = Assembler::EVEX_64bit;
  790     evex_encoding = Assembler::VEX_W;
  791   }
  792   if (cbuf) {
  793     MacroAssembler _masm(cbuf);
  794     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  795     //                          it maps more cases to single byte displacement
  796     _masm.set_managed();
  797     if (reg_lo+1 == reg_hi) { // double move?
  798       if (is_load) {
  799         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  800       } else {
  801         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  802       }
  803     } else {
  804       if (is_load) {
  805         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  806       } else {
  807         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  808       }
  809     }
  810 #ifndef PRODUCT
  811   } else if (!do_size) {
  812     if (size != 0) st->print("\n\t");
  813     if (reg_lo+1 == reg_hi) { // double move?
  814       if (is_load) st->print("%s %s,[ESP + #%d]",
  815                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  816                               Matcher::regName[reg_lo], offset);
  817       else         st->print("MOVSD  [ESP + #%d],%s",
  818                               offset, Matcher::regName[reg_lo]);
  819     } else {
  820       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  821                               Matcher::regName[reg_lo], offset);
  822       else         st->print("MOVSS  [ESP + #%d],%s",
  823                               offset, Matcher::regName[reg_lo]);
  824     }
  825 #endif
  826   }
  827   bool is_single_byte = false;
  828   if ((UseAVX > 2) && (offset != 0)) {
  829     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  830   }
  831   int offset_size = 0;
  832   if (UseAVX > 2 ) {
  833     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  834   } else {
  835     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  836   }
  837   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  838   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  839   return size+5+offset_size;
  840 }
  841 
  842 
  843 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  844                             int src_hi, int dst_hi, int size, outputStream* st ) {
  845   if (cbuf) {
  846     MacroAssembler _masm(cbuf);
  847     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  848     _masm.set_managed();
  849     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  851                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  852     } else {
  853       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  854                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  855     }
  856 #ifndef PRODUCT
  857   } else if (!do_size) {
  858     if (size != 0) st->print("\n\t");
  859     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  860       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  861         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  862       } else {
  863         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  864       }
  865     } else {
  866       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  867         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  868       } else {
  869         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  870       }
  871     }
  872 #endif
  873   }
  874   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  875   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  876   int sz = (UseAVX > 2) ? 6 : 4;
  877   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  878       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  879   return size + sz;
  880 }
  881 
  882 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  883                             int src_hi, int dst_hi, int size, outputStream* st ) {
  884   // 32-bit
  885   if (cbuf) {
  886     MacroAssembler _masm(cbuf);
  887     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  888     _masm.set_managed();
  889     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  890              as_Register(Matcher::_regEncode[src_lo]));
  891 #ifndef PRODUCT
  892   } else if (!do_size) {
  893     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  894 #endif
  895   }
  896   return (UseAVX> 2) ? 6 : 4;
  897 }
  898 
  899 
  900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  901                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  902   // 32-bit
  903   if (cbuf) {
  904     MacroAssembler _masm(cbuf);
  905     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  906     _masm.set_managed();
  907     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  908              as_XMMRegister(Matcher::_regEncode[src_lo]));
  909 #ifndef PRODUCT
  910   } else if (!do_size) {
  911     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  912 #endif
  913   }
  914   return (UseAVX> 2) ? 6 : 4;
  915 }
  916 
  917 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  918   if( cbuf ) {
  919     emit_opcode(*cbuf, 0x8B );
  920     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  921 #ifndef PRODUCT
  922   } else if( !do_size ) {
  923     if( size != 0 ) st->print("\n\t");
  924     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  925 #endif
  926   }
  927   return size+2;
  928 }
  929 
  930 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  931                                  int offset, int size, outputStream* st ) {
  932   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  933     if( cbuf ) {
  934       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  935       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  936 #ifndef PRODUCT
  937     } else if( !do_size ) {
  938       if( size != 0 ) st->print("\n\t");
  939       st->print("FLD    %s",Matcher::regName[src_lo]);
  940 #endif
  941     }
  942     size += 2;
  943   }
  944 
  945   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  946   const char *op_str;
  947   int op;
  948   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  949     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  950     op = 0xDD;
  951   } else {                   // 32-bit store
  952     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  953     op = 0xD9;
  954     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  955   }
  956 
  957   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  958 }
  959 
  960 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  961 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  962                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  963 
  964 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  965                             int stack_offset, int reg, uint ireg, outputStream* st);
  966 
  967 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  968                                      int dst_offset, uint ireg, outputStream* st) {
  969   if (cbuf) {
  970     MacroAssembler _masm(cbuf);
  971     switch (ireg) {
  972     case Op_VecS:
  973       __ pushl(Address(rsp, src_offset));
  974       __ popl (Address(rsp, dst_offset));
  975       break;
  976     case Op_VecD:
  977       __ pushl(Address(rsp, src_offset));
  978       __ popl (Address(rsp, dst_offset));
  979       __ pushl(Address(rsp, src_offset+4));
  980       __ popl (Address(rsp, dst_offset+4));
  981       break;
  982     case Op_VecX:
  983       __ movdqu(Address(rsp, -16), xmm0);
  984       __ movdqu(xmm0, Address(rsp, src_offset));
  985       __ movdqu(Address(rsp, dst_offset), xmm0);
  986       __ movdqu(xmm0, Address(rsp, -16));
  987       break;
  988     case Op_VecY:
  989       __ vmovdqu(Address(rsp, -32), xmm0);
  990       __ vmovdqu(xmm0, Address(rsp, src_offset));
  991       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  992       __ vmovdqu(xmm0, Address(rsp, -32));
  993       break;
  994     case Op_VecZ:
  995       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  996       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  997       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  998       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  999       break;
 1000     default:
 1001       ShouldNotReachHere();
 1002     }
 1003 #ifndef PRODUCT
 1004   } else {
 1005     switch (ireg) {
 1006     case Op_VecS:
 1007       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1008                 "popl    [rsp + #%d]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecD:
 1012       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1013                 "popq    [rsp + #%d]\n\t"
 1014                 "pushl   [rsp + #%d]\n\t"
 1015                 "popq    [rsp + #%d]",
 1016                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1017       break;
 1018      case Op_VecX:
 1019       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1020                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1021                 "movdqu  [rsp + #%d], xmm0\n\t"
 1022                 "movdqu  xmm0, [rsp - #16]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     case Op_VecY:
 1026       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1027                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1028                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1029                 "vmovdqu xmm0, [rsp - #32]",
 1030                 src_offset, dst_offset);
 1031       break;
 1032     case Op_VecZ:
 1033       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1034                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1035                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1036                 "vmovdqu xmm0, [rsp - #64]",
 1037                 src_offset, dst_offset);
 1038       break;
 1039     default:
 1040       ShouldNotReachHere();
 1041     }
 1042 #endif
 1043   }
 1044 }
 1045 
 1046 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1047   // Get registers to move
 1048   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1049   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1050   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1051   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1052 
 1053   enum RC src_second_rc = rc_class(src_second);
 1054   enum RC src_first_rc = rc_class(src_first);
 1055   enum RC dst_second_rc = rc_class(dst_second);
 1056   enum RC dst_first_rc = rc_class(dst_first);
 1057 
 1058   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1059 
 1060   // Generate spill code!
 1061   int size = 0;
 1062 
 1063   if( src_first == dst_first && src_second == dst_second )
 1064     return size;            // Self copy, no move
 1065 
 1066   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1067     uint ireg = ideal_reg();
 1068     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1069     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1070     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1071     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1072       // mem -> mem
 1073       int src_offset = ra_->reg2offset(src_first);
 1074       int dst_offset = ra_->reg2offset(dst_first);
 1075       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1076     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1077       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1078     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1079       int stack_offset = ra_->reg2offset(dst_first);
 1080       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1081     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1082       int stack_offset = ra_->reg2offset(src_first);
 1083       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1084     } else {
 1085       ShouldNotReachHere();
 1086     }
 1087     return 0;
 1088   }
 1089 
 1090   // --------------------------------------
 1091   // Check for mem-mem move.  push/pop to move.
 1092   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1093     if( src_second == dst_first ) { // overlapping stack copy ranges
 1094       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1095       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1096       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1097       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1098     }
 1099     // move low bits
 1100     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1101     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1102     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1103       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1104       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1105     }
 1106     return size;
 1107   }
 1108 
 1109   // --------------------------------------
 1110   // Check for integer reg-reg copy
 1111   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1112     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1113 
 1114   // Check for integer store
 1115   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1116     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1117 
 1118   // Check for integer load
 1119   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1120     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1121 
 1122   // Check for integer reg-xmm reg copy
 1123   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1124     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1125             "no 64 bit integer-float reg moves" );
 1126     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1127   }
 1128   // --------------------------------------
 1129   // Check for float reg-reg copy
 1130   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1131     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1132             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1133     if( cbuf ) {
 1134 
 1135       // Note the mucking with the register encode to compensate for the 0/1
 1136       // indexing issue mentioned in a comment in the reg_def sections
 1137       // for FPR registers many lines above here.
 1138 
 1139       if( src_first != FPR1L_num ) {
 1140         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1141         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1142         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1143         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1144      } else {
 1145         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1146         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1147      }
 1148 #ifndef PRODUCT
 1149     } else if( !do_size ) {
 1150       if( size != 0 ) st->print("\n\t");
 1151       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1152       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1153 #endif
 1154     }
 1155     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1156   }
 1157 
 1158   // Check for float store
 1159   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1160     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1161   }
 1162 
 1163   // Check for float load
 1164   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1165     int offset = ra_->reg2offset(src_first);
 1166     const char *op_str;
 1167     int op;
 1168     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1169       op_str = "FLD_D";
 1170       op = 0xDD;
 1171     } else {                   // 32-bit load
 1172       op_str = "FLD_S";
 1173       op = 0xD9;
 1174       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1175     }
 1176     if( cbuf ) {
 1177       emit_opcode  (*cbuf, op );
 1178       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1179       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1180       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1181 #ifndef PRODUCT
 1182     } else if( !do_size ) {
 1183       if( size != 0 ) st->print("\n\t");
 1184       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1185 #endif
 1186     }
 1187     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1188     return size + 3+offset_size+2;
 1189   }
 1190 
 1191   // Check for xmm reg-reg copy
 1192   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1193     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1194             (src_first+1 == src_second && dst_first+1 == dst_second),
 1195             "no non-adjacent float-moves" );
 1196     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1197   }
 1198 
 1199   // Check for xmm reg-integer reg copy
 1200   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1201     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1202             "no 64 bit float-integer reg moves" );
 1203     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1204   }
 1205 
 1206   // Check for xmm store
 1207   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1208     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1209   }
 1210 
 1211   // Check for float xmm load
 1212   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1213     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1214   }
 1215 
 1216   // Copy from float reg to xmm reg
 1217   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1218     // copy to the top of stack from floating point reg
 1219     // and use LEA to preserve flags
 1220     if( cbuf ) {
 1221       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1222       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1223       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1224       emit_d8(*cbuf,0xF8);
 1225 #ifndef PRODUCT
 1226     } else if( !do_size ) {
 1227       if( size != 0 ) st->print("\n\t");
 1228       st->print("LEA    ESP,[ESP-8]");
 1229 #endif
 1230     }
 1231     size += 4;
 1232 
 1233     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1234 
 1235     // Copy from the temp memory to the xmm reg.
 1236     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1237 
 1238     if( cbuf ) {
 1239       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1240       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1241       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1242       emit_d8(*cbuf,0x08);
 1243 #ifndef PRODUCT
 1244     } else if( !do_size ) {
 1245       if( size != 0 ) st->print("\n\t");
 1246       st->print("LEA    ESP,[ESP+8]");
 1247 #endif
 1248     }
 1249     size += 4;
 1250     return size;
 1251   }
 1252 
 1253   // AVX-512 opmask specific spilling.
 1254   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1255     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1256     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1257     MacroAssembler _masm(cbuf);
 1258     int offset = ra_->reg2offset(src_first);
 1259     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1260     return 0;
 1261   }
 1262 
 1263   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1264     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1265     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1266     MacroAssembler _masm(cbuf);
 1267     int offset = ra_->reg2offset(dst_first);
 1268     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1269     return 0;
 1270   }
 1271 
 1272   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1273     Unimplemented();
 1274     return 0;
 1275   }
 1276 
 1277   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1278     Unimplemented();
 1279     return 0;
 1280   }
 1281 
 1282   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1283     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1284     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1285     MacroAssembler _masm(cbuf);
 1286     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1287     return 0;
 1288   }
 1289 
 1290   assert( size > 0, "missed a case" );
 1291 
 1292   // --------------------------------------------------------------------
 1293   // Check for second bits still needing moving.
 1294   if( src_second == dst_second )
 1295     return size;               // Self copy; no move
 1296   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1297 
 1298   // Check for second word int-int move
 1299   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1300     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1301 
 1302   // Check for second word integer store
 1303   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1304     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1305 
 1306   // Check for second word integer load
 1307   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1308     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1309 
 1310   Unimplemented();
 1311   return 0; // Mute compiler
 1312 }
 1313 
 1314 #ifndef PRODUCT
 1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1316   implementation( NULL, ra_, false, st );
 1317 }
 1318 #endif
 1319 
 1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1321   implementation( &cbuf, ra_, false, NULL );
 1322 }
 1323 
 1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1325   return MachNode::size(ra_);
 1326 }
 1327 
 1328 
 1329 //=============================================================================
 1330 #ifndef PRODUCT
 1331 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1332   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1333   int reg = ra_->get_reg_first(this);
 1334   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1335 }
 1336 #endif
 1337 
 1338 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_encode(this);
 1341   if( offset >= 128 ) {
 1342     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1343     emit_rm(cbuf, 0x2, reg, 0x04);
 1344     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1345     emit_d32(cbuf, offset);
 1346   }
 1347   else {
 1348     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1349     emit_rm(cbuf, 0x1, reg, 0x04);
 1350     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1351     emit_d8(cbuf, offset);
 1352   }
 1353 }
 1354 
 1355 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1356   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1357   if( offset >= 128 ) {
 1358     return 7;
 1359   }
 1360   else {
 1361     return 4;
 1362   }
 1363 }
 1364 
 1365 //=============================================================================
 1366 #ifndef PRODUCT
 1367 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1368   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1369   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1370   st->print_cr("\tNOP");
 1371   st->print_cr("\tNOP");
 1372   if( !OptoBreakpoint )
 1373     st->print_cr("\tNOP");
 1374 }
 1375 #endif
 1376 
 1377 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1378   MacroAssembler masm(&cbuf);
 1379 #ifdef ASSERT
 1380   uint insts_size = cbuf.insts_size();
 1381 #endif
 1382   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1383   masm.jump_cc(Assembler::notEqual,
 1384                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1385   /* WARNING these NOPs are critical so that verified entry point is properly
 1386      aligned for patching by NativeJump::patch_verified_entry() */
 1387   int nops_cnt = 2;
 1388   if( !OptoBreakpoint ) // Leave space for int3
 1389      nops_cnt += 1;
 1390   masm.nop(nops_cnt);
 1391 
 1392   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1393 }
 1394 
 1395 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1396   return OptoBreakpoint ? 11 : 12;
 1397 }
 1398 
 1399 
 1400 //=============================================================================
 1401 
 1402 // Vector calling convention not supported.
 1403 const bool Matcher::supports_vector_calling_convention() {
 1404   return false;
 1405 }
 1406 
 1407 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1408   Unimplemented();
 1409   return OptoRegPair(0, 0);
 1410 }
 1411 
 1412 // Is this branch offset short enough that a short branch can be used?
 1413 //
 1414 // NOTE: If the platform does not provide any short branch variants, then
 1415 //       this method should return false for offset 0.
 1416 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1417   // The passed offset is relative to address of the branch.
 1418   // On 86 a branch displacement is calculated relative to address
 1419   // of a next instruction.
 1420   offset -= br_size;
 1421 
 1422   // the short version of jmpConUCF2 contains multiple branches,
 1423   // making the reach slightly less
 1424   if (rule == jmpConUCF2_rule)
 1425     return (-126 <= offset && offset <= 125);
 1426   return (-128 <= offset && offset <= 127);
 1427 }
 1428 
 1429 // Return whether or not this register is ever used as an argument.  This
 1430 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1431 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1432 // arguments in those registers not be available to the callee.
 1433 bool Matcher::can_be_java_arg( int reg ) {
 1434   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1435   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1436   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1437   return false;
 1438 }
 1439 
 1440 bool Matcher::is_spillable_arg( int reg ) {
 1441   return can_be_java_arg(reg);
 1442 }
 1443 
 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1445   // Use hardware integer DIV instruction when
 1446   // it is faster than a code which use multiply.
 1447   // Only when constant divisor fits into 32 bit
 1448   // (min_jint is excluded to get only correct
 1449   // positive 32 bit values from negative).
 1450   return VM_Version::has_fast_idiv() &&
 1451          (divisor == (int)divisor && divisor != min_jint);
 1452 }
 1453 
 1454 // Register for DIVI projection of divmodI
 1455 RegMask Matcher::divI_proj_mask() {
 1456   return EAX_REG_mask();
 1457 }
 1458 
 1459 // Register for MODI projection of divmodI
 1460 RegMask Matcher::modI_proj_mask() {
 1461   return EDX_REG_mask();
 1462 }
 1463 
 1464 // Register for DIVL projection of divmodL
 1465 RegMask Matcher::divL_proj_mask() {
 1466   ShouldNotReachHere();
 1467   return RegMask();
 1468 }
 1469 
 1470 // Register for MODL projection of divmodL
 1471 RegMask Matcher::modL_proj_mask() {
 1472   ShouldNotReachHere();
 1473   return RegMask();
 1474 }
 1475 
 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1477   return NO_REG_mask();
 1478 }
 1479 
 1480 // Returns true if the high 32 bits of the value is known to be zero.
 1481 bool is_operand_hi32_zero(Node* n) {
 1482   int opc = n->Opcode();
 1483   if (opc == Op_AndL) {
 1484     Node* o2 = n->in(2);
 1485     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486       return true;
 1487     }
 1488   }
 1489   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1490     return true;
 1491   }
 1492   return false;
 1493 }
 1494 
 1495 %}
 1496 
 1497 //----------ENCODING BLOCK-----------------------------------------------------
 1498 // This block specifies the encoding classes used by the compiler to output
 1499 // byte streams.  Encoding classes generate functions which are called by
 1500 // Machine Instruction Nodes in order to generate the bit encoding of the
 1501 // instruction.  Operands specify their base encoding interface with the
 1502 // interface keyword.  There are currently supported four interfaces,
 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1504 // operand to generate a function which returns its register number when
 1505 // queried.   CONST_INTER causes an operand to generate a function which
 1506 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1507 // operand to generate four functions which return the Base Register, the
 1508 // Index Register, the Scale Value, and the Offset Value of the operand when
 1509 // queried.  COND_INTER causes an operand to generate six functions which
 1510 // return the encoding code (ie - encoding bits for the instruction)
 1511 // associated with each basic boolean condition for a conditional instruction.
 1512 // Instructions specify two basic values for encoding.  They use the
 1513 // ins_encode keyword to specify their encoding class (which must be one of
 1514 // the class names specified in the encoding block), and they use the
 1515 // opcode keyword to specify, in order, their primary, secondary, and
 1516 // tertiary opcode.  Only the opcode sections which a particular instruction
 1517 // needs for encoding need to be specified.
 1518 encode %{
 1519   // Build emit functions for each basic byte or larger field in the intel
 1520   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1521   // code in the enc_class source block.  Emit functions will live in the
 1522   // main source block for now.  In future, we can generalize this by
 1523   // adding a syntax that specifies the sizes of fields in an order,
 1524   // so that the adlc can build the emit functions automagically
 1525 
 1526   // Emit primary opcode
 1527   enc_class OpcP %{
 1528     emit_opcode(cbuf, $primary);
 1529   %}
 1530 
 1531   // Emit secondary opcode
 1532   enc_class OpcS %{
 1533     emit_opcode(cbuf, $secondary);
 1534   %}
 1535 
 1536   // Emit opcode directly
 1537   enc_class Opcode(immI d8) %{
 1538     emit_opcode(cbuf, $d8$$constant);
 1539   %}
 1540 
 1541   enc_class SizePrefix %{
 1542     emit_opcode(cbuf,0x66);
 1543   %}
 1544 
 1545   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1546     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1547   %}
 1548 
 1549   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1550     emit_opcode(cbuf,$opcode$$constant);
 1551     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1552   %}
 1553 
 1554   enc_class mov_r32_imm0( rRegI dst ) %{
 1555     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1556     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1557   %}
 1558 
 1559   enc_class cdq_enc %{
 1560     // Full implementation of Java idiv and irem; checks for
 1561     // special case as described in JVM spec., p.243 & p.271.
 1562     //
 1563     //         normal case                           special case
 1564     //
 1565     // input : rax,: dividend                         min_int
 1566     //         reg: divisor                          -1
 1567     //
 1568     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1569     //         rdx: remainder (= rax, irem reg)       0
 1570     //
 1571     //  Code sequnce:
 1572     //
 1573     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1574     //  0F 85 0B 00 00 00    jne         normal_case
 1575     //  33 D2                xor         rdx,edx
 1576     //  83 F9 FF             cmp         rcx,0FFh
 1577     //  0F 84 03 00 00 00    je          done
 1578     //                  normal_case:
 1579     //  99                   cdq
 1580     //  F7 F9                idiv        rax,ecx
 1581     //                  done:
 1582     //
 1583     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1584     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1585     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1586     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1587     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1588     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1589     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1590     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1591     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1592     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1594     // normal_case:
 1595     emit_opcode(cbuf,0x99);                                         // cdq
 1596     // idiv (note: must be emitted by the user of this rule)
 1597     // normal:
 1598   %}
 1599 
 1600   // Dense encoding for older common ops
 1601   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1602     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1603   %}
 1604 
 1605 
 1606   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1607   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1608     // Check for 8-bit immediate, and set sign extend bit in opcode
 1609     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1610       emit_opcode(cbuf, $primary | 0x02);
 1611     }
 1612     else {                          // If 32-bit immediate
 1613       emit_opcode(cbuf, $primary);
 1614     }
 1615   %}
 1616 
 1617   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1618     // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(cbuf, $primary | 0x02);    }
 1622     else {                          // If 32-bit immediate
 1623       emit_opcode(cbuf, $primary);
 1624     }
 1625     // Emit r/m byte with secondary opcode, after primary opcode.
 1626     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1627   %}
 1628 
 1629   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       $$$emit8$imm$$constant;
 1633     }
 1634     else {                          // If 32-bit immediate
 1635       // Output immediate
 1636       $$$emit32$imm$$constant;
 1637     }
 1638   %}
 1639 
 1640   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1641     // Emit primary opcode and set sign-extend bit
 1642     // Check for 8-bit immediate, and set sign extend bit in opcode
 1643     int con = (int)$imm$$constant; // Throw away top bits
 1644     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1645     // Emit r/m byte with secondary opcode, after primary opcode.
 1646     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1647     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1648     else                               emit_d32(cbuf,con);
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1655     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with tertiary opcode, after primary opcode.
 1657     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1659     else                               emit_d32(cbuf,con);
 1660   %}
 1661 
 1662   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1663     emit_cc(cbuf, $secondary, $dst$$reg );
 1664   %}
 1665 
 1666   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1667     int destlo = $dst$$reg;
 1668     int desthi = HIGH_FROM_LOW(destlo);
 1669     // bswap lo
 1670     emit_opcode(cbuf, 0x0F);
 1671     emit_cc(cbuf, 0xC8, destlo);
 1672     // bswap hi
 1673     emit_opcode(cbuf, 0x0F);
 1674     emit_cc(cbuf, 0xC8, desthi);
 1675     // xchg lo and hi
 1676     emit_opcode(cbuf, 0x87);
 1677     emit_rm(cbuf, 0x3, destlo, desthi);
 1678   %}
 1679 
 1680   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1681     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1682   %}
 1683 
 1684   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1685     $$$emit8$primary;
 1686     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1687   %}
 1688 
 1689   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1690     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1691     emit_d8(cbuf, op >> 8 );
 1692     emit_d8(cbuf, op & 255);
 1693   %}
 1694 
 1695   // emulate a CMOV with a conditional branch around a MOV
 1696   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1697     // Invert sense of branch from sense of CMOV
 1698     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1699     emit_d8( cbuf, $brOffs$$constant );
 1700   %}
 1701 
 1702   enc_class enc_PartialSubtypeCheck( ) %{
 1703     Register Redi = as_Register(EDI_enc); // result register
 1704     Register Reax = as_Register(EAX_enc); // super class
 1705     Register Recx = as_Register(ECX_enc); // killed
 1706     Register Resi = as_Register(ESI_enc); // sub class
 1707     Label miss;
 1708 
 1709     MacroAssembler _masm(&cbuf);
 1710     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1711                                      NULL, &miss,
 1712                                      /*set_cond_codes:*/ true);
 1713     if ($primary) {
 1714       __ xorptr(Redi, Redi);
 1715     }
 1716     __ bind(miss);
 1717   %}
 1718 
 1719   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1720     MacroAssembler masm(&cbuf);
 1721     int start = masm.offset();
 1722     if (UseSSE >= 2) {
 1723       if (VerifyFPU) {
 1724         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1725       }
 1726     } else {
 1727       // External c_calling_convention expects the FPU stack to be 'clean'.
 1728       // Compiled code leaves it dirty.  Do cleanup now.
 1729       masm.empty_FPU_stack();
 1730     }
 1731     if (sizeof_FFree_Float_Stack_All == -1) {
 1732       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1733     } else {
 1734       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1735     }
 1736   %}
 1737 
 1738   enc_class Verify_FPU_For_Leaf %{
 1739     if( VerifyFPU ) {
 1740       MacroAssembler masm(&cbuf);
 1741       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1742     }
 1743   %}
 1744 
 1745   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1746     // This is the instruction starting address for relocation info.
 1747     cbuf.set_insts_mark();
 1748     $$$emit8$primary;
 1749     // CALL directly to the runtime
 1750     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1751                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1752 
 1753     if (UseSSE >= 2) {
 1754       MacroAssembler _masm(&cbuf);
 1755       BasicType rt = tf()->return_type();
 1756 
 1757       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1758         // A C runtime call where the return value is unused.  In SSE2+
 1759         // mode the result needs to be removed from the FPU stack.  It's
 1760         // likely that this function call could be removed by the
 1761         // optimizer if the C function is a pure function.
 1762         __ ffree(0);
 1763       } else if (rt == T_FLOAT) {
 1764         __ lea(rsp, Address(rsp, -4));
 1765         __ fstp_s(Address(rsp, 0));
 1766         __ movflt(xmm0, Address(rsp, 0));
 1767         __ lea(rsp, Address(rsp,  4));
 1768       } else if (rt == T_DOUBLE) {
 1769         __ lea(rsp, Address(rsp, -8));
 1770         __ fstp_d(Address(rsp, 0));
 1771         __ movdbl(xmm0, Address(rsp, 0));
 1772         __ lea(rsp, Address(rsp,  8));
 1773       }
 1774     }
 1775   %}
 1776 
 1777   enc_class pre_call_resets %{
 1778     // If method sets FPU control word restore it here
 1779     debug_only(int off0 = cbuf.insts_size());
 1780     if (ra_->C->in_24_bit_fp_mode()) {
 1781       MacroAssembler _masm(&cbuf);
 1782       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1783     }
 1784     // Clear upper bits of YMM registers when current compiled code uses
 1785     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1786     MacroAssembler _masm(&cbuf);
 1787     __ vzeroupper();
 1788     debug_only(int off1 = cbuf.insts_size());
 1789     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1790   %}
 1791 
 1792   enc_class post_call_FPU %{
 1793     // If method sets FPU control word do it here also
 1794     if (Compile::current()->in_24_bit_fp_mode()) {
 1795       MacroAssembler masm(&cbuf);
 1796       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1797     }
 1798   %}
 1799 
 1800   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1801     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1802     // who we intended to call.
 1803     cbuf.set_insts_mark();
 1804     $$$emit8$primary;
 1805 
 1806     if (!_method) {
 1807       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1808                      runtime_call_Relocation::spec(),
 1809                      RELOC_IMM32);
 1810     } else {
 1811       int method_index = resolved_method_index(cbuf);
 1812       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1813                                                   : static_call_Relocation::spec(method_index);
 1814       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1815                      rspec, RELOC_DISP32);
 1816       // Emit stubs for static call.
 1817       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1818       if (stub == NULL) {
 1819         ciEnv::current()->record_failure("CodeCache is full");
 1820         return;
 1821       }
 1822     }
 1823   %}
 1824 
 1825   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1826     MacroAssembler _masm(&cbuf);
 1827     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1828   %}
 1829 
 1830   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1831     int disp = in_bytes(Method::from_compiled_offset());
 1832     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1833 
 1834     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1835     cbuf.set_insts_mark();
 1836     $$$emit8$primary;
 1837     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1838     emit_d8(cbuf, disp);             // Displacement
 1839 
 1840   %}
 1841 
 1842 //   Following encoding is no longer used, but may be restored if calling
 1843 //   convention changes significantly.
 1844 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1845 //
 1846 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1847 //     // int ic_reg     = Matcher::inline_cache_reg();
 1848 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1849 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1850 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1851 //
 1852 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1853 //     // // so we load it immediately before the call
 1854 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1855 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1856 //
 1857 //     // xor rbp,ebp
 1858 //     emit_opcode(cbuf, 0x33);
 1859 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1860 //
 1861 //     // CALL to interpreter.
 1862 //     cbuf.set_insts_mark();
 1863 //     $$$emit8$primary;
 1864 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1865 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1866 //   %}
 1867 
 1868   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1869     $$$emit8$primary;
 1870     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1871     $$$emit8$shift$$constant;
 1872   %}
 1873 
 1874   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1875     // Load immediate does not have a zero or sign extended version
 1876     // for 8-bit immediates
 1877     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1878     $$$emit32$src$$constant;
 1879   %}
 1880 
 1881   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1882     // Load immediate does not have a zero or sign extended version
 1883     // for 8-bit immediates
 1884     emit_opcode(cbuf, $primary + $dst$$reg);
 1885     $$$emit32$src$$constant;
 1886   %}
 1887 
 1888   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1889     // Load immediate does not have a zero or sign extended version
 1890     // for 8-bit immediates
 1891     int dst_enc = $dst$$reg;
 1892     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1893     if (src_con == 0) {
 1894       // xor dst, dst
 1895       emit_opcode(cbuf, 0x33);
 1896       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1897     } else {
 1898       emit_opcode(cbuf, $primary + dst_enc);
 1899       emit_d32(cbuf, src_con);
 1900     }
 1901   %}
 1902 
 1903   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1904     // Load immediate does not have a zero or sign extended version
 1905     // for 8-bit immediates
 1906     int dst_enc = $dst$$reg + 2;
 1907     int src_con = ((julong)($src$$constant)) >> 32;
 1908     if (src_con == 0) {
 1909       // xor dst, dst
 1910       emit_opcode(cbuf, 0x33);
 1911       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1912     } else {
 1913       emit_opcode(cbuf, $primary + dst_enc);
 1914       emit_d32(cbuf, src_con);
 1915     }
 1916   %}
 1917 
 1918 
 1919   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1920   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1921     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1922   %}
 1923 
 1924   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1925     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1926   %}
 1927 
 1928   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1929     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1930   %}
 1931 
 1932   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1933     $$$emit8$primary;
 1934     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1935   %}
 1936 
 1937   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1938     $$$emit8$secondary;
 1939     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1940   %}
 1941 
 1942   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1943     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1944   %}
 1945 
 1946   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1947     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1948   %}
 1949 
 1950   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1951     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1952   %}
 1953 
 1954   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1955     // Output immediate
 1956     $$$emit32$src$$constant;
 1957   %}
 1958 
 1959   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1960     // Output Float immediate bits
 1961     jfloat jf = $src$$constant;
 1962     int    jf_as_bits = jint_cast( jf );
 1963     emit_d32(cbuf, jf_as_bits);
 1964   %}
 1965 
 1966   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1967     // Output Float immediate bits
 1968     jfloat jf = $src$$constant;
 1969     int    jf_as_bits = jint_cast( jf );
 1970     emit_d32(cbuf, jf_as_bits);
 1971   %}
 1972 
 1973   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1974     // Output immediate
 1975     $$$emit16$src$$constant;
 1976   %}
 1977 
 1978   enc_class Con_d32(immI src) %{
 1979     emit_d32(cbuf,$src$$constant);
 1980   %}
 1981 
 1982   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1983     // Output immediate memory reference
 1984     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1985     emit_d32(cbuf, 0x00);
 1986   %}
 1987 
 1988   enc_class lock_prefix( ) %{
 1989     emit_opcode(cbuf,0xF0);         // [Lock]
 1990   %}
 1991 
 1992   // Cmp-xchg long value.
 1993   // Note: we need to swap rbx, and rcx before and after the
 1994   //       cmpxchg8 instruction because the instruction uses
 1995   //       rcx as the high order word of the new value to store but
 1996   //       our register encoding uses rbx,.
 1997   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1998 
 1999     // XCHG  rbx,ecx
 2000     emit_opcode(cbuf,0x87);
 2001     emit_opcode(cbuf,0xD9);
 2002     // [Lock]
 2003     emit_opcode(cbuf,0xF0);
 2004     // CMPXCHG8 [Eptr]
 2005     emit_opcode(cbuf,0x0F);
 2006     emit_opcode(cbuf,0xC7);
 2007     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2008     // XCHG  rbx,ecx
 2009     emit_opcode(cbuf,0x87);
 2010     emit_opcode(cbuf,0xD9);
 2011   %}
 2012 
 2013   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2014     // [Lock]
 2015     emit_opcode(cbuf,0xF0);
 2016 
 2017     // CMPXCHG [Eptr]
 2018     emit_opcode(cbuf,0x0F);
 2019     emit_opcode(cbuf,0xB1);
 2020     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2021   %}
 2022 
 2023   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2024     // [Lock]
 2025     emit_opcode(cbuf,0xF0);
 2026 
 2027     // CMPXCHGB [Eptr]
 2028     emit_opcode(cbuf,0x0F);
 2029     emit_opcode(cbuf,0xB0);
 2030     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2031   %}
 2032 
 2033   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2034     // [Lock]
 2035     emit_opcode(cbuf,0xF0);
 2036 
 2037     // 16-bit mode
 2038     emit_opcode(cbuf, 0x66);
 2039 
 2040     // CMPXCHGW [Eptr]
 2041     emit_opcode(cbuf,0x0F);
 2042     emit_opcode(cbuf,0xB1);
 2043     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2044   %}
 2045 
 2046   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2047     int res_encoding = $res$$reg;
 2048 
 2049     // MOV  res,0
 2050     emit_opcode( cbuf, 0xB8 + res_encoding);
 2051     emit_d32( cbuf, 0 );
 2052     // JNE,s  fail
 2053     emit_opcode(cbuf,0x75);
 2054     emit_d8(cbuf, 5 );
 2055     // MOV  res,1
 2056     emit_opcode( cbuf, 0xB8 + res_encoding);
 2057     emit_d32( cbuf, 1 );
 2058     // fail:
 2059   %}
 2060 
 2061   enc_class set_instruction_start( ) %{
 2062     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2063   %}
 2064 
 2065   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2066     int reg_encoding = $ereg$$reg;
 2067     int base  = $mem$$base;
 2068     int index = $mem$$index;
 2069     int scale = $mem$$scale;
 2070     int displace = $mem$$disp;
 2071     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2072     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2073   %}
 2074 
 2075   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2076     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2077     int base  = $mem$$base;
 2078     int index = $mem$$index;
 2079     int scale = $mem$$scale;
 2080     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2081     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2082     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2083   %}
 2084 
 2085   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2086     int r1, r2;
 2087     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2088     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2089     emit_opcode(cbuf,0x0F);
 2090     emit_opcode(cbuf,$tertiary);
 2091     emit_rm(cbuf, 0x3, r1, r2);
 2092     emit_d8(cbuf,$cnt$$constant);
 2093     emit_d8(cbuf,$primary);
 2094     emit_rm(cbuf, 0x3, $secondary, r1);
 2095     emit_d8(cbuf,$cnt$$constant);
 2096   %}
 2097 
 2098   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2099     emit_opcode( cbuf, 0x8B ); // Move
 2100     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2101     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2102       emit_d8(cbuf,$primary);
 2103       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2104       emit_d8(cbuf,$cnt$$constant-32);
 2105     }
 2106     emit_d8(cbuf,$primary);
 2107     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2108     emit_d8(cbuf,31);
 2109   %}
 2110 
 2111   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2112     int r1, r2;
 2113     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2114     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2115 
 2116     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2117     emit_rm(cbuf, 0x3, r1, r2);
 2118     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2119       emit_opcode(cbuf,$primary);
 2120       emit_rm(cbuf, 0x3, $secondary, r1);
 2121       emit_d8(cbuf,$cnt$$constant-32);
 2122     }
 2123     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2124     emit_rm(cbuf, 0x3, r2, r2);
 2125   %}
 2126 
 2127   // Clone of RegMem but accepts an extra parameter to access each
 2128   // half of a double in memory; it never needs relocation info.
 2129   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2130     emit_opcode(cbuf,$opcode$$constant);
 2131     int reg_encoding = $rm_reg$$reg;
 2132     int base     = $mem$$base;
 2133     int index    = $mem$$index;
 2134     int scale    = $mem$$scale;
 2135     int displace = $mem$$disp + $disp_for_half$$constant;
 2136     relocInfo::relocType disp_reloc = relocInfo::none;
 2137     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2138   %}
 2139 
 2140   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2141   //
 2142   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2143   // and it never needs relocation information.
 2144   // Frequently used to move data between FPU's Stack Top and memory.
 2145   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2146     int rm_byte_opcode = $rm_opcode$$constant;
 2147     int base     = $mem$$base;
 2148     int index    = $mem$$index;
 2149     int scale    = $mem$$scale;
 2150     int displace = $mem$$disp;
 2151     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2152     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2153   %}
 2154 
 2155   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2156     int rm_byte_opcode = $rm_opcode$$constant;
 2157     int base     = $mem$$base;
 2158     int index    = $mem$$index;
 2159     int scale    = $mem$$scale;
 2160     int displace = $mem$$disp;
 2161     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2162     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2163   %}
 2164 
 2165   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2166     int reg_encoding = $dst$$reg;
 2167     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2168     int index        = 0x04;            // 0x04 indicates no index
 2169     int scale        = 0x00;            // 0x00 indicates no scale
 2170     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2171     relocInfo::relocType disp_reloc = relocInfo::none;
 2172     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2173   %}
 2174 
 2175   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2176     // Compare dst,src
 2177     emit_opcode(cbuf,0x3B);
 2178     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2179     // jmp dst < src around move
 2180     emit_opcode(cbuf,0x7C);
 2181     emit_d8(cbuf,2);
 2182     // move dst,src
 2183     emit_opcode(cbuf,0x8B);
 2184     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2185   %}
 2186 
 2187   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2188     // Compare dst,src
 2189     emit_opcode(cbuf,0x3B);
 2190     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2191     // jmp dst > src around move
 2192     emit_opcode(cbuf,0x7F);
 2193     emit_d8(cbuf,2);
 2194     // move dst,src
 2195     emit_opcode(cbuf,0x8B);
 2196     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2197   %}
 2198 
 2199   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2200     // If src is FPR1, we can just FST to store it.
 2201     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2202     int reg_encoding = 0x2; // Just store
 2203     int base  = $mem$$base;
 2204     int index = $mem$$index;
 2205     int scale = $mem$$scale;
 2206     int displace = $mem$$disp;
 2207     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2208     if( $src$$reg != FPR1L_enc ) {
 2209       reg_encoding = 0x3;  // Store & pop
 2210       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2211       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2212     }
 2213     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2214     emit_opcode(cbuf,$primary);
 2215     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2216   %}
 2217 
 2218   enc_class neg_reg(rRegI dst) %{
 2219     // NEG $dst
 2220     emit_opcode(cbuf,0xF7);
 2221     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2222   %}
 2223 
 2224   enc_class setLT_reg(eCXRegI dst) %{
 2225     // SETLT $dst
 2226     emit_opcode(cbuf,0x0F);
 2227     emit_opcode(cbuf,0x9C);
 2228     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2229   %}
 2230 
 2231   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2232     int tmpReg = $tmp$$reg;
 2233 
 2234     // SUB $p,$q
 2235     emit_opcode(cbuf,0x2B);
 2236     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2237     // SBB $tmp,$tmp
 2238     emit_opcode(cbuf,0x1B);
 2239     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2240     // AND $tmp,$y
 2241     emit_opcode(cbuf,0x23);
 2242     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2243     // ADD $p,$tmp
 2244     emit_opcode(cbuf,0x03);
 2245     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2246   %}
 2247 
 2248   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2249     // TEST shift,32
 2250     emit_opcode(cbuf,0xF7);
 2251     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2252     emit_d32(cbuf,0x20);
 2253     // JEQ,s small
 2254     emit_opcode(cbuf, 0x74);
 2255     emit_d8(cbuf, 0x04);
 2256     // MOV    $dst.hi,$dst.lo
 2257     emit_opcode( cbuf, 0x8B );
 2258     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2259     // CLR    $dst.lo
 2260     emit_opcode(cbuf, 0x33);
 2261     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2262 // small:
 2263     // SHLD   $dst.hi,$dst.lo,$shift
 2264     emit_opcode(cbuf,0x0F);
 2265     emit_opcode(cbuf,0xA5);
 2266     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2267     // SHL    $dst.lo,$shift"
 2268     emit_opcode(cbuf,0xD3);
 2269     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2270   %}
 2271 
 2272   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2273     // TEST shift,32
 2274     emit_opcode(cbuf,0xF7);
 2275     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2276     emit_d32(cbuf,0x20);
 2277     // JEQ,s small
 2278     emit_opcode(cbuf, 0x74);
 2279     emit_d8(cbuf, 0x04);
 2280     // MOV    $dst.lo,$dst.hi
 2281     emit_opcode( cbuf, 0x8B );
 2282     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2283     // CLR    $dst.hi
 2284     emit_opcode(cbuf, 0x33);
 2285     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2286 // small:
 2287     // SHRD   $dst.lo,$dst.hi,$shift
 2288     emit_opcode(cbuf,0x0F);
 2289     emit_opcode(cbuf,0xAD);
 2290     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2291     // SHR    $dst.hi,$shift"
 2292     emit_opcode(cbuf,0xD3);
 2293     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2294   %}
 2295 
 2296   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2297     // TEST shift,32
 2298     emit_opcode(cbuf,0xF7);
 2299     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2300     emit_d32(cbuf,0x20);
 2301     // JEQ,s small
 2302     emit_opcode(cbuf, 0x74);
 2303     emit_d8(cbuf, 0x05);
 2304     // MOV    $dst.lo,$dst.hi
 2305     emit_opcode( cbuf, 0x8B );
 2306     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2307     // SAR    $dst.hi,31
 2308     emit_opcode(cbuf, 0xC1);
 2309     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2310     emit_d8(cbuf, 0x1F );
 2311 // small:
 2312     // SHRD   $dst.lo,$dst.hi,$shift
 2313     emit_opcode(cbuf,0x0F);
 2314     emit_opcode(cbuf,0xAD);
 2315     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2316     // SAR    $dst.hi,$shift"
 2317     emit_opcode(cbuf,0xD3);
 2318     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2319   %}
 2320 
 2321 
 2322   // ----------------- Encodings for floating point unit -----------------
 2323   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2324   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2325     $$$emit8$primary;
 2326     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2327   %}
 2328 
 2329   // Pop argument in FPR0 with FSTP ST(0)
 2330   enc_class PopFPU() %{
 2331     emit_opcode( cbuf, 0xDD );
 2332     emit_d8( cbuf, 0xD8 );
 2333   %}
 2334 
 2335   // !!!!! equivalent to Pop_Reg_F
 2336   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2337     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2338     emit_d8( cbuf, 0xD8+$dst$$reg );
 2339   %}
 2340 
 2341   enc_class Push_Reg_DPR( regDPR dst ) %{
 2342     emit_opcode( cbuf, 0xD9 );
 2343     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2344   %}
 2345 
 2346   enc_class strictfp_bias1( regDPR dst ) %{
 2347     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2348     emit_opcode( cbuf, 0x2D );
 2349     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2350     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2351     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2352   %}
 2353 
 2354   enc_class strictfp_bias2( regDPR dst ) %{
 2355     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2356     emit_opcode( cbuf, 0x2D );
 2357     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2358     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2359     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2360   %}
 2361 
 2362   // Special case for moving an integer register to a stack slot.
 2363   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2364     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2365   %}
 2366 
 2367   // Special case for moving a register to a stack slot.
 2368   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2369     // Opcode already emitted
 2370     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2371     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2372     emit_d32(cbuf, $dst$$disp);   // Displacement
 2373   %}
 2374 
 2375   // Push the integer in stackSlot 'src' onto FP-stack
 2376   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2377     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2378   %}
 2379 
 2380   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2381   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2382     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2383   %}
 2384 
 2385   // Same as Pop_Mem_F except for opcode
 2386   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2387   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2388     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2389   %}
 2390 
 2391   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2392     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2393     emit_d8( cbuf, 0xD8+$dst$$reg );
 2394   %}
 2395 
 2396   enc_class Push_Reg_FPR( regFPR dst ) %{
 2397     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2398     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2399   %}
 2400 
 2401   // Push FPU's float to a stack-slot, and pop FPU-stack
 2402   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2403     int pop = 0x02;
 2404     if ($src$$reg != FPR1L_enc) {
 2405       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2406       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2407       pop = 0x03;
 2408     }
 2409     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2410   %}
 2411 
 2412   // Push FPU's double to a stack-slot, and pop FPU-stack
 2413   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2414     int pop = 0x02;
 2415     if ($src$$reg != FPR1L_enc) {
 2416       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2417       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2418       pop = 0x03;
 2419     }
 2420     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2421   %}
 2422 
 2423   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2424   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2425     int pop = 0xD0 - 1; // -1 since we skip FLD
 2426     if ($src$$reg != FPR1L_enc) {
 2427       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2428       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2429       pop = 0xD8;
 2430     }
 2431     emit_opcode( cbuf, 0xDD );
 2432     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2433   %}
 2434 
 2435 
 2436   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2437     // load dst in FPR0
 2438     emit_opcode( cbuf, 0xD9 );
 2439     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2440     if ($src$$reg != FPR1L_enc) {
 2441       // fincstp
 2442       emit_opcode (cbuf, 0xD9);
 2443       emit_opcode (cbuf, 0xF7);
 2444       // swap src with FPR1:
 2445       // FXCH FPR1 with src
 2446       emit_opcode(cbuf, 0xD9);
 2447       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2448       // fdecstp
 2449       emit_opcode (cbuf, 0xD9);
 2450       emit_opcode (cbuf, 0xF6);
 2451     }
 2452   %}
 2453 
 2454   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2455     MacroAssembler _masm(&cbuf);
 2456     __ subptr(rsp, 8);
 2457     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2458     __ fld_d(Address(rsp, 0));
 2459     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2460     __ fld_d(Address(rsp, 0));
 2461   %}
 2462 
 2463   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2464     MacroAssembler _masm(&cbuf);
 2465     __ subptr(rsp, 4);
 2466     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2467     __ fld_s(Address(rsp, 0));
 2468     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2469     __ fld_s(Address(rsp, 0));
 2470   %}
 2471 
 2472   enc_class Push_ResultD(regD dst) %{
 2473     MacroAssembler _masm(&cbuf);
 2474     __ fstp_d(Address(rsp, 0));
 2475     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2476     __ addptr(rsp, 8);
 2477   %}
 2478 
 2479   enc_class Push_ResultF(regF dst, immI d8) %{
 2480     MacroAssembler _masm(&cbuf);
 2481     __ fstp_s(Address(rsp, 0));
 2482     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2483     __ addptr(rsp, $d8$$constant);
 2484   %}
 2485 
 2486   enc_class Push_SrcD(regD src) %{
 2487     MacroAssembler _masm(&cbuf);
 2488     __ subptr(rsp, 8);
 2489     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2490     __ fld_d(Address(rsp, 0));
 2491   %}
 2492 
 2493   enc_class push_stack_temp_qword() %{
 2494     MacroAssembler _masm(&cbuf);
 2495     __ subptr(rsp, 8);
 2496   %}
 2497 
 2498   enc_class pop_stack_temp_qword() %{
 2499     MacroAssembler _masm(&cbuf);
 2500     __ addptr(rsp, 8);
 2501   %}
 2502 
 2503   enc_class push_xmm_to_fpr1(regD src) %{
 2504     MacroAssembler _masm(&cbuf);
 2505     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2506     __ fld_d(Address(rsp, 0));
 2507   %}
 2508 
 2509   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2510     if ($src$$reg != FPR1L_enc) {
 2511       // fincstp
 2512       emit_opcode (cbuf, 0xD9);
 2513       emit_opcode (cbuf, 0xF7);
 2514       // FXCH FPR1 with src
 2515       emit_opcode(cbuf, 0xD9);
 2516       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2517       // fdecstp
 2518       emit_opcode (cbuf, 0xD9);
 2519       emit_opcode (cbuf, 0xF6);
 2520     }
 2521     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2522     // // FSTP   FPR$dst$$reg
 2523     // emit_opcode( cbuf, 0xDD );
 2524     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2525   %}
 2526 
 2527   enc_class fnstsw_sahf_skip_parity() %{
 2528     // fnstsw ax
 2529     emit_opcode( cbuf, 0xDF );
 2530     emit_opcode( cbuf, 0xE0 );
 2531     // sahf
 2532     emit_opcode( cbuf, 0x9E );
 2533     // jnp  ::skip
 2534     emit_opcode( cbuf, 0x7B );
 2535     emit_opcode( cbuf, 0x05 );
 2536   %}
 2537 
 2538   enc_class emitModDPR() %{
 2539     // fprem must be iterative
 2540     // :: loop
 2541     // fprem
 2542     emit_opcode( cbuf, 0xD9 );
 2543     emit_opcode( cbuf, 0xF8 );
 2544     // wait
 2545     emit_opcode( cbuf, 0x9b );
 2546     // fnstsw ax
 2547     emit_opcode( cbuf, 0xDF );
 2548     emit_opcode( cbuf, 0xE0 );
 2549     // sahf
 2550     emit_opcode( cbuf, 0x9E );
 2551     // jp  ::loop
 2552     emit_opcode( cbuf, 0x0F );
 2553     emit_opcode( cbuf, 0x8A );
 2554     emit_opcode( cbuf, 0xF4 );
 2555     emit_opcode( cbuf, 0xFF );
 2556     emit_opcode( cbuf, 0xFF );
 2557     emit_opcode( cbuf, 0xFF );
 2558   %}
 2559 
 2560   enc_class fpu_flags() %{
 2561     // fnstsw_ax
 2562     emit_opcode( cbuf, 0xDF);
 2563     emit_opcode( cbuf, 0xE0);
 2564     // test ax,0x0400
 2565     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2566     emit_opcode( cbuf, 0xA9 );
 2567     emit_d16   ( cbuf, 0x0400 );
 2568     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2569     // // test rax,0x0400
 2570     // emit_opcode( cbuf, 0xA9 );
 2571     // emit_d32   ( cbuf, 0x00000400 );
 2572     //
 2573     // jz exit (no unordered comparison)
 2574     emit_opcode( cbuf, 0x74 );
 2575     emit_d8    ( cbuf, 0x02 );
 2576     // mov ah,1 - treat as LT case (set carry flag)
 2577     emit_opcode( cbuf, 0xB4 );
 2578     emit_d8    ( cbuf, 0x01 );
 2579     // sahf
 2580     emit_opcode( cbuf, 0x9E);
 2581   %}
 2582 
 2583   enc_class cmpF_P6_fixup() %{
 2584     // Fixup the integer flags in case comparison involved a NaN
 2585     //
 2586     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2587     emit_opcode( cbuf, 0x7B );
 2588     emit_d8    ( cbuf, 0x03 );
 2589     // MOV AH,1 - treat as LT case (set carry flag)
 2590     emit_opcode( cbuf, 0xB4 );
 2591     emit_d8    ( cbuf, 0x01 );
 2592     // SAHF
 2593     emit_opcode( cbuf, 0x9E);
 2594     // NOP     // target for branch to avoid branch to branch
 2595     emit_opcode( cbuf, 0x90);
 2596   %}
 2597 
 2598 //     fnstsw_ax();
 2599 //     sahf();
 2600 //     movl(dst, nan_result);
 2601 //     jcc(Assembler::parity, exit);
 2602 //     movl(dst, less_result);
 2603 //     jcc(Assembler::below, exit);
 2604 //     movl(dst, equal_result);
 2605 //     jcc(Assembler::equal, exit);
 2606 //     movl(dst, greater_result);
 2607 
 2608 // less_result     =  1;
 2609 // greater_result  = -1;
 2610 // equal_result    = 0;
 2611 // nan_result      = -1;
 2612 
 2613   enc_class CmpF_Result(rRegI dst) %{
 2614     // fnstsw_ax();
 2615     emit_opcode( cbuf, 0xDF);
 2616     emit_opcode( cbuf, 0xE0);
 2617     // sahf
 2618     emit_opcode( cbuf, 0x9E);
 2619     // movl(dst, nan_result);
 2620     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2621     emit_d32( cbuf, -1 );
 2622     // jcc(Assembler::parity, exit);
 2623     emit_opcode( cbuf, 0x7A );
 2624     emit_d8    ( cbuf, 0x13 );
 2625     // movl(dst, less_result);
 2626     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2627     emit_d32( cbuf, -1 );
 2628     // jcc(Assembler::below, exit);
 2629     emit_opcode( cbuf, 0x72 );
 2630     emit_d8    ( cbuf, 0x0C );
 2631     // movl(dst, equal_result);
 2632     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2633     emit_d32( cbuf, 0 );
 2634     // jcc(Assembler::equal, exit);
 2635     emit_opcode( cbuf, 0x74 );
 2636     emit_d8    ( cbuf, 0x05 );
 2637     // movl(dst, greater_result);
 2638     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2639     emit_d32( cbuf, 1 );
 2640   %}
 2641 
 2642 
 2643   // Compare the longs and set flags
 2644   // BROKEN!  Do Not use as-is
 2645   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2646     // CMP    $src1.hi,$src2.hi
 2647     emit_opcode( cbuf, 0x3B );
 2648     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2649     // JNE,s  done
 2650     emit_opcode(cbuf,0x75);
 2651     emit_d8(cbuf, 2 );
 2652     // CMP    $src1.lo,$src2.lo
 2653     emit_opcode( cbuf, 0x3B );
 2654     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2655 // done:
 2656   %}
 2657 
 2658   enc_class convert_int_long( regL dst, rRegI src ) %{
 2659     // mov $dst.lo,$src
 2660     int dst_encoding = $dst$$reg;
 2661     int src_encoding = $src$$reg;
 2662     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2663     // mov $dst.hi,$src
 2664     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2665     // sar $dst.hi,31
 2666     emit_opcode( cbuf, 0xC1 );
 2667     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2668     emit_d8(cbuf, 0x1F );
 2669   %}
 2670 
 2671   enc_class convert_long_double( eRegL src ) %{
 2672     // push $src.hi
 2673     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2674     // push $src.lo
 2675     emit_opcode(cbuf, 0x50+$src$$reg  );
 2676     // fild 64-bits at [SP]
 2677     emit_opcode(cbuf,0xdf);
 2678     emit_d8(cbuf, 0x6C);
 2679     emit_d8(cbuf, 0x24);
 2680     emit_d8(cbuf, 0x00);
 2681     // pop stack
 2682     emit_opcode(cbuf, 0x83); // add  SP, #8
 2683     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2684     emit_d8(cbuf, 0x8);
 2685   %}
 2686 
 2687   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2688     // IMUL   EDX:EAX,$src1
 2689     emit_opcode( cbuf, 0xF7 );
 2690     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2691     // SAR    EDX,$cnt-32
 2692     int shift_count = ((int)$cnt$$constant) - 32;
 2693     if (shift_count > 0) {
 2694       emit_opcode(cbuf, 0xC1);
 2695       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2696       emit_d8(cbuf, shift_count);
 2697     }
 2698   %}
 2699 
 2700   // this version doesn't have add sp, 8
 2701   enc_class convert_long_double2( eRegL src ) %{
 2702     // push $src.hi
 2703     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2704     // push $src.lo
 2705     emit_opcode(cbuf, 0x50+$src$$reg  );
 2706     // fild 64-bits at [SP]
 2707     emit_opcode(cbuf,0xdf);
 2708     emit_d8(cbuf, 0x6C);
 2709     emit_d8(cbuf, 0x24);
 2710     emit_d8(cbuf, 0x00);
 2711   %}
 2712 
 2713   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2714     // Basic idea: long = (long)int * (long)int
 2715     // IMUL EDX:EAX, src
 2716     emit_opcode( cbuf, 0xF7 );
 2717     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2718   %}
 2719 
 2720   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2721     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2722     // MUL EDX:EAX, src
 2723     emit_opcode( cbuf, 0xF7 );
 2724     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2725   %}
 2726 
 2727   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2728     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2729     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2730     // MOV    $tmp,$src.lo
 2731     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2732     // IMUL   $tmp,EDX
 2733     emit_opcode( cbuf, 0x0F );
 2734     emit_opcode( cbuf, 0xAF );
 2735     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2736     // MOV    EDX,$src.hi
 2737     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2738     // IMUL   EDX,EAX
 2739     emit_opcode( cbuf, 0x0F );
 2740     emit_opcode( cbuf, 0xAF );
 2741     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2742     // ADD    $tmp,EDX
 2743     emit_opcode( cbuf, 0x03 );
 2744     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2745     // MUL   EDX:EAX,$src.lo
 2746     emit_opcode( cbuf, 0xF7 );
 2747     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2748     // ADD    EDX,ESI
 2749     emit_opcode( cbuf, 0x03 );
 2750     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2751   %}
 2752 
 2753   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2754     // Basic idea: lo(result) = lo(src * y_lo)
 2755     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2756     // IMUL   $tmp,EDX,$src
 2757     emit_opcode( cbuf, 0x6B );
 2758     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2759     emit_d8( cbuf, (int)$src$$constant );
 2760     // MOV    EDX,$src
 2761     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2762     emit_d32( cbuf, (int)$src$$constant );
 2763     // MUL   EDX:EAX,EDX
 2764     emit_opcode( cbuf, 0xF7 );
 2765     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2766     // ADD    EDX,ESI
 2767     emit_opcode( cbuf, 0x03 );
 2768     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2769   %}
 2770 
 2771   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2772     // PUSH src1.hi
 2773     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2774     // PUSH src1.lo
 2775     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2776     // PUSH src2.hi
 2777     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2778     // PUSH src2.lo
 2779     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2780     // CALL directly to the runtime
 2781     cbuf.set_insts_mark();
 2782     emit_opcode(cbuf,0xE8);       // Call into runtime
 2783     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2784     // Restore stack
 2785     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2786     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2787     emit_d8(cbuf, 4*4);
 2788   %}
 2789 
 2790   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2791     // PUSH src1.hi
 2792     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2793     // PUSH src1.lo
 2794     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2795     // PUSH src2.hi
 2796     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2797     // PUSH src2.lo
 2798     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2799     // CALL directly to the runtime
 2800     cbuf.set_insts_mark();
 2801     emit_opcode(cbuf,0xE8);       // Call into runtime
 2802     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2803     // Restore stack
 2804     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2805     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2806     emit_d8(cbuf, 4*4);
 2807   %}
 2808 
 2809   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2810     // MOV   $tmp,$src.lo
 2811     emit_opcode(cbuf, 0x8B);
 2812     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2813     // OR    $tmp,$src.hi
 2814     emit_opcode(cbuf, 0x0B);
 2815     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2816   %}
 2817 
 2818   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2819     // CMP    $src1.lo,$src2.lo
 2820     emit_opcode( cbuf, 0x3B );
 2821     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2822     // JNE,s  skip
 2823     emit_cc(cbuf, 0x70, 0x5);
 2824     emit_d8(cbuf,2);
 2825     // CMP    $src1.hi,$src2.hi
 2826     emit_opcode( cbuf, 0x3B );
 2827     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2828   %}
 2829 
 2830   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2831     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2832     emit_opcode( cbuf, 0x3B );
 2833     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2834     // MOV    $tmp,$src1.hi
 2835     emit_opcode( cbuf, 0x8B );
 2836     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2837     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2838     emit_opcode( cbuf, 0x1B );
 2839     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2840   %}
 2841 
 2842   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2843     // XOR    $tmp,$tmp
 2844     emit_opcode(cbuf,0x33);  // XOR
 2845     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2846     // CMP    $tmp,$src.lo
 2847     emit_opcode( cbuf, 0x3B );
 2848     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2849     // SBB    $tmp,$src.hi
 2850     emit_opcode( cbuf, 0x1B );
 2851     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2852   %}
 2853 
 2854  // Sniff, sniff... smells like Gnu Superoptimizer
 2855   enc_class neg_long( eRegL dst ) %{
 2856     emit_opcode(cbuf,0xF7);    // NEG hi
 2857     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2858     emit_opcode(cbuf,0xF7);    // NEG lo
 2859     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2860     emit_opcode(cbuf,0x83);    // SBB hi,0
 2861     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2862     emit_d8    (cbuf,0 );
 2863   %}
 2864 
 2865   enc_class enc_pop_rdx() %{
 2866     emit_opcode(cbuf,0x5A);
 2867   %}
 2868 
 2869   enc_class enc_rethrow() %{
 2870     cbuf.set_insts_mark();
 2871     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2872     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2873                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2874   %}
 2875 
 2876 
 2877   // Convert a double to an int.  Java semantics require we do complex
 2878   // manglelations in the corner cases.  So we set the rounding mode to
 2879   // 'zero', store the darned double down as an int, and reset the
 2880   // rounding mode to 'nearest'.  The hardware throws an exception which
 2881   // patches up the correct value directly to the stack.
 2882   enc_class DPR2I_encoding( regDPR src ) %{
 2883     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2884     // exceptions here, so that a NAN or other corner-case value will
 2885     // thrown an exception (but normal values get converted at full speed).
 2886     // However, I2C adapters and other float-stack manglers leave pending
 2887     // invalid-op exceptions hanging.  We would have to clear them before
 2888     // enabling them and that is more expensive than just testing for the
 2889     // invalid value Intel stores down in the corner cases.
 2890     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2891     emit_opcode(cbuf,0x2D);
 2892     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2893     // Allocate a word
 2894     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2895     emit_opcode(cbuf,0xEC);
 2896     emit_d8(cbuf,0x04);
 2897     // Encoding assumes a double has been pushed into FPR0.
 2898     // Store down the double as an int, popping the FPU stack
 2899     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2900     emit_opcode(cbuf,0x1C);
 2901     emit_d8(cbuf,0x24);
 2902     // Restore the rounding mode; mask the exception
 2903     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2904     emit_opcode(cbuf,0x2D);
 2905     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2906         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2907         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2908 
 2909     // Load the converted int; adjust CPU stack
 2910     emit_opcode(cbuf,0x58);       // POP EAX
 2911     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2912     emit_d32   (cbuf,0x80000000); //         0x80000000
 2913     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2914     emit_d8    (cbuf,0x07);       // Size of slow_call
 2915     // Push src onto stack slow-path
 2916     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2917     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2918     // CALL directly to the runtime
 2919     cbuf.set_insts_mark();
 2920     emit_opcode(cbuf,0xE8);       // Call into runtime
 2921     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2922     // Carry on here...
 2923   %}
 2924 
 2925   enc_class DPR2L_encoding( regDPR src ) %{
 2926     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2927     emit_opcode(cbuf,0x2D);
 2928     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2929     // Allocate a word
 2930     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2931     emit_opcode(cbuf,0xEC);
 2932     emit_d8(cbuf,0x08);
 2933     // Encoding assumes a double has been pushed into FPR0.
 2934     // Store down the double as a long, popping the FPU stack
 2935     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2936     emit_opcode(cbuf,0x3C);
 2937     emit_d8(cbuf,0x24);
 2938     // Restore the rounding mode; mask the exception
 2939     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2940     emit_opcode(cbuf,0x2D);
 2941     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2942         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2943         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2944 
 2945     // Load the converted int; adjust CPU stack
 2946     emit_opcode(cbuf,0x58);       // POP EAX
 2947     emit_opcode(cbuf,0x5A);       // POP EDX
 2948     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2949     emit_d8    (cbuf,0xFA);       // rdx
 2950     emit_d32   (cbuf,0x80000000); //         0x80000000
 2951     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2952     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2953     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2954     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2955     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2956     emit_d8    (cbuf,0x07);       // Size of slow_call
 2957     // Push src onto stack slow-path
 2958     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2959     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2960     // CALL directly to the runtime
 2961     cbuf.set_insts_mark();
 2962     emit_opcode(cbuf,0xE8);       // Call into runtime
 2963     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2964     // Carry on here...
 2965   %}
 2966 
 2967   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2968     // Operand was loaded from memory into fp ST (stack top)
 2969     // FMUL   ST,$src  /* D8 C8+i */
 2970     emit_opcode(cbuf, 0xD8);
 2971     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2972   %}
 2973 
 2974   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2975     // FADDP  ST,src2  /* D8 C0+i */
 2976     emit_opcode(cbuf, 0xD8);
 2977     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2978     //could use FADDP  src2,fpST  /* DE C0+i */
 2979   %}
 2980 
 2981   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2982     // FADDP  src2,ST  /* DE C0+i */
 2983     emit_opcode(cbuf, 0xDE);
 2984     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2985   %}
 2986 
 2987   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2988     // Operand has been loaded into fp ST (stack top)
 2989       // FSUB   ST,$src1
 2990       emit_opcode(cbuf, 0xD8);
 2991       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2992 
 2993       // FDIV
 2994       emit_opcode(cbuf, 0xD8);
 2995       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 2996   %}
 2997 
 2998   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2999     // Operand was loaded from memory into fp ST (stack top)
 3000     // FADD   ST,$src  /* D8 C0+i */
 3001     emit_opcode(cbuf, 0xD8);
 3002     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3003 
 3004     // FMUL  ST,src2  /* D8 C*+i */
 3005     emit_opcode(cbuf, 0xD8);
 3006     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3007   %}
 3008 
 3009 
 3010   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3011     // Operand was loaded from memory into fp ST (stack top)
 3012     // FADD   ST,$src  /* D8 C0+i */
 3013     emit_opcode(cbuf, 0xD8);
 3014     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3015 
 3016     // FMULP  src2,ST  /* DE C8+i */
 3017     emit_opcode(cbuf, 0xDE);
 3018     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3019   %}
 3020 
 3021   // Atomically load the volatile long
 3022   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3023     emit_opcode(cbuf,0xDF);
 3024     int rm_byte_opcode = 0x05;
 3025     int base     = $mem$$base;
 3026     int index    = $mem$$index;
 3027     int scale    = $mem$$scale;
 3028     int displace = $mem$$disp;
 3029     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3030     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3031     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3032   %}
 3033 
 3034   // Volatile Store Long.  Must be atomic, so move it into
 3035   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3036   // target address before the store (for null-ptr checks)
 3037   // so the memory operand is used twice in the encoding.
 3038   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3039     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3040     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3041     emit_opcode(cbuf,0xDF);
 3042     int rm_byte_opcode = 0x07;
 3043     int base     = $mem$$base;
 3044     int index    = $mem$$index;
 3045     int scale    = $mem$$scale;
 3046     int displace = $mem$$disp;
 3047     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3048     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3049   %}
 3050 
 3051 %}
 3052 
 3053 
 3054 //----------FRAME--------------------------------------------------------------
 3055 // Definition of frame structure and management information.
 3056 //
 3057 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3058 //                             |   (to get allocators register number
 3059 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3060 //  r   CALLER     |        |
 3061 //  o     |        +--------+      pad to even-align allocators stack-slot
 3062 //  w     V        |  pad0  |        numbers; owned by CALLER
 3063 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3064 //  h     ^        |   in   |  5
 3065 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3066 //  |     |        |        |  3
 3067 //  |     |        +--------+
 3068 //  V     |        | old out|      Empty on Intel, window on Sparc
 3069 //        |    old |preserve|      Must be even aligned.
 3070 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3071 //        |        |   in   |  3   area for Intel ret address
 3072 //     Owned by    |preserve|      Empty on Sparc.
 3073 //       SELF      +--------+
 3074 //        |        |  pad2  |  2   pad to align old SP
 3075 //        |        +--------+  1
 3076 //        |        | locks  |  0
 3077 //        |        +--------+----> OptoReg::stack0(), even aligned
 3078 //        |        |  pad1  | 11   pad to align new SP
 3079 //        |        +--------+
 3080 //        |        |        | 10
 3081 //        |        | spills |  9   spills
 3082 //        V        |        |  8   (pad0 slot for callee)
 3083 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3084 //        ^        |  out   |  7
 3085 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3086 //     Owned by    +--------+
 3087 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3088 //        |    new |preserve|      Must be even-aligned.
 3089 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3090 //        |        |        |
 3091 //
 3092 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3093 //         known from SELF's arguments and the Java calling convention.
 3094 //         Region 6-7 is determined per call site.
 3095 // Note 2: If the calling convention leaves holes in the incoming argument
 3096 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3097 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3098 //         incoming area, as the Java calling convention is completely under
 3099 //         the control of the AD file.  Doubles can be sorted and packed to
 3100 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3101 //         varargs C calling conventions.
 3102 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3103 //         even aligned with pad0 as needed.
 3104 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3105 //         region 6-11 is even aligned; it may be padded out more so that
 3106 //         the region from SP to FP meets the minimum stack alignment.
 3107 
 3108 frame %{
 3109   // These three registers define part of the calling convention
 3110   // between compiled code and the interpreter.
 3111   inline_cache_reg(EAX);                // Inline Cache Register
 3112 
 3113   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3114   cisc_spilling_operand_name(indOffset32);
 3115 
 3116   // Number of stack slots consumed by locking an object
 3117   sync_stack_slots(1);
 3118 
 3119   // Compiled code's Frame Pointer
 3120   frame_pointer(ESP);
 3121   // Interpreter stores its frame pointer in a register which is
 3122   // stored to the stack by I2CAdaptors.
 3123   // I2CAdaptors convert from interpreted java to compiled java.
 3124   interpreter_frame_pointer(EBP);
 3125 
 3126   // Stack alignment requirement
 3127   // Alignment size in bytes (128-bit -> 16 bytes)
 3128   stack_alignment(StackAlignmentInBytes);
 3129 
 3130   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3131   // for calls to C.  Supports the var-args backing area for register parms.
 3132   varargs_C_out_slots_killed(0);
 3133 
 3134   // The after-PROLOG location of the return address.  Location of
 3135   // return address specifies a type (REG or STACK) and a number
 3136   // representing the register number (i.e. - use a register name) or
 3137   // stack slot.
 3138   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3139   // Otherwise, it is above the locks and verification slot and alignment word
 3140   return_addr(STACK - 1 +
 3141               align_up((Compile::current()->in_preserve_stack_slots() +
 3142                         Compile::current()->fixed_slots()),
 3143                        stack_alignment_in_slots()));
 3144 
 3145   // Location of C & interpreter return values
 3146   c_return_value %{
 3147     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3148     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3149     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3150 
 3151     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3152     // that C functions return float and double results in XMM0.
 3153     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3154       return OptoRegPair(XMM0b_num,XMM0_num);
 3155     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3156       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3157 
 3158     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3159   %}
 3160 
 3161   // Location of return values
 3162   return_value %{
 3163     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3164     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3165     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3166     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3167       return OptoRegPair(XMM0b_num,XMM0_num);
 3168     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3169       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3170     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3171   %}
 3172 
 3173 %}
 3174 
 3175 //----------ATTRIBUTES---------------------------------------------------------
 3176 //----------Operand Attributes-------------------------------------------------
 3177 op_attrib op_cost(0);        // Required cost attribute
 3178 
 3179 //----------Instruction Attributes---------------------------------------------
 3180 ins_attrib ins_cost(100);       // Required cost attribute
 3181 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3182 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3183                                 // non-matching short branch variant of some
 3184                                                             // long branch?
 3185 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3186                                 // specifies the alignment that some part of the instruction (not
 3187                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3188                                 // function must be provided for the instruction
 3189 
 3190 //----------OPERANDS-----------------------------------------------------------
 3191 // Operand definitions must precede instruction definitions for correct parsing
 3192 // in the ADLC because operands constitute user defined types which are used in
 3193 // instruction definitions.
 3194 
 3195 //----------Simple Operands----------------------------------------------------
 3196 // Immediate Operands
 3197 // Integer Immediate
 3198 operand immI() %{
 3199   match(ConI);
 3200 
 3201   op_cost(10);
 3202   format %{ %}
 3203   interface(CONST_INTER);
 3204 %}
 3205 
 3206 // Constant for test vs zero
 3207 operand immI_0() %{
 3208   predicate(n->get_int() == 0);
 3209   match(ConI);
 3210 
 3211   op_cost(0);
 3212   format %{ %}
 3213   interface(CONST_INTER);
 3214 %}
 3215 
 3216 // Constant for increment
 3217 operand immI_1() %{
 3218   predicate(n->get_int() == 1);
 3219   match(ConI);
 3220 
 3221   op_cost(0);
 3222   format %{ %}
 3223   interface(CONST_INTER);
 3224 %}
 3225 
 3226 // Constant for decrement
 3227 operand immI_M1() %{
 3228   predicate(n->get_int() == -1);
 3229   match(ConI);
 3230 
 3231   op_cost(0);
 3232   format %{ %}
 3233   interface(CONST_INTER);
 3234 %}
 3235 
 3236 // Valid scale values for addressing modes
 3237 operand immI2() %{
 3238   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3239   match(ConI);
 3240 
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 operand immI8() %{
 3246   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3247   match(ConI);
 3248 
 3249   op_cost(5);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 operand immU8() %{
 3255   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3256   match(ConI);
 3257 
 3258   op_cost(5);
 3259   format %{ %}
 3260   interface(CONST_INTER);
 3261 %}
 3262 
 3263 operand immI16() %{
 3264   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3265   match(ConI);
 3266 
 3267   op_cost(10);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Int Immediate non-negative
 3273 operand immU31()
 3274 %{
 3275   predicate(n->get_int() >= 0);
 3276   match(ConI);
 3277 
 3278   op_cost(0);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 // Constant for long shifts
 3284 operand immI_32() %{
 3285   predicate( n->get_int() == 32 );
 3286   match(ConI);
 3287 
 3288   op_cost(0);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 operand immI_1_31() %{
 3294   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3295   match(ConI);
 3296 
 3297   op_cost(0);
 3298   format %{ %}
 3299   interface(CONST_INTER);
 3300 %}
 3301 
 3302 operand immI_32_63() %{
 3303   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3304   match(ConI);
 3305   op_cost(0);
 3306 
 3307   format %{ %}
 3308   interface(CONST_INTER);
 3309 %}
 3310 
 3311 operand immI_2() %{
 3312   predicate( n->get_int() == 2 );
 3313   match(ConI);
 3314 
 3315   op_cost(0);
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 operand immI_3() %{
 3321   predicate( n->get_int() == 3 );
 3322   match(ConI);
 3323 
 3324   op_cost(0);
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 operand immI_4()
 3330 %{
 3331   predicate(n->get_int() == 4);
 3332   match(ConI);
 3333 
 3334   op_cost(0);
 3335   format %{ %}
 3336   interface(CONST_INTER);
 3337 %}
 3338 
 3339 operand immI_8()
 3340 %{
 3341   predicate(n->get_int() == 8);
 3342   match(ConI);
 3343 
 3344   op_cost(0);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 // Pointer Immediate
 3350 operand immP() %{
 3351   match(ConP);
 3352 
 3353   op_cost(10);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 // NULL Pointer Immediate
 3359 operand immP0() %{
 3360   predicate( n->get_ptr() == 0 );
 3361   match(ConP);
 3362   op_cost(0);
 3363 
 3364   format %{ %}
 3365   interface(CONST_INTER);
 3366 %}
 3367 
 3368 // Long Immediate
 3369 operand immL() %{
 3370   match(ConL);
 3371 
 3372   op_cost(20);
 3373   format %{ %}
 3374   interface(CONST_INTER);
 3375 %}
 3376 
 3377 // Long Immediate zero
 3378 operand immL0() %{
 3379   predicate( n->get_long() == 0L );
 3380   match(ConL);
 3381   op_cost(0);
 3382 
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Long Immediate zero
 3388 operand immL_M1() %{
 3389   predicate( n->get_long() == -1L );
 3390   match(ConL);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long immediate from 0 to 127.
 3398 // Used for a shorter form of long mul by 10.
 3399 operand immL_127() %{
 3400   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3401   match(ConL);
 3402   op_cost(0);
 3403 
 3404   format %{ %}
 3405   interface(CONST_INTER);
 3406 %}
 3407 
 3408 // Long Immediate: low 32-bit mask
 3409 operand immL_32bits() %{
 3410   predicate(n->get_long() == 0xFFFFFFFFL);
 3411   match(ConL);
 3412   op_cost(0);
 3413 
 3414   format %{ %}
 3415   interface(CONST_INTER);
 3416 %}
 3417 
 3418 // Long Immediate: low 32-bit mask
 3419 operand immL32() %{
 3420   predicate(n->get_long() == (int)(n->get_long()));
 3421   match(ConL);
 3422   op_cost(20);
 3423 
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 //Double Immediate zero
 3429 operand immDPR0() %{
 3430   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3431   // bug that generates code such that NaNs compare equal to 0.0
 3432   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3433   match(ConD);
 3434 
 3435   op_cost(5);
 3436   format %{ %}
 3437   interface(CONST_INTER);
 3438 %}
 3439 
 3440 // Double Immediate one
 3441 operand immDPR1() %{
 3442   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3443   match(ConD);
 3444 
 3445   op_cost(5);
 3446   format %{ %}
 3447   interface(CONST_INTER);
 3448 %}
 3449 
 3450 // Double Immediate
 3451 operand immDPR() %{
 3452   predicate(UseSSE<=1);
 3453   match(ConD);
 3454 
 3455   op_cost(5);
 3456   format %{ %}
 3457   interface(CONST_INTER);
 3458 %}
 3459 
 3460 operand immD() %{
 3461   predicate(UseSSE>=2);
 3462   match(ConD);
 3463 
 3464   op_cost(5);
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 // Double Immediate zero
 3470 operand immD0() %{
 3471   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3472   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3473   // compare equal to -0.0.
 3474   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3475   match(ConD);
 3476 
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 // Float Immediate zero
 3482 operand immFPR0() %{
 3483   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3484   match(ConF);
 3485 
 3486   op_cost(5);
 3487   format %{ %}
 3488   interface(CONST_INTER);
 3489 %}
 3490 
 3491 // Float Immediate one
 3492 operand immFPR1() %{
 3493   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3494   match(ConF);
 3495 
 3496   op_cost(5);
 3497   format %{ %}
 3498   interface(CONST_INTER);
 3499 %}
 3500 
 3501 // Float Immediate
 3502 operand immFPR() %{
 3503   predicate( UseSSE == 0 );
 3504   match(ConF);
 3505 
 3506   op_cost(5);
 3507   format %{ %}
 3508   interface(CONST_INTER);
 3509 %}
 3510 
 3511 // Float Immediate
 3512 operand immF() %{
 3513   predicate(UseSSE >= 1);
 3514   match(ConF);
 3515 
 3516   op_cost(5);
 3517   format %{ %}
 3518   interface(CONST_INTER);
 3519 %}
 3520 
 3521 // Float Immediate zero.  Zero and not -0.0
 3522 operand immF0() %{
 3523   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3524   match(ConF);
 3525 
 3526   op_cost(5);
 3527   format %{ %}
 3528   interface(CONST_INTER);
 3529 %}
 3530 
 3531 // Immediates for special shifts (sign extend)
 3532 
 3533 // Constants for increment
 3534 operand immI_16() %{
 3535   predicate( n->get_int() == 16 );
 3536   match(ConI);
 3537 
 3538   format %{ %}
 3539   interface(CONST_INTER);
 3540 %}
 3541 
 3542 operand immI_24() %{
 3543   predicate( n->get_int() == 24 );
 3544   match(ConI);
 3545 
 3546   format %{ %}
 3547   interface(CONST_INTER);
 3548 %}
 3549 
 3550 // Constant for byte-wide masking
 3551 operand immI_255() %{
 3552   predicate( n->get_int() == 255 );
 3553   match(ConI);
 3554 
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 // Constant for short-wide masking
 3560 operand immI_65535() %{
 3561   predicate(n->get_int() == 65535);
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 operand kReg()
 3569 %{
 3570   constraint(ALLOC_IN_RC(vectmask_reg));
 3571   match(RegVectMask);
 3572   format %{%}
 3573   interface(REG_INTER);
 3574 %}
 3575 
 3576 operand kReg_K1()
 3577 %{
 3578   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3579   match(RegVectMask);
 3580   format %{%}
 3581   interface(REG_INTER);
 3582 %}
 3583 
 3584 operand kReg_K2()
 3585 %{
 3586   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3587   match(RegVectMask);
 3588   format %{%}
 3589   interface(REG_INTER);
 3590 %}
 3591 
 3592 // Special Registers
 3593 operand kReg_K3()
 3594 %{
 3595   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3596   match(RegVectMask);
 3597   format %{%}
 3598   interface(REG_INTER);
 3599 %}
 3600 
 3601 operand kReg_K4()
 3602 %{
 3603   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3604   match(RegVectMask);
 3605   format %{%}
 3606   interface(REG_INTER);
 3607 %}
 3608 
 3609 operand kReg_K5()
 3610 %{
 3611   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3612   match(RegVectMask);
 3613   format %{%}
 3614   interface(REG_INTER);
 3615 %}
 3616 
 3617 operand kReg_K6()
 3618 %{
 3619   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3620   match(RegVectMask);
 3621   format %{%}
 3622   interface(REG_INTER);
 3623 %}
 3624 
 3625 // Special Registers
 3626 operand kReg_K7()
 3627 %{
 3628   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3629   match(RegVectMask);
 3630   format %{%}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 // Register Operands
 3635 // Integer Register
 3636 operand rRegI() %{
 3637   constraint(ALLOC_IN_RC(int_reg));
 3638   match(RegI);
 3639   match(xRegI);
 3640   match(eAXRegI);
 3641   match(eBXRegI);
 3642   match(eCXRegI);
 3643   match(eDXRegI);
 3644   match(eDIRegI);
 3645   match(eSIRegI);
 3646 
 3647   format %{ %}
 3648   interface(REG_INTER);
 3649 %}
 3650 
 3651 // Subset of Integer Register
 3652 operand xRegI(rRegI reg) %{
 3653   constraint(ALLOC_IN_RC(int_x_reg));
 3654   match(reg);
 3655   match(eAXRegI);
 3656   match(eBXRegI);
 3657   match(eCXRegI);
 3658   match(eDXRegI);
 3659 
 3660   format %{ %}
 3661   interface(REG_INTER);
 3662 %}
 3663 
 3664 // Special Registers
 3665 operand eAXRegI(xRegI reg) %{
 3666   constraint(ALLOC_IN_RC(eax_reg));
 3667   match(reg);
 3668   match(rRegI);
 3669 
 3670   format %{ "EAX" %}
 3671   interface(REG_INTER);
 3672 %}
 3673 
 3674 // Special Registers
 3675 operand eBXRegI(xRegI reg) %{
 3676   constraint(ALLOC_IN_RC(ebx_reg));
 3677   match(reg);
 3678   match(rRegI);
 3679 
 3680   format %{ "EBX" %}
 3681   interface(REG_INTER);
 3682 %}
 3683 
 3684 operand eCXRegI(xRegI reg) %{
 3685   constraint(ALLOC_IN_RC(ecx_reg));
 3686   match(reg);
 3687   match(rRegI);
 3688 
 3689   format %{ "ECX" %}
 3690   interface(REG_INTER);
 3691 %}
 3692 
 3693 operand eDXRegI(xRegI reg) %{
 3694   constraint(ALLOC_IN_RC(edx_reg));
 3695   match(reg);
 3696   match(rRegI);
 3697 
 3698   format %{ "EDX" %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand eDIRegI(xRegI reg) %{
 3703   constraint(ALLOC_IN_RC(edi_reg));
 3704   match(reg);
 3705   match(rRegI);
 3706 
 3707   format %{ "EDI" %}
 3708   interface(REG_INTER);
 3709 %}
 3710 
 3711 operand naxRegI() %{
 3712   constraint(ALLOC_IN_RC(nax_reg));
 3713   match(RegI);
 3714   match(eCXRegI);
 3715   match(eDXRegI);
 3716   match(eSIRegI);
 3717   match(eDIRegI);
 3718 
 3719   format %{ %}
 3720   interface(REG_INTER);
 3721 %}
 3722 
 3723 operand nadxRegI() %{
 3724   constraint(ALLOC_IN_RC(nadx_reg));
 3725   match(RegI);
 3726   match(eBXRegI);
 3727   match(eCXRegI);
 3728   match(eSIRegI);
 3729   match(eDIRegI);
 3730 
 3731   format %{ %}
 3732   interface(REG_INTER);
 3733 %}
 3734 
 3735 operand ncxRegI() %{
 3736   constraint(ALLOC_IN_RC(ncx_reg));
 3737   match(RegI);
 3738   match(eAXRegI);
 3739   match(eDXRegI);
 3740   match(eSIRegI);
 3741   match(eDIRegI);
 3742 
 3743   format %{ %}
 3744   interface(REG_INTER);
 3745 %}
 3746 
 3747 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3748 // //
 3749 operand eSIRegI(xRegI reg) %{
 3750    constraint(ALLOC_IN_RC(esi_reg));
 3751    match(reg);
 3752    match(rRegI);
 3753 
 3754    format %{ "ESI" %}
 3755    interface(REG_INTER);
 3756 %}
 3757 
 3758 // Pointer Register
 3759 operand anyRegP() %{
 3760   constraint(ALLOC_IN_RC(any_reg));
 3761   match(RegP);
 3762   match(eAXRegP);
 3763   match(eBXRegP);
 3764   match(eCXRegP);
 3765   match(eDIRegP);
 3766   match(eRegP);
 3767 
 3768   format %{ %}
 3769   interface(REG_INTER);
 3770 %}
 3771 
 3772 operand eRegP() %{
 3773   constraint(ALLOC_IN_RC(int_reg));
 3774   match(RegP);
 3775   match(eAXRegP);
 3776   match(eBXRegP);
 3777   match(eCXRegP);
 3778   match(eDIRegP);
 3779 
 3780   format %{ %}
 3781   interface(REG_INTER);
 3782 %}
 3783 
 3784 operand rRegP() %{
 3785   constraint(ALLOC_IN_RC(int_reg));
 3786   match(RegP);
 3787   match(eAXRegP);
 3788   match(eBXRegP);
 3789   match(eCXRegP);
 3790   match(eDIRegP);
 3791 
 3792   format %{ %}
 3793   interface(REG_INTER);
 3794 %}
 3795 
 3796 // On windows95, EBP is not safe to use for implicit null tests.
 3797 operand eRegP_no_EBP() %{
 3798   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3799   match(RegP);
 3800   match(eAXRegP);
 3801   match(eBXRegP);
 3802   match(eCXRegP);
 3803   match(eDIRegP);
 3804 
 3805   op_cost(100);
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand naxRegP() %{
 3811   constraint(ALLOC_IN_RC(nax_reg));
 3812   match(RegP);
 3813   match(eBXRegP);
 3814   match(eDXRegP);
 3815   match(eCXRegP);
 3816   match(eSIRegP);
 3817   match(eDIRegP);
 3818 
 3819   format %{ %}
 3820   interface(REG_INTER);
 3821 %}
 3822 
 3823 operand nabxRegP() %{
 3824   constraint(ALLOC_IN_RC(nabx_reg));
 3825   match(RegP);
 3826   match(eCXRegP);
 3827   match(eDXRegP);
 3828   match(eSIRegP);
 3829   match(eDIRegP);
 3830 
 3831   format %{ %}
 3832   interface(REG_INTER);
 3833 %}
 3834 
 3835 operand pRegP() %{
 3836   constraint(ALLOC_IN_RC(p_reg));
 3837   match(RegP);
 3838   match(eBXRegP);
 3839   match(eDXRegP);
 3840   match(eSIRegP);
 3841   match(eDIRegP);
 3842 
 3843   format %{ %}
 3844   interface(REG_INTER);
 3845 %}
 3846 
 3847 // Special Registers
 3848 // Return a pointer value
 3849 operand eAXRegP(eRegP reg) %{
 3850   constraint(ALLOC_IN_RC(eax_reg));
 3851   match(reg);
 3852   format %{ "EAX" %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 // Used in AtomicAdd
 3857 operand eBXRegP(eRegP reg) %{
 3858   constraint(ALLOC_IN_RC(ebx_reg));
 3859   match(reg);
 3860   format %{ "EBX" %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 // Tail-call (interprocedural jump) to interpreter
 3865 operand eCXRegP(eRegP reg) %{
 3866   constraint(ALLOC_IN_RC(ecx_reg));
 3867   match(reg);
 3868   format %{ "ECX" %}
 3869   interface(REG_INTER);
 3870 %}
 3871 
 3872 operand eDXRegP(eRegP reg) %{
 3873   constraint(ALLOC_IN_RC(edx_reg));
 3874   match(reg);
 3875   format %{ "EDX" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 
 3879 operand eSIRegP(eRegP reg) %{
 3880   constraint(ALLOC_IN_RC(esi_reg));
 3881   match(reg);
 3882   format %{ "ESI" %}
 3883   interface(REG_INTER);
 3884 %}
 3885 
 3886 // Used in rep stosw
 3887 operand eDIRegP(eRegP reg) %{
 3888   constraint(ALLOC_IN_RC(edi_reg));
 3889   match(reg);
 3890   format %{ "EDI" %}
 3891   interface(REG_INTER);
 3892 %}
 3893 
 3894 operand eRegL() %{
 3895   constraint(ALLOC_IN_RC(long_reg));
 3896   match(RegL);
 3897   match(eADXRegL);
 3898 
 3899   format %{ %}
 3900   interface(REG_INTER);
 3901 %}
 3902 
 3903 operand eADXRegL( eRegL reg ) %{
 3904   constraint(ALLOC_IN_RC(eadx_reg));
 3905   match(reg);
 3906 
 3907   format %{ "EDX:EAX" %}
 3908   interface(REG_INTER);
 3909 %}
 3910 
 3911 operand eBCXRegL( eRegL reg ) %{
 3912   constraint(ALLOC_IN_RC(ebcx_reg));
 3913   match(reg);
 3914 
 3915   format %{ "EBX:ECX" %}
 3916   interface(REG_INTER);
 3917 %}
 3918 
 3919 // Special case for integer high multiply
 3920 operand eADXRegL_low_only() %{
 3921   constraint(ALLOC_IN_RC(eadx_reg));
 3922   match(RegL);
 3923 
 3924   format %{ "EAX" %}
 3925   interface(REG_INTER);
 3926 %}
 3927 
 3928 // Flags register, used as output of compare instructions
 3929 operand rFlagsReg() %{
 3930   constraint(ALLOC_IN_RC(int_flags));
 3931   match(RegFlags);
 3932 
 3933   format %{ "EFLAGS" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 // Flags register, used as output of compare instructions
 3938 operand eFlagsReg() %{
 3939   constraint(ALLOC_IN_RC(int_flags));
 3940   match(RegFlags);
 3941 
 3942   format %{ "EFLAGS" %}
 3943   interface(REG_INTER);
 3944 %}
 3945 
 3946 // Flags register, used as output of FLOATING POINT compare instructions
 3947 operand eFlagsRegU() %{
 3948   constraint(ALLOC_IN_RC(int_flags));
 3949   match(RegFlags);
 3950 
 3951   format %{ "EFLAGS_U" %}
 3952   interface(REG_INTER);
 3953 %}
 3954 
 3955 operand eFlagsRegUCF() %{
 3956   constraint(ALLOC_IN_RC(int_flags));
 3957   match(RegFlags);
 3958   predicate(false);
 3959 
 3960   format %{ "EFLAGS_U_CF" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 // Condition Code Register used by long compare
 3965 operand flagsReg_long_LTGE() %{
 3966   constraint(ALLOC_IN_RC(int_flags));
 3967   match(RegFlags);
 3968   format %{ "FLAGS_LTGE" %}
 3969   interface(REG_INTER);
 3970 %}
 3971 operand flagsReg_long_EQNE() %{
 3972   constraint(ALLOC_IN_RC(int_flags));
 3973   match(RegFlags);
 3974   format %{ "FLAGS_EQNE" %}
 3975   interface(REG_INTER);
 3976 %}
 3977 operand flagsReg_long_LEGT() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980   format %{ "FLAGS_LEGT" %}
 3981   interface(REG_INTER);
 3982 %}
 3983 
 3984 // Condition Code Register used by unsigned long compare
 3985 operand flagsReg_ulong_LTGE() %{
 3986   constraint(ALLOC_IN_RC(int_flags));
 3987   match(RegFlags);
 3988   format %{ "FLAGS_U_LTGE" %}
 3989   interface(REG_INTER);
 3990 %}
 3991 operand flagsReg_ulong_EQNE() %{
 3992   constraint(ALLOC_IN_RC(int_flags));
 3993   match(RegFlags);
 3994   format %{ "FLAGS_U_EQNE" %}
 3995   interface(REG_INTER);
 3996 %}
 3997 operand flagsReg_ulong_LEGT() %{
 3998   constraint(ALLOC_IN_RC(int_flags));
 3999   match(RegFlags);
 4000   format %{ "FLAGS_U_LEGT" %}
 4001   interface(REG_INTER);
 4002 %}
 4003 
 4004 // Float register operands
 4005 operand regDPR() %{
 4006   predicate( UseSSE < 2 );
 4007   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4008   match(RegD);
 4009   match(regDPR1);
 4010   match(regDPR2);
 4011   format %{ %}
 4012   interface(REG_INTER);
 4013 %}
 4014 
 4015 operand regDPR1(regDPR reg) %{
 4016   predicate( UseSSE < 2 );
 4017   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4018   match(reg);
 4019   format %{ "FPR1" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 
 4023 operand regDPR2(regDPR reg) %{
 4024   predicate( UseSSE < 2 );
 4025   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4026   match(reg);
 4027   format %{ "FPR2" %}
 4028   interface(REG_INTER);
 4029 %}
 4030 
 4031 operand regnotDPR1(regDPR reg) %{
 4032   predicate( UseSSE < 2 );
 4033   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4034   match(reg);
 4035   format %{ %}
 4036   interface(REG_INTER);
 4037 %}
 4038 
 4039 // Float register operands
 4040 operand regFPR() %{
 4041   predicate( UseSSE < 2 );
 4042   constraint(ALLOC_IN_RC(fp_flt_reg));
 4043   match(RegF);
 4044   match(regFPR1);
 4045   format %{ %}
 4046   interface(REG_INTER);
 4047 %}
 4048 
 4049 // Float register operands
 4050 operand regFPR1(regFPR reg) %{
 4051   predicate( UseSSE < 2 );
 4052   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4053   match(reg);
 4054   format %{ "FPR1" %}
 4055   interface(REG_INTER);
 4056 %}
 4057 
 4058 // XMM Float register operands
 4059 operand regF() %{
 4060   predicate( UseSSE>=1 );
 4061   constraint(ALLOC_IN_RC(float_reg_legacy));
 4062   match(RegF);
 4063   format %{ %}
 4064   interface(REG_INTER);
 4065 %}
 4066 
 4067 operand legRegF() %{
 4068   predicate( UseSSE>=1 );
 4069   constraint(ALLOC_IN_RC(float_reg_legacy));
 4070   match(RegF);
 4071   format %{ %}
 4072   interface(REG_INTER);
 4073 %}
 4074 
 4075 // Float register operands
 4076 operand vlRegF() %{
 4077    constraint(ALLOC_IN_RC(float_reg_vl));
 4078    match(RegF);
 4079 
 4080    format %{ %}
 4081    interface(REG_INTER);
 4082 %}
 4083 
 4084 // XMM Double register operands
 4085 operand regD() %{
 4086   predicate( UseSSE>=2 );
 4087   constraint(ALLOC_IN_RC(double_reg_legacy));
 4088   match(RegD);
 4089   format %{ %}
 4090   interface(REG_INTER);
 4091 %}
 4092 
 4093 // Double register operands
 4094 operand legRegD() %{
 4095   predicate( UseSSE>=2 );
 4096   constraint(ALLOC_IN_RC(double_reg_legacy));
 4097   match(RegD);
 4098   format %{ %}
 4099   interface(REG_INTER);
 4100 %}
 4101 
 4102 operand vlRegD() %{
 4103    constraint(ALLOC_IN_RC(double_reg_vl));
 4104    match(RegD);
 4105 
 4106    format %{ %}
 4107    interface(REG_INTER);
 4108 %}
 4109 
 4110 //----------Memory Operands----------------------------------------------------
 4111 // Direct Memory Operand
 4112 operand direct(immP addr) %{
 4113   match(addr);
 4114 
 4115   format %{ "[$addr]" %}
 4116   interface(MEMORY_INTER) %{
 4117     base(0xFFFFFFFF);
 4118     index(0x4);
 4119     scale(0x0);
 4120     disp($addr);
 4121   %}
 4122 %}
 4123 
 4124 // Indirect Memory Operand
 4125 operand indirect(eRegP reg) %{
 4126   constraint(ALLOC_IN_RC(int_reg));
 4127   match(reg);
 4128 
 4129   format %{ "[$reg]" %}
 4130   interface(MEMORY_INTER) %{
 4131     base($reg);
 4132     index(0x4);
 4133     scale(0x0);
 4134     disp(0x0);
 4135   %}
 4136 %}
 4137 
 4138 // Indirect Memory Plus Short Offset Operand
 4139 operand indOffset8(eRegP reg, immI8 off) %{
 4140   match(AddP reg off);
 4141 
 4142   format %{ "[$reg + $off]" %}
 4143   interface(MEMORY_INTER) %{
 4144     base($reg);
 4145     index(0x4);
 4146     scale(0x0);
 4147     disp($off);
 4148   %}
 4149 %}
 4150 
 4151 // Indirect Memory Plus Long Offset Operand
 4152 operand indOffset32(eRegP reg, immI off) %{
 4153   match(AddP reg off);
 4154 
 4155   format %{ "[$reg + $off]" %}
 4156   interface(MEMORY_INTER) %{
 4157     base($reg);
 4158     index(0x4);
 4159     scale(0x0);
 4160     disp($off);
 4161   %}
 4162 %}
 4163 
 4164 // Indirect Memory Plus Long Offset Operand
 4165 operand indOffset32X(rRegI reg, immP off) %{
 4166   match(AddP off reg);
 4167 
 4168   format %{ "[$reg + $off]" %}
 4169   interface(MEMORY_INTER) %{
 4170     base($reg);
 4171     index(0x4);
 4172     scale(0x0);
 4173     disp($off);
 4174   %}
 4175 %}
 4176 
 4177 // Indirect Memory Plus Index Register Plus Offset Operand
 4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4179   match(AddP (AddP reg ireg) off);
 4180 
 4181   op_cost(10);
 4182   format %{"[$reg + $off + $ireg]" %}
 4183   interface(MEMORY_INTER) %{
 4184     base($reg);
 4185     index($ireg);
 4186     scale(0x0);
 4187     disp($off);
 4188   %}
 4189 %}
 4190 
 4191 // Indirect Memory Plus Index Register Plus Offset Operand
 4192 operand indIndex(eRegP reg, rRegI ireg) %{
 4193   match(AddP reg ireg);
 4194 
 4195   op_cost(10);
 4196   format %{"[$reg + $ireg]" %}
 4197   interface(MEMORY_INTER) %{
 4198     base($reg);
 4199     index($ireg);
 4200     scale(0x0);
 4201     disp(0x0);
 4202   %}
 4203 %}
 4204 
 4205 // // -------------------------------------------------------------------------
 4206 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4207 // // -------------------------------------------------------------------------
 4208 // // Scaled Memory Operands
 4209 // // Indirect Memory Times Scale Plus Offset Operand
 4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4211 //   match(AddP off (LShiftI ireg scale));
 4212 //
 4213 //   op_cost(10);
 4214 //   format %{"[$off + $ireg << $scale]" %}
 4215 //   interface(MEMORY_INTER) %{
 4216 //     base(0x4);
 4217 //     index($ireg);
 4218 //     scale($scale);
 4219 //     disp($off);
 4220 //   %}
 4221 // %}
 4222 
 4223 // Indirect Memory Times Scale Plus Index Register
 4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4225   match(AddP reg (LShiftI ireg scale));
 4226 
 4227   op_cost(10);
 4228   format %{"[$reg + $ireg << $scale]" %}
 4229   interface(MEMORY_INTER) %{
 4230     base($reg);
 4231     index($ireg);
 4232     scale($scale);
 4233     disp(0x0);
 4234   %}
 4235 %}
 4236 
 4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4239   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4240 
 4241   op_cost(10);
 4242   format %{"[$reg + $off + $ireg << $scale]" %}
 4243   interface(MEMORY_INTER) %{
 4244     base($reg);
 4245     index($ireg);
 4246     scale($scale);
 4247     disp($off);
 4248   %}
 4249 %}
 4250 
 4251 //----------Load Long Memory Operands------------------------------------------
 4252 // The load-long idiom will use it's address expression again after loading
 4253 // the first word of the long.  If the load-long destination overlaps with
 4254 // registers used in the addressing expression, the 2nd half will be loaded
 4255 // from a clobbered address.  Fix this by requiring that load-long use
 4256 // address registers that do not overlap with the load-long target.
 4257 
 4258 // load-long support
 4259 operand load_long_RegP() %{
 4260   constraint(ALLOC_IN_RC(esi_reg));
 4261   match(RegP);
 4262   match(eSIRegP);
 4263   op_cost(100);
 4264   format %{  %}
 4265   interface(REG_INTER);
 4266 %}
 4267 
 4268 // Indirect Memory Operand Long
 4269 operand load_long_indirect(load_long_RegP reg) %{
 4270   constraint(ALLOC_IN_RC(esi_reg));
 4271   match(reg);
 4272 
 4273   format %{ "[$reg]" %}
 4274   interface(MEMORY_INTER) %{
 4275     base($reg);
 4276     index(0x4);
 4277     scale(0x0);
 4278     disp(0x0);
 4279   %}
 4280 %}
 4281 
 4282 // Indirect Memory Plus Long Offset Operand
 4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4284   match(AddP reg off);
 4285 
 4286   format %{ "[$reg + $off]" %}
 4287   interface(MEMORY_INTER) %{
 4288     base($reg);
 4289     index(0x4);
 4290     scale(0x0);
 4291     disp($off);
 4292   %}
 4293 %}
 4294 
 4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4296 
 4297 
 4298 //----------Special Memory Operands--------------------------------------------
 4299 // Stack Slot Operand - This operand is used for loading and storing temporary
 4300 //                      values on the stack where a match requires a value to
 4301 //                      flow through memory.
 4302 operand stackSlotP(sRegP reg) %{
 4303   constraint(ALLOC_IN_RC(stack_slots));
 4304   // No match rule because this operand is only generated in matching
 4305   format %{ "[$reg]" %}
 4306   interface(MEMORY_INTER) %{
 4307     base(0x4);   // ESP
 4308     index(0x4);  // No Index
 4309     scale(0x0);  // No Scale
 4310     disp($reg);  // Stack Offset
 4311   %}
 4312 %}
 4313 
 4314 operand stackSlotI(sRegI reg) %{
 4315   constraint(ALLOC_IN_RC(stack_slots));
 4316   // No match rule because this operand is only generated in matching
 4317   format %{ "[$reg]" %}
 4318   interface(MEMORY_INTER) %{
 4319     base(0x4);   // ESP
 4320     index(0x4);  // No Index
 4321     scale(0x0);  // No Scale
 4322     disp($reg);  // Stack Offset
 4323   %}
 4324 %}
 4325 
 4326 operand stackSlotF(sRegF reg) %{
 4327   constraint(ALLOC_IN_RC(stack_slots));
 4328   // No match rule because this operand is only generated in matching
 4329   format %{ "[$reg]" %}
 4330   interface(MEMORY_INTER) %{
 4331     base(0x4);   // ESP
 4332     index(0x4);  // No Index
 4333     scale(0x0);  // No Scale
 4334     disp($reg);  // Stack Offset
 4335   %}
 4336 %}
 4337 
 4338 operand stackSlotD(sRegD reg) %{
 4339   constraint(ALLOC_IN_RC(stack_slots));
 4340   // No match rule because this operand is only generated in matching
 4341   format %{ "[$reg]" %}
 4342   interface(MEMORY_INTER) %{
 4343     base(0x4);   // ESP
 4344     index(0x4);  // No Index
 4345     scale(0x0);  // No Scale
 4346     disp($reg);  // Stack Offset
 4347   %}
 4348 %}
 4349 
 4350 operand stackSlotL(sRegL reg) %{
 4351   constraint(ALLOC_IN_RC(stack_slots));
 4352   // No match rule because this operand is only generated in matching
 4353   format %{ "[$reg]" %}
 4354   interface(MEMORY_INTER) %{
 4355     base(0x4);   // ESP
 4356     index(0x4);  // No Index
 4357     scale(0x0);  // No Scale
 4358     disp($reg);  // Stack Offset
 4359   %}
 4360 %}
 4361 
 4362 //----------Conditional Branch Operands----------------------------------------
 4363 // Comparison Op  - This is the operation of the comparison, and is limited to
 4364 //                  the following set of codes:
 4365 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4366 //
 4367 // Other attributes of the comparison, such as unsignedness, are specified
 4368 // by the comparison instruction that sets a condition code flags register.
 4369 // That result is represented by a flags operand whose subtype is appropriate
 4370 // to the unsignedness (etc.) of the comparison.
 4371 //
 4372 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4373 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4374 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4375 
 4376 // Comparision Code
 4377 operand cmpOp() %{
 4378   match(Bool);
 4379 
 4380   format %{ "" %}
 4381   interface(COND_INTER) %{
 4382     equal(0x4, "e");
 4383     not_equal(0x5, "ne");
 4384     less(0xC, "l");
 4385     greater_equal(0xD, "ge");
 4386     less_equal(0xE, "le");
 4387     greater(0xF, "g");
 4388     overflow(0x0, "o");
 4389     no_overflow(0x1, "no");
 4390   %}
 4391 %}
 4392 
 4393 // Comparison Code, unsigned compare.  Used by FP also, with
 4394 // C2 (unordered) turned into GT or LT already.  The other bits
 4395 // C0 and C3 are turned into Carry & Zero flags.
 4396 operand cmpOpU() %{
 4397   match(Bool);
 4398 
 4399   format %{ "" %}
 4400   interface(COND_INTER) %{
 4401     equal(0x4, "e");
 4402     not_equal(0x5, "ne");
 4403     less(0x2, "b");
 4404     greater_equal(0x3, "nb");
 4405     less_equal(0x6, "be");
 4406     greater(0x7, "nbe");
 4407     overflow(0x0, "o");
 4408     no_overflow(0x1, "no");
 4409   %}
 4410 %}
 4411 
 4412 // Floating comparisons that don't require any fixup for the unordered case
 4413 operand cmpOpUCF() %{
 4414   match(Bool);
 4415   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4416             n->as_Bool()->_test._test == BoolTest::ge ||
 4417             n->as_Bool()->_test._test == BoolTest::le ||
 4418             n->as_Bool()->_test._test == BoolTest::gt);
 4419   format %{ "" %}
 4420   interface(COND_INTER) %{
 4421     equal(0x4, "e");
 4422     not_equal(0x5, "ne");
 4423     less(0x2, "b");
 4424     greater_equal(0x3, "nb");
 4425     less_equal(0x6, "be");
 4426     greater(0x7, "nbe");
 4427     overflow(0x0, "o");
 4428     no_overflow(0x1, "no");
 4429   %}
 4430 %}
 4431 
 4432 
 4433 // Floating comparisons that can be fixed up with extra conditional jumps
 4434 operand cmpOpUCF2() %{
 4435   match(Bool);
 4436   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4437             n->as_Bool()->_test._test == BoolTest::eq);
 4438   format %{ "" %}
 4439   interface(COND_INTER) %{
 4440     equal(0x4, "e");
 4441     not_equal(0x5, "ne");
 4442     less(0x2, "b");
 4443     greater_equal(0x3, "nb");
 4444     less_equal(0x6, "be");
 4445     greater(0x7, "nbe");
 4446     overflow(0x0, "o");
 4447     no_overflow(0x1, "no");
 4448   %}
 4449 %}
 4450 
 4451 // Comparison Code for FP conditional move
 4452 operand cmpOp_fcmov() %{
 4453   match(Bool);
 4454 
 4455   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4456             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4457   format %{ "" %}
 4458   interface(COND_INTER) %{
 4459     equal        (0x0C8);
 4460     not_equal    (0x1C8);
 4461     less         (0x0C0);
 4462     greater_equal(0x1C0);
 4463     less_equal   (0x0D0);
 4464     greater      (0x1D0);
 4465     overflow(0x0, "o"); // not really supported by the instruction
 4466     no_overflow(0x1, "no"); // not really supported by the instruction
 4467   %}
 4468 %}
 4469 
 4470 // Comparison Code used in long compares
 4471 operand cmpOp_commute() %{
 4472   match(Bool);
 4473 
 4474   format %{ "" %}
 4475   interface(COND_INTER) %{
 4476     equal(0x4, "e");
 4477     not_equal(0x5, "ne");
 4478     less(0xF, "g");
 4479     greater_equal(0xE, "le");
 4480     less_equal(0xD, "ge");
 4481     greater(0xC, "l");
 4482     overflow(0x0, "o");
 4483     no_overflow(0x1, "no");
 4484   %}
 4485 %}
 4486 
 4487 // Comparison Code used in unsigned long compares
 4488 operand cmpOpU_commute() %{
 4489   match(Bool);
 4490 
 4491   format %{ "" %}
 4492   interface(COND_INTER) %{
 4493     equal(0x4, "e");
 4494     not_equal(0x5, "ne");
 4495     less(0x7, "nbe");
 4496     greater_equal(0x6, "be");
 4497     less_equal(0x3, "nb");
 4498     greater(0x2, "b");
 4499     overflow(0x0, "o");
 4500     no_overflow(0x1, "no");
 4501   %}
 4502 %}
 4503 
 4504 //----------OPERAND CLASSES----------------------------------------------------
 4505 // Operand Classes are groups of operands that are used as to simplify
 4506 // instruction definitions by not requiring the AD writer to specify separate
 4507 // instructions for every form of operand when the instruction accepts
 4508 // multiple operand types with the same basic encoding and format.  The classic
 4509 // case of this is memory operands.
 4510 
 4511 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4512                indIndex, indIndexScale, indIndexScaleOffset);
 4513 
 4514 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4515 // This means some kind of offset is always required and you cannot use
 4516 // an oop as the offset (done when working on static globals).
 4517 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4518                     indIndex, indIndexScale, indIndexScaleOffset);
 4519 
 4520 
 4521 //----------PIPELINE-----------------------------------------------------------
 4522 // Rules which define the behavior of the target architectures pipeline.
 4523 pipeline %{
 4524 
 4525 //----------ATTRIBUTES---------------------------------------------------------
 4526 attributes %{
 4527   variable_size_instructions;        // Fixed size instructions
 4528   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4529   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4530   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4531   instruction_fetch_units = 1;       // of 16 bytes
 4532 
 4533   // List of nop instructions
 4534   nops( MachNop );
 4535 %}
 4536 
 4537 //----------RESOURCES----------------------------------------------------------
 4538 // Resources are the functional units available to the machine
 4539 
 4540 // Generic P2/P3 pipeline
 4541 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4542 // 3 instructions decoded per cycle.
 4543 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4544 // 2 ALU op, only ALU0 handles mul/div instructions.
 4545 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4546            MS0, MS1, MEM = MS0 | MS1,
 4547            BR, FPU,
 4548            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4549 
 4550 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4551 // Pipeline Description specifies the stages in the machine's pipeline
 4552 
 4553 // Generic P2/P3 pipeline
 4554 pipe_desc(S0, S1, S2, S3, S4, S5);
 4555 
 4556 //----------PIPELINE CLASSES---------------------------------------------------
 4557 // Pipeline Classes describe the stages in which input and output are
 4558 // referenced by the hardware pipeline.
 4559 
 4560 // Naming convention: ialu or fpu
 4561 // Then: _reg
 4562 // Then: _reg if there is a 2nd register
 4563 // Then: _long if it's a pair of instructions implementing a long
 4564 // Then: _fat if it requires the big decoder
 4565 //   Or: _mem if it requires the big decoder and a memory unit.
 4566 
 4567 // Integer ALU reg operation
 4568 pipe_class ialu_reg(rRegI dst) %{
 4569     single_instruction;
 4570     dst    : S4(write);
 4571     dst    : S3(read);
 4572     DECODE : S0;        // any decoder
 4573     ALU    : S3;        // any alu
 4574 %}
 4575 
 4576 // Long ALU reg operation
 4577 pipe_class ialu_reg_long(eRegL dst) %{
 4578     instruction_count(2);
 4579     dst    : S4(write);
 4580     dst    : S3(read);
 4581     DECODE : S0(2);     // any 2 decoders
 4582     ALU    : S3(2);     // both alus
 4583 %}
 4584 
 4585 // Integer ALU reg operation using big decoder
 4586 pipe_class ialu_reg_fat(rRegI dst) %{
 4587     single_instruction;
 4588     dst    : S4(write);
 4589     dst    : S3(read);
 4590     D0     : S0;        // big decoder only
 4591     ALU    : S3;        // any alu
 4592 %}
 4593 
 4594 // Long ALU reg operation using big decoder
 4595 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4596     instruction_count(2);
 4597     dst    : S4(write);
 4598     dst    : S3(read);
 4599     D0     : S0(2);     // big decoder only; twice
 4600     ALU    : S3(2);     // any 2 alus
 4601 %}
 4602 
 4603 // Integer ALU reg-reg operation
 4604 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4605     single_instruction;
 4606     dst    : S4(write);
 4607     src    : S3(read);
 4608     DECODE : S0;        // any decoder
 4609     ALU    : S3;        // any alu
 4610 %}
 4611 
 4612 // Long ALU reg-reg operation
 4613 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     ALU    : S3(2);     // both alus
 4619 %}
 4620 
 4621 // Integer ALU reg-reg operation
 4622 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4623     single_instruction;
 4624     dst    : S4(write);
 4625     src    : S3(read);
 4626     D0     : S0;        // big decoder only
 4627     ALU    : S3;        // any alu
 4628 %}
 4629 
 4630 // Long ALU reg-reg operation
 4631 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4632     instruction_count(2);
 4633     dst    : S4(write);
 4634     src    : S3(read);
 4635     D0     : S0(2);     // big decoder only; twice
 4636     ALU    : S3(2);     // both alus
 4637 %}
 4638 
 4639 // Integer ALU reg-mem operation
 4640 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4641     single_instruction;
 4642     dst    : S5(write);
 4643     mem    : S3(read);
 4644     D0     : S0;        // big decoder only
 4645     ALU    : S4;        // any alu
 4646     MEM    : S3;        // any mem
 4647 %}
 4648 
 4649 // Long ALU reg-mem operation
 4650 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4651     instruction_count(2);
 4652     dst    : S5(write);
 4653     mem    : S3(read);
 4654     D0     : S0(2);     // big decoder only; twice
 4655     ALU    : S4(2);     // any 2 alus
 4656     MEM    : S3(2);     // both mems
 4657 %}
 4658 
 4659 // Integer mem operation (prefetch)
 4660 pipe_class ialu_mem(memory mem)
 4661 %{
 4662     single_instruction;
 4663     mem    : S3(read);
 4664     D0     : S0;        // big decoder only
 4665     MEM    : S3;        // any mem
 4666 %}
 4667 
 4668 // Integer Store to Memory
 4669 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4670     single_instruction;
 4671     mem    : S3(read);
 4672     src    : S5(read);
 4673     D0     : S0;        // big decoder only
 4674     ALU    : S4;        // any alu
 4675     MEM    : S3;
 4676 %}
 4677 
 4678 // Long Store to Memory
 4679 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4680     instruction_count(2);
 4681     mem    : S3(read);
 4682     src    : S5(read);
 4683     D0     : S0(2);     // big decoder only; twice
 4684     ALU    : S4(2);     // any 2 alus
 4685     MEM    : S3(2);     // Both mems
 4686 %}
 4687 
 4688 // Integer Store to Memory
 4689 pipe_class ialu_mem_imm(memory mem) %{
 4690     single_instruction;
 4691     mem    : S3(read);
 4692     D0     : S0;        // big decoder only
 4693     ALU    : S4;        // any alu
 4694     MEM    : S3;
 4695 %}
 4696 
 4697 // Integer ALU0 reg-reg operation
 4698 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4699     single_instruction;
 4700     dst    : S4(write);
 4701     src    : S3(read);
 4702     D0     : S0;        // Big decoder only
 4703     ALU0   : S3;        // only alu0
 4704 %}
 4705 
 4706 // Integer ALU0 reg-mem operation
 4707 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4708     single_instruction;
 4709     dst    : S5(write);
 4710     mem    : S3(read);
 4711     D0     : S0;        // big decoder only
 4712     ALU0   : S4;        // ALU0 only
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 // Integer ALU reg-reg operation
 4717 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4718     single_instruction;
 4719     cr     : S4(write);
 4720     src1   : S3(read);
 4721     src2   : S3(read);
 4722     DECODE : S0;        // any decoder
 4723     ALU    : S3;        // any alu
 4724 %}
 4725 
 4726 // Integer ALU reg-imm operation
 4727 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4728     single_instruction;
 4729     cr     : S4(write);
 4730     src1   : S3(read);
 4731     DECODE : S0;        // any decoder
 4732     ALU    : S3;        // any alu
 4733 %}
 4734 
 4735 // Integer ALU reg-mem operation
 4736 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4737     single_instruction;
 4738     cr     : S4(write);
 4739     src1   : S3(read);
 4740     src2   : S3(read);
 4741     D0     : S0;        // big decoder only
 4742     ALU    : S4;        // any alu
 4743     MEM    : S3;
 4744 %}
 4745 
 4746 // Conditional move reg-reg
 4747 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4748     instruction_count(4);
 4749     y      : S4(read);
 4750     q      : S3(read);
 4751     p      : S3(read);
 4752     DECODE : S0(4);     // any decoder
 4753 %}
 4754 
 4755 // Conditional move reg-reg
 4756 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4757     single_instruction;
 4758     dst    : S4(write);
 4759     src    : S3(read);
 4760     cr     : S3(read);
 4761     DECODE : S0;        // any decoder
 4762 %}
 4763 
 4764 // Conditional move reg-mem
 4765 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4766     single_instruction;
 4767     dst    : S4(write);
 4768     src    : S3(read);
 4769     cr     : S3(read);
 4770     DECODE : S0;        // any decoder
 4771     MEM    : S3;
 4772 %}
 4773 
 4774 // Conditional move reg-reg long
 4775 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4776     single_instruction;
 4777     dst    : S4(write);
 4778     src    : S3(read);
 4779     cr     : S3(read);
 4780     DECODE : S0(2);     // any 2 decoders
 4781 %}
 4782 
 4783 // Conditional move double reg-reg
 4784 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4785     single_instruction;
 4786     dst    : S4(write);
 4787     src    : S3(read);
 4788     cr     : S3(read);
 4789     DECODE : S0;        // any decoder
 4790 %}
 4791 
 4792 // Float reg-reg operation
 4793 pipe_class fpu_reg(regDPR dst) %{
 4794     instruction_count(2);
 4795     dst    : S3(read);
 4796     DECODE : S0(2);     // any 2 decoders
 4797     FPU    : S3;
 4798 %}
 4799 
 4800 // Float reg-reg operation
 4801 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4802     instruction_count(2);
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     DECODE : S0(2);     // any 2 decoders
 4806     FPU    : S3;
 4807 %}
 4808 
 4809 // Float reg-reg operation
 4810 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4811     instruction_count(3);
 4812     dst    : S4(write);
 4813     src1   : S3(read);
 4814     src2   : S3(read);
 4815     DECODE : S0(3);     // any 3 decoders
 4816     FPU    : S3(2);
 4817 %}
 4818 
 4819 // Float reg-reg operation
 4820 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4821     instruction_count(4);
 4822     dst    : S4(write);
 4823     src1   : S3(read);
 4824     src2   : S3(read);
 4825     src3   : S3(read);
 4826     DECODE : S0(4);     // any 3 decoders
 4827     FPU    : S3(2);
 4828 %}
 4829 
 4830 // Float reg-reg operation
 4831 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4832     instruction_count(4);
 4833     dst    : S4(write);
 4834     src1   : S3(read);
 4835     src2   : S3(read);
 4836     src3   : S3(read);
 4837     DECODE : S1(3);     // any 3 decoders
 4838     D0     : S0;        // Big decoder only
 4839     FPU    : S3(2);
 4840     MEM    : S3;
 4841 %}
 4842 
 4843 // Float reg-mem operation
 4844 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4845     instruction_count(2);
 4846     dst    : S5(write);
 4847     mem    : S3(read);
 4848     D0     : S0;        // big decoder only
 4849     DECODE : S1;        // any decoder for FPU POP
 4850     FPU    : S4;
 4851     MEM    : S3;        // any mem
 4852 %}
 4853 
 4854 // Float reg-mem operation
 4855 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4856     instruction_count(3);
 4857     dst    : S5(write);
 4858     src1   : S3(read);
 4859     mem    : S3(read);
 4860     D0     : S0;        // big decoder only
 4861     DECODE : S1(2);     // any decoder for FPU POP
 4862     FPU    : S4;
 4863     MEM    : S3;        // any mem
 4864 %}
 4865 
 4866 // Float mem-reg operation
 4867 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4868     instruction_count(2);
 4869     src    : S5(read);
 4870     mem    : S3(read);
 4871     DECODE : S0;        // any decoder for FPU PUSH
 4872     D0     : S1;        // big decoder only
 4873     FPU    : S4;
 4874     MEM    : S3;        // any mem
 4875 %}
 4876 
 4877 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4878     instruction_count(3);
 4879     src1   : S3(read);
 4880     src2   : S3(read);
 4881     mem    : S3(read);
 4882     DECODE : S0(2);     // any decoder for FPU PUSH
 4883     D0     : S1;        // big decoder only
 4884     FPU    : S4;
 4885     MEM    : S3;        // any mem
 4886 %}
 4887 
 4888 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4889     instruction_count(3);
 4890     src1   : S3(read);
 4891     src2   : S3(read);
 4892     mem    : S4(read);
 4893     DECODE : S0;        // any decoder for FPU PUSH
 4894     D0     : S0(2);     // big decoder only
 4895     FPU    : S4;
 4896     MEM    : S3(2);     // any mem
 4897 %}
 4898 
 4899 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4900     instruction_count(2);
 4901     src1   : S3(read);
 4902     dst    : S4(read);
 4903     D0     : S0(2);     // big decoder only
 4904     MEM    : S3(2);     // any mem
 4905 %}
 4906 
 4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4908     instruction_count(3);
 4909     src1   : S3(read);
 4910     src2   : S3(read);
 4911     dst    : S4(read);
 4912     D0     : S0(3);     // big decoder only
 4913     FPU    : S4;
 4914     MEM    : S3(3);     // any mem
 4915 %}
 4916 
 4917 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4918     instruction_count(3);
 4919     src1   : S4(read);
 4920     mem    : S4(read);
 4921     DECODE : S0;        // any decoder for FPU PUSH
 4922     D0     : S0(2);     // big decoder only
 4923     FPU    : S4;
 4924     MEM    : S3(2);     // any mem
 4925 %}
 4926 
 4927 // Float load constant
 4928 pipe_class fpu_reg_con(regDPR dst) %{
 4929     instruction_count(2);
 4930     dst    : S5(write);
 4931     D0     : S0;        // big decoder only for the load
 4932     DECODE : S1;        // any decoder for FPU POP
 4933     FPU    : S4;
 4934     MEM    : S3;        // any mem
 4935 %}
 4936 
 4937 // Float load constant
 4938 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4939     instruction_count(3);
 4940     dst    : S5(write);
 4941     src    : S3(read);
 4942     D0     : S0;        // big decoder only for the load
 4943     DECODE : S1(2);     // any decoder for FPU POP
 4944     FPU    : S4;
 4945     MEM    : S3;        // any mem
 4946 %}
 4947 
 4948 // UnConditional branch
 4949 pipe_class pipe_jmp( label labl ) %{
 4950     single_instruction;
 4951     BR   : S3;
 4952 %}
 4953 
 4954 // Conditional branch
 4955 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4956     single_instruction;
 4957     cr    : S1(read);
 4958     BR    : S3;
 4959 %}
 4960 
 4961 // Allocation idiom
 4962 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4963     instruction_count(1); force_serialization;
 4964     fixed_latency(6);
 4965     heap_ptr : S3(read);
 4966     DECODE   : S0(3);
 4967     D0       : S2;
 4968     MEM      : S3;
 4969     ALU      : S3(2);
 4970     dst      : S5(write);
 4971     BR       : S5;
 4972 %}
 4973 
 4974 // Generic big/slow expanded idiom
 4975 pipe_class pipe_slow(  ) %{
 4976     instruction_count(10); multiple_bundles; force_serialization;
 4977     fixed_latency(100);
 4978     D0  : S0(2);
 4979     MEM : S3(2);
 4980 %}
 4981 
 4982 // The real do-nothing guy
 4983 pipe_class empty( ) %{
 4984     instruction_count(0);
 4985 %}
 4986 
 4987 // Define the class for the Nop node
 4988 define %{
 4989    MachNop = empty;
 4990 %}
 4991 
 4992 %}
 4993 
 4994 //----------INSTRUCTIONS-------------------------------------------------------
 4995 //
 4996 // match      -- States which machine-independent subtree may be replaced
 4997 //               by this instruction.
 4998 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4999 //               selection to identify a minimum cost tree of machine
 5000 //               instructions that matches a tree of machine-independent
 5001 //               instructions.
 5002 // format     -- A string providing the disassembly for this instruction.
 5003 //               The value of an instruction's operand may be inserted
 5004 //               by referring to it with a '$' prefix.
 5005 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5006 //               to within an encode class as $primary, $secondary, and $tertiary
 5007 //               respectively.  The primary opcode is commonly used to
 5008 //               indicate the type of machine instruction, while secondary
 5009 //               and tertiary are often used for prefix options or addressing
 5010 //               modes.
 5011 // ins_encode -- A list of encode classes with parameters. The encode class
 5012 //               name must have been defined in an 'enc_class' specification
 5013 //               in the encode section of the architecture description.
 5014 
 5015 //----------BSWAP-Instruction--------------------------------------------------
 5016 instruct bytes_reverse_int(rRegI dst) %{
 5017   match(Set dst (ReverseBytesI dst));
 5018 
 5019   format %{ "BSWAP  $dst" %}
 5020   opcode(0x0F, 0xC8);
 5021   ins_encode( OpcP, OpcSReg(dst) );
 5022   ins_pipe( ialu_reg );
 5023 %}
 5024 
 5025 instruct bytes_reverse_long(eRegL dst) %{
 5026   match(Set dst (ReverseBytesL dst));
 5027 
 5028   format %{ "BSWAP  $dst.lo\n\t"
 5029             "BSWAP  $dst.hi\n\t"
 5030             "XCHG   $dst.lo $dst.hi" %}
 5031 
 5032   ins_cost(125);
 5033   ins_encode( bswap_long_bytes(dst) );
 5034   ins_pipe( ialu_reg_reg);
 5035 %}
 5036 
 5037 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5038   match(Set dst (ReverseBytesUS dst));
 5039   effect(KILL cr);
 5040 
 5041   format %{ "BSWAP  $dst\n\t"
 5042             "SHR    $dst,16\n\t" %}
 5043   ins_encode %{
 5044     __ bswapl($dst$$Register);
 5045     __ shrl($dst$$Register, 16);
 5046   %}
 5047   ins_pipe( ialu_reg );
 5048 %}
 5049 
 5050 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5051   match(Set dst (ReverseBytesS dst));
 5052   effect(KILL cr);
 5053 
 5054   format %{ "BSWAP  $dst\n\t"
 5055             "SAR    $dst,16\n\t" %}
 5056   ins_encode %{
 5057     __ bswapl($dst$$Register);
 5058     __ sarl($dst$$Register, 16);
 5059   %}
 5060   ins_pipe( ialu_reg );
 5061 %}
 5062 
 5063 
 5064 //---------- Zeros Count Instructions ------------------------------------------
 5065 
 5066 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5067   predicate(UseCountLeadingZerosInstruction);
 5068   match(Set dst (CountLeadingZerosI src));
 5069   effect(KILL cr);
 5070 
 5071   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5072   ins_encode %{
 5073     __ lzcntl($dst$$Register, $src$$Register);
 5074   %}
 5075   ins_pipe(ialu_reg);
 5076 %}
 5077 
 5078 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5079   predicate(!UseCountLeadingZerosInstruction);
 5080   match(Set dst (CountLeadingZerosI src));
 5081   effect(KILL cr);
 5082 
 5083   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5084             "JNZ    skip\n\t"
 5085             "MOV    $dst, -1\n"
 5086       "skip:\n\t"
 5087             "NEG    $dst\n\t"
 5088             "ADD    $dst, 31" %}
 5089   ins_encode %{
 5090     Register Rdst = $dst$$Register;
 5091     Register Rsrc = $src$$Register;
 5092     Label skip;
 5093     __ bsrl(Rdst, Rsrc);
 5094     __ jccb(Assembler::notZero, skip);
 5095     __ movl(Rdst, -1);
 5096     __ bind(skip);
 5097     __ negl(Rdst);
 5098     __ addl(Rdst, BitsPerInt - 1);
 5099   %}
 5100   ins_pipe(ialu_reg);
 5101 %}
 5102 
 5103 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5104   predicate(UseCountLeadingZerosInstruction);
 5105   match(Set dst (CountLeadingZerosL src));
 5106   effect(TEMP dst, KILL cr);
 5107 
 5108   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5109             "JNC    done\n\t"
 5110             "LZCNT  $dst, $src.lo\n\t"
 5111             "ADD    $dst, 32\n"
 5112       "done:" %}
 5113   ins_encode %{
 5114     Register Rdst = $dst$$Register;
 5115     Register Rsrc = $src$$Register;
 5116     Label done;
 5117     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5118     __ jccb(Assembler::carryClear, done);
 5119     __ lzcntl(Rdst, Rsrc);
 5120     __ addl(Rdst, BitsPerInt);
 5121     __ bind(done);
 5122   %}
 5123   ins_pipe(ialu_reg);
 5124 %}
 5125 
 5126 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5127   predicate(!UseCountLeadingZerosInstruction);
 5128   match(Set dst (CountLeadingZerosL src));
 5129   effect(TEMP dst, KILL cr);
 5130 
 5131   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5132             "JZ     msw_is_zero\n\t"
 5133             "ADD    $dst, 32\n\t"
 5134             "JMP    not_zero\n"
 5135       "msw_is_zero:\n\t"
 5136             "BSR    $dst, $src.lo\n\t"
 5137             "JNZ    not_zero\n\t"
 5138             "MOV    $dst, -1\n"
 5139       "not_zero:\n\t"
 5140             "NEG    $dst\n\t"
 5141             "ADD    $dst, 63\n" %}
 5142  ins_encode %{
 5143     Register Rdst = $dst$$Register;
 5144     Register Rsrc = $src$$Register;
 5145     Label msw_is_zero;
 5146     Label not_zero;
 5147     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5148     __ jccb(Assembler::zero, msw_is_zero);
 5149     __ addl(Rdst, BitsPerInt);
 5150     __ jmpb(not_zero);
 5151     __ bind(msw_is_zero);
 5152     __ bsrl(Rdst, Rsrc);
 5153     __ jccb(Assembler::notZero, not_zero);
 5154     __ movl(Rdst, -1);
 5155     __ bind(not_zero);
 5156     __ negl(Rdst);
 5157     __ addl(Rdst, BitsPerLong - 1);
 5158   %}
 5159   ins_pipe(ialu_reg);
 5160 %}
 5161 
 5162 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5163   predicate(UseCountTrailingZerosInstruction);
 5164   match(Set dst (CountTrailingZerosI src));
 5165   effect(KILL cr);
 5166 
 5167   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5168   ins_encode %{
 5169     __ tzcntl($dst$$Register, $src$$Register);
 5170   %}
 5171   ins_pipe(ialu_reg);
 5172 %}
 5173 
 5174 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5175   predicate(!UseCountTrailingZerosInstruction);
 5176   match(Set dst (CountTrailingZerosI src));
 5177   effect(KILL cr);
 5178 
 5179   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5180             "JNZ    done\n\t"
 5181             "MOV    $dst, 32\n"
 5182       "done:" %}
 5183   ins_encode %{
 5184     Register Rdst = $dst$$Register;
 5185     Label done;
 5186     __ bsfl(Rdst, $src$$Register);
 5187     __ jccb(Assembler::notZero, done);
 5188     __ movl(Rdst, BitsPerInt);
 5189     __ bind(done);
 5190   %}
 5191   ins_pipe(ialu_reg);
 5192 %}
 5193 
 5194 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5195   predicate(UseCountTrailingZerosInstruction);
 5196   match(Set dst (CountTrailingZerosL src));
 5197   effect(TEMP dst, KILL cr);
 5198 
 5199   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5200             "JNC    done\n\t"
 5201             "TZCNT  $dst, $src.hi\n\t"
 5202             "ADD    $dst, 32\n"
 5203             "done:" %}
 5204   ins_encode %{
 5205     Register Rdst = $dst$$Register;
 5206     Register Rsrc = $src$$Register;
 5207     Label done;
 5208     __ tzcntl(Rdst, Rsrc);
 5209     __ jccb(Assembler::carryClear, done);
 5210     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5211     __ addl(Rdst, BitsPerInt);
 5212     __ bind(done);
 5213   %}
 5214   ins_pipe(ialu_reg);
 5215 %}
 5216 
 5217 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5218   predicate(!UseCountTrailingZerosInstruction);
 5219   match(Set dst (CountTrailingZerosL src));
 5220   effect(TEMP dst, KILL cr);
 5221 
 5222   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5223             "JNZ    done\n\t"
 5224             "BSF    $dst, $src.hi\n\t"
 5225             "JNZ    msw_not_zero\n\t"
 5226             "MOV    $dst, 32\n"
 5227       "msw_not_zero:\n\t"
 5228             "ADD    $dst, 32\n"
 5229       "done:" %}
 5230   ins_encode %{
 5231     Register Rdst = $dst$$Register;
 5232     Register Rsrc = $src$$Register;
 5233     Label msw_not_zero;
 5234     Label done;
 5235     __ bsfl(Rdst, Rsrc);
 5236     __ jccb(Assembler::notZero, done);
 5237     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5238     __ jccb(Assembler::notZero, msw_not_zero);
 5239     __ movl(Rdst, BitsPerInt);
 5240     __ bind(msw_not_zero);
 5241     __ addl(Rdst, BitsPerInt);
 5242     __ bind(done);
 5243   %}
 5244   ins_pipe(ialu_reg);
 5245 %}
 5246 
 5247 
 5248 //---------- Population Count Instructions -------------------------------------
 5249 
 5250 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5251   predicate(UsePopCountInstruction);
 5252   match(Set dst (PopCountI src));
 5253   effect(KILL cr);
 5254 
 5255   format %{ "POPCNT $dst, $src" %}
 5256   ins_encode %{
 5257     __ popcntl($dst$$Register, $src$$Register);
 5258   %}
 5259   ins_pipe(ialu_reg);
 5260 %}
 5261 
 5262 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5263   predicate(UsePopCountInstruction);
 5264   match(Set dst (PopCountI (LoadI mem)));
 5265   effect(KILL cr);
 5266 
 5267   format %{ "POPCNT $dst, $mem" %}
 5268   ins_encode %{
 5269     __ popcntl($dst$$Register, $mem$$Address);
 5270   %}
 5271   ins_pipe(ialu_reg);
 5272 %}
 5273 
 5274 // Note: Long.bitCount(long) returns an int.
 5275 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5276   predicate(UsePopCountInstruction);
 5277   match(Set dst (PopCountL src));
 5278   effect(KILL cr, TEMP tmp, TEMP dst);
 5279 
 5280   format %{ "POPCNT $dst, $src.lo\n\t"
 5281             "POPCNT $tmp, $src.hi\n\t"
 5282             "ADD    $dst, $tmp" %}
 5283   ins_encode %{
 5284     __ popcntl($dst$$Register, $src$$Register);
 5285     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5286     __ addl($dst$$Register, $tmp$$Register);
 5287   %}
 5288   ins_pipe(ialu_reg);
 5289 %}
 5290 
 5291 // Note: Long.bitCount(long) returns an int.
 5292 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5293   predicate(UsePopCountInstruction);
 5294   match(Set dst (PopCountL (LoadL mem)));
 5295   effect(KILL cr, TEMP tmp, TEMP dst);
 5296 
 5297   format %{ "POPCNT $dst, $mem\n\t"
 5298             "POPCNT $tmp, $mem+4\n\t"
 5299             "ADD    $dst, $tmp" %}
 5300   ins_encode %{
 5301     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5302     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5303     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5304     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5305     __ addl($dst$$Register, $tmp$$Register);
 5306   %}
 5307   ins_pipe(ialu_reg);
 5308 %}
 5309 
 5310 
 5311 //----------Load/Store/Move Instructions---------------------------------------
 5312 //----------Load Instructions--------------------------------------------------
 5313 // Load Byte (8bit signed)
 5314 instruct loadB(xRegI dst, memory mem) %{
 5315   match(Set dst (LoadB mem));
 5316 
 5317   ins_cost(125);
 5318   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5319 
 5320   ins_encode %{
 5321     __ movsbl($dst$$Register, $mem$$Address);
 5322   %}
 5323 
 5324   ins_pipe(ialu_reg_mem);
 5325 %}
 5326 
 5327 // Load Byte (8bit signed) into Long Register
 5328 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5329   match(Set dst (ConvI2L (LoadB mem)));
 5330   effect(KILL cr);
 5331 
 5332   ins_cost(375);
 5333   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5334             "MOV    $dst.hi,$dst.lo\n\t"
 5335             "SAR    $dst.hi,7" %}
 5336 
 5337   ins_encode %{
 5338     __ movsbl($dst$$Register, $mem$$Address);
 5339     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5340     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5341   %}
 5342 
 5343   ins_pipe(ialu_reg_mem);
 5344 %}
 5345 
 5346 // Load Unsigned Byte (8bit UNsigned)
 5347 instruct loadUB(xRegI dst, memory mem) %{
 5348   match(Set dst (LoadUB mem));
 5349 
 5350   ins_cost(125);
 5351   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5352 
 5353   ins_encode %{
 5354     __ movzbl($dst$$Register, $mem$$Address);
 5355   %}
 5356 
 5357   ins_pipe(ialu_reg_mem);
 5358 %}
 5359 
 5360 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5361 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5362   match(Set dst (ConvI2L (LoadUB mem)));
 5363   effect(KILL cr);
 5364 
 5365   ins_cost(250);
 5366   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5367             "XOR    $dst.hi,$dst.hi" %}
 5368 
 5369   ins_encode %{
 5370     Register Rdst = $dst$$Register;
 5371     __ movzbl(Rdst, $mem$$Address);
 5372     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5373   %}
 5374 
 5375   ins_pipe(ialu_reg_mem);
 5376 %}
 5377 
 5378 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5379 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5380   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5381   effect(KILL cr);
 5382 
 5383   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5384             "XOR    $dst.hi,$dst.hi\n\t"
 5385             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5386   ins_encode %{
 5387     Register Rdst = $dst$$Register;
 5388     __ movzbl(Rdst, $mem$$Address);
 5389     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5390     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5391   %}
 5392   ins_pipe(ialu_reg_mem);
 5393 %}
 5394 
 5395 // Load Short (16bit signed)
 5396 instruct loadS(rRegI dst, memory mem) %{
 5397   match(Set dst (LoadS mem));
 5398 
 5399   ins_cost(125);
 5400   format %{ "MOVSX  $dst,$mem\t# short" %}
 5401 
 5402   ins_encode %{
 5403     __ movswl($dst$$Register, $mem$$Address);
 5404   %}
 5405 
 5406   ins_pipe(ialu_reg_mem);
 5407 %}
 5408 
 5409 // Load Short (16 bit signed) to Byte (8 bit signed)
 5410 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5411   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5412 
 5413   ins_cost(125);
 5414   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5415   ins_encode %{
 5416     __ movsbl($dst$$Register, $mem$$Address);
 5417   %}
 5418   ins_pipe(ialu_reg_mem);
 5419 %}
 5420 
 5421 // Load Short (16bit signed) into Long Register
 5422 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5423   match(Set dst (ConvI2L (LoadS mem)));
 5424   effect(KILL cr);
 5425 
 5426   ins_cost(375);
 5427   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5428             "MOV    $dst.hi,$dst.lo\n\t"
 5429             "SAR    $dst.hi,15" %}
 5430 
 5431   ins_encode %{
 5432     __ movswl($dst$$Register, $mem$$Address);
 5433     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5434     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5435   %}
 5436 
 5437   ins_pipe(ialu_reg_mem);
 5438 %}
 5439 
 5440 // Load Unsigned Short/Char (16bit unsigned)
 5441 instruct loadUS(rRegI dst, memory mem) %{
 5442   match(Set dst (LoadUS mem));
 5443 
 5444   ins_cost(125);
 5445   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5446 
 5447   ins_encode %{
 5448     __ movzwl($dst$$Register, $mem$$Address);
 5449   %}
 5450 
 5451   ins_pipe(ialu_reg_mem);
 5452 %}
 5453 
 5454 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5455 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5456   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5457 
 5458   ins_cost(125);
 5459   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5460   ins_encode %{
 5461     __ movsbl($dst$$Register, $mem$$Address);
 5462   %}
 5463   ins_pipe(ialu_reg_mem);
 5464 %}
 5465 
 5466 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5467 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5468   match(Set dst (ConvI2L (LoadUS mem)));
 5469   effect(KILL cr);
 5470 
 5471   ins_cost(250);
 5472   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5473             "XOR    $dst.hi,$dst.hi" %}
 5474 
 5475   ins_encode %{
 5476     __ movzwl($dst$$Register, $mem$$Address);
 5477     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5478   %}
 5479 
 5480   ins_pipe(ialu_reg_mem);
 5481 %}
 5482 
 5483 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5484 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5485   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5486   effect(KILL cr);
 5487 
 5488   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5489             "XOR    $dst.hi,$dst.hi" %}
 5490   ins_encode %{
 5491     Register Rdst = $dst$$Register;
 5492     __ movzbl(Rdst, $mem$$Address);
 5493     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5494   %}
 5495   ins_pipe(ialu_reg_mem);
 5496 %}
 5497 
 5498 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5499 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5500   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5501   effect(KILL cr);
 5502 
 5503   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5504             "XOR    $dst.hi,$dst.hi\n\t"
 5505             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5506   ins_encode %{
 5507     Register Rdst = $dst$$Register;
 5508     __ movzwl(Rdst, $mem$$Address);
 5509     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5510     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5511   %}
 5512   ins_pipe(ialu_reg_mem);
 5513 %}
 5514 
 5515 // Load Integer
 5516 instruct loadI(rRegI dst, memory mem) %{
 5517   match(Set dst (LoadI mem));
 5518 
 5519   ins_cost(125);
 5520   format %{ "MOV    $dst,$mem\t# int" %}
 5521 
 5522   ins_encode %{
 5523     __ movl($dst$$Register, $mem$$Address);
 5524   %}
 5525 
 5526   ins_pipe(ialu_reg_mem);
 5527 %}
 5528 
 5529 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5530 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5531   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5532 
 5533   ins_cost(125);
 5534   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5535   ins_encode %{
 5536     __ movsbl($dst$$Register, $mem$$Address);
 5537   %}
 5538   ins_pipe(ialu_reg_mem);
 5539 %}
 5540 
 5541 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5542 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5543   match(Set dst (AndI (LoadI mem) mask));
 5544 
 5545   ins_cost(125);
 5546   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5547   ins_encode %{
 5548     __ movzbl($dst$$Register, $mem$$Address);
 5549   %}
 5550   ins_pipe(ialu_reg_mem);
 5551 %}
 5552 
 5553 // Load Integer (32 bit signed) to Short (16 bit signed)
 5554 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5555   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5556 
 5557   ins_cost(125);
 5558   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5559   ins_encode %{
 5560     __ movswl($dst$$Register, $mem$$Address);
 5561   %}
 5562   ins_pipe(ialu_reg_mem);
 5563 %}
 5564 
 5565 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5566 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5567   match(Set dst (AndI (LoadI mem) mask));
 5568 
 5569   ins_cost(125);
 5570   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5571   ins_encode %{
 5572     __ movzwl($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Integer into Long Register
 5578 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5579   match(Set dst (ConvI2L (LoadI mem)));
 5580   effect(KILL cr);
 5581 
 5582   ins_cost(375);
 5583   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5584             "MOV    $dst.hi,$dst.lo\n\t"
 5585             "SAR    $dst.hi,31" %}
 5586 
 5587   ins_encode %{
 5588     __ movl($dst$$Register, $mem$$Address);
 5589     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5590     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5591   %}
 5592 
 5593   ins_pipe(ialu_reg_mem);
 5594 %}
 5595 
 5596 // Load Integer with mask 0xFF into Long Register
 5597 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5598   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5599   effect(KILL cr);
 5600 
 5601   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5602             "XOR    $dst.hi,$dst.hi" %}
 5603   ins_encode %{
 5604     Register Rdst = $dst$$Register;
 5605     __ movzbl(Rdst, $mem$$Address);
 5606     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5607   %}
 5608   ins_pipe(ialu_reg_mem);
 5609 %}
 5610 
 5611 // Load Integer with mask 0xFFFF into Long Register
 5612 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5613   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5614   effect(KILL cr);
 5615 
 5616   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5617             "XOR    $dst.hi,$dst.hi" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movzwl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Integer with 31-bit mask into Long Register
 5627 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5628   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5629   effect(KILL cr);
 5630 
 5631   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5632             "XOR    $dst.hi,$dst.hi\n\t"
 5633             "AND    $dst.lo,$mask" %}
 5634   ins_encode %{
 5635     Register Rdst = $dst$$Register;
 5636     __ movl(Rdst, $mem$$Address);
 5637     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5638     __ andl(Rdst, $mask$$constant);
 5639   %}
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Unsigned Integer into Long Register
 5644 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5645   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5646   effect(KILL cr);
 5647 
 5648   ins_cost(250);
 5649   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5650             "XOR    $dst.hi,$dst.hi" %}
 5651 
 5652   ins_encode %{
 5653     __ movl($dst$$Register, $mem$$Address);
 5654     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5655   %}
 5656 
 5657   ins_pipe(ialu_reg_mem);
 5658 %}
 5659 
 5660 // Load Long.  Cannot clobber address while loading, so restrict address
 5661 // register to ESI
 5662 instruct loadL(eRegL dst, load_long_memory mem) %{
 5663   predicate(!((LoadLNode*)n)->require_atomic_access());
 5664   match(Set dst (LoadL mem));
 5665 
 5666   ins_cost(250);
 5667   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5668             "MOV    $dst.hi,$mem+4" %}
 5669 
 5670   ins_encode %{
 5671     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5672     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5673     __ movl($dst$$Register, Amemlo);
 5674     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5675   %}
 5676 
 5677   ins_pipe(ialu_reg_long_mem);
 5678 %}
 5679 
 5680 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5681 // then store it down to the stack and reload on the int
 5682 // side.
 5683 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5684   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5685   match(Set dst (LoadL mem));
 5686 
 5687   ins_cost(200);
 5688   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5689             "FISTp  $dst" %}
 5690   ins_encode(enc_loadL_volatile(mem,dst));
 5691   ins_pipe( fpu_reg_mem );
 5692 %}
 5693 
 5694 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5695   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5696   match(Set dst (LoadL mem));
 5697   effect(TEMP tmp);
 5698   ins_cost(180);
 5699   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5700             "MOVSD  $dst,$tmp" %}
 5701   ins_encode %{
 5702     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5703     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5704   %}
 5705   ins_pipe( pipe_slow );
 5706 %}
 5707 
 5708 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5709   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5710   match(Set dst (LoadL mem));
 5711   effect(TEMP tmp);
 5712   ins_cost(160);
 5713   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5714             "MOVD   $dst.lo,$tmp\n\t"
 5715             "PSRLQ  $tmp,32\n\t"
 5716             "MOVD   $dst.hi,$tmp" %}
 5717   ins_encode %{
 5718     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5719     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5720     __ psrlq($tmp$$XMMRegister, 32);
 5721     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5722   %}
 5723   ins_pipe( pipe_slow );
 5724 %}
 5725 
 5726 // Load Range
 5727 instruct loadRange(rRegI dst, memory mem) %{
 5728   match(Set dst (LoadRange mem));
 5729 
 5730   ins_cost(125);
 5731   format %{ "MOV    $dst,$mem" %}
 5732   opcode(0x8B);
 5733   ins_encode( OpcP, RegMem(dst,mem));
 5734   ins_pipe( ialu_reg_mem );
 5735 %}
 5736 
 5737 
 5738 // Load Pointer
 5739 instruct loadP(eRegP dst, memory mem) %{
 5740   match(Set dst (LoadP mem));
 5741 
 5742   ins_cost(125);
 5743   format %{ "MOV    $dst,$mem" %}
 5744   opcode(0x8B);
 5745   ins_encode( OpcP, RegMem(dst,mem));
 5746   ins_pipe( ialu_reg_mem );
 5747 %}
 5748 
 5749 // Load Klass Pointer
 5750 instruct loadKlass(eRegP dst, memory mem) %{
 5751   match(Set dst (LoadKlass mem));
 5752 
 5753   ins_cost(125);
 5754   format %{ "MOV    $dst,$mem" %}
 5755   opcode(0x8B);
 5756   ins_encode( OpcP, RegMem(dst,mem));
 5757   ins_pipe( ialu_reg_mem );
 5758 %}
 5759 
 5760 // Load Float
 5761 instruct MoveF2LEG(legRegF dst, regF src) %{
 5762   match(Set dst src);
 5763   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5764   ins_encode %{
 5765     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5766   %}
 5767   ins_pipe( fpu_reg_reg );
 5768 %}
 5769 
 5770 // Load Float
 5771 instruct MoveLEG2F(regF dst, legRegF src) %{
 5772   match(Set dst src);
 5773   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5774   ins_encode %{
 5775     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5776   %}
 5777   ins_pipe( fpu_reg_reg );
 5778 %}
 5779 
 5780 // Load Double
 5781 instruct MoveD2LEG(legRegD dst, regD src) %{
 5782   match(Set dst src);
 5783   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5784   ins_encode %{
 5785     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5786   %}
 5787   ins_pipe( fpu_reg_reg );
 5788 %}
 5789 
 5790 // Load Double
 5791 instruct MoveLEG2D(regD dst, legRegD src) %{
 5792   match(Set dst src);
 5793   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5794   ins_encode %{
 5795     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5796   %}
 5797   ins_pipe( fpu_reg_reg );
 5798 %}
 5799 
 5800 // Load Double
 5801 instruct loadDPR(regDPR dst, memory mem) %{
 5802   predicate(UseSSE<=1);
 5803   match(Set dst (LoadD mem));
 5804 
 5805   ins_cost(150);
 5806   format %{ "FLD_D  ST,$mem\n\t"
 5807             "FSTP   $dst" %}
 5808   opcode(0xDD);               /* DD /0 */
 5809   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5810               Pop_Reg_DPR(dst) );
 5811   ins_pipe( fpu_reg_mem );
 5812 %}
 5813 
 5814 // Load Double to XMM
 5815 instruct loadD(regD dst, memory mem) %{
 5816   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5817   match(Set dst (LoadD mem));
 5818   ins_cost(145);
 5819   format %{ "MOVSD  $dst,$mem" %}
 5820   ins_encode %{
 5821     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5822   %}
 5823   ins_pipe( pipe_slow );
 5824 %}
 5825 
 5826 instruct loadD_partial(regD dst, memory mem) %{
 5827   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5828   match(Set dst (LoadD mem));
 5829   ins_cost(145);
 5830   format %{ "MOVLPD $dst,$mem" %}
 5831   ins_encode %{
 5832     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5833   %}
 5834   ins_pipe( pipe_slow );
 5835 %}
 5836 
 5837 // Load to XMM register (single-precision floating point)
 5838 // MOVSS instruction
 5839 instruct loadF(regF dst, memory mem) %{
 5840   predicate(UseSSE>=1);
 5841   match(Set dst (LoadF mem));
 5842   ins_cost(145);
 5843   format %{ "MOVSS  $dst,$mem" %}
 5844   ins_encode %{
 5845     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5846   %}
 5847   ins_pipe( pipe_slow );
 5848 %}
 5849 
 5850 // Load Float
 5851 instruct loadFPR(regFPR dst, memory mem) %{
 5852   predicate(UseSSE==0);
 5853   match(Set dst (LoadF mem));
 5854 
 5855   ins_cost(150);
 5856   format %{ "FLD_S  ST,$mem\n\t"
 5857             "FSTP   $dst" %}
 5858   opcode(0xD9);               /* D9 /0 */
 5859   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5860               Pop_Reg_FPR(dst) );
 5861   ins_pipe( fpu_reg_mem );
 5862 %}
 5863 
 5864 // Load Effective Address
 5865 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5866   match(Set dst mem);
 5867 
 5868   ins_cost(110);
 5869   format %{ "LEA    $dst,$mem" %}
 5870   opcode(0x8D);
 5871   ins_encode( OpcP, RegMem(dst,mem));
 5872   ins_pipe( ialu_reg_reg_fat );
 5873 %}
 5874 
 5875 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5876   match(Set dst mem);
 5877 
 5878   ins_cost(110);
 5879   format %{ "LEA    $dst,$mem" %}
 5880   opcode(0x8D);
 5881   ins_encode( OpcP, RegMem(dst,mem));
 5882   ins_pipe( ialu_reg_reg_fat );
 5883 %}
 5884 
 5885 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5886   match(Set dst mem);
 5887 
 5888   ins_cost(110);
 5889   format %{ "LEA    $dst,$mem" %}
 5890   opcode(0x8D);
 5891   ins_encode( OpcP, RegMem(dst,mem));
 5892   ins_pipe( ialu_reg_reg_fat );
 5893 %}
 5894 
 5895 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5896   match(Set dst mem);
 5897 
 5898   ins_cost(110);
 5899   format %{ "LEA    $dst,$mem" %}
 5900   opcode(0x8D);
 5901   ins_encode( OpcP, RegMem(dst,mem));
 5902   ins_pipe( ialu_reg_reg_fat );
 5903 %}
 5904 
 5905 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5906   match(Set dst mem);
 5907 
 5908   ins_cost(110);
 5909   format %{ "LEA    $dst,$mem" %}
 5910   opcode(0x8D);
 5911   ins_encode( OpcP, RegMem(dst,mem));
 5912   ins_pipe( ialu_reg_reg_fat );
 5913 %}
 5914 
 5915 // Load Constant
 5916 instruct loadConI(rRegI dst, immI src) %{
 5917   match(Set dst src);
 5918 
 5919   format %{ "MOV    $dst,$src" %}
 5920   ins_encode( LdImmI(dst, src) );
 5921   ins_pipe( ialu_reg_fat );
 5922 %}
 5923 
 5924 // Load Constant zero
 5925 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5926   match(Set dst src);
 5927   effect(KILL cr);
 5928 
 5929   ins_cost(50);
 5930   format %{ "XOR    $dst,$dst" %}
 5931   opcode(0x33);  /* + rd */
 5932   ins_encode( OpcP, RegReg( dst, dst ) );
 5933   ins_pipe( ialu_reg );
 5934 %}
 5935 
 5936 instruct loadConP(eRegP dst, immP src) %{
 5937   match(Set dst src);
 5938 
 5939   format %{ "MOV    $dst,$src" %}
 5940   opcode(0xB8);  /* + rd */
 5941   ins_encode( LdImmP(dst, src) );
 5942   ins_pipe( ialu_reg_fat );
 5943 %}
 5944 
 5945 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5946   match(Set dst src);
 5947   effect(KILL cr);
 5948   ins_cost(200);
 5949   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5950             "MOV    $dst.hi,$src.hi" %}
 5951   opcode(0xB8);
 5952   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5953   ins_pipe( ialu_reg_long_fat );
 5954 %}
 5955 
 5956 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5957   match(Set dst src);
 5958   effect(KILL cr);
 5959   ins_cost(150);
 5960   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5961             "XOR    $dst.hi,$dst.hi" %}
 5962   opcode(0x33,0x33);
 5963   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5964   ins_pipe( ialu_reg_long );
 5965 %}
 5966 
 5967 // The instruction usage is guarded by predicate in operand immFPR().
 5968 instruct loadConFPR(regFPR dst, immFPR con) %{
 5969   match(Set dst con);
 5970   ins_cost(125);
 5971   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5972             "FSTP   $dst" %}
 5973   ins_encode %{
 5974     __ fld_s($constantaddress($con));
 5975     __ fstp_d($dst$$reg);
 5976   %}
 5977   ins_pipe(fpu_reg_con);
 5978 %}
 5979 
 5980 // The instruction usage is guarded by predicate in operand immFPR0().
 5981 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5982   match(Set dst con);
 5983   ins_cost(125);
 5984   format %{ "FLDZ   ST\n\t"
 5985             "FSTP   $dst" %}
 5986   ins_encode %{
 5987     __ fldz();
 5988     __ fstp_d($dst$$reg);
 5989   %}
 5990   ins_pipe(fpu_reg_con);
 5991 %}
 5992 
 5993 // The instruction usage is guarded by predicate in operand immFPR1().
 5994 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5995   match(Set dst con);
 5996   ins_cost(125);
 5997   format %{ "FLD1   ST\n\t"
 5998             "FSTP   $dst" %}
 5999   ins_encode %{
 6000     __ fld1();
 6001     __ fstp_d($dst$$reg);
 6002   %}
 6003   ins_pipe(fpu_reg_con);
 6004 %}
 6005 
 6006 // The instruction usage is guarded by predicate in operand immF().
 6007 instruct loadConF(regF dst, immF con) %{
 6008   match(Set dst con);
 6009   ins_cost(125);
 6010   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6011   ins_encode %{
 6012     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6013   %}
 6014   ins_pipe(pipe_slow);
 6015 %}
 6016 
 6017 // The instruction usage is guarded by predicate in operand immF0().
 6018 instruct loadConF0(regF dst, immF0 src) %{
 6019   match(Set dst src);
 6020   ins_cost(100);
 6021   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6022   ins_encode %{
 6023     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6024   %}
 6025   ins_pipe(pipe_slow);
 6026 %}
 6027 
 6028 // The instruction usage is guarded by predicate in operand immDPR().
 6029 instruct loadConDPR(regDPR dst, immDPR con) %{
 6030   match(Set dst con);
 6031   ins_cost(125);
 6032 
 6033   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6034             "FSTP   $dst" %}
 6035   ins_encode %{
 6036     __ fld_d($constantaddress($con));
 6037     __ fstp_d($dst$$reg);
 6038   %}
 6039   ins_pipe(fpu_reg_con);
 6040 %}
 6041 
 6042 // The instruction usage is guarded by predicate in operand immDPR0().
 6043 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6044   match(Set dst con);
 6045   ins_cost(125);
 6046 
 6047   format %{ "FLDZ   ST\n\t"
 6048             "FSTP   $dst" %}
 6049   ins_encode %{
 6050     __ fldz();
 6051     __ fstp_d($dst$$reg);
 6052   %}
 6053   ins_pipe(fpu_reg_con);
 6054 %}
 6055 
 6056 // The instruction usage is guarded by predicate in operand immDPR1().
 6057 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6058   match(Set dst con);
 6059   ins_cost(125);
 6060 
 6061   format %{ "FLD1   ST\n\t"
 6062             "FSTP   $dst" %}
 6063   ins_encode %{
 6064     __ fld1();
 6065     __ fstp_d($dst$$reg);
 6066   %}
 6067   ins_pipe(fpu_reg_con);
 6068 %}
 6069 
 6070 // The instruction usage is guarded by predicate in operand immD().
 6071 instruct loadConD(regD dst, immD con) %{
 6072   match(Set dst con);
 6073   ins_cost(125);
 6074   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6075   ins_encode %{
 6076     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6077   %}
 6078   ins_pipe(pipe_slow);
 6079 %}
 6080 
 6081 // The instruction usage is guarded by predicate in operand immD0().
 6082 instruct loadConD0(regD dst, immD0 src) %{
 6083   match(Set dst src);
 6084   ins_cost(100);
 6085   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6086   ins_encode %{
 6087     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6088   %}
 6089   ins_pipe( pipe_slow );
 6090 %}
 6091 
 6092 // Load Stack Slot
 6093 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6094   match(Set dst src);
 6095   ins_cost(125);
 6096 
 6097   format %{ "MOV    $dst,$src" %}
 6098   opcode(0x8B);
 6099   ins_encode( OpcP, RegMem(dst,src));
 6100   ins_pipe( ialu_reg_mem );
 6101 %}
 6102 
 6103 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6104   match(Set dst src);
 6105 
 6106   ins_cost(200);
 6107   format %{ "MOV    $dst,$src.lo\n\t"
 6108             "MOV    $dst+4,$src.hi" %}
 6109   opcode(0x8B, 0x8B);
 6110   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6111   ins_pipe( ialu_mem_long_reg );
 6112 %}
 6113 
 6114 // Load Stack Slot
 6115 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6116   match(Set dst src);
 6117   ins_cost(125);
 6118 
 6119   format %{ "MOV    $dst,$src" %}
 6120   opcode(0x8B);
 6121   ins_encode( OpcP, RegMem(dst,src));
 6122   ins_pipe( ialu_reg_mem );
 6123 %}
 6124 
 6125 // Load Stack Slot
 6126 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6127   match(Set dst src);
 6128   ins_cost(125);
 6129 
 6130   format %{ "FLD_S  $src\n\t"
 6131             "FSTP   $dst" %}
 6132   opcode(0xD9);               /* D9 /0, FLD m32real */
 6133   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6134               Pop_Reg_FPR(dst) );
 6135   ins_pipe( fpu_reg_mem );
 6136 %}
 6137 
 6138 // Load Stack Slot
 6139 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6140   match(Set dst src);
 6141   ins_cost(125);
 6142 
 6143   format %{ "FLD_D  $src\n\t"
 6144             "FSTP   $dst" %}
 6145   opcode(0xDD);               /* DD /0, FLD m64real */
 6146   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6147               Pop_Reg_DPR(dst) );
 6148   ins_pipe( fpu_reg_mem );
 6149 %}
 6150 
 6151 // Prefetch instructions for allocation.
 6152 // Must be safe to execute with invalid address (cannot fault).
 6153 
 6154 instruct prefetchAlloc0( memory mem ) %{
 6155   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6156   match(PrefetchAllocation mem);
 6157   ins_cost(0);
 6158   size(0);
 6159   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6160   ins_encode();
 6161   ins_pipe(empty);
 6162 %}
 6163 
 6164 instruct prefetchAlloc( memory mem ) %{
 6165   predicate(AllocatePrefetchInstr==3);
 6166   match( PrefetchAllocation mem );
 6167   ins_cost(100);
 6168 
 6169   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6170   ins_encode %{
 6171     __ prefetchw($mem$$Address);
 6172   %}
 6173   ins_pipe(ialu_mem);
 6174 %}
 6175 
 6176 instruct prefetchAllocNTA( memory mem ) %{
 6177   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6178   match(PrefetchAllocation mem);
 6179   ins_cost(100);
 6180 
 6181   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6182   ins_encode %{
 6183     __ prefetchnta($mem$$Address);
 6184   %}
 6185   ins_pipe(ialu_mem);
 6186 %}
 6187 
 6188 instruct prefetchAllocT0( memory mem ) %{
 6189   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6190   match(PrefetchAllocation mem);
 6191   ins_cost(100);
 6192 
 6193   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6194   ins_encode %{
 6195     __ prefetcht0($mem$$Address);
 6196   %}
 6197   ins_pipe(ialu_mem);
 6198 %}
 6199 
 6200 instruct prefetchAllocT2( memory mem ) %{
 6201   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6202   match(PrefetchAllocation mem);
 6203   ins_cost(100);
 6204 
 6205   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6206   ins_encode %{
 6207     __ prefetcht2($mem$$Address);
 6208   %}
 6209   ins_pipe(ialu_mem);
 6210 %}
 6211 
 6212 //----------Store Instructions-------------------------------------------------
 6213 
 6214 // Store Byte
 6215 instruct storeB(memory mem, xRegI src) %{
 6216   match(Set mem (StoreB mem src));
 6217 
 6218   ins_cost(125);
 6219   format %{ "MOV8   $mem,$src" %}
 6220   opcode(0x88);
 6221   ins_encode( OpcP, RegMem( src, mem ) );
 6222   ins_pipe( ialu_mem_reg );
 6223 %}
 6224 
 6225 // Store Char/Short
 6226 instruct storeC(memory mem, rRegI src) %{
 6227   match(Set mem (StoreC mem src));
 6228 
 6229   ins_cost(125);
 6230   format %{ "MOV16  $mem,$src" %}
 6231   opcode(0x89, 0x66);
 6232   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6233   ins_pipe( ialu_mem_reg );
 6234 %}
 6235 
 6236 // Store Integer
 6237 instruct storeI(memory mem, rRegI src) %{
 6238   match(Set mem (StoreI mem src));
 6239 
 6240   ins_cost(125);
 6241   format %{ "MOV    $mem,$src" %}
 6242   opcode(0x89);
 6243   ins_encode( OpcP, RegMem( src, mem ) );
 6244   ins_pipe( ialu_mem_reg );
 6245 %}
 6246 
 6247 // Store Long
 6248 instruct storeL(long_memory mem, eRegL src) %{
 6249   predicate(!((StoreLNode*)n)->require_atomic_access());
 6250   match(Set mem (StoreL mem src));
 6251 
 6252   ins_cost(200);
 6253   format %{ "MOV    $mem,$src.lo\n\t"
 6254             "MOV    $mem+4,$src.hi" %}
 6255   opcode(0x89, 0x89);
 6256   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6257   ins_pipe( ialu_mem_long_reg );
 6258 %}
 6259 
 6260 // Store Long to Integer
 6261 instruct storeL2I(memory mem, eRegL src) %{
 6262   match(Set mem (StoreI mem (ConvL2I src)));
 6263 
 6264   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6265   ins_encode %{
 6266     __ movl($mem$$Address, $src$$Register);
 6267   %}
 6268   ins_pipe(ialu_mem_reg);
 6269 %}
 6270 
 6271 // Volatile Store Long.  Must be atomic, so move it into
 6272 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6273 // target address before the store (for null-ptr checks)
 6274 // so the memory operand is used twice in the encoding.
 6275 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6276   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6277   match(Set mem (StoreL mem src));
 6278   effect( KILL cr );
 6279   ins_cost(400);
 6280   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6281             "FILD   $src\n\t"
 6282             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6283   opcode(0x3B);
 6284   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6285   ins_pipe( fpu_reg_mem );
 6286 %}
 6287 
 6288 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6289   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6290   match(Set mem (StoreL mem src));
 6291   effect( TEMP tmp, KILL cr );
 6292   ins_cost(380);
 6293   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6294             "MOVSD  $tmp,$src\n\t"
 6295             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6296   ins_encode %{
 6297     __ cmpl(rax, $mem$$Address);
 6298     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6299     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6300   %}
 6301   ins_pipe( pipe_slow );
 6302 %}
 6303 
 6304 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6305   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6306   match(Set mem (StoreL mem src));
 6307   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6308   ins_cost(360);
 6309   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6310             "MOVD   $tmp,$src.lo\n\t"
 6311             "MOVD   $tmp2,$src.hi\n\t"
 6312             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6313             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6314   ins_encode %{
 6315     __ cmpl(rax, $mem$$Address);
 6316     __ movdl($tmp$$XMMRegister, $src$$Register);
 6317     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6318     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6319     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6320   %}
 6321   ins_pipe( pipe_slow );
 6322 %}
 6323 
 6324 // Store Pointer; for storing unknown oops and raw pointers
 6325 instruct storeP(memory mem, anyRegP src) %{
 6326   match(Set mem (StoreP mem src));
 6327 
 6328   ins_cost(125);
 6329   format %{ "MOV    $mem,$src" %}
 6330   opcode(0x89);
 6331   ins_encode( OpcP, RegMem( src, mem ) );
 6332   ins_pipe( ialu_mem_reg );
 6333 %}
 6334 
 6335 // Store Integer Immediate
 6336 instruct storeImmI(memory mem, immI src) %{
 6337   match(Set mem (StoreI mem src));
 6338 
 6339   ins_cost(150);
 6340   format %{ "MOV    $mem,$src" %}
 6341   opcode(0xC7);               /* C7 /0 */
 6342   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6343   ins_pipe( ialu_mem_imm );
 6344 %}
 6345 
 6346 // Store Short/Char Immediate
 6347 instruct storeImmI16(memory mem, immI16 src) %{
 6348   predicate(UseStoreImmI16);
 6349   match(Set mem (StoreC mem src));
 6350 
 6351   ins_cost(150);
 6352   format %{ "MOV16  $mem,$src" %}
 6353   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6354   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6355   ins_pipe( ialu_mem_imm );
 6356 %}
 6357 
 6358 // Store Pointer Immediate; null pointers or constant oops that do not
 6359 // need card-mark barriers.
 6360 instruct storeImmP(memory mem, immP src) %{
 6361   match(Set mem (StoreP mem src));
 6362 
 6363   ins_cost(150);
 6364   format %{ "MOV    $mem,$src" %}
 6365   opcode(0xC7);               /* C7 /0 */
 6366   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6367   ins_pipe( ialu_mem_imm );
 6368 %}
 6369 
 6370 // Store Byte Immediate
 6371 instruct storeImmB(memory mem, immI8 src) %{
 6372   match(Set mem (StoreB mem src));
 6373 
 6374   ins_cost(150);
 6375   format %{ "MOV8   $mem,$src" %}
 6376   opcode(0xC6);               /* C6 /0 */
 6377   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6378   ins_pipe( ialu_mem_imm );
 6379 %}
 6380 
 6381 // Store CMS card-mark Immediate
 6382 instruct storeImmCM(memory mem, immI8 src) %{
 6383   match(Set mem (StoreCM mem src));
 6384 
 6385   ins_cost(150);
 6386   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6387   opcode(0xC6);               /* C6 /0 */
 6388   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6389   ins_pipe( ialu_mem_imm );
 6390 %}
 6391 
 6392 // Store Double
 6393 instruct storeDPR( memory mem, regDPR1 src) %{
 6394   predicate(UseSSE<=1);
 6395   match(Set mem (StoreD mem src));
 6396 
 6397   ins_cost(100);
 6398   format %{ "FST_D  $mem,$src" %}
 6399   opcode(0xDD);       /* DD /2 */
 6400   ins_encode( enc_FPR_store(mem,src) );
 6401   ins_pipe( fpu_mem_reg );
 6402 %}
 6403 
 6404 // Store double does rounding on x86
 6405 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6406   predicate(UseSSE<=1);
 6407   match(Set mem (StoreD mem (RoundDouble src)));
 6408 
 6409   ins_cost(100);
 6410   format %{ "FST_D  $mem,$src\t# round" %}
 6411   opcode(0xDD);       /* DD /2 */
 6412   ins_encode( enc_FPR_store(mem,src) );
 6413   ins_pipe( fpu_mem_reg );
 6414 %}
 6415 
 6416 // Store XMM register to memory (double-precision floating points)
 6417 // MOVSD instruction
 6418 instruct storeD(memory mem, regD src) %{
 6419   predicate(UseSSE>=2);
 6420   match(Set mem (StoreD mem src));
 6421   ins_cost(95);
 6422   format %{ "MOVSD  $mem,$src" %}
 6423   ins_encode %{
 6424     __ movdbl($mem$$Address, $src$$XMMRegister);
 6425   %}
 6426   ins_pipe( pipe_slow );
 6427 %}
 6428 
 6429 // Load Double
 6430 instruct MoveD2VL(vlRegD dst, regD src) %{
 6431   match(Set dst src);
 6432   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6433   ins_encode %{
 6434     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6435   %}
 6436   ins_pipe( fpu_reg_reg );
 6437 %}
 6438 
 6439 // Load Double
 6440 instruct MoveVL2D(regD dst, vlRegD src) %{
 6441   match(Set dst src);
 6442   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6443   ins_encode %{
 6444     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6445   %}
 6446   ins_pipe( fpu_reg_reg );
 6447 %}
 6448 
 6449 // Store XMM register to memory (single-precision floating point)
 6450 // MOVSS instruction
 6451 instruct storeF(memory mem, regF src) %{
 6452   predicate(UseSSE>=1);
 6453   match(Set mem (StoreF mem src));
 6454   ins_cost(95);
 6455   format %{ "MOVSS  $mem,$src" %}
 6456   ins_encode %{
 6457     __ movflt($mem$$Address, $src$$XMMRegister);
 6458   %}
 6459   ins_pipe( pipe_slow );
 6460 %}
 6461 
 6462 // Load Float
 6463 instruct MoveF2VL(vlRegF dst, regF src) %{
 6464   match(Set dst src);
 6465   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6466   ins_encode %{
 6467     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6468   %}
 6469   ins_pipe( fpu_reg_reg );
 6470 %}
 6471 
 6472 // Load Float
 6473 instruct MoveVL2F(regF dst, vlRegF src) %{
 6474   match(Set dst src);
 6475   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6476   ins_encode %{
 6477     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6478   %}
 6479   ins_pipe( fpu_reg_reg );
 6480 %}
 6481 
 6482 // Store Float
 6483 instruct storeFPR( memory mem, regFPR1 src) %{
 6484   predicate(UseSSE==0);
 6485   match(Set mem (StoreF mem src));
 6486 
 6487   ins_cost(100);
 6488   format %{ "FST_S  $mem,$src" %}
 6489   opcode(0xD9);       /* D9 /2 */
 6490   ins_encode( enc_FPR_store(mem,src) );
 6491   ins_pipe( fpu_mem_reg );
 6492 %}
 6493 
 6494 // Store Float does rounding on x86
 6495 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6496   predicate(UseSSE==0);
 6497   match(Set mem (StoreF mem (RoundFloat src)));
 6498 
 6499   ins_cost(100);
 6500   format %{ "FST_S  $mem,$src\t# round" %}
 6501   opcode(0xD9);       /* D9 /2 */
 6502   ins_encode( enc_FPR_store(mem,src) );
 6503   ins_pipe( fpu_mem_reg );
 6504 %}
 6505 
 6506 // Store Float does rounding on x86
 6507 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6508   predicate(UseSSE<=1);
 6509   match(Set mem (StoreF mem (ConvD2F src)));
 6510 
 6511   ins_cost(100);
 6512   format %{ "FST_S  $mem,$src\t# D-round" %}
 6513   opcode(0xD9);       /* D9 /2 */
 6514   ins_encode( enc_FPR_store(mem,src) );
 6515   ins_pipe( fpu_mem_reg );
 6516 %}
 6517 
 6518 // Store immediate Float value (it is faster than store from FPU register)
 6519 // The instruction usage is guarded by predicate in operand immFPR().
 6520 instruct storeFPR_imm( memory mem, immFPR src) %{
 6521   match(Set mem (StoreF mem src));
 6522 
 6523   ins_cost(50);
 6524   format %{ "MOV    $mem,$src\t# store float" %}
 6525   opcode(0xC7);               /* C7 /0 */
 6526   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6527   ins_pipe( ialu_mem_imm );
 6528 %}
 6529 
 6530 // Store immediate Float value (it is faster than store from XMM register)
 6531 // The instruction usage is guarded by predicate in operand immF().
 6532 instruct storeF_imm( memory mem, immF src) %{
 6533   match(Set mem (StoreF mem src));
 6534 
 6535   ins_cost(50);
 6536   format %{ "MOV    $mem,$src\t# store float" %}
 6537   opcode(0xC7);               /* C7 /0 */
 6538   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6539   ins_pipe( ialu_mem_imm );
 6540 %}
 6541 
 6542 // Store Integer to stack slot
 6543 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6544   match(Set dst src);
 6545 
 6546   ins_cost(100);
 6547   format %{ "MOV    $dst,$src" %}
 6548   opcode(0x89);
 6549   ins_encode( OpcPRegSS( dst, src ) );
 6550   ins_pipe( ialu_mem_reg );
 6551 %}
 6552 
 6553 // Store Integer to stack slot
 6554 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6555   match(Set dst src);
 6556 
 6557   ins_cost(100);
 6558   format %{ "MOV    $dst,$src" %}
 6559   opcode(0x89);
 6560   ins_encode( OpcPRegSS( dst, src ) );
 6561   ins_pipe( ialu_mem_reg );
 6562 %}
 6563 
 6564 // Store Long to stack slot
 6565 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6566   match(Set dst src);
 6567 
 6568   ins_cost(200);
 6569   format %{ "MOV    $dst,$src.lo\n\t"
 6570             "MOV    $dst+4,$src.hi" %}
 6571   opcode(0x89, 0x89);
 6572   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6573   ins_pipe( ialu_mem_long_reg );
 6574 %}
 6575 
 6576 //----------MemBar Instructions-----------------------------------------------
 6577 // Memory barrier flavors
 6578 
 6579 instruct membar_acquire() %{
 6580   match(MemBarAcquire);
 6581   match(LoadFence);
 6582   ins_cost(400);
 6583 
 6584   size(0);
 6585   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6586   ins_encode();
 6587   ins_pipe(empty);
 6588 %}
 6589 
 6590 instruct membar_acquire_lock() %{
 6591   match(MemBarAcquireLock);
 6592   ins_cost(0);
 6593 
 6594   size(0);
 6595   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6596   ins_encode( );
 6597   ins_pipe(empty);
 6598 %}
 6599 
 6600 instruct membar_release() %{
 6601   match(MemBarRelease);
 6602   match(StoreFence);
 6603   ins_cost(400);
 6604 
 6605   size(0);
 6606   format %{ "MEMBAR-release ! (empty encoding)" %}
 6607   ins_encode( );
 6608   ins_pipe(empty);
 6609 %}
 6610 
 6611 instruct membar_release_lock() %{
 6612   match(MemBarReleaseLock);
 6613   ins_cost(0);
 6614 
 6615   size(0);
 6616   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6617   ins_encode( );
 6618   ins_pipe(empty);
 6619 %}
 6620 
 6621 instruct membar_volatile(eFlagsReg cr) %{
 6622   match(MemBarVolatile);
 6623   effect(KILL cr);
 6624   ins_cost(400);
 6625 
 6626   format %{
 6627     $$template
 6628     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6629   %}
 6630   ins_encode %{
 6631     __ membar(Assembler::StoreLoad);
 6632   %}
 6633   ins_pipe(pipe_slow);
 6634 %}
 6635 
 6636 instruct unnecessary_membar_volatile() %{
 6637   match(MemBarVolatile);
 6638   predicate(Matcher::post_store_load_barrier(n));
 6639   ins_cost(0);
 6640 
 6641   size(0);
 6642   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6643   ins_encode( );
 6644   ins_pipe(empty);
 6645 %}
 6646 
 6647 instruct membar_storestore() %{
 6648   match(MemBarStoreStore);
 6649   ins_cost(0);
 6650 
 6651   size(0);
 6652   format %{ "MEMBAR-storestore (empty encoding)" %}
 6653   ins_encode( );
 6654   ins_pipe(empty);
 6655 %}
 6656 
 6657 //----------Move Instructions--------------------------------------------------
 6658 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6659   match(Set dst (CastX2P src));
 6660   format %{ "# X2P  $dst, $src" %}
 6661   ins_encode( /*empty encoding*/ );
 6662   ins_cost(0);
 6663   ins_pipe(empty);
 6664 %}
 6665 
 6666 instruct castP2X(rRegI dst, eRegP src ) %{
 6667   match(Set dst (CastP2X src));
 6668   ins_cost(50);
 6669   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6670   ins_encode( enc_Copy( dst, src) );
 6671   ins_pipe( ialu_reg_reg );
 6672 %}
 6673 
 6674 //----------Conditional Move---------------------------------------------------
 6675 // Conditional move
 6676 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6677   predicate(!VM_Version::supports_cmov() );
 6678   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6679   ins_cost(200);
 6680   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6681             "MOV    $dst,$src\n"
 6682       "skip:" %}
 6683   ins_encode %{
 6684     Label Lskip;
 6685     // Invert sense of branch from sense of CMOV
 6686     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6687     __ movl($dst$$Register, $src$$Register);
 6688     __ bind(Lskip);
 6689   %}
 6690   ins_pipe( pipe_cmov_reg );
 6691 %}
 6692 
 6693 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6694   predicate(!VM_Version::supports_cmov() );
 6695   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6696   ins_cost(200);
 6697   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6698             "MOV    $dst,$src\n"
 6699       "skip:" %}
 6700   ins_encode %{
 6701     Label Lskip;
 6702     // Invert sense of branch from sense of CMOV
 6703     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6704     __ movl($dst$$Register, $src$$Register);
 6705     __ bind(Lskip);
 6706   %}
 6707   ins_pipe( pipe_cmov_reg );
 6708 %}
 6709 
 6710 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6711   predicate(VM_Version::supports_cmov() );
 6712   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6713   ins_cost(200);
 6714   format %{ "CMOV$cop $dst,$src" %}
 6715   opcode(0x0F,0x40);
 6716   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6717   ins_pipe( pipe_cmov_reg );
 6718 %}
 6719 
 6720 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6721   predicate(VM_Version::supports_cmov() );
 6722   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6723   ins_cost(200);
 6724   format %{ "CMOV$cop $dst,$src" %}
 6725   opcode(0x0F,0x40);
 6726   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6727   ins_pipe( pipe_cmov_reg );
 6728 %}
 6729 
 6730 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6731   predicate(VM_Version::supports_cmov() );
 6732   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6733   ins_cost(200);
 6734   expand %{
 6735     cmovI_regU(cop, cr, dst, src);
 6736   %}
 6737 %}
 6738 
 6739 // Conditional move
 6740 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6741   predicate(VM_Version::supports_cmov() );
 6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6743   ins_cost(250);
 6744   format %{ "CMOV$cop $dst,$src" %}
 6745   opcode(0x0F,0x40);
 6746   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6747   ins_pipe( pipe_cmov_mem );
 6748 %}
 6749 
 6750 // Conditional move
 6751 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6752   predicate(VM_Version::supports_cmov() );
 6753   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6754   ins_cost(250);
 6755   format %{ "CMOV$cop $dst,$src" %}
 6756   opcode(0x0F,0x40);
 6757   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6758   ins_pipe( pipe_cmov_mem );
 6759 %}
 6760 
 6761 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6762   predicate(VM_Version::supports_cmov() );
 6763   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6764   ins_cost(250);
 6765   expand %{
 6766     cmovI_memU(cop, cr, dst, src);
 6767   %}
 6768 %}
 6769 
 6770 // Conditional move
 6771 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6772   predicate(VM_Version::supports_cmov() );
 6773   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6774   ins_cost(200);
 6775   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6776   opcode(0x0F,0x40);
 6777   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6778   ins_pipe( pipe_cmov_reg );
 6779 %}
 6780 
 6781 // Conditional move (non-P6 version)
 6782 // Note:  a CMoveP is generated for  stubs and native wrappers
 6783 //        regardless of whether we are on a P6, so we
 6784 //        emulate a cmov here
 6785 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6786   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6787   ins_cost(300);
 6788   format %{ "Jn$cop   skip\n\t"
 6789           "MOV    $dst,$src\t# pointer\n"
 6790       "skip:" %}
 6791   opcode(0x8b);
 6792   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6793   ins_pipe( pipe_cmov_reg );
 6794 %}
 6795 
 6796 // Conditional move
 6797 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6798   predicate(VM_Version::supports_cmov() );
 6799   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6800   ins_cost(200);
 6801   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6802   opcode(0x0F,0x40);
 6803   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6804   ins_pipe( pipe_cmov_reg );
 6805 %}
 6806 
 6807 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6808   predicate(VM_Version::supports_cmov() );
 6809   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6810   ins_cost(200);
 6811   expand %{
 6812     cmovP_regU(cop, cr, dst, src);
 6813   %}
 6814 %}
 6815 
 6816 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6817 // correctly meets the two pointer arguments; one is an incoming
 6818 // register but the other is a memory operand.  ALSO appears to
 6819 // be buggy with implicit null checks.
 6820 //
 6821 //// Conditional move
 6822 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6823 //  predicate(VM_Version::supports_cmov() );
 6824 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6825 //  ins_cost(250);
 6826 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6827 //  opcode(0x0F,0x40);
 6828 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6829 //  ins_pipe( pipe_cmov_mem );
 6830 //%}
 6831 //
 6832 //// Conditional move
 6833 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6834 //  predicate(VM_Version::supports_cmov() );
 6835 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6836 //  ins_cost(250);
 6837 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6838 //  opcode(0x0F,0x40);
 6839 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6840 //  ins_pipe( pipe_cmov_mem );
 6841 //%}
 6842 
 6843 // Conditional move
 6844 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6845   predicate(UseSSE<=1);
 6846   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6847   ins_cost(200);
 6848   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6849   opcode(0xDA);
 6850   ins_encode( enc_cmov_dpr(cop,src) );
 6851   ins_pipe( pipe_cmovDPR_reg );
 6852 %}
 6853 
 6854 // Conditional move
 6855 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6856   predicate(UseSSE==0);
 6857   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6858   ins_cost(200);
 6859   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6860   opcode(0xDA);
 6861   ins_encode( enc_cmov_dpr(cop,src) );
 6862   ins_pipe( pipe_cmovDPR_reg );
 6863 %}
 6864 
 6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6866 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6867   predicate(UseSSE<=1);
 6868   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6869   ins_cost(200);
 6870   format %{ "Jn$cop   skip\n\t"
 6871             "MOV    $dst,$src\t# double\n"
 6872       "skip:" %}
 6873   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6874   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6875   ins_pipe( pipe_cmovDPR_reg );
 6876 %}
 6877 
 6878 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6879 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6880   predicate(UseSSE==0);
 6881   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6882   ins_cost(200);
 6883   format %{ "Jn$cop    skip\n\t"
 6884             "MOV    $dst,$src\t# float\n"
 6885       "skip:" %}
 6886   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6887   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6888   ins_pipe( pipe_cmovDPR_reg );
 6889 %}
 6890 
 6891 // No CMOVE with SSE/SSE2
 6892 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6893   predicate (UseSSE>=1);
 6894   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6895   ins_cost(200);
 6896   format %{ "Jn$cop   skip\n\t"
 6897             "MOVSS  $dst,$src\t# float\n"
 6898       "skip:" %}
 6899   ins_encode %{
 6900     Label skip;
 6901     // Invert sense of branch from sense of CMOV
 6902     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6903     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6904     __ bind(skip);
 6905   %}
 6906   ins_pipe( pipe_slow );
 6907 %}
 6908 
 6909 // No CMOVE with SSE/SSE2
 6910 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6911   predicate (UseSSE>=2);
 6912   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6913   ins_cost(200);
 6914   format %{ "Jn$cop   skip\n\t"
 6915             "MOVSD  $dst,$src\t# float\n"
 6916       "skip:" %}
 6917   ins_encode %{
 6918     Label skip;
 6919     // Invert sense of branch from sense of CMOV
 6920     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6921     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6922     __ bind(skip);
 6923   %}
 6924   ins_pipe( pipe_slow );
 6925 %}
 6926 
 6927 // unsigned version
 6928 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6929   predicate (UseSSE>=1);
 6930   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6931   ins_cost(200);
 6932   format %{ "Jn$cop   skip\n\t"
 6933             "MOVSS  $dst,$src\t# float\n"
 6934       "skip:" %}
 6935   ins_encode %{
 6936     Label skip;
 6937     // Invert sense of branch from sense of CMOV
 6938     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6939     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6940     __ bind(skip);
 6941   %}
 6942   ins_pipe( pipe_slow );
 6943 %}
 6944 
 6945 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6946   predicate (UseSSE>=1);
 6947   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6948   ins_cost(200);
 6949   expand %{
 6950     fcmovF_regU(cop, cr, dst, src);
 6951   %}
 6952 %}
 6953 
 6954 // unsigned version
 6955 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6956   predicate (UseSSE>=2);
 6957   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6958   ins_cost(200);
 6959   format %{ "Jn$cop   skip\n\t"
 6960             "MOVSD  $dst,$src\t# float\n"
 6961       "skip:" %}
 6962   ins_encode %{
 6963     Label skip;
 6964     // Invert sense of branch from sense of CMOV
 6965     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6966     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6967     __ bind(skip);
 6968   %}
 6969   ins_pipe( pipe_slow );
 6970 %}
 6971 
 6972 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6973   predicate (UseSSE>=2);
 6974   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6975   ins_cost(200);
 6976   expand %{
 6977     fcmovD_regU(cop, cr, dst, src);
 6978   %}
 6979 %}
 6980 
 6981 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6982   predicate(VM_Version::supports_cmov() );
 6983   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6984   ins_cost(200);
 6985   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6986             "CMOV$cop $dst.hi,$src.hi" %}
 6987   opcode(0x0F,0x40);
 6988   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6989   ins_pipe( pipe_cmov_reg_long );
 6990 %}
 6991 
 6992 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6993   predicate(VM_Version::supports_cmov() );
 6994   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6995   ins_cost(200);
 6996   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6997             "CMOV$cop $dst.hi,$src.hi" %}
 6998   opcode(0x0F,0x40);
 6999   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7000   ins_pipe( pipe_cmov_reg_long );
 7001 %}
 7002 
 7003 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7004   predicate(VM_Version::supports_cmov() );
 7005   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7006   ins_cost(200);
 7007   expand %{
 7008     cmovL_regU(cop, cr, dst, src);
 7009   %}
 7010 %}
 7011 
 7012 //----------Arithmetic Instructions--------------------------------------------
 7013 //----------Addition Instructions----------------------------------------------
 7014 
 7015 // Integer Addition Instructions
 7016 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7017   match(Set dst (AddI dst src));
 7018   effect(KILL cr);
 7019 
 7020   size(2);
 7021   format %{ "ADD    $dst,$src" %}
 7022   opcode(0x03);
 7023   ins_encode( OpcP, RegReg( dst, src) );
 7024   ins_pipe( ialu_reg_reg );
 7025 %}
 7026 
 7027 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7028   match(Set dst (AddI dst src));
 7029   effect(KILL cr);
 7030 
 7031   format %{ "ADD    $dst,$src" %}
 7032   opcode(0x81, 0x00); /* /0 id */
 7033   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7034   ins_pipe( ialu_reg );
 7035 %}
 7036 
 7037 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7038   predicate(UseIncDec);
 7039   match(Set dst (AddI dst src));
 7040   effect(KILL cr);
 7041 
 7042   size(1);
 7043   format %{ "INC    $dst" %}
 7044   opcode(0x40); /*  */
 7045   ins_encode( Opc_plus( primary, dst ) );
 7046   ins_pipe( ialu_reg );
 7047 %}
 7048 
 7049 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7050   match(Set dst (AddI src0 src1));
 7051   ins_cost(110);
 7052 
 7053   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7054   opcode(0x8D); /* 0x8D /r */
 7055   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7056   ins_pipe( ialu_reg_reg );
 7057 %}
 7058 
 7059 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7060   match(Set dst (AddP src0 src1));
 7061   ins_cost(110);
 7062 
 7063   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7064   opcode(0x8D); /* 0x8D /r */
 7065   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7066   ins_pipe( ialu_reg_reg );
 7067 %}
 7068 
 7069 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7070   predicate(UseIncDec);
 7071   match(Set dst (AddI dst src));
 7072   effect(KILL cr);
 7073 
 7074   size(1);
 7075   format %{ "DEC    $dst" %}
 7076   opcode(0x48); /*  */
 7077   ins_encode( Opc_plus( primary, dst ) );
 7078   ins_pipe( ialu_reg );
 7079 %}
 7080 
 7081 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7082   match(Set dst (AddP dst src));
 7083   effect(KILL cr);
 7084 
 7085   size(2);
 7086   format %{ "ADD    $dst,$src" %}
 7087   opcode(0x03);
 7088   ins_encode( OpcP, RegReg( dst, src) );
 7089   ins_pipe( ialu_reg_reg );
 7090 %}
 7091 
 7092 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7093   match(Set dst (AddP dst src));
 7094   effect(KILL cr);
 7095 
 7096   format %{ "ADD    $dst,$src" %}
 7097   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7098   // ins_encode( RegImm( dst, src) );
 7099   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7100   ins_pipe( ialu_reg );
 7101 %}
 7102 
 7103 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7104   match(Set dst (AddI dst (LoadI src)));
 7105   effect(KILL cr);
 7106 
 7107   ins_cost(125);
 7108   format %{ "ADD    $dst,$src" %}
 7109   opcode(0x03);
 7110   ins_encode( OpcP, RegMem( dst, src) );
 7111   ins_pipe( ialu_reg_mem );
 7112 %}
 7113 
 7114 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7115   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7116   effect(KILL cr);
 7117 
 7118   ins_cost(150);
 7119   format %{ "ADD    $dst,$src" %}
 7120   opcode(0x01);  /* Opcode 01 /r */
 7121   ins_encode( OpcP, RegMem( src, dst ) );
 7122   ins_pipe( ialu_mem_reg );
 7123 %}
 7124 
 7125 // Add Memory with Immediate
 7126 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7127   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7128   effect(KILL cr);
 7129 
 7130   ins_cost(125);
 7131   format %{ "ADD    $dst,$src" %}
 7132   opcode(0x81);               /* Opcode 81 /0 id */
 7133   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7134   ins_pipe( ialu_mem_imm );
 7135 %}
 7136 
 7137 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7138   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7139   effect(KILL cr);
 7140 
 7141   ins_cost(125);
 7142   format %{ "INC    $dst" %}
 7143   opcode(0xFF);               /* Opcode FF /0 */
 7144   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7145   ins_pipe( ialu_mem_imm );
 7146 %}
 7147 
 7148 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7149   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7150   effect(KILL cr);
 7151 
 7152   ins_cost(125);
 7153   format %{ "DEC    $dst" %}
 7154   opcode(0xFF);               /* Opcode FF /1 */
 7155   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7156   ins_pipe( ialu_mem_imm );
 7157 %}
 7158 
 7159 
 7160 instruct checkCastPP( eRegP dst ) %{
 7161   match(Set dst (CheckCastPP dst));
 7162 
 7163   size(0);
 7164   format %{ "#checkcastPP of $dst" %}
 7165   ins_encode( /*empty encoding*/ );
 7166   ins_pipe( empty );
 7167 %}
 7168 
 7169 instruct castPP( eRegP dst ) %{
 7170   match(Set dst (CastPP dst));
 7171   format %{ "#castPP of $dst" %}
 7172   ins_encode( /*empty encoding*/ );
 7173   ins_pipe( empty );
 7174 %}
 7175 
 7176 instruct castII( rRegI dst ) %{
 7177   match(Set dst (CastII dst));
 7178   format %{ "#castII of $dst" %}
 7179   ins_encode( /*empty encoding*/ );
 7180   ins_cost(0);
 7181   ins_pipe( empty );
 7182 %}
 7183 
 7184 instruct castLL( eRegL dst ) %{
 7185   match(Set dst (CastLL dst));
 7186   format %{ "#castLL of $dst" %}
 7187   ins_encode( /*empty encoding*/ );
 7188   ins_cost(0);
 7189   ins_pipe( empty );
 7190 %}
 7191 
 7192 instruct castFF( regF dst ) %{
 7193   predicate(UseSSE >= 1);
 7194   match(Set dst (CastFF dst));
 7195   format %{ "#castFF of $dst" %}
 7196   ins_encode( /*empty encoding*/ );
 7197   ins_cost(0);
 7198   ins_pipe( empty );
 7199 %}
 7200 
 7201 instruct castDD( regD dst ) %{
 7202   predicate(UseSSE >= 2);
 7203   match(Set dst (CastDD dst));
 7204   format %{ "#castDD of $dst" %}
 7205   ins_encode( /*empty encoding*/ );
 7206   ins_cost(0);
 7207   ins_pipe( empty );
 7208 %}
 7209 
 7210 instruct castFF_PR( regFPR dst ) %{
 7211   predicate(UseSSE < 1);
 7212   match(Set dst (CastFF dst));
 7213   format %{ "#castFF of $dst" %}
 7214   ins_encode( /*empty encoding*/ );
 7215   ins_cost(0);
 7216   ins_pipe( empty );
 7217 %}
 7218 
 7219 instruct castDD_PR( regDPR dst ) %{
 7220   predicate(UseSSE < 2);
 7221   match(Set dst (CastDD dst));
 7222   format %{ "#castDD of $dst" %}
 7223   ins_encode( /*empty encoding*/ );
 7224   ins_cost(0);
 7225   ins_pipe( empty );
 7226 %}
 7227 
 7228 // Load-locked - same as a regular pointer load when used with compare-swap
 7229 instruct loadPLocked(eRegP dst, memory mem) %{
 7230   match(Set dst (LoadPLocked mem));
 7231 
 7232   ins_cost(125);
 7233   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7234   opcode(0x8B);
 7235   ins_encode( OpcP, RegMem(dst,mem));
 7236   ins_pipe( ialu_reg_mem );
 7237 %}
 7238 
 7239 // Conditional-store of the updated heap-top.
 7240 // Used during allocation of the shared heap.
 7241 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7242 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7243   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7244   // EAX is killed if there is contention, but then it's also unused.
 7245   // In the common case of no contention, EAX holds the new oop address.
 7246   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7247   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7248   ins_pipe( pipe_cmpxchg );
 7249 %}
 7250 
 7251 // Conditional-store of an int value.
 7252 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7253 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7254   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7255   effect(KILL oldval);
 7256   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7257   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7258   ins_pipe( pipe_cmpxchg );
 7259 %}
 7260 
 7261 // Conditional-store of a long value.
 7262 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7263 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7264   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7265   effect(KILL oldval);
 7266   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7267             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7268             "XCHG   EBX,ECX"
 7269   %}
 7270   ins_encode %{
 7271     // Note: we need to swap rbx, and rcx before and after the
 7272     //       cmpxchg8 instruction because the instruction uses
 7273     //       rcx as the high order word of the new value to store but
 7274     //       our register encoding uses rbx.
 7275     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7276     __ lock();
 7277     __ cmpxchg8($mem$$Address);
 7278     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7279   %}
 7280   ins_pipe( pipe_cmpxchg );
 7281 %}
 7282 
 7283 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7284 
 7285 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7286   predicate(VM_Version::supports_cx8());
 7287   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7288   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7289   effect(KILL cr, KILL oldval);
 7290   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7291             "MOV    $res,0\n\t"
 7292             "JNE,s  fail\n\t"
 7293             "MOV    $res,1\n"
 7294           "fail:" %}
 7295   ins_encode( enc_cmpxchg8(mem_ptr),
 7296               enc_flags_ne_to_boolean(res) );
 7297   ins_pipe( pipe_cmpxchg );
 7298 %}
 7299 
 7300 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7301   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7302   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7303   effect(KILL cr, KILL oldval);
 7304   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7305             "MOV    $res,0\n\t"
 7306             "JNE,s  fail\n\t"
 7307             "MOV    $res,1\n"
 7308           "fail:" %}
 7309   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7310   ins_pipe( pipe_cmpxchg );
 7311 %}
 7312 
 7313 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7314   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7315   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7316   effect(KILL cr, KILL oldval);
 7317   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7318             "MOV    $res,0\n\t"
 7319             "JNE,s  fail\n\t"
 7320             "MOV    $res,1\n"
 7321           "fail:" %}
 7322   ins_encode( enc_cmpxchgb(mem_ptr),
 7323               enc_flags_ne_to_boolean(res) );
 7324   ins_pipe( pipe_cmpxchg );
 7325 %}
 7326 
 7327 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7328   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7329   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7330   effect(KILL cr, KILL oldval);
 7331   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7332             "MOV    $res,0\n\t"
 7333             "JNE,s  fail\n\t"
 7334             "MOV    $res,1\n"
 7335           "fail:" %}
 7336   ins_encode( enc_cmpxchgw(mem_ptr),
 7337               enc_flags_ne_to_boolean(res) );
 7338   ins_pipe( pipe_cmpxchg );
 7339 %}
 7340 
 7341 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7342   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7343   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7344   effect(KILL cr, KILL oldval);
 7345   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7346             "MOV    $res,0\n\t"
 7347             "JNE,s  fail\n\t"
 7348             "MOV    $res,1\n"
 7349           "fail:" %}
 7350   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7351   ins_pipe( pipe_cmpxchg );
 7352 %}
 7353 
 7354 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7355   predicate(VM_Version::supports_cx8());
 7356   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7357   effect(KILL cr);
 7358   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7359   ins_encode( enc_cmpxchg8(mem_ptr) );
 7360   ins_pipe( pipe_cmpxchg );
 7361 %}
 7362 
 7363 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7364   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7365   effect(KILL cr);
 7366   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7367   ins_encode( enc_cmpxchg(mem_ptr) );
 7368   ins_pipe( pipe_cmpxchg );
 7369 %}
 7370 
 7371 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7372   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7373   effect(KILL cr);
 7374   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7375   ins_encode( enc_cmpxchgb(mem_ptr) );
 7376   ins_pipe( pipe_cmpxchg );
 7377 %}
 7378 
 7379 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7380   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7381   effect(KILL cr);
 7382   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7383   ins_encode( enc_cmpxchgw(mem_ptr) );
 7384   ins_pipe( pipe_cmpxchg );
 7385 %}
 7386 
 7387 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7388   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7389   effect(KILL cr);
 7390   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7391   ins_encode( enc_cmpxchg(mem_ptr) );
 7392   ins_pipe( pipe_cmpxchg );
 7393 %}
 7394 
 7395 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7396   predicate(n->as_LoadStore()->result_not_used());
 7397   match(Set dummy (GetAndAddB mem add));
 7398   effect(KILL cr);
 7399   format %{ "ADDB  [$mem],$add" %}
 7400   ins_encode %{
 7401     __ lock();
 7402     __ addb($mem$$Address, $add$$constant);
 7403   %}
 7404   ins_pipe( pipe_cmpxchg );
 7405 %}
 7406 
 7407 // Important to match to xRegI: only 8-bit regs.
 7408 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7409   match(Set newval (GetAndAddB mem newval));
 7410   effect(KILL cr);
 7411   format %{ "XADDB  [$mem],$newval" %}
 7412   ins_encode %{
 7413     __ lock();
 7414     __ xaddb($mem$$Address, $newval$$Register);
 7415   %}
 7416   ins_pipe( pipe_cmpxchg );
 7417 %}
 7418 
 7419 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7420   predicate(n->as_LoadStore()->result_not_used());
 7421   match(Set dummy (GetAndAddS mem add));
 7422   effect(KILL cr);
 7423   format %{ "ADDS  [$mem],$add" %}
 7424   ins_encode %{
 7425     __ lock();
 7426     __ addw($mem$$Address, $add$$constant);
 7427   %}
 7428   ins_pipe( pipe_cmpxchg );
 7429 %}
 7430 
 7431 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7432   match(Set newval (GetAndAddS mem newval));
 7433   effect(KILL cr);
 7434   format %{ "XADDS  [$mem],$newval" %}
 7435   ins_encode %{
 7436     __ lock();
 7437     __ xaddw($mem$$Address, $newval$$Register);
 7438   %}
 7439   ins_pipe( pipe_cmpxchg );
 7440 %}
 7441 
 7442 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7443   predicate(n->as_LoadStore()->result_not_used());
 7444   match(Set dummy (GetAndAddI mem add));
 7445   effect(KILL cr);
 7446   format %{ "ADDL  [$mem],$add" %}
 7447   ins_encode %{
 7448     __ lock();
 7449     __ addl($mem$$Address, $add$$constant);
 7450   %}
 7451   ins_pipe( pipe_cmpxchg );
 7452 %}
 7453 
 7454 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7455   match(Set newval (GetAndAddI mem newval));
 7456   effect(KILL cr);
 7457   format %{ "XADDL  [$mem],$newval" %}
 7458   ins_encode %{
 7459     __ lock();
 7460     __ xaddl($mem$$Address, $newval$$Register);
 7461   %}
 7462   ins_pipe( pipe_cmpxchg );
 7463 %}
 7464 
 7465 // Important to match to xRegI: only 8-bit regs.
 7466 instruct xchgB( memory mem, xRegI newval) %{
 7467   match(Set newval (GetAndSetB mem newval));
 7468   format %{ "XCHGB  $newval,[$mem]" %}
 7469   ins_encode %{
 7470     __ xchgb($newval$$Register, $mem$$Address);
 7471   %}
 7472   ins_pipe( pipe_cmpxchg );
 7473 %}
 7474 
 7475 instruct xchgS( memory mem, rRegI newval) %{
 7476   match(Set newval (GetAndSetS mem newval));
 7477   format %{ "XCHGW  $newval,[$mem]" %}
 7478   ins_encode %{
 7479     __ xchgw($newval$$Register, $mem$$Address);
 7480   %}
 7481   ins_pipe( pipe_cmpxchg );
 7482 %}
 7483 
 7484 instruct xchgI( memory mem, rRegI newval) %{
 7485   match(Set newval (GetAndSetI mem newval));
 7486   format %{ "XCHGL  $newval,[$mem]" %}
 7487   ins_encode %{
 7488     __ xchgl($newval$$Register, $mem$$Address);
 7489   %}
 7490   ins_pipe( pipe_cmpxchg );
 7491 %}
 7492 
 7493 instruct xchgP( memory mem, pRegP newval) %{
 7494   match(Set newval (GetAndSetP mem newval));
 7495   format %{ "XCHGL  $newval,[$mem]" %}
 7496   ins_encode %{
 7497     __ xchgl($newval$$Register, $mem$$Address);
 7498   %}
 7499   ins_pipe( pipe_cmpxchg );
 7500 %}
 7501 
 7502 //----------Subtraction Instructions-------------------------------------------
 7503 
 7504 // Integer Subtraction Instructions
 7505 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7506   match(Set dst (SubI dst src));
 7507   effect(KILL cr);
 7508 
 7509   size(2);
 7510   format %{ "SUB    $dst,$src" %}
 7511   opcode(0x2B);
 7512   ins_encode( OpcP, RegReg( dst, src) );
 7513   ins_pipe( ialu_reg_reg );
 7514 %}
 7515 
 7516 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7517   match(Set dst (SubI dst src));
 7518   effect(KILL cr);
 7519 
 7520   format %{ "SUB    $dst,$src" %}
 7521   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7522   // ins_encode( RegImm( dst, src) );
 7523   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7524   ins_pipe( ialu_reg );
 7525 %}
 7526 
 7527 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7528   match(Set dst (SubI dst (LoadI src)));
 7529   effect(KILL cr);
 7530 
 7531   ins_cost(125);
 7532   format %{ "SUB    $dst,$src" %}
 7533   opcode(0x2B);
 7534   ins_encode( OpcP, RegMem( dst, src) );
 7535   ins_pipe( ialu_reg_mem );
 7536 %}
 7537 
 7538 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7539   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7540   effect(KILL cr);
 7541 
 7542   ins_cost(150);
 7543   format %{ "SUB    $dst,$src" %}
 7544   opcode(0x29);  /* Opcode 29 /r */
 7545   ins_encode( OpcP, RegMem( src, dst ) );
 7546   ins_pipe( ialu_mem_reg );
 7547 %}
 7548 
 7549 // Subtract from a pointer
 7550 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7551   match(Set dst (AddP dst (SubI zero src)));
 7552   effect(KILL cr);
 7553 
 7554   size(2);
 7555   format %{ "SUB    $dst,$src" %}
 7556   opcode(0x2B);
 7557   ins_encode( OpcP, RegReg( dst, src) );
 7558   ins_pipe( ialu_reg_reg );
 7559 %}
 7560 
 7561 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7562   match(Set dst (SubI zero dst));
 7563   effect(KILL cr);
 7564 
 7565   size(2);
 7566   format %{ "NEG    $dst" %}
 7567   opcode(0xF7,0x03);  // Opcode F7 /3
 7568   ins_encode( OpcP, RegOpc( dst ) );
 7569   ins_pipe( ialu_reg );
 7570 %}
 7571 
 7572 //----------Multiplication/Division Instructions-------------------------------
 7573 // Integer Multiplication Instructions
 7574 // Multiply Register
 7575 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7576   match(Set dst (MulI dst src));
 7577   effect(KILL cr);
 7578 
 7579   size(3);
 7580   ins_cost(300);
 7581   format %{ "IMUL   $dst,$src" %}
 7582   opcode(0xAF, 0x0F);
 7583   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7584   ins_pipe( ialu_reg_reg_alu0 );
 7585 %}
 7586 
 7587 // Multiply 32-bit Immediate
 7588 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7589   match(Set dst (MulI src imm));
 7590   effect(KILL cr);
 7591 
 7592   ins_cost(300);
 7593   format %{ "IMUL   $dst,$src,$imm" %}
 7594   opcode(0x69);  /* 69 /r id */
 7595   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7596   ins_pipe( ialu_reg_reg_alu0 );
 7597 %}
 7598 
 7599 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7600   match(Set dst src);
 7601   effect(KILL cr);
 7602 
 7603   // Note that this is artificially increased to make it more expensive than loadConL
 7604   ins_cost(250);
 7605   format %{ "MOV    EAX,$src\t// low word only" %}
 7606   opcode(0xB8);
 7607   ins_encode( LdImmL_Lo(dst, src) );
 7608   ins_pipe( ialu_reg_fat );
 7609 %}
 7610 
 7611 // Multiply by 32-bit Immediate, taking the shifted high order results
 7612 //  (special case for shift by 32)
 7613 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7614   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7615   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7616              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7617              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7618   effect(USE src1, KILL cr);
 7619 
 7620   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7621   ins_cost(0*100 + 1*400 - 150);
 7622   format %{ "IMUL   EDX:EAX,$src1" %}
 7623   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7624   ins_pipe( pipe_slow );
 7625 %}
 7626 
 7627 // Multiply by 32-bit Immediate, taking the shifted high order results
 7628 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7629   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7630   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7631              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7632              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7633   effect(USE src1, KILL cr);
 7634 
 7635   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7636   ins_cost(1*100 + 1*400 - 150);
 7637   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7638             "SAR    EDX,$cnt-32" %}
 7639   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7640   ins_pipe( pipe_slow );
 7641 %}
 7642 
 7643 // Multiply Memory 32-bit Immediate
 7644 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7645   match(Set dst (MulI (LoadI src) imm));
 7646   effect(KILL cr);
 7647 
 7648   ins_cost(300);
 7649   format %{ "IMUL   $dst,$src,$imm" %}
 7650   opcode(0x69);  /* 69 /r id */
 7651   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7652   ins_pipe( ialu_reg_mem_alu0 );
 7653 %}
 7654 
 7655 // Multiply Memory
 7656 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7657   match(Set dst (MulI dst (LoadI src)));
 7658   effect(KILL cr);
 7659 
 7660   ins_cost(350);
 7661   format %{ "IMUL   $dst,$src" %}
 7662   opcode(0xAF, 0x0F);
 7663   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7664   ins_pipe( ialu_reg_mem_alu0 );
 7665 %}
 7666 
 7667 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7668 %{
 7669   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7670   effect(KILL cr, KILL src2);
 7671 
 7672   expand %{ mulI_eReg(dst, src1, cr);
 7673            mulI_eReg(src2, src3, cr);
 7674            addI_eReg(dst, src2, cr); %}
 7675 %}
 7676 
 7677 // Multiply Register Int to Long
 7678 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7679   // Basic Idea: long = (long)int * (long)int
 7680   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7681   effect(DEF dst, USE src, USE src1, KILL flags);
 7682 
 7683   ins_cost(300);
 7684   format %{ "IMUL   $dst,$src1" %}
 7685 
 7686   ins_encode( long_int_multiply( dst, src1 ) );
 7687   ins_pipe( ialu_reg_reg_alu0 );
 7688 %}
 7689 
 7690 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7691   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7692   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7693   effect(KILL flags);
 7694 
 7695   ins_cost(300);
 7696   format %{ "MUL    $dst,$src1" %}
 7697 
 7698   ins_encode( long_uint_multiply(dst, src1) );
 7699   ins_pipe( ialu_reg_reg_alu0 );
 7700 %}
 7701 
 7702 // Multiply Register Long
 7703 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7704   match(Set dst (MulL dst src));
 7705   effect(KILL cr, TEMP tmp);
 7706   ins_cost(4*100+3*400);
 7707 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7708 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7709   format %{ "MOV    $tmp,$src.lo\n\t"
 7710             "IMUL   $tmp,EDX\n\t"
 7711             "MOV    EDX,$src.hi\n\t"
 7712             "IMUL   EDX,EAX\n\t"
 7713             "ADD    $tmp,EDX\n\t"
 7714             "MUL    EDX:EAX,$src.lo\n\t"
 7715             "ADD    EDX,$tmp" %}
 7716   ins_encode( long_multiply( dst, src, tmp ) );
 7717   ins_pipe( pipe_slow );
 7718 %}
 7719 
 7720 // Multiply Register Long where the left operand's high 32 bits are zero
 7721 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7722   predicate(is_operand_hi32_zero(n->in(1)));
 7723   match(Set dst (MulL dst src));
 7724   effect(KILL cr, TEMP tmp);
 7725   ins_cost(2*100+2*400);
 7726 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7727 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7728   format %{ "MOV    $tmp,$src.hi\n\t"
 7729             "IMUL   $tmp,EAX\n\t"
 7730             "MUL    EDX:EAX,$src.lo\n\t"
 7731             "ADD    EDX,$tmp" %}
 7732   ins_encode %{
 7733     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7734     __ imull($tmp$$Register, rax);
 7735     __ mull($src$$Register);
 7736     __ addl(rdx, $tmp$$Register);
 7737   %}
 7738   ins_pipe( pipe_slow );
 7739 %}
 7740 
 7741 // Multiply Register Long where the right operand's high 32 bits are zero
 7742 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7743   predicate(is_operand_hi32_zero(n->in(2)));
 7744   match(Set dst (MulL dst src));
 7745   effect(KILL cr, TEMP tmp);
 7746   ins_cost(2*100+2*400);
 7747 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7748 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7749   format %{ "MOV    $tmp,$src.lo\n\t"
 7750             "IMUL   $tmp,EDX\n\t"
 7751             "MUL    EDX:EAX,$src.lo\n\t"
 7752             "ADD    EDX,$tmp" %}
 7753   ins_encode %{
 7754     __ movl($tmp$$Register, $src$$Register);
 7755     __ imull($tmp$$Register, rdx);
 7756     __ mull($src$$Register);
 7757     __ addl(rdx, $tmp$$Register);
 7758   %}
 7759   ins_pipe( pipe_slow );
 7760 %}
 7761 
 7762 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7763 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7764   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7765   match(Set dst (MulL dst src));
 7766   effect(KILL cr);
 7767   ins_cost(1*400);
 7768 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7769 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7770   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7771   ins_encode %{
 7772     __ mull($src$$Register);
 7773   %}
 7774   ins_pipe( pipe_slow );
 7775 %}
 7776 
 7777 // Multiply Register Long by small constant
 7778 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7779   match(Set dst (MulL dst src));
 7780   effect(KILL cr, TEMP tmp);
 7781   ins_cost(2*100+2*400);
 7782   size(12);
 7783 // Basic idea: lo(result) = lo(src * EAX)
 7784 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7785   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7786             "MOV    EDX,$src\n\t"
 7787             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7788             "ADD    EDX,$tmp" %}
 7789   ins_encode( long_multiply_con( dst, src, tmp ) );
 7790   ins_pipe( pipe_slow );
 7791 %}
 7792 
 7793 // Integer DIV with Register
 7794 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7795   match(Set rax (DivI rax div));
 7796   effect(KILL rdx, KILL cr);
 7797   size(26);
 7798   ins_cost(30*100+10*100);
 7799   format %{ "CMP    EAX,0x80000000\n\t"
 7800             "JNE,s  normal\n\t"
 7801             "XOR    EDX,EDX\n\t"
 7802             "CMP    ECX,-1\n\t"
 7803             "JE,s   done\n"
 7804     "normal: CDQ\n\t"
 7805             "IDIV   $div\n\t"
 7806     "done:"        %}
 7807   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7808   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7809   ins_pipe( ialu_reg_reg_alu0 );
 7810 %}
 7811 
 7812 // Divide Register Long
 7813 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7814   match(Set dst (DivL src1 src2));
 7815   effect(CALL);
 7816   ins_cost(10000);
 7817   format %{ "PUSH   $src1.hi\n\t"
 7818             "PUSH   $src1.lo\n\t"
 7819             "PUSH   $src2.hi\n\t"
 7820             "PUSH   $src2.lo\n\t"
 7821             "CALL   SharedRuntime::ldiv\n\t"
 7822             "ADD    ESP,16" %}
 7823   ins_encode( long_div(src1,src2) );
 7824   ins_pipe( pipe_slow );
 7825 %}
 7826 
 7827 // Integer DIVMOD with Register, both quotient and mod results
 7828 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7829   match(DivModI rax div);
 7830   effect(KILL cr);
 7831   size(26);
 7832   ins_cost(30*100+10*100);
 7833   format %{ "CMP    EAX,0x80000000\n\t"
 7834             "JNE,s  normal\n\t"
 7835             "XOR    EDX,EDX\n\t"
 7836             "CMP    ECX,-1\n\t"
 7837             "JE,s   done\n"
 7838     "normal: CDQ\n\t"
 7839             "IDIV   $div\n\t"
 7840     "done:"        %}
 7841   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7842   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7843   ins_pipe( pipe_slow );
 7844 %}
 7845 
 7846 // Integer MOD with Register
 7847 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7848   match(Set rdx (ModI rax div));
 7849   effect(KILL rax, KILL cr);
 7850 
 7851   size(26);
 7852   ins_cost(300);
 7853   format %{ "CDQ\n\t"
 7854             "IDIV   $div" %}
 7855   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7856   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7857   ins_pipe( ialu_reg_reg_alu0 );
 7858 %}
 7859 
 7860 // Remainder Register Long
 7861 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7862   match(Set dst (ModL src1 src2));
 7863   effect(CALL);
 7864   ins_cost(10000);
 7865   format %{ "PUSH   $src1.hi\n\t"
 7866             "PUSH   $src1.lo\n\t"
 7867             "PUSH   $src2.hi\n\t"
 7868             "PUSH   $src2.lo\n\t"
 7869             "CALL   SharedRuntime::lrem\n\t"
 7870             "ADD    ESP,16" %}
 7871   ins_encode( long_mod(src1,src2) );
 7872   ins_pipe( pipe_slow );
 7873 %}
 7874 
 7875 // Divide Register Long (no special case since divisor != -1)
 7876 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7877   match(Set dst (DivL dst imm));
 7878   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7879   ins_cost(1000);
 7880   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7881             "XOR    $tmp2,$tmp2\n\t"
 7882             "CMP    $tmp,EDX\n\t"
 7883             "JA,s   fast\n\t"
 7884             "MOV    $tmp2,EAX\n\t"
 7885             "MOV    EAX,EDX\n\t"
 7886             "MOV    EDX,0\n\t"
 7887             "JLE,s  pos\n\t"
 7888             "LNEG   EAX : $tmp2\n\t"
 7889             "DIV    $tmp # unsigned division\n\t"
 7890             "XCHG   EAX,$tmp2\n\t"
 7891             "DIV    $tmp\n\t"
 7892             "LNEG   $tmp2 : EAX\n\t"
 7893             "JMP,s  done\n"
 7894     "pos:\n\t"
 7895             "DIV    $tmp\n\t"
 7896             "XCHG   EAX,$tmp2\n"
 7897     "fast:\n\t"
 7898             "DIV    $tmp\n"
 7899     "done:\n\t"
 7900             "MOV    EDX,$tmp2\n\t"
 7901             "NEG    EDX:EAX # if $imm < 0" %}
 7902   ins_encode %{
 7903     int con = (int)$imm$$constant;
 7904     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7905     int pcon = (con > 0) ? con : -con;
 7906     Label Lfast, Lpos, Ldone;
 7907 
 7908     __ movl($tmp$$Register, pcon);
 7909     __ xorl($tmp2$$Register,$tmp2$$Register);
 7910     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7911     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7912 
 7913     __ movl($tmp2$$Register, $dst$$Register); // save
 7914     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7915     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7916     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7917 
 7918     // Negative dividend.
 7919     // convert value to positive to use unsigned division
 7920     __ lneg($dst$$Register, $tmp2$$Register);
 7921     __ divl($tmp$$Register);
 7922     __ xchgl($dst$$Register, $tmp2$$Register);
 7923     __ divl($tmp$$Register);
 7924     // revert result back to negative
 7925     __ lneg($tmp2$$Register, $dst$$Register);
 7926     __ jmpb(Ldone);
 7927 
 7928     __ bind(Lpos);
 7929     __ divl($tmp$$Register); // Use unsigned division
 7930     __ xchgl($dst$$Register, $tmp2$$Register);
 7931     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7932 
 7933     __ bind(Lfast);
 7934     // fast path: src is positive
 7935     __ divl($tmp$$Register); // Use unsigned division
 7936 
 7937     __ bind(Ldone);
 7938     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7939     if (con < 0) {
 7940       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7941     }
 7942   %}
 7943   ins_pipe( pipe_slow );
 7944 %}
 7945 
 7946 // Remainder Register Long (remainder fit into 32 bits)
 7947 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7948   match(Set dst (ModL dst imm));
 7949   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7950   ins_cost(1000);
 7951   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7952             "CMP    $tmp,EDX\n\t"
 7953             "JA,s   fast\n\t"
 7954             "MOV    $tmp2,EAX\n\t"
 7955             "MOV    EAX,EDX\n\t"
 7956             "MOV    EDX,0\n\t"
 7957             "JLE,s  pos\n\t"
 7958             "LNEG   EAX : $tmp2\n\t"
 7959             "DIV    $tmp # unsigned division\n\t"
 7960             "MOV    EAX,$tmp2\n\t"
 7961             "DIV    $tmp\n\t"
 7962             "NEG    EDX\n\t"
 7963             "JMP,s  done\n"
 7964     "pos:\n\t"
 7965             "DIV    $tmp\n\t"
 7966             "MOV    EAX,$tmp2\n"
 7967     "fast:\n\t"
 7968             "DIV    $tmp\n"
 7969     "done:\n\t"
 7970             "MOV    EAX,EDX\n\t"
 7971             "SAR    EDX,31\n\t" %}
 7972   ins_encode %{
 7973     int con = (int)$imm$$constant;
 7974     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7975     int pcon = (con > 0) ? con : -con;
 7976     Label  Lfast, Lpos, Ldone;
 7977 
 7978     __ movl($tmp$$Register, pcon);
 7979     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7980     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7981 
 7982     __ movl($tmp2$$Register, $dst$$Register); // save
 7983     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7984     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7985     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7986 
 7987     // Negative dividend.
 7988     // convert value to positive to use unsigned division
 7989     __ lneg($dst$$Register, $tmp2$$Register);
 7990     __ divl($tmp$$Register);
 7991     __ movl($dst$$Register, $tmp2$$Register);
 7992     __ divl($tmp$$Register);
 7993     // revert remainder back to negative
 7994     __ negl(HIGH_FROM_LOW($dst$$Register));
 7995     __ jmpb(Ldone);
 7996 
 7997     __ bind(Lpos);
 7998     __ divl($tmp$$Register);
 7999     __ movl($dst$$Register, $tmp2$$Register);
 8000 
 8001     __ bind(Lfast);
 8002     // fast path: src is positive
 8003     __ divl($tmp$$Register);
 8004 
 8005     __ bind(Ldone);
 8006     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8007     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8008 
 8009   %}
 8010   ins_pipe( pipe_slow );
 8011 %}
 8012 
 8013 // Integer Shift Instructions
 8014 // Shift Left by one
 8015 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8016   match(Set dst (LShiftI dst shift));
 8017   effect(KILL cr);
 8018 
 8019   size(2);
 8020   format %{ "SHL    $dst,$shift" %}
 8021   opcode(0xD1, 0x4);  /* D1 /4 */
 8022   ins_encode( OpcP, RegOpc( dst ) );
 8023   ins_pipe( ialu_reg );
 8024 %}
 8025 
 8026 // Shift Left by 8-bit immediate
 8027 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8028   match(Set dst (LShiftI dst shift));
 8029   effect(KILL cr);
 8030 
 8031   size(3);
 8032   format %{ "SHL    $dst,$shift" %}
 8033   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8034   ins_encode( RegOpcImm( dst, shift) );
 8035   ins_pipe( ialu_reg );
 8036 %}
 8037 
 8038 // Shift Left by variable
 8039 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8040   match(Set dst (LShiftI dst shift));
 8041   effect(KILL cr);
 8042 
 8043   size(2);
 8044   format %{ "SHL    $dst,$shift" %}
 8045   opcode(0xD3, 0x4);  /* D3 /4 */
 8046   ins_encode( OpcP, RegOpc( dst ) );
 8047   ins_pipe( ialu_reg_reg );
 8048 %}
 8049 
 8050 // Arithmetic shift right by one
 8051 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8052   match(Set dst (RShiftI dst shift));
 8053   effect(KILL cr);
 8054 
 8055   size(2);
 8056   format %{ "SAR    $dst,$shift" %}
 8057   opcode(0xD1, 0x7);  /* D1 /7 */
 8058   ins_encode( OpcP, RegOpc( dst ) );
 8059   ins_pipe( ialu_reg );
 8060 %}
 8061 
 8062 // Arithmetic shift right by one
 8063 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8064   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8065   effect(KILL cr);
 8066   format %{ "SAR    $dst,$shift" %}
 8067   opcode(0xD1, 0x7);  /* D1 /7 */
 8068   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8069   ins_pipe( ialu_mem_imm );
 8070 %}
 8071 
 8072 // Arithmetic Shift Right by 8-bit immediate
 8073 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8074   match(Set dst (RShiftI dst shift));
 8075   effect(KILL cr);
 8076 
 8077   size(3);
 8078   format %{ "SAR    $dst,$shift" %}
 8079   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8080   ins_encode( RegOpcImm( dst, shift ) );
 8081   ins_pipe( ialu_mem_imm );
 8082 %}
 8083 
 8084 // Arithmetic Shift Right by 8-bit immediate
 8085 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8086   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8087   effect(KILL cr);
 8088 
 8089   format %{ "SAR    $dst,$shift" %}
 8090   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8091   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8092   ins_pipe( ialu_mem_imm );
 8093 %}
 8094 
 8095 // Arithmetic Shift Right by variable
 8096 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8097   match(Set dst (RShiftI dst shift));
 8098   effect(KILL cr);
 8099 
 8100   size(2);
 8101   format %{ "SAR    $dst,$shift" %}
 8102   opcode(0xD3, 0x7);  /* D3 /7 */
 8103   ins_encode( OpcP, RegOpc( dst ) );
 8104   ins_pipe( ialu_reg_reg );
 8105 %}
 8106 
 8107 // Logical shift right by one
 8108 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8109   match(Set dst (URShiftI dst shift));
 8110   effect(KILL cr);
 8111 
 8112   size(2);
 8113   format %{ "SHR    $dst,$shift" %}
 8114   opcode(0xD1, 0x5);  /* D1 /5 */
 8115   ins_encode( OpcP, RegOpc( dst ) );
 8116   ins_pipe( ialu_reg );
 8117 %}
 8118 
 8119 // Logical Shift Right by 8-bit immediate
 8120 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8121   match(Set dst (URShiftI dst shift));
 8122   effect(KILL cr);
 8123 
 8124   size(3);
 8125   format %{ "SHR    $dst,$shift" %}
 8126   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8127   ins_encode( RegOpcImm( dst, shift) );
 8128   ins_pipe( ialu_reg );
 8129 %}
 8130 
 8131 
 8132 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8133 // This idiom is used by the compiler for the i2b bytecode.
 8134 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8135   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8136 
 8137   size(3);
 8138   format %{ "MOVSX  $dst,$src :8" %}
 8139   ins_encode %{
 8140     __ movsbl($dst$$Register, $src$$Register);
 8141   %}
 8142   ins_pipe(ialu_reg_reg);
 8143 %}
 8144 
 8145 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8146 // This idiom is used by the compiler the i2s bytecode.
 8147 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8148   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8149 
 8150   size(3);
 8151   format %{ "MOVSX  $dst,$src :16" %}
 8152   ins_encode %{
 8153     __ movswl($dst$$Register, $src$$Register);
 8154   %}
 8155   ins_pipe(ialu_reg_reg);
 8156 %}
 8157 
 8158 
 8159 // Logical Shift Right by variable
 8160 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8161   match(Set dst (URShiftI dst shift));
 8162   effect(KILL cr);
 8163 
 8164   size(2);
 8165   format %{ "SHR    $dst,$shift" %}
 8166   opcode(0xD3, 0x5);  /* D3 /5 */
 8167   ins_encode( OpcP, RegOpc( dst ) );
 8168   ins_pipe( ialu_reg_reg );
 8169 %}
 8170 
 8171 
 8172 //----------Logical Instructions-----------------------------------------------
 8173 //----------Integer Logical Instructions---------------------------------------
 8174 // And Instructions
 8175 // And Register with Register
 8176 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8177   match(Set dst (AndI dst src));
 8178   effect(KILL cr);
 8179 
 8180   size(2);
 8181   format %{ "AND    $dst,$src" %}
 8182   opcode(0x23);
 8183   ins_encode( OpcP, RegReg( dst, src) );
 8184   ins_pipe( ialu_reg_reg );
 8185 %}
 8186 
 8187 // And Register with Immediate
 8188 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8189   match(Set dst (AndI dst src));
 8190   effect(KILL cr);
 8191 
 8192   format %{ "AND    $dst,$src" %}
 8193   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8194   // ins_encode( RegImm( dst, src) );
 8195   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8196   ins_pipe( ialu_reg );
 8197 %}
 8198 
 8199 // And Register with Memory
 8200 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8201   match(Set dst (AndI dst (LoadI src)));
 8202   effect(KILL cr);
 8203 
 8204   ins_cost(125);
 8205   format %{ "AND    $dst,$src" %}
 8206   opcode(0x23);
 8207   ins_encode( OpcP, RegMem( dst, src) );
 8208   ins_pipe( ialu_reg_mem );
 8209 %}
 8210 
 8211 // And Memory with Register
 8212 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8213   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8214   effect(KILL cr);
 8215 
 8216   ins_cost(150);
 8217   format %{ "AND    $dst,$src" %}
 8218   opcode(0x21);  /* Opcode 21 /r */
 8219   ins_encode( OpcP, RegMem( src, dst ) );
 8220   ins_pipe( ialu_mem_reg );
 8221 %}
 8222 
 8223 // And Memory with Immediate
 8224 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8225   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8226   effect(KILL cr);
 8227 
 8228   ins_cost(125);
 8229   format %{ "AND    $dst,$src" %}
 8230   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8231   // ins_encode( MemImm( dst, src) );
 8232   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8233   ins_pipe( ialu_mem_imm );
 8234 %}
 8235 
 8236 // BMI1 instructions
 8237 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8238   match(Set dst (AndI (XorI src1 minus_1) src2));
 8239   predicate(UseBMI1Instructions);
 8240   effect(KILL cr);
 8241 
 8242   format %{ "ANDNL  $dst, $src1, $src2" %}
 8243 
 8244   ins_encode %{
 8245     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8246   %}
 8247   ins_pipe(ialu_reg);
 8248 %}
 8249 
 8250 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8251   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8252   predicate(UseBMI1Instructions);
 8253   effect(KILL cr);
 8254 
 8255   ins_cost(125);
 8256   format %{ "ANDNL  $dst, $src1, $src2" %}
 8257 
 8258   ins_encode %{
 8259     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8260   %}
 8261   ins_pipe(ialu_reg_mem);
 8262 %}
 8263 
 8264 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8265   match(Set dst (AndI (SubI imm_zero src) src));
 8266   predicate(UseBMI1Instructions);
 8267   effect(KILL cr);
 8268 
 8269   format %{ "BLSIL  $dst, $src" %}
 8270 
 8271   ins_encode %{
 8272     __ blsil($dst$$Register, $src$$Register);
 8273   %}
 8274   ins_pipe(ialu_reg);
 8275 %}
 8276 
 8277 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8278   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8279   predicate(UseBMI1Instructions);
 8280   effect(KILL cr);
 8281 
 8282   ins_cost(125);
 8283   format %{ "BLSIL  $dst, $src" %}
 8284 
 8285   ins_encode %{
 8286     __ blsil($dst$$Register, $src$$Address);
 8287   %}
 8288   ins_pipe(ialu_reg_mem);
 8289 %}
 8290 
 8291 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8292 %{
 8293   match(Set dst (XorI (AddI src minus_1) src));
 8294   predicate(UseBMI1Instructions);
 8295   effect(KILL cr);
 8296 
 8297   format %{ "BLSMSKL $dst, $src" %}
 8298 
 8299   ins_encode %{
 8300     __ blsmskl($dst$$Register, $src$$Register);
 8301   %}
 8302 
 8303   ins_pipe(ialu_reg);
 8304 %}
 8305 
 8306 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8307 %{
 8308   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8309   predicate(UseBMI1Instructions);
 8310   effect(KILL cr);
 8311 
 8312   ins_cost(125);
 8313   format %{ "BLSMSKL $dst, $src" %}
 8314 
 8315   ins_encode %{
 8316     __ blsmskl($dst$$Register, $src$$Address);
 8317   %}
 8318 
 8319   ins_pipe(ialu_reg_mem);
 8320 %}
 8321 
 8322 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8323 %{
 8324   match(Set dst (AndI (AddI src minus_1) src) );
 8325   predicate(UseBMI1Instructions);
 8326   effect(KILL cr);
 8327 
 8328   format %{ "BLSRL  $dst, $src" %}
 8329 
 8330   ins_encode %{
 8331     __ blsrl($dst$$Register, $src$$Register);
 8332   %}
 8333 
 8334   ins_pipe(ialu_reg);
 8335 %}
 8336 
 8337 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8338 %{
 8339   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8340   predicate(UseBMI1Instructions);
 8341   effect(KILL cr);
 8342 
 8343   ins_cost(125);
 8344   format %{ "BLSRL  $dst, $src" %}
 8345 
 8346   ins_encode %{
 8347     __ blsrl($dst$$Register, $src$$Address);
 8348   %}
 8349 
 8350   ins_pipe(ialu_reg_mem);
 8351 %}
 8352 
 8353 // Or Instructions
 8354 // Or Register with Register
 8355 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8356   match(Set dst (OrI dst src));
 8357   effect(KILL cr);
 8358 
 8359   size(2);
 8360   format %{ "OR     $dst,$src" %}
 8361   opcode(0x0B);
 8362   ins_encode( OpcP, RegReg( dst, src) );
 8363   ins_pipe( ialu_reg_reg );
 8364 %}
 8365 
 8366 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8367   match(Set dst (OrI dst (CastP2X src)));
 8368   effect(KILL cr);
 8369 
 8370   size(2);
 8371   format %{ "OR     $dst,$src" %}
 8372   opcode(0x0B);
 8373   ins_encode( OpcP, RegReg( dst, src) );
 8374   ins_pipe( ialu_reg_reg );
 8375 %}
 8376 
 8377 
 8378 // Or Register with Immediate
 8379 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8380   match(Set dst (OrI dst src));
 8381   effect(KILL cr);
 8382 
 8383   format %{ "OR     $dst,$src" %}
 8384   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8385   // ins_encode( RegImm( dst, src) );
 8386   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8387   ins_pipe( ialu_reg );
 8388 %}
 8389 
 8390 // Or Register with Memory
 8391 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8392   match(Set dst (OrI dst (LoadI src)));
 8393   effect(KILL cr);
 8394 
 8395   ins_cost(125);
 8396   format %{ "OR     $dst,$src" %}
 8397   opcode(0x0B);
 8398   ins_encode( OpcP, RegMem( dst, src) );
 8399   ins_pipe( ialu_reg_mem );
 8400 %}
 8401 
 8402 // Or Memory with Register
 8403 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8404   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8405   effect(KILL cr);
 8406 
 8407   ins_cost(150);
 8408   format %{ "OR     $dst,$src" %}
 8409   opcode(0x09);  /* Opcode 09 /r */
 8410   ins_encode( OpcP, RegMem( src, dst ) );
 8411   ins_pipe( ialu_mem_reg );
 8412 %}
 8413 
 8414 // Or Memory with Immediate
 8415 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8416   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8417   effect(KILL cr);
 8418 
 8419   ins_cost(125);
 8420   format %{ "OR     $dst,$src" %}
 8421   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8422   // ins_encode( MemImm( dst, src) );
 8423   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8424   ins_pipe( ialu_mem_imm );
 8425 %}
 8426 
 8427 // ROL/ROR
 8428 // ROL expand
 8429 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8430   effect(USE_DEF dst, USE shift, KILL cr);
 8431 
 8432   format %{ "ROL    $dst, $shift" %}
 8433   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8434   ins_encode( OpcP, RegOpc( dst ));
 8435   ins_pipe( ialu_reg );
 8436 %}
 8437 
 8438 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8439   effect(USE_DEF dst, USE shift, KILL cr);
 8440 
 8441   format %{ "ROL    $dst, $shift" %}
 8442   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8443   ins_encode( RegOpcImm(dst, shift) );
 8444   ins_pipe(ialu_reg);
 8445 %}
 8446 
 8447 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8448   effect(USE_DEF dst, USE shift, KILL cr);
 8449 
 8450   format %{ "ROL    $dst, $shift" %}
 8451   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8452   ins_encode(OpcP, RegOpc(dst));
 8453   ins_pipe( ialu_reg_reg );
 8454 %}
 8455 // end of ROL expand
 8456 
 8457 // ROL 32bit by one once
 8458 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8459   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8460 
 8461   expand %{
 8462     rolI_eReg_imm1(dst, lshift, cr);
 8463   %}
 8464 %}
 8465 
 8466 // ROL 32bit var by imm8 once
 8467 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8468   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8469   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8470 
 8471   expand %{
 8472     rolI_eReg_imm8(dst, lshift, cr);
 8473   %}
 8474 %}
 8475 
 8476 // ROL 32bit var by var once
 8477 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8478   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8479 
 8480   expand %{
 8481     rolI_eReg_CL(dst, shift, cr);
 8482   %}
 8483 %}
 8484 
 8485 // ROL 32bit var by var once
 8486 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8487   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8488 
 8489   expand %{
 8490     rolI_eReg_CL(dst, shift, cr);
 8491   %}
 8492 %}
 8493 
 8494 // ROR expand
 8495 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8496   effect(USE_DEF dst, USE shift, KILL cr);
 8497 
 8498   format %{ "ROR    $dst, $shift" %}
 8499   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8500   ins_encode( OpcP, RegOpc( dst ) );
 8501   ins_pipe( ialu_reg );
 8502 %}
 8503 
 8504 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8505   effect (USE_DEF dst, USE shift, KILL cr);
 8506 
 8507   format %{ "ROR    $dst, $shift" %}
 8508   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8509   ins_encode( RegOpcImm(dst, shift) );
 8510   ins_pipe( ialu_reg );
 8511 %}
 8512 
 8513 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8514   effect(USE_DEF dst, USE shift, KILL cr);
 8515 
 8516   format %{ "ROR    $dst, $shift" %}
 8517   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8518   ins_encode(OpcP, RegOpc(dst));
 8519   ins_pipe( ialu_reg_reg );
 8520 %}
 8521 // end of ROR expand
 8522 
 8523 // ROR right once
 8524 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8525   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8526 
 8527   expand %{
 8528     rorI_eReg_imm1(dst, rshift, cr);
 8529   %}
 8530 %}
 8531 
 8532 // ROR 32bit by immI8 once
 8533 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8534   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8535   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8536 
 8537   expand %{
 8538     rorI_eReg_imm8(dst, rshift, cr);
 8539   %}
 8540 %}
 8541 
 8542 // ROR 32bit var by var once
 8543 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8544   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8545 
 8546   expand %{
 8547     rorI_eReg_CL(dst, shift, cr);
 8548   %}
 8549 %}
 8550 
 8551 // ROR 32bit var by var once
 8552 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8553   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8554 
 8555   expand %{
 8556     rorI_eReg_CL(dst, shift, cr);
 8557   %}
 8558 %}
 8559 
 8560 // Xor Instructions
 8561 // Xor Register with Register
 8562 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8563   match(Set dst (XorI dst src));
 8564   effect(KILL cr);
 8565 
 8566   size(2);
 8567   format %{ "XOR    $dst,$src" %}
 8568   opcode(0x33);
 8569   ins_encode( OpcP, RegReg( dst, src) );
 8570   ins_pipe( ialu_reg_reg );
 8571 %}
 8572 
 8573 // Xor Register with Immediate -1
 8574 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8575   match(Set dst (XorI dst imm));
 8576 
 8577   size(2);
 8578   format %{ "NOT    $dst" %}
 8579   ins_encode %{
 8580      __ notl($dst$$Register);
 8581   %}
 8582   ins_pipe( ialu_reg );
 8583 %}
 8584 
 8585 // Xor Register with Immediate
 8586 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8587   match(Set dst (XorI dst src));
 8588   effect(KILL cr);
 8589 
 8590   format %{ "XOR    $dst,$src" %}
 8591   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8592   // ins_encode( RegImm( dst, src) );
 8593   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8594   ins_pipe( ialu_reg );
 8595 %}
 8596 
 8597 // Xor Register with Memory
 8598 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8599   match(Set dst (XorI dst (LoadI src)));
 8600   effect(KILL cr);
 8601 
 8602   ins_cost(125);
 8603   format %{ "XOR    $dst,$src" %}
 8604   opcode(0x33);
 8605   ins_encode( OpcP, RegMem(dst, src) );
 8606   ins_pipe( ialu_reg_mem );
 8607 %}
 8608 
 8609 // Xor Memory with Register
 8610 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8611   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8612   effect(KILL cr);
 8613 
 8614   ins_cost(150);
 8615   format %{ "XOR    $dst,$src" %}
 8616   opcode(0x31);  /* Opcode 31 /r */
 8617   ins_encode( OpcP, RegMem( src, dst ) );
 8618   ins_pipe( ialu_mem_reg );
 8619 %}
 8620 
 8621 // Xor Memory with Immediate
 8622 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8623   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8624   effect(KILL cr);
 8625 
 8626   ins_cost(125);
 8627   format %{ "XOR    $dst,$src" %}
 8628   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8629   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8630   ins_pipe( ialu_mem_imm );
 8631 %}
 8632 
 8633 //----------Convert Int to Boolean---------------------------------------------
 8634 
 8635 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8636   effect( DEF dst, USE src );
 8637   format %{ "MOV    $dst,$src" %}
 8638   ins_encode( enc_Copy( dst, src) );
 8639   ins_pipe( ialu_reg_reg );
 8640 %}
 8641 
 8642 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8643   effect( USE_DEF dst, USE src, KILL cr );
 8644 
 8645   size(4);
 8646   format %{ "NEG    $dst\n\t"
 8647             "ADC    $dst,$src" %}
 8648   ins_encode( neg_reg(dst),
 8649               OpcRegReg(0x13,dst,src) );
 8650   ins_pipe( ialu_reg_reg_long );
 8651 %}
 8652 
 8653 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8654   match(Set dst (Conv2B src));
 8655 
 8656   expand %{
 8657     movI_nocopy(dst,src);
 8658     ci2b(dst,src,cr);
 8659   %}
 8660 %}
 8661 
 8662 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8663   effect( DEF dst, USE src );
 8664   format %{ "MOV    $dst,$src" %}
 8665   ins_encode( enc_Copy( dst, src) );
 8666   ins_pipe( ialu_reg_reg );
 8667 %}
 8668 
 8669 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8670   effect( USE_DEF dst, USE src, KILL cr );
 8671   format %{ "NEG    $dst\n\t"
 8672             "ADC    $dst,$src" %}
 8673   ins_encode( neg_reg(dst),
 8674               OpcRegReg(0x13,dst,src) );
 8675   ins_pipe( ialu_reg_reg_long );
 8676 %}
 8677 
 8678 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8679   match(Set dst (Conv2B src));
 8680 
 8681   expand %{
 8682     movP_nocopy(dst,src);
 8683     cp2b(dst,src,cr);
 8684   %}
 8685 %}
 8686 
 8687 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8688   match(Set dst (CmpLTMask p q));
 8689   effect(KILL cr);
 8690   ins_cost(400);
 8691 
 8692   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8693   format %{ "XOR    $dst,$dst\n\t"
 8694             "CMP    $p,$q\n\t"
 8695             "SETlt  $dst\n\t"
 8696             "NEG    $dst" %}
 8697   ins_encode %{
 8698     Register Rp = $p$$Register;
 8699     Register Rq = $q$$Register;
 8700     Register Rd = $dst$$Register;
 8701     Label done;
 8702     __ xorl(Rd, Rd);
 8703     __ cmpl(Rp, Rq);
 8704     __ setb(Assembler::less, Rd);
 8705     __ negl(Rd);
 8706   %}
 8707 
 8708   ins_pipe(pipe_slow);
 8709 %}
 8710 
 8711 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8712   match(Set dst (CmpLTMask dst zero));
 8713   effect(DEF dst, KILL cr);
 8714   ins_cost(100);
 8715 
 8716   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8717   ins_encode %{
 8718   __ sarl($dst$$Register, 31);
 8719   %}
 8720   ins_pipe(ialu_reg);
 8721 %}
 8722 
 8723 /* better to save a register than avoid a branch */
 8724 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8725   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8726   effect(KILL cr);
 8727   ins_cost(400);
 8728   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8729             "JGE    done\n\t"
 8730             "ADD    $p,$y\n"
 8731             "done:  " %}
 8732   ins_encode %{
 8733     Register Rp = $p$$Register;
 8734     Register Rq = $q$$Register;
 8735     Register Ry = $y$$Register;
 8736     Label done;
 8737     __ subl(Rp, Rq);
 8738     __ jccb(Assembler::greaterEqual, done);
 8739     __ addl(Rp, Ry);
 8740     __ bind(done);
 8741   %}
 8742 
 8743   ins_pipe(pipe_cmplt);
 8744 %}
 8745 
 8746 /* better to save a register than avoid a branch */
 8747 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8748   match(Set y (AndI (CmpLTMask p q) y));
 8749   effect(KILL cr);
 8750 
 8751   ins_cost(300);
 8752 
 8753   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8754             "JLT      done\n\t"
 8755             "XORL     $y, $y\n"
 8756             "done:  " %}
 8757   ins_encode %{
 8758     Register Rp = $p$$Register;
 8759     Register Rq = $q$$Register;
 8760     Register Ry = $y$$Register;
 8761     Label done;
 8762     __ cmpl(Rp, Rq);
 8763     __ jccb(Assembler::less, done);
 8764     __ xorl(Ry, Ry);
 8765     __ bind(done);
 8766   %}
 8767 
 8768   ins_pipe(pipe_cmplt);
 8769 %}
 8770 
 8771 /* If I enable this, I encourage spilling in the inner loop of compress.
 8772 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8773   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8774 */
 8775 //----------Overflow Math Instructions-----------------------------------------
 8776 
 8777 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8778 %{
 8779   match(Set cr (OverflowAddI op1 op2));
 8780   effect(DEF cr, USE_KILL op1, USE op2);
 8781 
 8782   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8783 
 8784   ins_encode %{
 8785     __ addl($op1$$Register, $op2$$Register);
 8786   %}
 8787   ins_pipe(ialu_reg_reg);
 8788 %}
 8789 
 8790 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8791 %{
 8792   match(Set cr (OverflowAddI op1 op2));
 8793   effect(DEF cr, USE_KILL op1, USE op2);
 8794 
 8795   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8796 
 8797   ins_encode %{
 8798     __ addl($op1$$Register, $op2$$constant);
 8799   %}
 8800   ins_pipe(ialu_reg_reg);
 8801 %}
 8802 
 8803 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8804 %{
 8805   match(Set cr (OverflowSubI op1 op2));
 8806 
 8807   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8808   ins_encode %{
 8809     __ cmpl($op1$$Register, $op2$$Register);
 8810   %}
 8811   ins_pipe(ialu_reg_reg);
 8812 %}
 8813 
 8814 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8815 %{
 8816   match(Set cr (OverflowSubI op1 op2));
 8817 
 8818   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8819   ins_encode %{
 8820     __ cmpl($op1$$Register, $op2$$constant);
 8821   %}
 8822   ins_pipe(ialu_reg_reg);
 8823 %}
 8824 
 8825 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8826 %{
 8827   match(Set cr (OverflowSubI zero op2));
 8828   effect(DEF cr, USE_KILL op2);
 8829 
 8830   format %{ "NEG    $op2\t# overflow check int" %}
 8831   ins_encode %{
 8832     __ negl($op2$$Register);
 8833   %}
 8834   ins_pipe(ialu_reg_reg);
 8835 %}
 8836 
 8837 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8838 %{
 8839   match(Set cr (OverflowMulI op1 op2));
 8840   effect(DEF cr, USE_KILL op1, USE op2);
 8841 
 8842   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8843   ins_encode %{
 8844     __ imull($op1$$Register, $op2$$Register);
 8845   %}
 8846   ins_pipe(ialu_reg_reg_alu0);
 8847 %}
 8848 
 8849 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8850 %{
 8851   match(Set cr (OverflowMulI op1 op2));
 8852   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8853 
 8854   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8855   ins_encode %{
 8856     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8857   %}
 8858   ins_pipe(ialu_reg_reg_alu0);
 8859 %}
 8860 
 8861 // Integer Absolute Instructions
 8862 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8863 %{
 8864   match(Set dst (AbsI src));
 8865   effect(TEMP dst, TEMP tmp, KILL cr);
 8866   format %{ "movl $tmp, $src\n\t"
 8867             "sarl $tmp, 31\n\t"
 8868             "movl $dst, $src\n\t"
 8869             "xorl $dst, $tmp\n\t"
 8870             "subl $dst, $tmp\n"
 8871           %}
 8872   ins_encode %{
 8873     __ movl($tmp$$Register, $src$$Register);
 8874     __ sarl($tmp$$Register, 31);
 8875     __ movl($dst$$Register, $src$$Register);
 8876     __ xorl($dst$$Register, $tmp$$Register);
 8877     __ subl($dst$$Register, $tmp$$Register);
 8878   %}
 8879 
 8880   ins_pipe(ialu_reg_reg);
 8881 %}
 8882 
 8883 //----------Long Instructions------------------------------------------------
 8884 // Add Long Register with Register
 8885 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8886   match(Set dst (AddL dst src));
 8887   effect(KILL cr);
 8888   ins_cost(200);
 8889   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8890             "ADC    $dst.hi,$src.hi" %}
 8891   opcode(0x03, 0x13);
 8892   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8893   ins_pipe( ialu_reg_reg_long );
 8894 %}
 8895 
 8896 // Add Long Register with Immediate
 8897 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8898   match(Set dst (AddL dst src));
 8899   effect(KILL cr);
 8900   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8901             "ADC    $dst.hi,$src.hi" %}
 8902   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8903   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8904   ins_pipe( ialu_reg_long );
 8905 %}
 8906 
 8907 // Add Long Register with Memory
 8908 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8909   match(Set dst (AddL dst (LoadL mem)));
 8910   effect(KILL cr);
 8911   ins_cost(125);
 8912   format %{ "ADD    $dst.lo,$mem\n\t"
 8913             "ADC    $dst.hi,$mem+4" %}
 8914   opcode(0x03, 0x13);
 8915   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8916   ins_pipe( ialu_reg_long_mem );
 8917 %}
 8918 
 8919 // Subtract Long Register with Register.
 8920 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8921   match(Set dst (SubL dst src));
 8922   effect(KILL cr);
 8923   ins_cost(200);
 8924   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8925             "SBB    $dst.hi,$src.hi" %}
 8926   opcode(0x2B, 0x1B);
 8927   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8928   ins_pipe( ialu_reg_reg_long );
 8929 %}
 8930 
 8931 // Subtract Long Register with Immediate
 8932 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8933   match(Set dst (SubL dst src));
 8934   effect(KILL cr);
 8935   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8936             "SBB    $dst.hi,$src.hi" %}
 8937   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8938   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8939   ins_pipe( ialu_reg_long );
 8940 %}
 8941 
 8942 // Subtract Long Register with Memory
 8943 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8944   match(Set dst (SubL dst (LoadL mem)));
 8945   effect(KILL cr);
 8946   ins_cost(125);
 8947   format %{ "SUB    $dst.lo,$mem\n\t"
 8948             "SBB    $dst.hi,$mem+4" %}
 8949   opcode(0x2B, 0x1B);
 8950   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8951   ins_pipe( ialu_reg_long_mem );
 8952 %}
 8953 
 8954 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8955   match(Set dst (SubL zero dst));
 8956   effect(KILL cr);
 8957   ins_cost(300);
 8958   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8959   ins_encode( neg_long(dst) );
 8960   ins_pipe( ialu_reg_reg_long );
 8961 %}
 8962 
 8963 // And Long Register with Register
 8964 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8965   match(Set dst (AndL dst src));
 8966   effect(KILL cr);
 8967   format %{ "AND    $dst.lo,$src.lo\n\t"
 8968             "AND    $dst.hi,$src.hi" %}
 8969   opcode(0x23,0x23);
 8970   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8971   ins_pipe( ialu_reg_reg_long );
 8972 %}
 8973 
 8974 // And Long Register with Immediate
 8975 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8976   match(Set dst (AndL dst src));
 8977   effect(KILL cr);
 8978   format %{ "AND    $dst.lo,$src.lo\n\t"
 8979             "AND    $dst.hi,$src.hi" %}
 8980   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8981   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8982   ins_pipe( ialu_reg_long );
 8983 %}
 8984 
 8985 // And Long Register with Memory
 8986 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8987   match(Set dst (AndL dst (LoadL mem)));
 8988   effect(KILL cr);
 8989   ins_cost(125);
 8990   format %{ "AND    $dst.lo,$mem\n\t"
 8991             "AND    $dst.hi,$mem+4" %}
 8992   opcode(0x23, 0x23);
 8993   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8994   ins_pipe( ialu_reg_long_mem );
 8995 %}
 8996 
 8997 // BMI1 instructions
 8998 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8999   match(Set dst (AndL (XorL src1 minus_1) src2));
 9000   predicate(UseBMI1Instructions);
 9001   effect(KILL cr, TEMP dst);
 9002 
 9003   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9004             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9005          %}
 9006 
 9007   ins_encode %{
 9008     Register Rdst = $dst$$Register;
 9009     Register Rsrc1 = $src1$$Register;
 9010     Register Rsrc2 = $src2$$Register;
 9011     __ andnl(Rdst, Rsrc1, Rsrc2);
 9012     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9013   %}
 9014   ins_pipe(ialu_reg_reg_long);
 9015 %}
 9016 
 9017 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9018   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9019   predicate(UseBMI1Instructions);
 9020   effect(KILL cr, TEMP dst);
 9021 
 9022   ins_cost(125);
 9023   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9024             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9025          %}
 9026 
 9027   ins_encode %{
 9028     Register Rdst = $dst$$Register;
 9029     Register Rsrc1 = $src1$$Register;
 9030     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9031 
 9032     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9033     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9034   %}
 9035   ins_pipe(ialu_reg_mem);
 9036 %}
 9037 
 9038 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9039   match(Set dst (AndL (SubL imm_zero src) src));
 9040   predicate(UseBMI1Instructions);
 9041   effect(KILL cr, TEMP dst);
 9042 
 9043   format %{ "MOVL   $dst.hi, 0\n\t"
 9044             "BLSIL  $dst.lo, $src.lo\n\t"
 9045             "JNZ    done\n\t"
 9046             "BLSIL  $dst.hi, $src.hi\n"
 9047             "done:"
 9048          %}
 9049 
 9050   ins_encode %{
 9051     Label done;
 9052     Register Rdst = $dst$$Register;
 9053     Register Rsrc = $src$$Register;
 9054     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9055     __ blsil(Rdst, Rsrc);
 9056     __ jccb(Assembler::notZero, done);
 9057     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9058     __ bind(done);
 9059   %}
 9060   ins_pipe(ialu_reg);
 9061 %}
 9062 
 9063 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9064   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9065   predicate(UseBMI1Instructions);
 9066   effect(KILL cr, TEMP dst);
 9067 
 9068   ins_cost(125);
 9069   format %{ "MOVL   $dst.hi, 0\n\t"
 9070             "BLSIL  $dst.lo, $src\n\t"
 9071             "JNZ    done\n\t"
 9072             "BLSIL  $dst.hi, $src+4\n"
 9073             "done:"
 9074          %}
 9075 
 9076   ins_encode %{
 9077     Label done;
 9078     Register Rdst = $dst$$Register;
 9079     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9080 
 9081     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9082     __ blsil(Rdst, $src$$Address);
 9083     __ jccb(Assembler::notZero, done);
 9084     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9085     __ bind(done);
 9086   %}
 9087   ins_pipe(ialu_reg_mem);
 9088 %}
 9089 
 9090 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9091 %{
 9092   match(Set dst (XorL (AddL src minus_1) src));
 9093   predicate(UseBMI1Instructions);
 9094   effect(KILL cr, TEMP dst);
 9095 
 9096   format %{ "MOVL    $dst.hi, 0\n\t"
 9097             "BLSMSKL $dst.lo, $src.lo\n\t"
 9098             "JNC     done\n\t"
 9099             "BLSMSKL $dst.hi, $src.hi\n"
 9100             "done:"
 9101          %}
 9102 
 9103   ins_encode %{
 9104     Label done;
 9105     Register Rdst = $dst$$Register;
 9106     Register Rsrc = $src$$Register;
 9107     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9108     __ blsmskl(Rdst, Rsrc);
 9109     __ jccb(Assembler::carryClear, done);
 9110     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9111     __ bind(done);
 9112   %}
 9113 
 9114   ins_pipe(ialu_reg);
 9115 %}
 9116 
 9117 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9118 %{
 9119   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9120   predicate(UseBMI1Instructions);
 9121   effect(KILL cr, TEMP dst);
 9122 
 9123   ins_cost(125);
 9124   format %{ "MOVL    $dst.hi, 0\n\t"
 9125             "BLSMSKL $dst.lo, $src\n\t"
 9126             "JNC     done\n\t"
 9127             "BLSMSKL $dst.hi, $src+4\n"
 9128             "done:"
 9129          %}
 9130 
 9131   ins_encode %{
 9132     Label done;
 9133     Register Rdst = $dst$$Register;
 9134     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9135 
 9136     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9137     __ blsmskl(Rdst, $src$$Address);
 9138     __ jccb(Assembler::carryClear, done);
 9139     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9140     __ bind(done);
 9141   %}
 9142 
 9143   ins_pipe(ialu_reg_mem);
 9144 %}
 9145 
 9146 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9147 %{
 9148   match(Set dst (AndL (AddL src minus_1) src) );
 9149   predicate(UseBMI1Instructions);
 9150   effect(KILL cr, TEMP dst);
 9151 
 9152   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9153             "BLSRL  $dst.lo, $src.lo\n\t"
 9154             "JNC    done\n\t"
 9155             "BLSRL  $dst.hi, $src.hi\n"
 9156             "done:"
 9157   %}
 9158 
 9159   ins_encode %{
 9160     Label done;
 9161     Register Rdst = $dst$$Register;
 9162     Register Rsrc = $src$$Register;
 9163     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9164     __ blsrl(Rdst, Rsrc);
 9165     __ jccb(Assembler::carryClear, done);
 9166     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9167     __ bind(done);
 9168   %}
 9169 
 9170   ins_pipe(ialu_reg);
 9171 %}
 9172 
 9173 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9174 %{
 9175   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9176   predicate(UseBMI1Instructions);
 9177   effect(KILL cr, TEMP dst);
 9178 
 9179   ins_cost(125);
 9180   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9181             "BLSRL  $dst.lo, $src\n\t"
 9182             "JNC    done\n\t"
 9183             "BLSRL  $dst.hi, $src+4\n"
 9184             "done:"
 9185   %}
 9186 
 9187   ins_encode %{
 9188     Label done;
 9189     Register Rdst = $dst$$Register;
 9190     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9191     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9192     __ blsrl(Rdst, $src$$Address);
 9193     __ jccb(Assembler::carryClear, done);
 9194     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9195     __ bind(done);
 9196   %}
 9197 
 9198   ins_pipe(ialu_reg_mem);
 9199 %}
 9200 
 9201 // Or Long Register with Register
 9202 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9203   match(Set dst (OrL dst src));
 9204   effect(KILL cr);
 9205   format %{ "OR     $dst.lo,$src.lo\n\t"
 9206             "OR     $dst.hi,$src.hi" %}
 9207   opcode(0x0B,0x0B);
 9208   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9209   ins_pipe( ialu_reg_reg_long );
 9210 %}
 9211 
 9212 // Or Long Register with Immediate
 9213 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9214   match(Set dst (OrL dst src));
 9215   effect(KILL cr);
 9216   format %{ "OR     $dst.lo,$src.lo\n\t"
 9217             "OR     $dst.hi,$src.hi" %}
 9218   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9219   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9220   ins_pipe( ialu_reg_long );
 9221 %}
 9222 
 9223 // Or Long Register with Memory
 9224 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9225   match(Set dst (OrL dst (LoadL mem)));
 9226   effect(KILL cr);
 9227   ins_cost(125);
 9228   format %{ "OR     $dst.lo,$mem\n\t"
 9229             "OR     $dst.hi,$mem+4" %}
 9230   opcode(0x0B,0x0B);
 9231   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9232   ins_pipe( ialu_reg_long_mem );
 9233 %}
 9234 
 9235 // Xor Long Register with Register
 9236 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9237   match(Set dst (XorL dst src));
 9238   effect(KILL cr);
 9239   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9240             "XOR    $dst.hi,$src.hi" %}
 9241   opcode(0x33,0x33);
 9242   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9243   ins_pipe( ialu_reg_reg_long );
 9244 %}
 9245 
 9246 // Xor Long Register with Immediate -1
 9247 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9248   match(Set dst (XorL dst imm));
 9249   format %{ "NOT    $dst.lo\n\t"
 9250             "NOT    $dst.hi" %}
 9251   ins_encode %{
 9252      __ notl($dst$$Register);
 9253      __ notl(HIGH_FROM_LOW($dst$$Register));
 9254   %}
 9255   ins_pipe( ialu_reg_long );
 9256 %}
 9257 
 9258 // Xor Long Register with Immediate
 9259 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9260   match(Set dst (XorL dst src));
 9261   effect(KILL cr);
 9262   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9263             "XOR    $dst.hi,$src.hi" %}
 9264   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9265   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9266   ins_pipe( ialu_reg_long );
 9267 %}
 9268 
 9269 // Xor Long Register with Memory
 9270 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9271   match(Set dst (XorL dst (LoadL mem)));
 9272   effect(KILL cr);
 9273   ins_cost(125);
 9274   format %{ "XOR    $dst.lo,$mem\n\t"
 9275             "XOR    $dst.hi,$mem+4" %}
 9276   opcode(0x33,0x33);
 9277   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9278   ins_pipe( ialu_reg_long_mem );
 9279 %}
 9280 
 9281 // Shift Left Long by 1
 9282 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9283   predicate(UseNewLongLShift);
 9284   match(Set dst (LShiftL dst cnt));
 9285   effect(KILL cr);
 9286   ins_cost(100);
 9287   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9288             "ADC    $dst.hi,$dst.hi" %}
 9289   ins_encode %{
 9290     __ addl($dst$$Register,$dst$$Register);
 9291     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9292   %}
 9293   ins_pipe( ialu_reg_long );
 9294 %}
 9295 
 9296 // Shift Left Long by 2
 9297 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9298   predicate(UseNewLongLShift);
 9299   match(Set dst (LShiftL dst cnt));
 9300   effect(KILL cr);
 9301   ins_cost(100);
 9302   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9303             "ADC    $dst.hi,$dst.hi\n\t"
 9304             "ADD    $dst.lo,$dst.lo\n\t"
 9305             "ADC    $dst.hi,$dst.hi" %}
 9306   ins_encode %{
 9307     __ addl($dst$$Register,$dst$$Register);
 9308     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9309     __ addl($dst$$Register,$dst$$Register);
 9310     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9311   %}
 9312   ins_pipe( ialu_reg_long );
 9313 %}
 9314 
 9315 // Shift Left Long by 3
 9316 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9317   predicate(UseNewLongLShift);
 9318   match(Set dst (LShiftL dst cnt));
 9319   effect(KILL cr);
 9320   ins_cost(100);
 9321   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9322             "ADC    $dst.hi,$dst.hi\n\t"
 9323             "ADD    $dst.lo,$dst.lo\n\t"
 9324             "ADC    $dst.hi,$dst.hi\n\t"
 9325             "ADD    $dst.lo,$dst.lo\n\t"
 9326             "ADC    $dst.hi,$dst.hi" %}
 9327   ins_encode %{
 9328     __ addl($dst$$Register,$dst$$Register);
 9329     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9330     __ addl($dst$$Register,$dst$$Register);
 9331     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9332     __ addl($dst$$Register,$dst$$Register);
 9333     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9334   %}
 9335   ins_pipe( ialu_reg_long );
 9336 %}
 9337 
 9338 // Shift Left Long by 1-31
 9339 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9340   match(Set dst (LShiftL dst cnt));
 9341   effect(KILL cr);
 9342   ins_cost(200);
 9343   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9344             "SHL    $dst.lo,$cnt" %}
 9345   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9346   ins_encode( move_long_small_shift(dst,cnt) );
 9347   ins_pipe( ialu_reg_long );
 9348 %}
 9349 
 9350 // Shift Left Long by 32-63
 9351 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9352   match(Set dst (LShiftL dst cnt));
 9353   effect(KILL cr);
 9354   ins_cost(300);
 9355   format %{ "MOV    $dst.hi,$dst.lo\n"
 9356           "\tSHL    $dst.hi,$cnt-32\n"
 9357           "\tXOR    $dst.lo,$dst.lo" %}
 9358   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9359   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9360   ins_pipe( ialu_reg_long );
 9361 %}
 9362 
 9363 // Shift Left Long by variable
 9364 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9365   match(Set dst (LShiftL dst shift));
 9366   effect(KILL cr);
 9367   ins_cost(500+200);
 9368   size(17);
 9369   format %{ "TEST   $shift,32\n\t"
 9370             "JEQ,s  small\n\t"
 9371             "MOV    $dst.hi,$dst.lo\n\t"
 9372             "XOR    $dst.lo,$dst.lo\n"
 9373     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9374             "SHL    $dst.lo,$shift" %}
 9375   ins_encode( shift_left_long( dst, shift ) );
 9376   ins_pipe( pipe_slow );
 9377 %}
 9378 
 9379 // Shift Right Long by 1-31
 9380 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9381   match(Set dst (URShiftL dst cnt));
 9382   effect(KILL cr);
 9383   ins_cost(200);
 9384   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9385             "SHR    $dst.hi,$cnt" %}
 9386   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9387   ins_encode( move_long_small_shift(dst,cnt) );
 9388   ins_pipe( ialu_reg_long );
 9389 %}
 9390 
 9391 // Shift Right Long by 32-63
 9392 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9393   match(Set dst (URShiftL dst cnt));
 9394   effect(KILL cr);
 9395   ins_cost(300);
 9396   format %{ "MOV    $dst.lo,$dst.hi\n"
 9397           "\tSHR    $dst.lo,$cnt-32\n"
 9398           "\tXOR    $dst.hi,$dst.hi" %}
 9399   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9400   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9401   ins_pipe( ialu_reg_long );
 9402 %}
 9403 
 9404 // Shift Right Long by variable
 9405 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9406   match(Set dst (URShiftL dst shift));
 9407   effect(KILL cr);
 9408   ins_cost(600);
 9409   size(17);
 9410   format %{ "TEST   $shift,32\n\t"
 9411             "JEQ,s  small\n\t"
 9412             "MOV    $dst.lo,$dst.hi\n\t"
 9413             "XOR    $dst.hi,$dst.hi\n"
 9414     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9415             "SHR    $dst.hi,$shift" %}
 9416   ins_encode( shift_right_long( dst, shift ) );
 9417   ins_pipe( pipe_slow );
 9418 %}
 9419 
 9420 // Shift Right Long by 1-31
 9421 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9422   match(Set dst (RShiftL dst cnt));
 9423   effect(KILL cr);
 9424   ins_cost(200);
 9425   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9426             "SAR    $dst.hi,$cnt" %}
 9427   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9428   ins_encode( move_long_small_shift(dst,cnt) );
 9429   ins_pipe( ialu_reg_long );
 9430 %}
 9431 
 9432 // Shift Right Long by 32-63
 9433 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9434   match(Set dst (RShiftL dst cnt));
 9435   effect(KILL cr);
 9436   ins_cost(300);
 9437   format %{ "MOV    $dst.lo,$dst.hi\n"
 9438           "\tSAR    $dst.lo,$cnt-32\n"
 9439           "\tSAR    $dst.hi,31" %}
 9440   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9441   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9442   ins_pipe( ialu_reg_long );
 9443 %}
 9444 
 9445 // Shift Right arithmetic Long by variable
 9446 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9447   match(Set dst (RShiftL dst shift));
 9448   effect(KILL cr);
 9449   ins_cost(600);
 9450   size(18);
 9451   format %{ "TEST   $shift,32\n\t"
 9452             "JEQ,s  small\n\t"
 9453             "MOV    $dst.lo,$dst.hi\n\t"
 9454             "SAR    $dst.hi,31\n"
 9455     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9456             "SAR    $dst.hi,$shift" %}
 9457   ins_encode( shift_right_arith_long( dst, shift ) );
 9458   ins_pipe( pipe_slow );
 9459 %}
 9460 
 9461 
 9462 //----------Double Instructions------------------------------------------------
 9463 // Double Math
 9464 
 9465 // Compare & branch
 9466 
 9467 // P6 version of float compare, sets condition codes in EFLAGS
 9468 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9469   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9470   match(Set cr (CmpD src1 src2));
 9471   effect(KILL rax);
 9472   ins_cost(150);
 9473   format %{ "FLD    $src1\n\t"
 9474             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9475             "JNP    exit\n\t"
 9476             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9477             "SAHF\n"
 9478      "exit:\tNOP               // avoid branch to branch" %}
 9479   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9480   ins_encode( Push_Reg_DPR(src1),
 9481               OpcP, RegOpc(src2),
 9482               cmpF_P6_fixup );
 9483   ins_pipe( pipe_slow );
 9484 %}
 9485 
 9486 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9487   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9488   match(Set cr (CmpD src1 src2));
 9489   ins_cost(150);
 9490   format %{ "FLD    $src1\n\t"
 9491             "FUCOMIP ST,$src2  // P6 instruction" %}
 9492   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9493   ins_encode( Push_Reg_DPR(src1),
 9494               OpcP, RegOpc(src2));
 9495   ins_pipe( pipe_slow );
 9496 %}
 9497 
 9498 // Compare & branch
 9499 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9500   predicate(UseSSE<=1);
 9501   match(Set cr (CmpD src1 src2));
 9502   effect(KILL rax);
 9503   ins_cost(200);
 9504   format %{ "FLD    $src1\n\t"
 9505             "FCOMp  $src2\n\t"
 9506             "FNSTSW AX\n\t"
 9507             "TEST   AX,0x400\n\t"
 9508             "JZ,s   flags\n\t"
 9509             "MOV    AH,1\t# unordered treat as LT\n"
 9510     "flags:\tSAHF" %}
 9511   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9512   ins_encode( Push_Reg_DPR(src1),
 9513               OpcP, RegOpc(src2),
 9514               fpu_flags);
 9515   ins_pipe( pipe_slow );
 9516 %}
 9517 
 9518 // Compare vs zero into -1,0,1
 9519 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9520   predicate(UseSSE<=1);
 9521   match(Set dst (CmpD3 src1 zero));
 9522   effect(KILL cr, KILL rax);
 9523   ins_cost(280);
 9524   format %{ "FTSTD  $dst,$src1" %}
 9525   opcode(0xE4, 0xD9);
 9526   ins_encode( Push_Reg_DPR(src1),
 9527               OpcS, OpcP, PopFPU,
 9528               CmpF_Result(dst));
 9529   ins_pipe( pipe_slow );
 9530 %}
 9531 
 9532 // Compare into -1,0,1
 9533 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9534   predicate(UseSSE<=1);
 9535   match(Set dst (CmpD3 src1 src2));
 9536   effect(KILL cr, KILL rax);
 9537   ins_cost(300);
 9538   format %{ "FCMPD  $dst,$src1,$src2" %}
 9539   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9540   ins_encode( Push_Reg_DPR(src1),
 9541               OpcP, RegOpc(src2),
 9542               CmpF_Result(dst));
 9543   ins_pipe( pipe_slow );
 9544 %}
 9545 
 9546 // float compare and set condition codes in EFLAGS by XMM regs
 9547 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9548   predicate(UseSSE>=2);
 9549   match(Set cr (CmpD src1 src2));
 9550   ins_cost(145);
 9551   format %{ "UCOMISD $src1,$src2\n\t"
 9552             "JNP,s   exit\n\t"
 9553             "PUSHF\t# saw NaN, set CF\n\t"
 9554             "AND     [rsp], #0xffffff2b\n\t"
 9555             "POPF\n"
 9556     "exit:" %}
 9557   ins_encode %{
 9558     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9559     emit_cmpfp_fixup(_masm);
 9560   %}
 9561   ins_pipe( pipe_slow );
 9562 %}
 9563 
 9564 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9565   predicate(UseSSE>=2);
 9566   match(Set cr (CmpD src1 src2));
 9567   ins_cost(100);
 9568   format %{ "UCOMISD $src1,$src2" %}
 9569   ins_encode %{
 9570     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9571   %}
 9572   ins_pipe( pipe_slow );
 9573 %}
 9574 
 9575 // float compare and set condition codes in EFLAGS by XMM regs
 9576 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9577   predicate(UseSSE>=2);
 9578   match(Set cr (CmpD src1 (LoadD src2)));
 9579   ins_cost(145);
 9580   format %{ "UCOMISD $src1,$src2\n\t"
 9581             "JNP,s   exit\n\t"
 9582             "PUSHF\t# saw NaN, set CF\n\t"
 9583             "AND     [rsp], #0xffffff2b\n\t"
 9584             "POPF\n"
 9585     "exit:" %}
 9586   ins_encode %{
 9587     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9588     emit_cmpfp_fixup(_masm);
 9589   %}
 9590   ins_pipe( pipe_slow );
 9591 %}
 9592 
 9593 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9594   predicate(UseSSE>=2);
 9595   match(Set cr (CmpD src1 (LoadD src2)));
 9596   ins_cost(100);
 9597   format %{ "UCOMISD $src1,$src2" %}
 9598   ins_encode %{
 9599     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9600   %}
 9601   ins_pipe( pipe_slow );
 9602 %}
 9603 
 9604 // Compare into -1,0,1 in XMM
 9605 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9606   predicate(UseSSE>=2);
 9607   match(Set dst (CmpD3 src1 src2));
 9608   effect(KILL cr);
 9609   ins_cost(255);
 9610   format %{ "UCOMISD $src1, $src2\n\t"
 9611             "MOV     $dst, #-1\n\t"
 9612             "JP,s    done\n\t"
 9613             "JB,s    done\n\t"
 9614             "SETNE   $dst\n\t"
 9615             "MOVZB   $dst, $dst\n"
 9616     "done:" %}
 9617   ins_encode %{
 9618     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9619     emit_cmpfp3(_masm, $dst$$Register);
 9620   %}
 9621   ins_pipe( pipe_slow );
 9622 %}
 9623 
 9624 // Compare into -1,0,1 in XMM and memory
 9625 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9626   predicate(UseSSE>=2);
 9627   match(Set dst (CmpD3 src1 (LoadD src2)));
 9628   effect(KILL cr);
 9629   ins_cost(275);
 9630   format %{ "UCOMISD $src1, $src2\n\t"
 9631             "MOV     $dst, #-1\n\t"
 9632             "JP,s    done\n\t"
 9633             "JB,s    done\n\t"
 9634             "SETNE   $dst\n\t"
 9635             "MOVZB   $dst, $dst\n"
 9636     "done:" %}
 9637   ins_encode %{
 9638     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9639     emit_cmpfp3(_masm, $dst$$Register);
 9640   %}
 9641   ins_pipe( pipe_slow );
 9642 %}
 9643 
 9644 
 9645 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9646   predicate (UseSSE <=1);
 9647   match(Set dst (SubD dst src));
 9648 
 9649   format %{ "FLD    $src\n\t"
 9650             "DSUBp  $dst,ST" %}
 9651   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9652   ins_cost(150);
 9653   ins_encode( Push_Reg_DPR(src),
 9654               OpcP, RegOpc(dst) );
 9655   ins_pipe( fpu_reg_reg );
 9656 %}
 9657 
 9658 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9659   predicate (UseSSE <=1);
 9660   match(Set dst (RoundDouble (SubD src1 src2)));
 9661   ins_cost(250);
 9662 
 9663   format %{ "FLD    $src2\n\t"
 9664             "DSUB   ST,$src1\n\t"
 9665             "FSTP_D $dst\t# D-round" %}
 9666   opcode(0xD8, 0x5);
 9667   ins_encode( Push_Reg_DPR(src2),
 9668               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9669   ins_pipe( fpu_mem_reg_reg );
 9670 %}
 9671 
 9672 
 9673 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9674   predicate (UseSSE <=1);
 9675   match(Set dst (SubD dst (LoadD src)));
 9676   ins_cost(150);
 9677 
 9678   format %{ "FLD    $src\n\t"
 9679             "DSUBp  $dst,ST" %}
 9680   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9681   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9682               OpcP, RegOpc(dst) );
 9683   ins_pipe( fpu_reg_mem );
 9684 %}
 9685 
 9686 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9687   predicate (UseSSE<=1);
 9688   match(Set dst (AbsD src));
 9689   ins_cost(100);
 9690   format %{ "FABS" %}
 9691   opcode(0xE1, 0xD9);
 9692   ins_encode( OpcS, OpcP );
 9693   ins_pipe( fpu_reg_reg );
 9694 %}
 9695 
 9696 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9697   predicate(UseSSE<=1);
 9698   match(Set dst (NegD src));
 9699   ins_cost(100);
 9700   format %{ "FCHS" %}
 9701   opcode(0xE0, 0xD9);
 9702   ins_encode( OpcS, OpcP );
 9703   ins_pipe( fpu_reg_reg );
 9704 %}
 9705 
 9706 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9707   predicate(UseSSE<=1);
 9708   match(Set dst (AddD dst src));
 9709   format %{ "FLD    $src\n\t"
 9710             "DADD   $dst,ST" %}
 9711   size(4);
 9712   ins_cost(150);
 9713   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9714   ins_encode( Push_Reg_DPR(src),
 9715               OpcP, RegOpc(dst) );
 9716   ins_pipe( fpu_reg_reg );
 9717 %}
 9718 
 9719 
 9720 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9721   predicate(UseSSE<=1);
 9722   match(Set dst (RoundDouble (AddD src1 src2)));
 9723   ins_cost(250);
 9724 
 9725   format %{ "FLD    $src2\n\t"
 9726             "DADD   ST,$src1\n\t"
 9727             "FSTP_D $dst\t# D-round" %}
 9728   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9729   ins_encode( Push_Reg_DPR(src2),
 9730               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9731   ins_pipe( fpu_mem_reg_reg );
 9732 %}
 9733 
 9734 
 9735 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9736   predicate(UseSSE<=1);
 9737   match(Set dst (AddD dst (LoadD src)));
 9738   ins_cost(150);
 9739 
 9740   format %{ "FLD    $src\n\t"
 9741             "DADDp  $dst,ST" %}
 9742   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9743   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9744               OpcP, RegOpc(dst) );
 9745   ins_pipe( fpu_reg_mem );
 9746 %}
 9747 
 9748 // add-to-memory
 9749 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9750   predicate(UseSSE<=1);
 9751   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9752   ins_cost(150);
 9753 
 9754   format %{ "FLD_D  $dst\n\t"
 9755             "DADD   ST,$src\n\t"
 9756             "FST_D  $dst" %}
 9757   opcode(0xDD, 0x0);
 9758   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9759               Opcode(0xD8), RegOpc(src),
 9760               set_instruction_start,
 9761               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9762   ins_pipe( fpu_reg_mem );
 9763 %}
 9764 
 9765 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9766   predicate(UseSSE<=1);
 9767   match(Set dst (AddD dst con));
 9768   ins_cost(125);
 9769   format %{ "FLD1\n\t"
 9770             "DADDp  $dst,ST" %}
 9771   ins_encode %{
 9772     __ fld1();
 9773     __ faddp($dst$$reg);
 9774   %}
 9775   ins_pipe(fpu_reg);
 9776 %}
 9777 
 9778 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9779   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9780   match(Set dst (AddD dst con));
 9781   ins_cost(200);
 9782   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9783             "DADDp  $dst,ST" %}
 9784   ins_encode %{
 9785     __ fld_d($constantaddress($con));
 9786     __ faddp($dst$$reg);
 9787   %}
 9788   ins_pipe(fpu_reg_mem);
 9789 %}
 9790 
 9791 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9792   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9793   match(Set dst (RoundDouble (AddD src con)));
 9794   ins_cost(200);
 9795   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9796             "DADD   ST,$src\n\t"
 9797             "FSTP_D $dst\t# D-round" %}
 9798   ins_encode %{
 9799     __ fld_d($constantaddress($con));
 9800     __ fadd($src$$reg);
 9801     __ fstp_d(Address(rsp, $dst$$disp));
 9802   %}
 9803   ins_pipe(fpu_mem_reg_con);
 9804 %}
 9805 
 9806 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9807   predicate(UseSSE<=1);
 9808   match(Set dst (MulD dst src));
 9809   format %{ "FLD    $src\n\t"
 9810             "DMULp  $dst,ST" %}
 9811   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9812   ins_cost(150);
 9813   ins_encode( Push_Reg_DPR(src),
 9814               OpcP, RegOpc(dst) );
 9815   ins_pipe( fpu_reg_reg );
 9816 %}
 9817 
 9818 // Strict FP instruction biases argument before multiply then
 9819 // biases result to avoid double rounding of subnormals.
 9820 //
 9821 // scale arg1 by multiplying arg1 by 2^(-15360)
 9822 // load arg2
 9823 // multiply scaled arg1 by arg2
 9824 // rescale product by 2^(15360)
 9825 //
 9826 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9827   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9828   match(Set dst (MulD dst src));
 9829   ins_cost(1);   // Select this instruction for all FP double multiplies
 9830 
 9831   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9832             "DMULp  $dst,ST\n\t"
 9833             "FLD    $src\n\t"
 9834             "DMULp  $dst,ST\n\t"
 9835             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9836             "DMULp  $dst,ST\n\t" %}
 9837   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9838   ins_encode( strictfp_bias1(dst),
 9839               Push_Reg_DPR(src),
 9840               OpcP, RegOpc(dst),
 9841               strictfp_bias2(dst) );
 9842   ins_pipe( fpu_reg_reg );
 9843 %}
 9844 
 9845 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9846   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9847   match(Set dst (MulD dst con));
 9848   ins_cost(200);
 9849   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9850             "DMULp  $dst,ST" %}
 9851   ins_encode %{
 9852     __ fld_d($constantaddress($con));
 9853     __ fmulp($dst$$reg);
 9854   %}
 9855   ins_pipe(fpu_reg_mem);
 9856 %}
 9857 
 9858 
 9859 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9860   predicate( UseSSE<=1 );
 9861   match(Set dst (MulD dst (LoadD src)));
 9862   ins_cost(200);
 9863   format %{ "FLD_D  $src\n\t"
 9864             "DMULp  $dst,ST" %}
 9865   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9866   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9867               OpcP, RegOpc(dst) );
 9868   ins_pipe( fpu_reg_mem );
 9869 %}
 9870 
 9871 //
 9872 // Cisc-alternate to reg-reg multiply
 9873 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9874   predicate( UseSSE<=1 );
 9875   match(Set dst (MulD src (LoadD mem)));
 9876   ins_cost(250);
 9877   format %{ "FLD_D  $mem\n\t"
 9878             "DMUL   ST,$src\n\t"
 9879             "FSTP_D $dst" %}
 9880   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9881   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9882               OpcReg_FPR(src),
 9883               Pop_Reg_DPR(dst) );
 9884   ins_pipe( fpu_reg_reg_mem );
 9885 %}
 9886 
 9887 
 9888 // MACRO3 -- addDPR a mulDPR
 9889 // This instruction is a '2-address' instruction in that the result goes
 9890 // back to src2.  This eliminates a move from the macro; possibly the
 9891 // register allocator will have to add it back (and maybe not).
 9892 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9893   predicate( UseSSE<=1 );
 9894   match(Set src2 (AddD (MulD src0 src1) src2));
 9895   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9896             "DMUL   ST,$src1\n\t"
 9897             "DADDp  $src2,ST" %}
 9898   ins_cost(250);
 9899   opcode(0xDD); /* LoadD DD /0 */
 9900   ins_encode( Push_Reg_FPR(src0),
 9901               FMul_ST_reg(src1),
 9902               FAddP_reg_ST(src2) );
 9903   ins_pipe( fpu_reg_reg_reg );
 9904 %}
 9905 
 9906 
 9907 // MACRO3 -- subDPR a mulDPR
 9908 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9909   predicate( UseSSE<=1 );
 9910   match(Set src2 (SubD (MulD src0 src1) src2));
 9911   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9912             "DMUL   ST,$src1\n\t"
 9913             "DSUBRp $src2,ST" %}
 9914   ins_cost(250);
 9915   ins_encode( Push_Reg_FPR(src0),
 9916               FMul_ST_reg(src1),
 9917               Opcode(0xDE), Opc_plus(0xE0,src2));
 9918   ins_pipe( fpu_reg_reg_reg );
 9919 %}
 9920 
 9921 
 9922 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9923   predicate( UseSSE<=1 );
 9924   match(Set dst (DivD dst src));
 9925 
 9926   format %{ "FLD    $src\n\t"
 9927             "FDIVp  $dst,ST" %}
 9928   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9929   ins_cost(150);
 9930   ins_encode( Push_Reg_DPR(src),
 9931               OpcP, RegOpc(dst) );
 9932   ins_pipe( fpu_reg_reg );
 9933 %}
 9934 
 9935 // Strict FP instruction biases argument before division then
 9936 // biases result, to avoid double rounding of subnormals.
 9937 //
 9938 // scale dividend by multiplying dividend by 2^(-15360)
 9939 // load divisor
 9940 // divide scaled dividend by divisor
 9941 // rescale quotient by 2^(15360)
 9942 //
 9943 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9944   predicate (UseSSE<=1);
 9945   match(Set dst (DivD dst src));
 9946   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9947   ins_cost(01);
 9948 
 9949   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9950             "DMULp  $dst,ST\n\t"
 9951             "FLD    $src\n\t"
 9952             "FDIVp  $dst,ST\n\t"
 9953             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9954             "DMULp  $dst,ST\n\t" %}
 9955   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9956   ins_encode( strictfp_bias1(dst),
 9957               Push_Reg_DPR(src),
 9958               OpcP, RegOpc(dst),
 9959               strictfp_bias2(dst) );
 9960   ins_pipe( fpu_reg_reg );
 9961 %}
 9962 
 9963 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9964   predicate(UseSSE<=1);
 9965   match(Set dst (ModD dst src));
 9966   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9967 
 9968   format %{ "DMOD   $dst,$src" %}
 9969   ins_cost(250);
 9970   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9971               emitModDPR(),
 9972               Push_Result_Mod_DPR(src),
 9973               Pop_Reg_DPR(dst));
 9974   ins_pipe( pipe_slow );
 9975 %}
 9976 
 9977 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9978   predicate(UseSSE>=2);
 9979   match(Set dst (ModD src0 src1));
 9980   effect(KILL rax, KILL cr);
 9981 
 9982   format %{ "SUB    ESP,8\t # DMOD\n"
 9983           "\tMOVSD  [ESP+0],$src1\n"
 9984           "\tFLD_D  [ESP+0]\n"
 9985           "\tMOVSD  [ESP+0],$src0\n"
 9986           "\tFLD_D  [ESP+0]\n"
 9987      "loop:\tFPREM\n"
 9988           "\tFWAIT\n"
 9989           "\tFNSTSW AX\n"
 9990           "\tSAHF\n"
 9991           "\tJP     loop\n"
 9992           "\tFSTP_D [ESP+0]\n"
 9993           "\tMOVSD  $dst,[ESP+0]\n"
 9994           "\tADD    ESP,8\n"
 9995           "\tFSTP   ST0\t # Restore FPU Stack"
 9996     %}
 9997   ins_cost(250);
 9998   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9999   ins_pipe( pipe_slow );
10000 %}
10001 
10002 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10003   predicate (UseSSE<=1);
10004   match(Set dst(AtanD dst src));
10005   format %{ "DATA   $dst,$src" %}
10006   opcode(0xD9, 0xF3);
10007   ins_encode( Push_Reg_DPR(src),
10008               OpcP, OpcS, RegOpc(dst) );
10009   ins_pipe( pipe_slow );
10010 %}
10011 
10012 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10013   predicate (UseSSE>=2);
10014   match(Set dst(AtanD dst src));
10015   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10016   format %{ "DATA   $dst,$src" %}
10017   opcode(0xD9, 0xF3);
10018   ins_encode( Push_SrcD(src),
10019               OpcP, OpcS, Push_ResultD(dst) );
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10024   predicate (UseSSE<=1);
10025   match(Set dst (SqrtD src));
10026   format %{ "DSQRT  $dst,$src" %}
10027   opcode(0xFA, 0xD9);
10028   ins_encode( Push_Reg_DPR(src),
10029               OpcS, OpcP, Pop_Reg_DPR(dst) );
10030   ins_pipe( pipe_slow );
10031 %}
10032 
10033 //-------------Float Instructions-------------------------------
10034 // Float Math
10035 
10036 // Code for float compare:
10037 //     fcompp();
10038 //     fwait(); fnstsw_ax();
10039 //     sahf();
10040 //     movl(dst, unordered_result);
10041 //     jcc(Assembler::parity, exit);
10042 //     movl(dst, less_result);
10043 //     jcc(Assembler::below, exit);
10044 //     movl(dst, equal_result);
10045 //     jcc(Assembler::equal, exit);
10046 //     movl(dst, greater_result);
10047 //   exit:
10048 
10049 // P6 version of float compare, sets condition codes in EFLAGS
10050 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10051   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10052   match(Set cr (CmpF src1 src2));
10053   effect(KILL rax);
10054   ins_cost(150);
10055   format %{ "FLD    $src1\n\t"
10056             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10057             "JNP    exit\n\t"
10058             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10059             "SAHF\n"
10060      "exit:\tNOP               // avoid branch to branch" %}
10061   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10062   ins_encode( Push_Reg_DPR(src1),
10063               OpcP, RegOpc(src2),
10064               cmpF_P6_fixup );
10065   ins_pipe( pipe_slow );
10066 %}
10067 
10068 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10069   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10070   match(Set cr (CmpF src1 src2));
10071   ins_cost(100);
10072   format %{ "FLD    $src1\n\t"
10073             "FUCOMIP ST,$src2  // P6 instruction" %}
10074   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10075   ins_encode( Push_Reg_DPR(src1),
10076               OpcP, RegOpc(src2));
10077   ins_pipe( pipe_slow );
10078 %}
10079 
10080 
10081 // Compare & branch
10082 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10083   predicate(UseSSE == 0);
10084   match(Set cr (CmpF src1 src2));
10085   effect(KILL rax);
10086   ins_cost(200);
10087   format %{ "FLD    $src1\n\t"
10088             "FCOMp  $src2\n\t"
10089             "FNSTSW AX\n\t"
10090             "TEST   AX,0x400\n\t"
10091             "JZ,s   flags\n\t"
10092             "MOV    AH,1\t# unordered treat as LT\n"
10093     "flags:\tSAHF" %}
10094   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10095   ins_encode( Push_Reg_DPR(src1),
10096               OpcP, RegOpc(src2),
10097               fpu_flags);
10098   ins_pipe( pipe_slow );
10099 %}
10100 
10101 // Compare vs zero into -1,0,1
10102 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10103   predicate(UseSSE == 0);
10104   match(Set dst (CmpF3 src1 zero));
10105   effect(KILL cr, KILL rax);
10106   ins_cost(280);
10107   format %{ "FTSTF  $dst,$src1" %}
10108   opcode(0xE4, 0xD9);
10109   ins_encode( Push_Reg_DPR(src1),
10110               OpcS, OpcP, PopFPU,
10111               CmpF_Result(dst));
10112   ins_pipe( pipe_slow );
10113 %}
10114 
10115 // Compare into -1,0,1
10116 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10117   predicate(UseSSE == 0);
10118   match(Set dst (CmpF3 src1 src2));
10119   effect(KILL cr, KILL rax);
10120   ins_cost(300);
10121   format %{ "FCMPF  $dst,$src1,$src2" %}
10122   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10123   ins_encode( Push_Reg_DPR(src1),
10124               OpcP, RegOpc(src2),
10125               CmpF_Result(dst));
10126   ins_pipe( pipe_slow );
10127 %}
10128 
10129 // float compare and set condition codes in EFLAGS by XMM regs
10130 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10131   predicate(UseSSE>=1);
10132   match(Set cr (CmpF src1 src2));
10133   ins_cost(145);
10134   format %{ "UCOMISS $src1,$src2\n\t"
10135             "JNP,s   exit\n\t"
10136             "PUSHF\t# saw NaN, set CF\n\t"
10137             "AND     [rsp], #0xffffff2b\n\t"
10138             "POPF\n"
10139     "exit:" %}
10140   ins_encode %{
10141     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10142     emit_cmpfp_fixup(_masm);
10143   %}
10144   ins_pipe( pipe_slow );
10145 %}
10146 
10147 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10148   predicate(UseSSE>=1);
10149   match(Set cr (CmpF src1 src2));
10150   ins_cost(100);
10151   format %{ "UCOMISS $src1,$src2" %}
10152   ins_encode %{
10153     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10154   %}
10155   ins_pipe( pipe_slow );
10156 %}
10157 
10158 // float compare and set condition codes in EFLAGS by XMM regs
10159 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10160   predicate(UseSSE>=1);
10161   match(Set cr (CmpF src1 (LoadF src2)));
10162   ins_cost(165);
10163   format %{ "UCOMISS $src1,$src2\n\t"
10164             "JNP,s   exit\n\t"
10165             "PUSHF\t# saw NaN, set CF\n\t"
10166             "AND     [rsp], #0xffffff2b\n\t"
10167             "POPF\n"
10168     "exit:" %}
10169   ins_encode %{
10170     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10171     emit_cmpfp_fixup(_masm);
10172   %}
10173   ins_pipe( pipe_slow );
10174 %}
10175 
10176 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10177   predicate(UseSSE>=1);
10178   match(Set cr (CmpF src1 (LoadF src2)));
10179   ins_cost(100);
10180   format %{ "UCOMISS $src1,$src2" %}
10181   ins_encode %{
10182     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10183   %}
10184   ins_pipe( pipe_slow );
10185 %}
10186 
10187 // Compare into -1,0,1 in XMM
10188 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10189   predicate(UseSSE>=1);
10190   match(Set dst (CmpF3 src1 src2));
10191   effect(KILL cr);
10192   ins_cost(255);
10193   format %{ "UCOMISS $src1, $src2\n\t"
10194             "MOV     $dst, #-1\n\t"
10195             "JP,s    done\n\t"
10196             "JB,s    done\n\t"
10197             "SETNE   $dst\n\t"
10198             "MOVZB   $dst, $dst\n"
10199     "done:" %}
10200   ins_encode %{
10201     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10202     emit_cmpfp3(_masm, $dst$$Register);
10203   %}
10204   ins_pipe( pipe_slow );
10205 %}
10206 
10207 // Compare into -1,0,1 in XMM and memory
10208 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10209   predicate(UseSSE>=1);
10210   match(Set dst (CmpF3 src1 (LoadF src2)));
10211   effect(KILL cr);
10212   ins_cost(275);
10213   format %{ "UCOMISS $src1, $src2\n\t"
10214             "MOV     $dst, #-1\n\t"
10215             "JP,s    done\n\t"
10216             "JB,s    done\n\t"
10217             "SETNE   $dst\n\t"
10218             "MOVZB   $dst, $dst\n"
10219     "done:" %}
10220   ins_encode %{
10221     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10222     emit_cmpfp3(_masm, $dst$$Register);
10223   %}
10224   ins_pipe( pipe_slow );
10225 %}
10226 
10227 // Spill to obtain 24-bit precision
10228 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10229   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10230   match(Set dst (SubF src1 src2));
10231 
10232   format %{ "FSUB   $dst,$src1 - $src2" %}
10233   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10234   ins_encode( Push_Reg_FPR(src1),
10235               OpcReg_FPR(src2),
10236               Pop_Mem_FPR(dst) );
10237   ins_pipe( fpu_mem_reg_reg );
10238 %}
10239 //
10240 // This instruction does not round to 24-bits
10241 instruct subFPR_reg(regFPR dst, regFPR src) %{
10242   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10243   match(Set dst (SubF dst src));
10244 
10245   format %{ "FSUB   $dst,$src" %}
10246   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10247   ins_encode( Push_Reg_FPR(src),
10248               OpcP, RegOpc(dst) );
10249   ins_pipe( fpu_reg_reg );
10250 %}
10251 
10252 // Spill to obtain 24-bit precision
10253 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10254   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10255   match(Set dst (AddF src1 src2));
10256 
10257   format %{ "FADD   $dst,$src1,$src2" %}
10258   opcode(0xD8, 0x0); /* D8 C0+i */
10259   ins_encode( Push_Reg_FPR(src2),
10260               OpcReg_FPR(src1),
10261               Pop_Mem_FPR(dst) );
10262   ins_pipe( fpu_mem_reg_reg );
10263 %}
10264 //
10265 // This instruction does not round to 24-bits
10266 instruct addFPR_reg(regFPR dst, regFPR src) %{
10267   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10268   match(Set dst (AddF dst src));
10269 
10270   format %{ "FLD    $src\n\t"
10271             "FADDp  $dst,ST" %}
10272   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10273   ins_encode( Push_Reg_FPR(src),
10274               OpcP, RegOpc(dst) );
10275   ins_pipe( fpu_reg_reg );
10276 %}
10277 
10278 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10279   predicate(UseSSE==0);
10280   match(Set dst (AbsF src));
10281   ins_cost(100);
10282   format %{ "FABS" %}
10283   opcode(0xE1, 0xD9);
10284   ins_encode( OpcS, OpcP );
10285   ins_pipe( fpu_reg_reg );
10286 %}
10287 
10288 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10289   predicate(UseSSE==0);
10290   match(Set dst (NegF src));
10291   ins_cost(100);
10292   format %{ "FCHS" %}
10293   opcode(0xE0, 0xD9);
10294   ins_encode( OpcS, OpcP );
10295   ins_pipe( fpu_reg_reg );
10296 %}
10297 
10298 // Cisc-alternate to addFPR_reg
10299 // Spill to obtain 24-bit precision
10300 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10301   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10302   match(Set dst (AddF src1 (LoadF src2)));
10303 
10304   format %{ "FLD    $src2\n\t"
10305             "FADD   ST,$src1\n\t"
10306             "FSTP_S $dst" %}
10307   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10308   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10309               OpcReg_FPR(src1),
10310               Pop_Mem_FPR(dst) );
10311   ins_pipe( fpu_mem_reg_mem );
10312 %}
10313 //
10314 // Cisc-alternate to addFPR_reg
10315 // This instruction does not round to 24-bits
10316 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10317   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10318   match(Set dst (AddF dst (LoadF src)));
10319 
10320   format %{ "FADD   $dst,$src" %}
10321   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10322   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10323               OpcP, RegOpc(dst) );
10324   ins_pipe( fpu_reg_mem );
10325 %}
10326 
10327 // // Following two instructions for _222_mpegaudio
10328 // Spill to obtain 24-bit precision
10329 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10330   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10331   match(Set dst (AddF src1 src2));
10332 
10333   format %{ "FADD   $dst,$src1,$src2" %}
10334   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10335   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10336               OpcReg_FPR(src2),
10337               Pop_Mem_FPR(dst) );
10338   ins_pipe( fpu_mem_reg_mem );
10339 %}
10340 
10341 // Cisc-spill variant
10342 // Spill to obtain 24-bit precision
10343 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10344   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10345   match(Set dst (AddF src1 (LoadF src2)));
10346 
10347   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10348   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10349   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10350               set_instruction_start,
10351               OpcP, RMopc_Mem(secondary,src1),
10352               Pop_Mem_FPR(dst) );
10353   ins_pipe( fpu_mem_mem_mem );
10354 %}
10355 
10356 // Spill to obtain 24-bit precision
10357 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10358   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10359   match(Set dst (AddF src1 src2));
10360 
10361   format %{ "FADD   $dst,$src1,$src2" %}
10362   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10363   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10364               set_instruction_start,
10365               OpcP, RMopc_Mem(secondary,src1),
10366               Pop_Mem_FPR(dst) );
10367   ins_pipe( fpu_mem_mem_mem );
10368 %}
10369 
10370 
10371 // Spill to obtain 24-bit precision
10372 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10373   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10374   match(Set dst (AddF src con));
10375   format %{ "FLD    $src\n\t"
10376             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10377             "FSTP_S $dst"  %}
10378   ins_encode %{
10379     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10380     __ fadd_s($constantaddress($con));
10381     __ fstp_s(Address(rsp, $dst$$disp));
10382   %}
10383   ins_pipe(fpu_mem_reg_con);
10384 %}
10385 //
10386 // This instruction does not round to 24-bits
10387 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10388   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10389   match(Set dst (AddF src con));
10390   format %{ "FLD    $src\n\t"
10391             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10392             "FSTP   $dst"  %}
10393   ins_encode %{
10394     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10395     __ fadd_s($constantaddress($con));
10396     __ fstp_d($dst$$reg);
10397   %}
10398   ins_pipe(fpu_reg_reg_con);
10399 %}
10400 
10401 // Spill to obtain 24-bit precision
10402 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10403   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10404   match(Set dst (MulF src1 src2));
10405 
10406   format %{ "FLD    $src1\n\t"
10407             "FMUL   $src2\n\t"
10408             "FSTP_S $dst"  %}
10409   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10410   ins_encode( Push_Reg_FPR(src1),
10411               OpcReg_FPR(src2),
10412               Pop_Mem_FPR(dst) );
10413   ins_pipe( fpu_mem_reg_reg );
10414 %}
10415 //
10416 // This instruction does not round to 24-bits
10417 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10418   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10419   match(Set dst (MulF src1 src2));
10420 
10421   format %{ "FLD    $src1\n\t"
10422             "FMUL   $src2\n\t"
10423             "FSTP_S $dst"  %}
10424   opcode(0xD8, 0x1); /* D8 C8+i */
10425   ins_encode( Push_Reg_FPR(src2),
10426               OpcReg_FPR(src1),
10427               Pop_Reg_FPR(dst) );
10428   ins_pipe( fpu_reg_reg_reg );
10429 %}
10430 
10431 
10432 // Spill to obtain 24-bit precision
10433 // Cisc-alternate to reg-reg multiply
10434 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10435   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10436   match(Set dst (MulF src1 (LoadF src2)));
10437 
10438   format %{ "FLD_S  $src2\n\t"
10439             "FMUL   $src1\n\t"
10440             "FSTP_S $dst"  %}
10441   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10442   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10443               OpcReg_FPR(src1),
10444               Pop_Mem_FPR(dst) );
10445   ins_pipe( fpu_mem_reg_mem );
10446 %}
10447 //
10448 // This instruction does not round to 24-bits
10449 // Cisc-alternate to reg-reg multiply
10450 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10451   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10452   match(Set dst (MulF src1 (LoadF src2)));
10453 
10454   format %{ "FMUL   $dst,$src1,$src2" %}
10455   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10456   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10457               OpcReg_FPR(src1),
10458               Pop_Reg_FPR(dst) );
10459   ins_pipe( fpu_reg_reg_mem );
10460 %}
10461 
10462 // Spill to obtain 24-bit precision
10463 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10464   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10465   match(Set dst (MulF src1 src2));
10466 
10467   format %{ "FMUL   $dst,$src1,$src2" %}
10468   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10469   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10470               set_instruction_start,
10471               OpcP, RMopc_Mem(secondary,src1),
10472               Pop_Mem_FPR(dst) );
10473   ins_pipe( fpu_mem_mem_mem );
10474 %}
10475 
10476 // Spill to obtain 24-bit precision
10477 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10478   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10479   match(Set dst (MulF src con));
10480 
10481   format %{ "FLD    $src\n\t"
10482             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10483             "FSTP_S $dst"  %}
10484   ins_encode %{
10485     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10486     __ fmul_s($constantaddress($con));
10487     __ fstp_s(Address(rsp, $dst$$disp));
10488   %}
10489   ins_pipe(fpu_mem_reg_con);
10490 %}
10491 //
10492 // This instruction does not round to 24-bits
10493 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10494   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10495   match(Set dst (MulF src con));
10496 
10497   format %{ "FLD    $src\n\t"
10498             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10499             "FSTP   $dst"  %}
10500   ins_encode %{
10501     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10502     __ fmul_s($constantaddress($con));
10503     __ fstp_d($dst$$reg);
10504   %}
10505   ins_pipe(fpu_reg_reg_con);
10506 %}
10507 
10508 
10509 //
10510 // MACRO1 -- subsume unshared load into mulFPR
10511 // This instruction does not round to 24-bits
10512 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10513   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10514   match(Set dst (MulF (LoadF mem1) src));
10515 
10516   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10517             "FMUL   ST,$src\n\t"
10518             "FSTP   $dst" %}
10519   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10520   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10521               OpcReg_FPR(src),
10522               Pop_Reg_FPR(dst) );
10523   ins_pipe( fpu_reg_reg_mem );
10524 %}
10525 //
10526 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10527 // This instruction does not round to 24-bits
10528 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10529   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10530   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10531   ins_cost(95);
10532 
10533   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10534             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10535             "FADD   ST,$src2\n\t"
10536             "FSTP   $dst" %}
10537   opcode(0xD9); /* LoadF D9 /0 */
10538   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10539               FMul_ST_reg(src1),
10540               FAdd_ST_reg(src2),
10541               Pop_Reg_FPR(dst) );
10542   ins_pipe( fpu_reg_mem_reg_reg );
10543 %}
10544 
10545 // MACRO3 -- addFPR a mulFPR
10546 // This instruction does not round to 24-bits.  It is a '2-address'
10547 // instruction in that the result goes back to src2.  This eliminates
10548 // a move from the macro; possibly the register allocator will have
10549 // to add it back (and maybe not).
10550 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10551   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10552   match(Set src2 (AddF (MulF src0 src1) src2));
10553 
10554   format %{ "FLD    $src0     ===MACRO3===\n\t"
10555             "FMUL   ST,$src1\n\t"
10556             "FADDP  $src2,ST" %}
10557   opcode(0xD9); /* LoadF D9 /0 */
10558   ins_encode( Push_Reg_FPR(src0),
10559               FMul_ST_reg(src1),
10560               FAddP_reg_ST(src2) );
10561   ins_pipe( fpu_reg_reg_reg );
10562 %}
10563 
10564 // MACRO4 -- divFPR subFPR
10565 // This instruction does not round to 24-bits
10566 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10567   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10568   match(Set dst (DivF (SubF src2 src1) src3));
10569 
10570   format %{ "FLD    $src2   ===MACRO4===\n\t"
10571             "FSUB   ST,$src1\n\t"
10572             "FDIV   ST,$src3\n\t"
10573             "FSTP  $dst" %}
10574   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10575   ins_encode( Push_Reg_FPR(src2),
10576               subFPR_divFPR_encode(src1,src3),
10577               Pop_Reg_FPR(dst) );
10578   ins_pipe( fpu_reg_reg_reg_reg );
10579 %}
10580 
10581 // Spill to obtain 24-bit precision
10582 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10583   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10584   match(Set dst (DivF src1 src2));
10585 
10586   format %{ "FDIV   $dst,$src1,$src2" %}
10587   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10588   ins_encode( Push_Reg_FPR(src1),
10589               OpcReg_FPR(src2),
10590               Pop_Mem_FPR(dst) );
10591   ins_pipe( fpu_mem_reg_reg );
10592 %}
10593 //
10594 // This instruction does not round to 24-bits
10595 instruct divFPR_reg(regFPR dst, regFPR src) %{
10596   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10597   match(Set dst (DivF dst src));
10598 
10599   format %{ "FDIV   $dst,$src" %}
10600   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10601   ins_encode( Push_Reg_FPR(src),
10602               OpcP, RegOpc(dst) );
10603   ins_pipe( fpu_reg_reg );
10604 %}
10605 
10606 
10607 // Spill to obtain 24-bit precision
10608 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10609   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10610   match(Set dst (ModF src1 src2));
10611   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10612 
10613   format %{ "FMOD   $dst,$src1,$src2" %}
10614   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10615               emitModDPR(),
10616               Push_Result_Mod_DPR(src2),
10617               Pop_Mem_FPR(dst));
10618   ins_pipe( pipe_slow );
10619 %}
10620 //
10621 // This instruction does not round to 24-bits
10622 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10623   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10624   match(Set dst (ModF dst src));
10625   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10626 
10627   format %{ "FMOD   $dst,$src" %}
10628   ins_encode(Push_Reg_Mod_DPR(dst, src),
10629               emitModDPR(),
10630               Push_Result_Mod_DPR(src),
10631               Pop_Reg_FPR(dst));
10632   ins_pipe( pipe_slow );
10633 %}
10634 
10635 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10636   predicate(UseSSE>=1);
10637   match(Set dst (ModF src0 src1));
10638   effect(KILL rax, KILL cr);
10639   format %{ "SUB    ESP,4\t # FMOD\n"
10640           "\tMOVSS  [ESP+0],$src1\n"
10641           "\tFLD_S  [ESP+0]\n"
10642           "\tMOVSS  [ESP+0],$src0\n"
10643           "\tFLD_S  [ESP+0]\n"
10644      "loop:\tFPREM\n"
10645           "\tFWAIT\n"
10646           "\tFNSTSW AX\n"
10647           "\tSAHF\n"
10648           "\tJP     loop\n"
10649           "\tFSTP_S [ESP+0]\n"
10650           "\tMOVSS  $dst,[ESP+0]\n"
10651           "\tADD    ESP,4\n"
10652           "\tFSTP   ST0\t # Restore FPU Stack"
10653     %}
10654   ins_cost(250);
10655   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10656   ins_pipe( pipe_slow );
10657 %}
10658 
10659 
10660 //----------Arithmetic Conversion Instructions---------------------------------
10661 // The conversions operations are all Alpha sorted.  Please keep it that way!
10662 
10663 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10664   predicate(UseSSE==0);
10665   match(Set dst (RoundFloat src));
10666   ins_cost(125);
10667   format %{ "FST_S  $dst,$src\t# F-round" %}
10668   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10669   ins_pipe( fpu_mem_reg );
10670 %}
10671 
10672 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10673   predicate(UseSSE<=1);
10674   match(Set dst (RoundDouble src));
10675   ins_cost(125);
10676   format %{ "FST_D  $dst,$src\t# D-round" %}
10677   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10678   ins_pipe( fpu_mem_reg );
10679 %}
10680 
10681 // Force rounding to 24-bit precision and 6-bit exponent
10682 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10683   predicate(UseSSE==0);
10684   match(Set dst (ConvD2F src));
10685   format %{ "FST_S  $dst,$src\t# F-round" %}
10686   expand %{
10687     roundFloat_mem_reg(dst,src);
10688   %}
10689 %}
10690 
10691 // Force rounding to 24-bit precision and 6-bit exponent
10692 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10693   predicate(UseSSE==1);
10694   match(Set dst (ConvD2F src));
10695   effect( KILL cr );
10696   format %{ "SUB    ESP,4\n\t"
10697             "FST_S  [ESP],$src\t# F-round\n\t"
10698             "MOVSS  $dst,[ESP]\n\t"
10699             "ADD ESP,4" %}
10700   ins_encode %{
10701     __ subptr(rsp, 4);
10702     if ($src$$reg != FPR1L_enc) {
10703       __ fld_s($src$$reg-1);
10704       __ fstp_s(Address(rsp, 0));
10705     } else {
10706       __ fst_s(Address(rsp, 0));
10707     }
10708     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10709     __ addptr(rsp, 4);
10710   %}
10711   ins_pipe( pipe_slow );
10712 %}
10713 
10714 // Force rounding double precision to single precision
10715 instruct convD2F_reg(regF dst, regD src) %{
10716   predicate(UseSSE>=2);
10717   match(Set dst (ConvD2F src));
10718   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10719   ins_encode %{
10720     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10721   %}
10722   ins_pipe( pipe_slow );
10723 %}
10724 
10725 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10726   predicate(UseSSE==0);
10727   match(Set dst (ConvF2D src));
10728   format %{ "FST_S  $dst,$src\t# D-round" %}
10729   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10730   ins_pipe( fpu_reg_reg );
10731 %}
10732 
10733 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10734   predicate(UseSSE==1);
10735   match(Set dst (ConvF2D src));
10736   format %{ "FST_D  $dst,$src\t# D-round" %}
10737   expand %{
10738     roundDouble_mem_reg(dst,src);
10739   %}
10740 %}
10741 
10742 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10743   predicate(UseSSE==1);
10744   match(Set dst (ConvF2D src));
10745   effect( KILL cr );
10746   format %{ "SUB    ESP,4\n\t"
10747             "MOVSS  [ESP] $src\n\t"
10748             "FLD_S  [ESP]\n\t"
10749             "ADD    ESP,4\n\t"
10750             "FSTP   $dst\t# D-round" %}
10751   ins_encode %{
10752     __ subptr(rsp, 4);
10753     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10754     __ fld_s(Address(rsp, 0));
10755     __ addptr(rsp, 4);
10756     __ fstp_d($dst$$reg);
10757   %}
10758   ins_pipe( pipe_slow );
10759 %}
10760 
10761 instruct convF2D_reg(regD dst, regF src) %{
10762   predicate(UseSSE>=2);
10763   match(Set dst (ConvF2D src));
10764   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10765   ins_encode %{
10766     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10767   %}
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10772 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10773   predicate(UseSSE<=1);
10774   match(Set dst (ConvD2I src));
10775   effect( KILL tmp, KILL cr );
10776   format %{ "FLD    $src\t# Convert double to int \n\t"
10777             "FLDCW  trunc mode\n\t"
10778             "SUB    ESP,4\n\t"
10779             "FISTp  [ESP + #0]\n\t"
10780             "FLDCW  std/24-bit mode\n\t"
10781             "POP    EAX\n\t"
10782             "CMP    EAX,0x80000000\n\t"
10783             "JNE,s  fast\n\t"
10784             "FLD_D  $src\n\t"
10785             "CALL   d2i_wrapper\n"
10786       "fast:" %}
10787   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10788   ins_pipe( pipe_slow );
10789 %}
10790 
10791 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10792 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10793   predicate(UseSSE>=2);
10794   match(Set dst (ConvD2I src));
10795   effect( KILL tmp, KILL cr );
10796   format %{ "CVTTSD2SI $dst, $src\n\t"
10797             "CMP    $dst,0x80000000\n\t"
10798             "JNE,s  fast\n\t"
10799             "SUB    ESP, 8\n\t"
10800             "MOVSD  [ESP], $src\n\t"
10801             "FLD_D  [ESP]\n\t"
10802             "ADD    ESP, 8\n\t"
10803             "CALL   d2i_wrapper\n"
10804       "fast:" %}
10805   ins_encode %{
10806     Label fast;
10807     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10808     __ cmpl($dst$$Register, 0x80000000);
10809     __ jccb(Assembler::notEqual, fast);
10810     __ subptr(rsp, 8);
10811     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10812     __ fld_d(Address(rsp, 0));
10813     __ addptr(rsp, 8);
10814     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10815     __ bind(fast);
10816   %}
10817   ins_pipe( pipe_slow );
10818 %}
10819 
10820 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10821   predicate(UseSSE<=1);
10822   match(Set dst (ConvD2L src));
10823   effect( KILL cr );
10824   format %{ "FLD    $src\t# Convert double to long\n\t"
10825             "FLDCW  trunc mode\n\t"
10826             "SUB    ESP,8\n\t"
10827             "FISTp  [ESP + #0]\n\t"
10828             "FLDCW  std/24-bit mode\n\t"
10829             "POP    EAX\n\t"
10830             "POP    EDX\n\t"
10831             "CMP    EDX,0x80000000\n\t"
10832             "JNE,s  fast\n\t"
10833             "TEST   EAX,EAX\n\t"
10834             "JNE,s  fast\n\t"
10835             "FLD    $src\n\t"
10836             "CALL   d2l_wrapper\n"
10837       "fast:" %}
10838   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10839   ins_pipe( pipe_slow );
10840 %}
10841 
10842 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10843 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10844   predicate (UseSSE>=2);
10845   match(Set dst (ConvD2L src));
10846   effect( KILL cr );
10847   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10848             "MOVSD  [ESP],$src\n\t"
10849             "FLD_D  [ESP]\n\t"
10850             "FLDCW  trunc mode\n\t"
10851             "FISTp  [ESP + #0]\n\t"
10852             "FLDCW  std/24-bit mode\n\t"
10853             "POP    EAX\n\t"
10854             "POP    EDX\n\t"
10855             "CMP    EDX,0x80000000\n\t"
10856             "JNE,s  fast\n\t"
10857             "TEST   EAX,EAX\n\t"
10858             "JNE,s  fast\n\t"
10859             "SUB    ESP,8\n\t"
10860             "MOVSD  [ESP],$src\n\t"
10861             "FLD_D  [ESP]\n\t"
10862             "ADD    ESP,8\n\t"
10863             "CALL   d2l_wrapper\n"
10864       "fast:" %}
10865   ins_encode %{
10866     Label fast;
10867     __ subptr(rsp, 8);
10868     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10869     __ fld_d(Address(rsp, 0));
10870     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10871     __ fistp_d(Address(rsp, 0));
10872     // Restore the rounding mode, mask the exception
10873     if (Compile::current()->in_24_bit_fp_mode()) {
10874       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10875     } else {
10876       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10877     }
10878     // Load the converted long, adjust CPU stack
10879     __ pop(rax);
10880     __ pop(rdx);
10881     __ cmpl(rdx, 0x80000000);
10882     __ jccb(Assembler::notEqual, fast);
10883     __ testl(rax, rax);
10884     __ jccb(Assembler::notEqual, fast);
10885     __ subptr(rsp, 8);
10886     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10887     __ fld_d(Address(rsp, 0));
10888     __ addptr(rsp, 8);
10889     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10890     __ bind(fast);
10891   %}
10892   ins_pipe( pipe_slow );
10893 %}
10894 
10895 // Convert a double to an int.  Java semantics require we do complex
10896 // manglations in the corner cases.  So we set the rounding mode to
10897 // 'zero', store the darned double down as an int, and reset the
10898 // rounding mode to 'nearest'.  The hardware stores a flag value down
10899 // if we would overflow or converted a NAN; we check for this and
10900 // and go the slow path if needed.
10901 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10902   predicate(UseSSE==0);
10903   match(Set dst (ConvF2I src));
10904   effect( KILL tmp, KILL cr );
10905   format %{ "FLD    $src\t# Convert float to int \n\t"
10906             "FLDCW  trunc mode\n\t"
10907             "SUB    ESP,4\n\t"
10908             "FISTp  [ESP + #0]\n\t"
10909             "FLDCW  std/24-bit mode\n\t"
10910             "POP    EAX\n\t"
10911             "CMP    EAX,0x80000000\n\t"
10912             "JNE,s  fast\n\t"
10913             "FLD    $src\n\t"
10914             "CALL   d2i_wrapper\n"
10915       "fast:" %}
10916   // DPR2I_encoding works for FPR2I
10917   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10918   ins_pipe( pipe_slow );
10919 %}
10920 
10921 // Convert a float in xmm to an int reg.
10922 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10923   predicate(UseSSE>=1);
10924   match(Set dst (ConvF2I src));
10925   effect( KILL tmp, KILL cr );
10926   format %{ "CVTTSS2SI $dst, $src\n\t"
10927             "CMP    $dst,0x80000000\n\t"
10928             "JNE,s  fast\n\t"
10929             "SUB    ESP, 4\n\t"
10930             "MOVSS  [ESP], $src\n\t"
10931             "FLD    [ESP]\n\t"
10932             "ADD    ESP, 4\n\t"
10933             "CALL   d2i_wrapper\n"
10934       "fast:" %}
10935   ins_encode %{
10936     Label fast;
10937     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10938     __ cmpl($dst$$Register, 0x80000000);
10939     __ jccb(Assembler::notEqual, fast);
10940     __ subptr(rsp, 4);
10941     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10942     __ fld_s(Address(rsp, 0));
10943     __ addptr(rsp, 4);
10944     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10945     __ bind(fast);
10946   %}
10947   ins_pipe( pipe_slow );
10948 %}
10949 
10950 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10951   predicate(UseSSE==0);
10952   match(Set dst (ConvF2L src));
10953   effect( KILL cr );
10954   format %{ "FLD    $src\t# Convert float to long\n\t"
10955             "FLDCW  trunc mode\n\t"
10956             "SUB    ESP,8\n\t"
10957             "FISTp  [ESP + #0]\n\t"
10958             "FLDCW  std/24-bit mode\n\t"
10959             "POP    EAX\n\t"
10960             "POP    EDX\n\t"
10961             "CMP    EDX,0x80000000\n\t"
10962             "JNE,s  fast\n\t"
10963             "TEST   EAX,EAX\n\t"
10964             "JNE,s  fast\n\t"
10965             "FLD    $src\n\t"
10966             "CALL   d2l_wrapper\n"
10967       "fast:" %}
10968   // DPR2L_encoding works for FPR2L
10969   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10970   ins_pipe( pipe_slow );
10971 %}
10972 
10973 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10974 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10975   predicate (UseSSE>=1);
10976   match(Set dst (ConvF2L src));
10977   effect( KILL cr );
10978   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10979             "MOVSS  [ESP],$src\n\t"
10980             "FLD_S  [ESP]\n\t"
10981             "FLDCW  trunc mode\n\t"
10982             "FISTp  [ESP + #0]\n\t"
10983             "FLDCW  std/24-bit mode\n\t"
10984             "POP    EAX\n\t"
10985             "POP    EDX\n\t"
10986             "CMP    EDX,0x80000000\n\t"
10987             "JNE,s  fast\n\t"
10988             "TEST   EAX,EAX\n\t"
10989             "JNE,s  fast\n\t"
10990             "SUB    ESP,4\t# Convert float to long\n\t"
10991             "MOVSS  [ESP],$src\n\t"
10992             "FLD_S  [ESP]\n\t"
10993             "ADD    ESP,4\n\t"
10994             "CALL   d2l_wrapper\n"
10995       "fast:" %}
10996   ins_encode %{
10997     Label fast;
10998     __ subptr(rsp, 8);
10999     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11000     __ fld_s(Address(rsp, 0));
11001     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11002     __ fistp_d(Address(rsp, 0));
11003     // Restore the rounding mode, mask the exception
11004     if (Compile::current()->in_24_bit_fp_mode()) {
11005       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11006     } else {
11007       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11008     }
11009     // Load the converted long, adjust CPU stack
11010     __ pop(rax);
11011     __ pop(rdx);
11012     __ cmpl(rdx, 0x80000000);
11013     __ jccb(Assembler::notEqual, fast);
11014     __ testl(rax, rax);
11015     __ jccb(Assembler::notEqual, fast);
11016     __ subptr(rsp, 4);
11017     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11018     __ fld_s(Address(rsp, 0));
11019     __ addptr(rsp, 4);
11020     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11021     __ bind(fast);
11022   %}
11023   ins_pipe( pipe_slow );
11024 %}
11025 
11026 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11027   predicate( UseSSE<=1 );
11028   match(Set dst (ConvI2D src));
11029   format %{ "FILD   $src\n\t"
11030             "FSTP   $dst" %}
11031   opcode(0xDB, 0x0);  /* DB /0 */
11032   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11033   ins_pipe( fpu_reg_mem );
11034 %}
11035 
11036 instruct convI2D_reg(regD dst, rRegI src) %{
11037   predicate( UseSSE>=2 && !UseXmmI2D );
11038   match(Set dst (ConvI2D src));
11039   format %{ "CVTSI2SD $dst,$src" %}
11040   ins_encode %{
11041     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11042   %}
11043   ins_pipe( pipe_slow );
11044 %}
11045 
11046 instruct convI2D_mem(regD dst, memory mem) %{
11047   predicate( UseSSE>=2 );
11048   match(Set dst (ConvI2D (LoadI mem)));
11049   format %{ "CVTSI2SD $dst,$mem" %}
11050   ins_encode %{
11051     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11052   %}
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 instruct convXI2D_reg(regD dst, rRegI src)
11057 %{
11058   predicate( UseSSE>=2 && UseXmmI2D );
11059   match(Set dst (ConvI2D src));
11060 
11061   format %{ "MOVD  $dst,$src\n\t"
11062             "CVTDQ2PD $dst,$dst\t# i2d" %}
11063   ins_encode %{
11064     __ movdl($dst$$XMMRegister, $src$$Register);
11065     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11066   %}
11067   ins_pipe(pipe_slow); // XXX
11068 %}
11069 
11070 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11071   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11072   match(Set dst (ConvI2D (LoadI mem)));
11073   format %{ "FILD   $mem\n\t"
11074             "FSTP   $dst" %}
11075   opcode(0xDB);      /* DB /0 */
11076   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11077               Pop_Reg_DPR(dst));
11078   ins_pipe( fpu_reg_mem );
11079 %}
11080 
11081 // Convert a byte to a float; no rounding step needed.
11082 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11083   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11084   match(Set dst (ConvI2F src));
11085   format %{ "FILD   $src\n\t"
11086             "FSTP   $dst" %}
11087 
11088   opcode(0xDB, 0x0);  /* DB /0 */
11089   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11090   ins_pipe( fpu_reg_mem );
11091 %}
11092 
11093 // In 24-bit mode, force exponent rounding by storing back out
11094 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11095   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11096   match(Set dst (ConvI2F src));
11097   ins_cost(200);
11098   format %{ "FILD   $src\n\t"
11099             "FSTP_S $dst" %}
11100   opcode(0xDB, 0x0);  /* DB /0 */
11101   ins_encode( Push_Mem_I(src),
11102               Pop_Mem_FPR(dst));
11103   ins_pipe( fpu_mem_mem );
11104 %}
11105 
11106 // In 24-bit mode, force exponent rounding by storing back out
11107 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11108   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11109   match(Set dst (ConvI2F (LoadI mem)));
11110   ins_cost(200);
11111   format %{ "FILD   $mem\n\t"
11112             "FSTP_S $dst" %}
11113   opcode(0xDB);  /* DB /0 */
11114   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11115               Pop_Mem_FPR(dst));
11116   ins_pipe( fpu_mem_mem );
11117 %}
11118 
11119 // This instruction does not round to 24-bits
11120 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11121   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11122   match(Set dst (ConvI2F src));
11123   format %{ "FILD   $src\n\t"
11124             "FSTP   $dst" %}
11125   opcode(0xDB, 0x0);  /* DB /0 */
11126   ins_encode( Push_Mem_I(src),
11127               Pop_Reg_FPR(dst));
11128   ins_pipe( fpu_reg_mem );
11129 %}
11130 
11131 // This instruction does not round to 24-bits
11132 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11133   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11134   match(Set dst (ConvI2F (LoadI mem)));
11135   format %{ "FILD   $mem\n\t"
11136             "FSTP   $dst" %}
11137   opcode(0xDB);      /* DB /0 */
11138   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11139               Pop_Reg_FPR(dst));
11140   ins_pipe( fpu_reg_mem );
11141 %}
11142 
11143 // Convert an int to a float in xmm; no rounding step needed.
11144 instruct convI2F_reg(regF dst, rRegI src) %{
11145   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11146   match(Set dst (ConvI2F src));
11147   format %{ "CVTSI2SS $dst, $src" %}
11148   ins_encode %{
11149     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11150   %}
11151   ins_pipe( pipe_slow );
11152 %}
11153 
11154  instruct convXI2F_reg(regF dst, rRegI src)
11155 %{
11156   predicate( UseSSE>=2 && UseXmmI2F );
11157   match(Set dst (ConvI2F src));
11158 
11159   format %{ "MOVD  $dst,$src\n\t"
11160             "CVTDQ2PS $dst,$dst\t# i2f" %}
11161   ins_encode %{
11162     __ movdl($dst$$XMMRegister, $src$$Register);
11163     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11164   %}
11165   ins_pipe(pipe_slow); // XXX
11166 %}
11167 
11168 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11169   match(Set dst (ConvI2L src));
11170   effect(KILL cr);
11171   ins_cost(375);
11172   format %{ "MOV    $dst.lo,$src\n\t"
11173             "MOV    $dst.hi,$src\n\t"
11174             "SAR    $dst.hi,31" %}
11175   ins_encode(convert_int_long(dst,src));
11176   ins_pipe( ialu_reg_reg_long );
11177 %}
11178 
11179 // Zero-extend convert int to long
11180 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11181   match(Set dst (AndL (ConvI2L src) mask) );
11182   effect( KILL flags );
11183   ins_cost(250);
11184   format %{ "MOV    $dst.lo,$src\n\t"
11185             "XOR    $dst.hi,$dst.hi" %}
11186   opcode(0x33); // XOR
11187   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11188   ins_pipe( ialu_reg_reg_long );
11189 %}
11190 
11191 // Zero-extend long
11192 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11193   match(Set dst (AndL src mask) );
11194   effect( KILL flags );
11195   ins_cost(250);
11196   format %{ "MOV    $dst.lo,$src.lo\n\t"
11197             "XOR    $dst.hi,$dst.hi\n\t" %}
11198   opcode(0x33); // XOR
11199   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11200   ins_pipe( ialu_reg_reg_long );
11201 %}
11202 
11203 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11204   predicate (UseSSE<=1);
11205   match(Set dst (ConvL2D src));
11206   effect( KILL cr );
11207   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11208             "PUSH   $src.lo\n\t"
11209             "FILD   ST,[ESP + #0]\n\t"
11210             "ADD    ESP,8\n\t"
11211             "FSTP_D $dst\t# D-round" %}
11212   opcode(0xDF, 0x5);  /* DF /5 */
11213   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11214   ins_pipe( pipe_slow );
11215 %}
11216 
11217 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11218   predicate (UseSSE>=2);
11219   match(Set dst (ConvL2D src));
11220   effect( KILL cr );
11221   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11222             "PUSH   $src.lo\n\t"
11223             "FILD_D [ESP]\n\t"
11224             "FSTP_D [ESP]\n\t"
11225             "MOVSD  $dst,[ESP]\n\t"
11226             "ADD    ESP,8" %}
11227   opcode(0xDF, 0x5);  /* DF /5 */
11228   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11229   ins_pipe( pipe_slow );
11230 %}
11231 
11232 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11233   predicate (UseSSE>=1);
11234   match(Set dst (ConvL2F src));
11235   effect( KILL cr );
11236   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11237             "PUSH   $src.lo\n\t"
11238             "FILD_D [ESP]\n\t"
11239             "FSTP_S [ESP]\n\t"
11240             "MOVSS  $dst,[ESP]\n\t"
11241             "ADD    ESP,8" %}
11242   opcode(0xDF, 0x5);  /* DF /5 */
11243   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11244   ins_pipe( pipe_slow );
11245 %}
11246 
11247 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11248   match(Set dst (ConvL2F src));
11249   effect( KILL cr );
11250   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11251             "PUSH   $src.lo\n\t"
11252             "FILD   ST,[ESP + #0]\n\t"
11253             "ADD    ESP,8\n\t"
11254             "FSTP_S $dst\t# F-round" %}
11255   opcode(0xDF, 0x5);  /* DF /5 */
11256   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11257   ins_pipe( pipe_slow );
11258 %}
11259 
11260 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11261   match(Set dst (ConvL2I src));
11262   effect( DEF dst, USE src );
11263   format %{ "MOV    $dst,$src.lo" %}
11264   ins_encode(enc_CopyL_Lo(dst,src));
11265   ins_pipe( ialu_reg_reg );
11266 %}
11267 
11268 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11269   match(Set dst (MoveF2I src));
11270   effect( DEF dst, USE src );
11271   ins_cost(100);
11272   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11273   ins_encode %{
11274     __ movl($dst$$Register, Address(rsp, $src$$disp));
11275   %}
11276   ins_pipe( ialu_reg_mem );
11277 %}
11278 
11279 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11280   predicate(UseSSE==0);
11281   match(Set dst (MoveF2I src));
11282   effect( DEF dst, USE src );
11283 
11284   ins_cost(125);
11285   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11286   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11287   ins_pipe( fpu_mem_reg );
11288 %}
11289 
11290 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11291   predicate(UseSSE>=1);
11292   match(Set dst (MoveF2I src));
11293   effect( DEF dst, USE src );
11294 
11295   ins_cost(95);
11296   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11297   ins_encode %{
11298     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11299   %}
11300   ins_pipe( pipe_slow );
11301 %}
11302 
11303 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11304   predicate(UseSSE>=2);
11305   match(Set dst (MoveF2I src));
11306   effect( DEF dst, USE src );
11307   ins_cost(85);
11308   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11309   ins_encode %{
11310     __ movdl($dst$$Register, $src$$XMMRegister);
11311   %}
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11316   match(Set dst (MoveI2F src));
11317   effect( DEF dst, USE src );
11318 
11319   ins_cost(100);
11320   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11321   ins_encode %{
11322     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11323   %}
11324   ins_pipe( ialu_mem_reg );
11325 %}
11326 
11327 
11328 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11329   predicate(UseSSE==0);
11330   match(Set dst (MoveI2F src));
11331   effect(DEF dst, USE src);
11332 
11333   ins_cost(125);
11334   format %{ "FLD_S  $src\n\t"
11335             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11336   opcode(0xD9);               /* D9 /0, FLD m32real */
11337   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11338               Pop_Reg_FPR(dst) );
11339   ins_pipe( fpu_reg_mem );
11340 %}
11341 
11342 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11343   predicate(UseSSE>=1);
11344   match(Set dst (MoveI2F src));
11345   effect( DEF dst, USE src );
11346 
11347   ins_cost(95);
11348   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11349   ins_encode %{
11350     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11351   %}
11352   ins_pipe( pipe_slow );
11353 %}
11354 
11355 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11356   predicate(UseSSE>=2);
11357   match(Set dst (MoveI2F src));
11358   effect( DEF dst, USE src );
11359 
11360   ins_cost(85);
11361   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11362   ins_encode %{
11363     __ movdl($dst$$XMMRegister, $src$$Register);
11364   %}
11365   ins_pipe( pipe_slow );
11366 %}
11367 
11368 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11369   match(Set dst (MoveD2L src));
11370   effect(DEF dst, USE src);
11371 
11372   ins_cost(250);
11373   format %{ "MOV    $dst.lo,$src\n\t"
11374             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11375   opcode(0x8B, 0x8B);
11376   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11377   ins_pipe( ialu_mem_long_reg );
11378 %}
11379 
11380 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11381   predicate(UseSSE<=1);
11382   match(Set dst (MoveD2L src));
11383   effect(DEF dst, USE src);
11384 
11385   ins_cost(125);
11386   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11387   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11388   ins_pipe( fpu_mem_reg );
11389 %}
11390 
11391 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11392   predicate(UseSSE>=2);
11393   match(Set dst (MoveD2L src));
11394   effect(DEF dst, USE src);
11395   ins_cost(95);
11396   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11397   ins_encode %{
11398     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11399   %}
11400   ins_pipe( pipe_slow );
11401 %}
11402 
11403 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11404   predicate(UseSSE>=2);
11405   match(Set dst (MoveD2L src));
11406   effect(DEF dst, USE src, TEMP tmp);
11407   ins_cost(85);
11408   format %{ "MOVD   $dst.lo,$src\n\t"
11409             "PSHUFLW $tmp,$src,0x4E\n\t"
11410             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11411   ins_encode %{
11412     __ movdl($dst$$Register, $src$$XMMRegister);
11413     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11414     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11420   match(Set dst (MoveL2D src));
11421   effect(DEF dst, USE src);
11422 
11423   ins_cost(200);
11424   format %{ "MOV    $dst,$src.lo\n\t"
11425             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11426   opcode(0x89, 0x89);
11427   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11428   ins_pipe( ialu_mem_long_reg );
11429 %}
11430 
11431 
11432 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11433   predicate(UseSSE<=1);
11434   match(Set dst (MoveL2D src));
11435   effect(DEF dst, USE src);
11436   ins_cost(125);
11437 
11438   format %{ "FLD_D  $src\n\t"
11439             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11440   opcode(0xDD);               /* DD /0, FLD m64real */
11441   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11442               Pop_Reg_DPR(dst) );
11443   ins_pipe( fpu_reg_mem );
11444 %}
11445 
11446 
11447 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11448   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11449   match(Set dst (MoveL2D src));
11450   effect(DEF dst, USE src);
11451 
11452   ins_cost(95);
11453   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11454   ins_encode %{
11455     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11456   %}
11457   ins_pipe( pipe_slow );
11458 %}
11459 
11460 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11461   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11462   match(Set dst (MoveL2D src));
11463   effect(DEF dst, USE src);
11464 
11465   ins_cost(95);
11466   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11467   ins_encode %{
11468     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11474   predicate(UseSSE>=2);
11475   match(Set dst (MoveL2D src));
11476   effect(TEMP dst, USE src, TEMP tmp);
11477   ins_cost(85);
11478   format %{ "MOVD   $dst,$src.lo\n\t"
11479             "MOVD   $tmp,$src.hi\n\t"
11480             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11481   ins_encode %{
11482     __ movdl($dst$$XMMRegister, $src$$Register);
11483     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11484     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11485   %}
11486   ins_pipe( pipe_slow );
11487 %}
11488 
11489 
11490 // =======================================================================
11491 // fast clearing of an array
11492 // Small ClearArray non-AVX512.
11493 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11494   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11495   match(Set dummy (ClearArray cnt base));
11496   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11497 
11498   format %{ $$template
11499     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11500     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11501     $$emit$$"JG     LARGE\n\t"
11502     $$emit$$"SHL    ECX, 1\n\t"
11503     $$emit$$"DEC    ECX\n\t"
11504     $$emit$$"JS     DONE\t# Zero length\n\t"
11505     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11506     $$emit$$"DEC    ECX\n\t"
11507     $$emit$$"JGE    LOOP\n\t"
11508     $$emit$$"JMP    DONE\n\t"
11509     $$emit$$"# LARGE:\n\t"
11510     if (UseFastStosb) {
11511        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11512        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11513     } else if (UseXMMForObjInit) {
11514        $$emit$$"MOV     RDI,RAX\n\t"
11515        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11516        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11517        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11518        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11519        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11520        $$emit$$"ADD     0x40,RAX\n\t"
11521        $$emit$$"# L_zero_64_bytes:\n\t"
11522        $$emit$$"SUB     0x8,RCX\n\t"
11523        $$emit$$"JGE     L_loop\n\t"
11524        $$emit$$"ADD     0x4,RCX\n\t"
11525        $$emit$$"JL      L_tail\n\t"
11526        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11527        $$emit$$"ADD     0x20,RAX\n\t"
11528        $$emit$$"SUB     0x4,RCX\n\t"
11529        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11530        $$emit$$"ADD     0x4,RCX\n\t"
11531        $$emit$$"JLE     L_end\n\t"
11532        $$emit$$"DEC     RCX\n\t"
11533        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11534        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11535        $$emit$$"ADD     0x8,RAX\n\t"
11536        $$emit$$"DEC     RCX\n\t"
11537        $$emit$$"JGE     L_sloop\n\t"
11538        $$emit$$"# L_end:\n\t"
11539     } else {
11540        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11541        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11542     }
11543     $$emit$$"# DONE"
11544   %}
11545   ins_encode %{
11546     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11547                  $tmp$$XMMRegister, false, knoreg);
11548   %}
11549   ins_pipe( pipe_slow );
11550 %}
11551 
11552 // Small ClearArray AVX512 non-constant length.
11553 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11554   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11555   match(Set dummy (ClearArray cnt base));
11556   ins_cost(125);
11557   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11558 
11559   format %{ $$template
11560     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11561     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11562     $$emit$$"JG     LARGE\n\t"
11563     $$emit$$"SHL    ECX, 1\n\t"
11564     $$emit$$"DEC    ECX\n\t"
11565     $$emit$$"JS     DONE\t# Zero length\n\t"
11566     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11567     $$emit$$"DEC    ECX\n\t"
11568     $$emit$$"JGE    LOOP\n\t"
11569     $$emit$$"JMP    DONE\n\t"
11570     $$emit$$"# LARGE:\n\t"
11571     if (UseFastStosb) {
11572        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11573        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11574     } else if (UseXMMForObjInit) {
11575        $$emit$$"MOV     RDI,RAX\n\t"
11576        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11577        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11578        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11579        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11580        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11581        $$emit$$"ADD     0x40,RAX\n\t"
11582        $$emit$$"# L_zero_64_bytes:\n\t"
11583        $$emit$$"SUB     0x8,RCX\n\t"
11584        $$emit$$"JGE     L_loop\n\t"
11585        $$emit$$"ADD     0x4,RCX\n\t"
11586        $$emit$$"JL      L_tail\n\t"
11587        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11588        $$emit$$"ADD     0x20,RAX\n\t"
11589        $$emit$$"SUB     0x4,RCX\n\t"
11590        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11591        $$emit$$"ADD     0x4,RCX\n\t"
11592        $$emit$$"JLE     L_end\n\t"
11593        $$emit$$"DEC     RCX\n\t"
11594        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11595        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11596        $$emit$$"ADD     0x8,RAX\n\t"
11597        $$emit$$"DEC     RCX\n\t"
11598        $$emit$$"JGE     L_sloop\n\t"
11599        $$emit$$"# L_end:\n\t"
11600     } else {
11601        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11602        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11603     }
11604     $$emit$$"# DONE"
11605   %}
11606   ins_encode %{
11607     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11608                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11609   %}
11610   ins_pipe( pipe_slow );
11611 %}
11612 
11613 // Large ClearArray non-AVX512.
11614 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11615   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11616   match(Set dummy (ClearArray cnt base));
11617   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11618   format %{ $$template
11619     if (UseFastStosb) {
11620        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11621        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11622        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11623     } else if (UseXMMForObjInit) {
11624        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11625        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11626        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11627        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11628        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11629        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11630        $$emit$$"ADD     0x40,RAX\n\t"
11631        $$emit$$"# L_zero_64_bytes:\n\t"
11632        $$emit$$"SUB     0x8,RCX\n\t"
11633        $$emit$$"JGE     L_loop\n\t"
11634        $$emit$$"ADD     0x4,RCX\n\t"
11635        $$emit$$"JL      L_tail\n\t"
11636        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11637        $$emit$$"ADD     0x20,RAX\n\t"
11638        $$emit$$"SUB     0x4,RCX\n\t"
11639        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11640        $$emit$$"ADD     0x4,RCX\n\t"
11641        $$emit$$"JLE     L_end\n\t"
11642        $$emit$$"DEC     RCX\n\t"
11643        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11644        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11645        $$emit$$"ADD     0x8,RAX\n\t"
11646        $$emit$$"DEC     RCX\n\t"
11647        $$emit$$"JGE     L_sloop\n\t"
11648        $$emit$$"# L_end:\n\t"
11649     } else {
11650        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11651        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11652        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11653     }
11654     $$emit$$"# DONE"
11655   %}
11656   ins_encode %{
11657     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11658                  $tmp$$XMMRegister, true, knoreg);
11659   %}
11660   ins_pipe( pipe_slow );
11661 %}
11662 
11663 // Large ClearArray AVX512.
11664 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11665   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11666   match(Set dummy (ClearArray cnt base));
11667   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11668   format %{ $$template
11669     if (UseFastStosb) {
11670        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11671        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11672        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11673     } else if (UseXMMForObjInit) {
11674        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11675        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11676        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11677        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11678        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11679        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11680        $$emit$$"ADD     0x40,RAX\n\t"
11681        $$emit$$"# L_zero_64_bytes:\n\t"
11682        $$emit$$"SUB     0x8,RCX\n\t"
11683        $$emit$$"JGE     L_loop\n\t"
11684        $$emit$$"ADD     0x4,RCX\n\t"
11685        $$emit$$"JL      L_tail\n\t"
11686        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11687        $$emit$$"ADD     0x20,RAX\n\t"
11688        $$emit$$"SUB     0x4,RCX\n\t"
11689        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11690        $$emit$$"ADD     0x4,RCX\n\t"
11691        $$emit$$"JLE     L_end\n\t"
11692        $$emit$$"DEC     RCX\n\t"
11693        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11694        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11695        $$emit$$"ADD     0x8,RAX\n\t"
11696        $$emit$$"DEC     RCX\n\t"
11697        $$emit$$"JGE     L_sloop\n\t"
11698        $$emit$$"# L_end:\n\t"
11699     } else {
11700        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11701        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11702        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11703     }
11704     $$emit$$"# DONE"
11705   %}
11706   ins_encode %{
11707     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11708                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11709   %}
11710   ins_pipe( pipe_slow );
11711 %}
11712 
11713 // Small ClearArray AVX512 constant length.
11714 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11715 %{
11716   predicate(!((ClearArrayNode*)n)->is_large() &&
11717                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11718   match(Set dummy (ClearArray cnt base));
11719   ins_cost(100);
11720   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11721   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11722   ins_encode %{
11723    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11724   %}
11725   ins_pipe(pipe_slow);
11726 %}
11727 
11728 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11729                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11730   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11731   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11732   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11733 
11734   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11735   ins_encode %{
11736     __ string_compare($str1$$Register, $str2$$Register,
11737                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11738                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11739   %}
11740   ins_pipe( pipe_slow );
11741 %}
11742 
11743 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11744                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11745   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11746   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11747   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11748 
11749   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11750   ins_encode %{
11751     __ string_compare($str1$$Register, $str2$$Register,
11752                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11753                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11754   %}
11755   ins_pipe( pipe_slow );
11756 %}
11757 
11758 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11759                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11760   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11761   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11762   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11763 
11764   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11765   ins_encode %{
11766     __ string_compare($str1$$Register, $str2$$Register,
11767                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11768                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11769   %}
11770   ins_pipe( pipe_slow );
11771 %}
11772 
11773 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11774                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11775   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11776   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11777   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11778 
11779   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11780   ins_encode %{
11781     __ string_compare($str1$$Register, $str2$$Register,
11782                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11783                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11784   %}
11785   ins_pipe( pipe_slow );
11786 %}
11787 
11788 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11789                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11790   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11791   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11792   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11793 
11794   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11795   ins_encode %{
11796     __ string_compare($str1$$Register, $str2$$Register,
11797                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11798                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11799   %}
11800   ins_pipe( pipe_slow );
11801 %}
11802 
11803 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11804                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11805   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11806   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11807   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11808 
11809   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11810   ins_encode %{
11811     __ string_compare($str1$$Register, $str2$$Register,
11812                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11813                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11814   %}
11815   ins_pipe( pipe_slow );
11816 %}
11817 
11818 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11819                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11820   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11821   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11822   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11823 
11824   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11825   ins_encode %{
11826     __ string_compare($str2$$Register, $str1$$Register,
11827                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11828                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11829   %}
11830   ins_pipe( pipe_slow );
11831 %}
11832 
11833 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11834                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11835   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11836   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11837   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11838 
11839   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11840   ins_encode %{
11841     __ string_compare($str2$$Register, $str1$$Register,
11842                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11843                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11844   %}
11845   ins_pipe( pipe_slow );
11846 %}
11847 
11848 // fast string equals
11849 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11850                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11851   predicate(!VM_Version::supports_avx512vlbw());
11852   match(Set result (StrEquals (Binary str1 str2) cnt));
11853   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11854 
11855   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11856   ins_encode %{
11857     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11858                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11859                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11860   %}
11861 
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11866                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11867   predicate(VM_Version::supports_avx512vlbw());
11868   match(Set result (StrEquals (Binary str1 str2) cnt));
11869   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11870 
11871   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11872   ins_encode %{
11873     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11874                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11875                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11876   %}
11877 
11878   ins_pipe( pipe_slow );
11879 %}
11880 
11881 
11882 // fast search of substring with known size.
11883 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11884                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11885   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11886   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11887   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11888 
11889   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11890   ins_encode %{
11891     int icnt2 = (int)$int_cnt2$$constant;
11892     if (icnt2 >= 16) {
11893       // IndexOf for constant substrings with size >= 16 elements
11894       // which don't need to be loaded through stack.
11895       __ string_indexofC8($str1$$Register, $str2$$Register,
11896                           $cnt1$$Register, $cnt2$$Register,
11897                           icnt2, $result$$Register,
11898                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11899     } else {
11900       // Small strings are loaded through stack if they cross page boundary.
11901       __ string_indexof($str1$$Register, $str2$$Register,
11902                         $cnt1$$Register, $cnt2$$Register,
11903                         icnt2, $result$$Register,
11904                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11905     }
11906   %}
11907   ins_pipe( pipe_slow );
11908 %}
11909 
11910 // fast search of substring with known size.
11911 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11912                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11913   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11914   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11915   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11916 
11917   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11918   ins_encode %{
11919     int icnt2 = (int)$int_cnt2$$constant;
11920     if (icnt2 >= 8) {
11921       // IndexOf for constant substrings with size >= 8 elements
11922       // which don't need to be loaded through stack.
11923       __ string_indexofC8($str1$$Register, $str2$$Register,
11924                           $cnt1$$Register, $cnt2$$Register,
11925                           icnt2, $result$$Register,
11926                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11927     } else {
11928       // Small strings are loaded through stack if they cross page boundary.
11929       __ string_indexof($str1$$Register, $str2$$Register,
11930                         $cnt1$$Register, $cnt2$$Register,
11931                         icnt2, $result$$Register,
11932                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11933     }
11934   %}
11935   ins_pipe( pipe_slow );
11936 %}
11937 
11938 // fast search of substring with known size.
11939 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11940                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11941   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11942   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11943   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11944 
11945   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11946   ins_encode %{
11947     int icnt2 = (int)$int_cnt2$$constant;
11948     if (icnt2 >= 8) {
11949       // IndexOf for constant substrings with size >= 8 elements
11950       // which don't need to be loaded through stack.
11951       __ string_indexofC8($str1$$Register, $str2$$Register,
11952                           $cnt1$$Register, $cnt2$$Register,
11953                           icnt2, $result$$Register,
11954                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11955     } else {
11956       // Small strings are loaded through stack if they cross page boundary.
11957       __ string_indexof($str1$$Register, $str2$$Register,
11958                         $cnt1$$Register, $cnt2$$Register,
11959                         icnt2, $result$$Register,
11960                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11961     }
11962   %}
11963   ins_pipe( pipe_slow );
11964 %}
11965 
11966 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11967                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11968   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11969   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11970   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11971 
11972   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11973   ins_encode %{
11974     __ string_indexof($str1$$Register, $str2$$Register,
11975                       $cnt1$$Register, $cnt2$$Register,
11976                       (-1), $result$$Register,
11977                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11978   %}
11979   ins_pipe( pipe_slow );
11980 %}
11981 
11982 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11983                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11984   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11985   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11986   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11987 
11988   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11989   ins_encode %{
11990     __ string_indexof($str1$$Register, $str2$$Register,
11991                       $cnt1$$Register, $cnt2$$Register,
11992                       (-1), $result$$Register,
11993                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11994   %}
11995   ins_pipe( pipe_slow );
11996 %}
11997 
11998 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11999                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12000   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12001   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12002   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12003 
12004   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12005   ins_encode %{
12006     __ string_indexof($str1$$Register, $str2$$Register,
12007                       $cnt1$$Register, $cnt2$$Register,
12008                       (-1), $result$$Register,
12009                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12010   %}
12011   ins_pipe( pipe_slow );
12012 %}
12013 
12014 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12015                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12016   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12017   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12018   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12019   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12020   ins_encode %{
12021     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12022                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12023   %}
12024   ins_pipe( pipe_slow );
12025 %}
12026 
12027 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12028                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12029   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12030   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12031   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12032   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12033   ins_encode %{
12034     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12035                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12036   %}
12037   ins_pipe( pipe_slow );
12038 %}
12039 
12040 
12041 // fast array equals
12042 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12043                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12044 %{
12045   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12046   match(Set result (AryEq ary1 ary2));
12047   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12048   //ins_cost(300);
12049 
12050   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12051   ins_encode %{
12052     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12053                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12054                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12055   %}
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12060                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12061 %{
12062   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12063   match(Set result (AryEq ary1 ary2));
12064   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12065   //ins_cost(300);
12066 
12067   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12068   ins_encode %{
12069     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12070                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12071                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12072   %}
12073   ins_pipe( pipe_slow );
12074 %}
12075 
12076 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12077                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12078 %{
12079   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12080   match(Set result (AryEq ary1 ary2));
12081   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12082   //ins_cost(300);
12083 
12084   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12085   ins_encode %{
12086     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12087                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12088                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12089   %}
12090   ins_pipe( pipe_slow );
12091 %}
12092 
12093 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12094                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12095 %{
12096   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12097   match(Set result (AryEq ary1 ary2));
12098   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12099   //ins_cost(300);
12100 
12101   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12102   ins_encode %{
12103     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12104                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12105                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12106   %}
12107   ins_pipe( pipe_slow );
12108 %}
12109 
12110 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12111                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12112 %{
12113   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12114   match(Set result (HasNegatives ary1 len));
12115   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12116 
12117   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12118   ins_encode %{
12119     __ has_negatives($ary1$$Register, $len$$Register,
12120                      $result$$Register, $tmp3$$Register,
12121                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12122   %}
12123   ins_pipe( pipe_slow );
12124 %}
12125 
12126 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12127                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12128 %{
12129   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12130   match(Set result (HasNegatives ary1 len));
12131   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12132 
12133   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12134   ins_encode %{
12135     __ has_negatives($ary1$$Register, $len$$Register,
12136                      $result$$Register, $tmp3$$Register,
12137                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12138   %}
12139   ins_pipe( pipe_slow );
12140 %}
12141 
12142 
12143 // fast char[] to byte[] compression
12144 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12145                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12146   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12147   match(Set result (StrCompressedCopy src (Binary dst len)));
12148   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12149 
12150   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12151   ins_encode %{
12152     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12153                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12154                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12155                            knoreg, knoreg);
12156   %}
12157   ins_pipe( pipe_slow );
12158 %}
12159 
12160 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12161                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12162   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12163   match(Set result (StrCompressedCopy src (Binary dst len)));
12164   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12165 
12166   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12167   ins_encode %{
12168     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12169                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12170                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12171                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12172   %}
12173   ins_pipe( pipe_slow );
12174 %}
12175 
12176 // fast byte[] to char[] inflation
12177 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12178                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12179   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12180   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12181   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12182 
12183   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12184   ins_encode %{
12185     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12186                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12187   %}
12188   ins_pipe( pipe_slow );
12189 %}
12190 
12191 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12192                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12193   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12194   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12195   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12196 
12197   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12198   ins_encode %{
12199     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12200                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12201   %}
12202   ins_pipe( pipe_slow );
12203 %}
12204 
12205 // encode char[] to byte[] in ISO_8859_1
12206 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12207                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12208                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12209   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12210   match(Set result (EncodeISOArray src (Binary dst len)));
12211   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12212 
12213   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12214   ins_encode %{
12215     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12216                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12217                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12218   %}
12219   ins_pipe( pipe_slow );
12220 %}
12221 
12222 // encode char[] to byte[] in ASCII
12223 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12224                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12225                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12226   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12227   match(Set result (EncodeISOArray src (Binary dst len)));
12228   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12229 
12230   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12231   ins_encode %{
12232     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12233                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12234                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12235   %}
12236   ins_pipe( pipe_slow );
12237 %}
12238 
12239 //----------Control Flow Instructions------------------------------------------
12240 // Signed compare Instructions
12241 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12242   match(Set cr (CmpI op1 op2));
12243   effect( DEF cr, USE op1, USE op2 );
12244   format %{ "CMP    $op1,$op2" %}
12245   opcode(0x3B);  /* Opcode 3B /r */
12246   ins_encode( OpcP, RegReg( op1, op2) );
12247   ins_pipe( ialu_cr_reg_reg );
12248 %}
12249 
12250 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12251   match(Set cr (CmpI op1 op2));
12252   effect( DEF cr, USE op1 );
12253   format %{ "CMP    $op1,$op2" %}
12254   opcode(0x81,0x07);  /* Opcode 81 /7 */
12255   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12256   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12257   ins_pipe( ialu_cr_reg_imm );
12258 %}
12259 
12260 // Cisc-spilled version of cmpI_eReg
12261 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12262   match(Set cr (CmpI op1 (LoadI op2)));
12263 
12264   format %{ "CMP    $op1,$op2" %}
12265   ins_cost(500);
12266   opcode(0x3B);  /* Opcode 3B /r */
12267   ins_encode( OpcP, RegMem( op1, op2) );
12268   ins_pipe( ialu_cr_reg_mem );
12269 %}
12270 
12271 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12272   match(Set cr (CmpI src zero));
12273   effect( DEF cr, USE src );
12274 
12275   format %{ "TEST   $src,$src" %}
12276   opcode(0x85);
12277   ins_encode( OpcP, RegReg( src, src ) );
12278   ins_pipe( ialu_cr_reg_imm );
12279 %}
12280 
12281 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12282   match(Set cr (CmpI (AndI src con) zero));
12283 
12284   format %{ "TEST   $src,$con" %}
12285   opcode(0xF7,0x00);
12286   ins_encode( OpcP, RegOpc(src), Con32(con) );
12287   ins_pipe( ialu_cr_reg_imm );
12288 %}
12289 
12290 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12291   match(Set cr (CmpI (AndI src mem) zero));
12292 
12293   format %{ "TEST   $src,$mem" %}
12294   opcode(0x85);
12295   ins_encode( OpcP, RegMem( src, mem ) );
12296   ins_pipe( ialu_cr_reg_mem );
12297 %}
12298 
12299 // Unsigned compare Instructions; really, same as signed except they
12300 // produce an eFlagsRegU instead of eFlagsReg.
12301 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12302   match(Set cr (CmpU op1 op2));
12303 
12304   format %{ "CMPu   $op1,$op2" %}
12305   opcode(0x3B);  /* Opcode 3B /r */
12306   ins_encode( OpcP, RegReg( op1, op2) );
12307   ins_pipe( ialu_cr_reg_reg );
12308 %}
12309 
12310 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12311   match(Set cr (CmpU op1 op2));
12312 
12313   format %{ "CMPu   $op1,$op2" %}
12314   opcode(0x81,0x07);  /* Opcode 81 /7 */
12315   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12316   ins_pipe( ialu_cr_reg_imm );
12317 %}
12318 
12319 // // Cisc-spilled version of cmpU_eReg
12320 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12321   match(Set cr (CmpU op1 (LoadI op2)));
12322 
12323   format %{ "CMPu   $op1,$op2" %}
12324   ins_cost(500);
12325   opcode(0x3B);  /* Opcode 3B /r */
12326   ins_encode( OpcP, RegMem( op1, op2) );
12327   ins_pipe( ialu_cr_reg_mem );
12328 %}
12329 
12330 // // Cisc-spilled version of cmpU_eReg
12331 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12332 //  match(Set cr (CmpU (LoadI op1) op2));
12333 //
12334 //  format %{ "CMPu   $op1,$op2" %}
12335 //  ins_cost(500);
12336 //  opcode(0x39);  /* Opcode 39 /r */
12337 //  ins_encode( OpcP, RegMem( op1, op2) );
12338 //%}
12339 
12340 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12341   match(Set cr (CmpU src zero));
12342 
12343   format %{ "TESTu  $src,$src" %}
12344   opcode(0x85);
12345   ins_encode( OpcP, RegReg( src, src ) );
12346   ins_pipe( ialu_cr_reg_imm );
12347 %}
12348 
12349 // Unsigned pointer compare Instructions
12350 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12351   match(Set cr (CmpP op1 op2));
12352 
12353   format %{ "CMPu   $op1,$op2" %}
12354   opcode(0x3B);  /* Opcode 3B /r */
12355   ins_encode( OpcP, RegReg( op1, op2) );
12356   ins_pipe( ialu_cr_reg_reg );
12357 %}
12358 
12359 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12360   match(Set cr (CmpP op1 op2));
12361 
12362   format %{ "CMPu   $op1,$op2" %}
12363   opcode(0x81,0x07);  /* Opcode 81 /7 */
12364   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12365   ins_pipe( ialu_cr_reg_imm );
12366 %}
12367 
12368 // // Cisc-spilled version of cmpP_eReg
12369 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12370   match(Set cr (CmpP op1 (LoadP op2)));
12371 
12372   format %{ "CMPu   $op1,$op2" %}
12373   ins_cost(500);
12374   opcode(0x3B);  /* Opcode 3B /r */
12375   ins_encode( OpcP, RegMem( op1, op2) );
12376   ins_pipe( ialu_cr_reg_mem );
12377 %}
12378 
12379 // // Cisc-spilled version of cmpP_eReg
12380 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12381 //  match(Set cr (CmpP (LoadP op1) op2));
12382 //
12383 //  format %{ "CMPu   $op1,$op2" %}
12384 //  ins_cost(500);
12385 //  opcode(0x39);  /* Opcode 39 /r */
12386 //  ins_encode( OpcP, RegMem( op1, op2) );
12387 //%}
12388 
12389 // Compare raw pointer (used in out-of-heap check).
12390 // Only works because non-oop pointers must be raw pointers
12391 // and raw pointers have no anti-dependencies.
12392 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12393   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12394   match(Set cr (CmpP op1 (LoadP op2)));
12395 
12396   format %{ "CMPu   $op1,$op2" %}
12397   opcode(0x3B);  /* Opcode 3B /r */
12398   ins_encode( OpcP, RegMem( op1, op2) );
12399   ins_pipe( ialu_cr_reg_mem );
12400 %}
12401 
12402 //
12403 // This will generate a signed flags result. This should be ok
12404 // since any compare to a zero should be eq/neq.
12405 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12406   match(Set cr (CmpP src zero));
12407 
12408   format %{ "TEST   $src,$src" %}
12409   opcode(0x85);
12410   ins_encode( OpcP, RegReg( src, src ) );
12411   ins_pipe( ialu_cr_reg_imm );
12412 %}
12413 
12414 // Cisc-spilled version of testP_reg
12415 // This will generate a signed flags result. This should be ok
12416 // since any compare to a zero should be eq/neq.
12417 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12418   match(Set cr (CmpP (LoadP op) zero));
12419 
12420   format %{ "TEST   $op,0xFFFFFFFF" %}
12421   ins_cost(500);
12422   opcode(0xF7);               /* Opcode F7 /0 */
12423   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12424   ins_pipe( ialu_cr_reg_imm );
12425 %}
12426 
12427 // Yanked all unsigned pointer compare operations.
12428 // Pointer compares are done with CmpP which is already unsigned.
12429 
12430 //----------Max and Min--------------------------------------------------------
12431 // Min Instructions
12432 ////
12433 //   *** Min and Max using the conditional move are slower than the
12434 //   *** branch version on a Pentium III.
12435 // // Conditional move for min
12436 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12437 //  effect( USE_DEF op2, USE op1, USE cr );
12438 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12439 //  opcode(0x4C,0x0F);
12440 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12441 //  ins_pipe( pipe_cmov_reg );
12442 //%}
12443 //
12444 //// Min Register with Register (P6 version)
12445 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12446 //  predicate(VM_Version::supports_cmov() );
12447 //  match(Set op2 (MinI op1 op2));
12448 //  ins_cost(200);
12449 //  expand %{
12450 //    eFlagsReg cr;
12451 //    compI_eReg(cr,op1,op2);
12452 //    cmovI_reg_lt(op2,op1,cr);
12453 //  %}
12454 //%}
12455 
12456 // Min Register with Register (generic version)
12457 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12458   match(Set dst (MinI dst src));
12459   effect(KILL flags);
12460   ins_cost(300);
12461 
12462   format %{ "MIN    $dst,$src" %}
12463   opcode(0xCC);
12464   ins_encode( min_enc(dst,src) );
12465   ins_pipe( pipe_slow );
12466 %}
12467 
12468 // Max Register with Register
12469 //   *** Min and Max using the conditional move are slower than the
12470 //   *** branch version on a Pentium III.
12471 // // Conditional move for max
12472 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12473 //  effect( USE_DEF op2, USE op1, USE cr );
12474 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12475 //  opcode(0x4F,0x0F);
12476 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12477 //  ins_pipe( pipe_cmov_reg );
12478 //%}
12479 //
12480 // // Max Register with Register (P6 version)
12481 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12482 //  predicate(VM_Version::supports_cmov() );
12483 //  match(Set op2 (MaxI op1 op2));
12484 //  ins_cost(200);
12485 //  expand %{
12486 //    eFlagsReg cr;
12487 //    compI_eReg(cr,op1,op2);
12488 //    cmovI_reg_gt(op2,op1,cr);
12489 //  %}
12490 //%}
12491 
12492 // Max Register with Register (generic version)
12493 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12494   match(Set dst (MaxI dst src));
12495   effect(KILL flags);
12496   ins_cost(300);
12497 
12498   format %{ "MAX    $dst,$src" %}
12499   opcode(0xCC);
12500   ins_encode( max_enc(dst,src) );
12501   ins_pipe( pipe_slow );
12502 %}
12503 
12504 // ============================================================================
12505 // Counted Loop limit node which represents exact final iterator value.
12506 // Note: the resulting value should fit into integer range since
12507 // counted loops have limit check on overflow.
12508 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12509   match(Set limit (LoopLimit (Binary init limit) stride));
12510   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12511   ins_cost(300);
12512 
12513   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12514   ins_encode %{
12515     int strd = (int)$stride$$constant;
12516     assert(strd != 1 && strd != -1, "sanity");
12517     int m1 = (strd > 0) ? 1 : -1;
12518     // Convert limit to long (EAX:EDX)
12519     __ cdql();
12520     // Convert init to long (init:tmp)
12521     __ movl($tmp$$Register, $init$$Register);
12522     __ sarl($tmp$$Register, 31);
12523     // $limit - $init
12524     __ subl($limit$$Register, $init$$Register);
12525     __ sbbl($limit_hi$$Register, $tmp$$Register);
12526     // + ($stride - 1)
12527     if (strd > 0) {
12528       __ addl($limit$$Register, (strd - 1));
12529       __ adcl($limit_hi$$Register, 0);
12530       __ movl($tmp$$Register, strd);
12531     } else {
12532       __ addl($limit$$Register, (strd + 1));
12533       __ adcl($limit_hi$$Register, -1);
12534       __ lneg($limit_hi$$Register, $limit$$Register);
12535       __ movl($tmp$$Register, -strd);
12536     }
12537     // signed devision: (EAX:EDX) / pos_stride
12538     __ idivl($tmp$$Register);
12539     if (strd < 0) {
12540       // restore sign
12541       __ negl($tmp$$Register);
12542     }
12543     // (EAX) * stride
12544     __ mull($tmp$$Register);
12545     // + init (ignore upper bits)
12546     __ addl($limit$$Register, $init$$Register);
12547   %}
12548   ins_pipe( pipe_slow );
12549 %}
12550 
12551 // ============================================================================
12552 // Branch Instructions
12553 // Jump Table
12554 instruct jumpXtnd(rRegI switch_val) %{
12555   match(Jump switch_val);
12556   ins_cost(350);
12557   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12558   ins_encode %{
12559     // Jump to Address(table_base + switch_reg)
12560     Address index(noreg, $switch_val$$Register, Address::times_1);
12561     __ jump(ArrayAddress($constantaddress, index));
12562   %}
12563   ins_pipe(pipe_jmp);
12564 %}
12565 
12566 // Jump Direct - Label defines a relative address from JMP+1
12567 instruct jmpDir(label labl) %{
12568   match(Goto);
12569   effect(USE labl);
12570 
12571   ins_cost(300);
12572   format %{ "JMP    $labl" %}
12573   size(5);
12574   ins_encode %{
12575     Label* L = $labl$$label;
12576     __ jmp(*L, false); // Always long jump
12577   %}
12578   ins_pipe( pipe_jmp );
12579 %}
12580 
12581 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12582 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12583   match(If cop cr);
12584   effect(USE labl);
12585 
12586   ins_cost(300);
12587   format %{ "J$cop    $labl" %}
12588   size(6);
12589   ins_encode %{
12590     Label* L = $labl$$label;
12591     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12592   %}
12593   ins_pipe( pipe_jcc );
12594 %}
12595 
12596 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12597 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12598   predicate(!n->has_vector_mask_set());
12599   match(CountedLoopEnd cop cr);
12600   effect(USE labl);
12601 
12602   ins_cost(300);
12603   format %{ "J$cop    $labl\t# Loop end" %}
12604   size(6);
12605   ins_encode %{
12606     Label* L = $labl$$label;
12607     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12608   %}
12609   ins_pipe( pipe_jcc );
12610 %}
12611 
12612 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12613 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12614   predicate(!n->has_vector_mask_set());
12615   match(CountedLoopEnd cop cmp);
12616   effect(USE labl);
12617 
12618   ins_cost(300);
12619   format %{ "J$cop,u  $labl\t# Loop end" %}
12620   size(6);
12621   ins_encode %{
12622     Label* L = $labl$$label;
12623     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12624   %}
12625   ins_pipe( pipe_jcc );
12626 %}
12627 
12628 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12629   predicate(!n->has_vector_mask_set());
12630   match(CountedLoopEnd cop cmp);
12631   effect(USE labl);
12632 
12633   ins_cost(200);
12634   format %{ "J$cop,u  $labl\t# Loop end" %}
12635   size(6);
12636   ins_encode %{
12637     Label* L = $labl$$label;
12638     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12639   %}
12640   ins_pipe( pipe_jcc );
12641 %}
12642 
12643 // mask version
12644 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12645 // Bounded mask operand used in following patten is needed for
12646 // post-loop multiversioning.
12647 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12648   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12649   match(CountedLoopEnd cop cr);
12650   effect(USE labl, TEMP ktmp);
12651 
12652   ins_cost(400);
12653   format %{ "J$cop    $labl\t# Loop end\n\t"
12654             "restorevectmask \t# vector mask restore for loops" %}
12655   size(10);
12656   ins_encode %{
12657     Label* L = $labl$$label;
12658     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12659     __ restorevectmask($ktmp$$KRegister);
12660   %}
12661   ins_pipe( pipe_jcc );
12662 %}
12663 
12664 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12665 // Bounded mask operand used in following patten is needed for
12666 // post-loop multiversioning.
12667 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12668   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12669   match(CountedLoopEnd cop cmp);
12670   effect(USE labl, TEMP ktmp);
12671 
12672   ins_cost(400);
12673   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12674             "restorevectmask \t# vector mask restore for loops" %}
12675   size(10);
12676   ins_encode %{
12677     Label* L = $labl$$label;
12678     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12679     __ restorevectmask($ktmp$$KRegister);
12680   %}
12681   ins_pipe( pipe_jcc );
12682 %}
12683 
12684 // Bounded mask operand used in following patten is needed for
12685 // post-loop multiversioning.
12686 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12687   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12688   match(CountedLoopEnd cop cmp);
12689   effect(USE labl, TEMP ktmp);
12690 
12691   ins_cost(300);
12692   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12693             "restorevectmask \t# vector mask restore for loops" %}
12694   size(10);
12695   ins_encode %{
12696     Label* L = $labl$$label;
12697     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12698     __ restorevectmask($ktmp$$KRegister);
12699   %}
12700   ins_pipe( pipe_jcc );
12701 %}
12702 
12703 // Jump Direct Conditional - using unsigned comparison
12704 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12705   match(If cop cmp);
12706   effect(USE labl);
12707 
12708   ins_cost(300);
12709   format %{ "J$cop,u  $labl" %}
12710   size(6);
12711   ins_encode %{
12712     Label* L = $labl$$label;
12713     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12714   %}
12715   ins_pipe(pipe_jcc);
12716 %}
12717 
12718 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12719   match(If cop cmp);
12720   effect(USE labl);
12721 
12722   ins_cost(200);
12723   format %{ "J$cop,u  $labl" %}
12724   size(6);
12725   ins_encode %{
12726     Label* L = $labl$$label;
12727     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12728   %}
12729   ins_pipe(pipe_jcc);
12730 %}
12731 
12732 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12733   match(If cop cmp);
12734   effect(USE labl);
12735 
12736   ins_cost(200);
12737   format %{ $$template
12738     if ($cop$$cmpcode == Assembler::notEqual) {
12739       $$emit$$"JP,u   $labl\n\t"
12740       $$emit$$"J$cop,u   $labl"
12741     } else {
12742       $$emit$$"JP,u   done\n\t"
12743       $$emit$$"J$cop,u   $labl\n\t"
12744       $$emit$$"done:"
12745     }
12746   %}
12747   ins_encode %{
12748     Label* l = $labl$$label;
12749     if ($cop$$cmpcode == Assembler::notEqual) {
12750       __ jcc(Assembler::parity, *l, false);
12751       __ jcc(Assembler::notEqual, *l, false);
12752     } else if ($cop$$cmpcode == Assembler::equal) {
12753       Label done;
12754       __ jccb(Assembler::parity, done);
12755       __ jcc(Assembler::equal, *l, false);
12756       __ bind(done);
12757     } else {
12758        ShouldNotReachHere();
12759     }
12760   %}
12761   ins_pipe(pipe_jcc);
12762 %}
12763 
12764 // ============================================================================
12765 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12766 // array for an instance of the superklass.  Set a hidden internal cache on a
12767 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12768 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12769 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12770   match(Set result (PartialSubtypeCheck sub super));
12771   effect( KILL rcx, KILL cr );
12772 
12773   ins_cost(1100);  // slightly larger than the next version
12774   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12775             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12776             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12777             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12778             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12779             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12780             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12781      "miss:\t" %}
12782 
12783   opcode(0x1); // Force a XOR of EDI
12784   ins_encode( enc_PartialSubtypeCheck() );
12785   ins_pipe( pipe_slow );
12786 %}
12787 
12788 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12789   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12790   effect( KILL rcx, KILL result );
12791 
12792   ins_cost(1000);
12793   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12794             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12795             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12796             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12797             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12798             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12799      "miss:\t" %}
12800 
12801   opcode(0x0);  // No need to XOR EDI
12802   ins_encode( enc_PartialSubtypeCheck() );
12803   ins_pipe( pipe_slow );
12804 %}
12805 
12806 // ============================================================================
12807 // Branch Instructions -- short offset versions
12808 //
12809 // These instructions are used to replace jumps of a long offset (the default
12810 // match) with jumps of a shorter offset.  These instructions are all tagged
12811 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12812 // match rules in general matching.  Instead, the ADLC generates a conversion
12813 // method in the MachNode which can be used to do in-place replacement of the
12814 // long variant with the shorter variant.  The compiler will determine if a
12815 // branch can be taken by the is_short_branch_offset() predicate in the machine
12816 // specific code section of the file.
12817 
12818 // Jump Direct - Label defines a relative address from JMP+1
12819 instruct jmpDir_short(label labl) %{
12820   match(Goto);
12821   effect(USE labl);
12822 
12823   ins_cost(300);
12824   format %{ "JMP,s  $labl" %}
12825   size(2);
12826   ins_encode %{
12827     Label* L = $labl$$label;
12828     __ jmpb(*L);
12829   %}
12830   ins_pipe( pipe_jmp );
12831   ins_short_branch(1);
12832 %}
12833 
12834 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12835 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12836   match(If cop cr);
12837   effect(USE labl);
12838 
12839   ins_cost(300);
12840   format %{ "J$cop,s  $labl" %}
12841   size(2);
12842   ins_encode %{
12843     Label* L = $labl$$label;
12844     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12845   %}
12846   ins_pipe( pipe_jcc );
12847   ins_short_branch(1);
12848 %}
12849 
12850 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12851 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12852   match(CountedLoopEnd cop cr);
12853   effect(USE labl);
12854 
12855   ins_cost(300);
12856   format %{ "J$cop,s  $labl\t# Loop end" %}
12857   size(2);
12858   ins_encode %{
12859     Label* L = $labl$$label;
12860     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12861   %}
12862   ins_pipe( pipe_jcc );
12863   ins_short_branch(1);
12864 %}
12865 
12866 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12867 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12868   match(CountedLoopEnd cop cmp);
12869   effect(USE labl);
12870 
12871   ins_cost(300);
12872   format %{ "J$cop,us $labl\t# Loop end" %}
12873   size(2);
12874   ins_encode %{
12875     Label* L = $labl$$label;
12876     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12877   %}
12878   ins_pipe( pipe_jcc );
12879   ins_short_branch(1);
12880 %}
12881 
12882 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12883   match(CountedLoopEnd cop cmp);
12884   effect(USE labl);
12885 
12886   ins_cost(300);
12887   format %{ "J$cop,us $labl\t# Loop end" %}
12888   size(2);
12889   ins_encode %{
12890     Label* L = $labl$$label;
12891     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12892   %}
12893   ins_pipe( pipe_jcc );
12894   ins_short_branch(1);
12895 %}
12896 
12897 // Jump Direct Conditional - using unsigned comparison
12898 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12899   match(If cop cmp);
12900   effect(USE labl);
12901 
12902   ins_cost(300);
12903   format %{ "J$cop,us $labl" %}
12904   size(2);
12905   ins_encode %{
12906     Label* L = $labl$$label;
12907     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12908   %}
12909   ins_pipe( pipe_jcc );
12910   ins_short_branch(1);
12911 %}
12912 
12913 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12914   match(If cop cmp);
12915   effect(USE labl);
12916 
12917   ins_cost(300);
12918   format %{ "J$cop,us $labl" %}
12919   size(2);
12920   ins_encode %{
12921     Label* L = $labl$$label;
12922     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12923   %}
12924   ins_pipe( pipe_jcc );
12925   ins_short_branch(1);
12926 %}
12927 
12928 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12929   match(If cop cmp);
12930   effect(USE labl);
12931 
12932   ins_cost(300);
12933   format %{ $$template
12934     if ($cop$$cmpcode == Assembler::notEqual) {
12935       $$emit$$"JP,u,s   $labl\n\t"
12936       $$emit$$"J$cop,u,s   $labl"
12937     } else {
12938       $$emit$$"JP,u,s   done\n\t"
12939       $$emit$$"J$cop,u,s  $labl\n\t"
12940       $$emit$$"done:"
12941     }
12942   %}
12943   size(4);
12944   ins_encode %{
12945     Label* l = $labl$$label;
12946     if ($cop$$cmpcode == Assembler::notEqual) {
12947       __ jccb(Assembler::parity, *l);
12948       __ jccb(Assembler::notEqual, *l);
12949     } else if ($cop$$cmpcode == Assembler::equal) {
12950       Label done;
12951       __ jccb(Assembler::parity, done);
12952       __ jccb(Assembler::equal, *l);
12953       __ bind(done);
12954     } else {
12955        ShouldNotReachHere();
12956     }
12957   %}
12958   ins_pipe(pipe_jcc);
12959   ins_short_branch(1);
12960 %}
12961 
12962 // ============================================================================
12963 // Long Compare
12964 //
12965 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12966 // is tricky.  The flavor of compare used depends on whether we are testing
12967 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12968 // The GE test is the negated LT test.  The LE test can be had by commuting
12969 // the operands (yielding a GE test) and then negating; negate again for the
12970 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12971 // NE test is negated from that.
12972 
12973 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12974 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12975 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12976 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12977 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12978 // foo match ends up with the wrong leaf.  One fix is to not match both
12979 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12980 // both forms beat the trinary form of long-compare and both are very useful
12981 // on Intel which has so few registers.
12982 
12983 // Manifest a CmpL result in an integer register.  Very painful.
12984 // This is the test to avoid.
12985 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12986   match(Set dst (CmpL3 src1 src2));
12987   effect( KILL flags );
12988   ins_cost(1000);
12989   format %{ "XOR    $dst,$dst\n\t"
12990             "CMP    $src1.hi,$src2.hi\n\t"
12991             "JLT,s  m_one\n\t"
12992             "JGT,s  p_one\n\t"
12993             "CMP    $src1.lo,$src2.lo\n\t"
12994             "JB,s   m_one\n\t"
12995             "JEQ,s  done\n"
12996     "p_one:\tINC    $dst\n\t"
12997             "JMP,s  done\n"
12998     "m_one:\tDEC    $dst\n"
12999      "done:" %}
13000   ins_encode %{
13001     Label p_one, m_one, done;
13002     __ xorptr($dst$$Register, $dst$$Register);
13003     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13004     __ jccb(Assembler::less,    m_one);
13005     __ jccb(Assembler::greater, p_one);
13006     __ cmpl($src1$$Register, $src2$$Register);
13007     __ jccb(Assembler::below,   m_one);
13008     __ jccb(Assembler::equal,   done);
13009     __ bind(p_one);
13010     __ incrementl($dst$$Register);
13011     __ jmpb(done);
13012     __ bind(m_one);
13013     __ decrementl($dst$$Register);
13014     __ bind(done);
13015   %}
13016   ins_pipe( pipe_slow );
13017 %}
13018 
13019 //======
13020 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13021 // compares.  Can be used for LE or GT compares by reversing arguments.
13022 // NOT GOOD FOR EQ/NE tests.
13023 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13024   match( Set flags (CmpL src zero ));
13025   ins_cost(100);
13026   format %{ "TEST   $src.hi,$src.hi" %}
13027   opcode(0x85);
13028   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13029   ins_pipe( ialu_cr_reg_reg );
13030 %}
13031 
13032 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13033 // compares.  Can be used for LE or GT compares by reversing arguments.
13034 // NOT GOOD FOR EQ/NE tests.
13035 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13036   match( Set flags (CmpL src1 src2 ));
13037   effect( TEMP tmp );
13038   ins_cost(300);
13039   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13040             "MOV    $tmp,$src1.hi\n\t"
13041             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13042   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13043   ins_pipe( ialu_cr_reg_reg );
13044 %}
13045 
13046 // Long compares reg < zero/req OR reg >= zero/req.
13047 // Just a wrapper for a normal branch, plus the predicate test.
13048 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13049   match(If cmp flags);
13050   effect(USE labl);
13051   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13052   expand %{
13053     jmpCon(cmp,flags,labl);    // JLT or JGE...
13054   %}
13055 %}
13056 
13057 //======
13058 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13059 // compares.  Can be used for LE or GT compares by reversing arguments.
13060 // NOT GOOD FOR EQ/NE tests.
13061 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13062   match(Set flags (CmpUL src zero));
13063   ins_cost(100);
13064   format %{ "TEST   $src.hi,$src.hi" %}
13065   opcode(0x85);
13066   ins_encode(OpcP, RegReg_Hi2(src, src));
13067   ins_pipe(ialu_cr_reg_reg);
13068 %}
13069 
13070 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13071 // compares.  Can be used for LE or GT compares by reversing arguments.
13072 // NOT GOOD FOR EQ/NE tests.
13073 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13074   match(Set flags (CmpUL src1 src2));
13075   effect(TEMP tmp);
13076   ins_cost(300);
13077   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13078             "MOV    $tmp,$src1.hi\n\t"
13079             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13080   ins_encode(long_cmp_flags2(src1, src2, tmp));
13081   ins_pipe(ialu_cr_reg_reg);
13082 %}
13083 
13084 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13085 // Just a wrapper for a normal branch, plus the predicate test.
13086 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13087   match(If cmp flags);
13088   effect(USE labl);
13089   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13090   expand %{
13091     jmpCon(cmp, flags, labl);    // JLT or JGE...
13092   %}
13093 %}
13094 
13095 // Compare 2 longs and CMOVE longs.
13096 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13097   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13098   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13099   ins_cost(400);
13100   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13101             "CMOV$cmp $dst.hi,$src.hi" %}
13102   opcode(0x0F,0x40);
13103   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13104   ins_pipe( pipe_cmov_reg_long );
13105 %}
13106 
13107 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13108   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13109   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13110   ins_cost(500);
13111   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13112             "CMOV$cmp $dst.hi,$src.hi" %}
13113   opcode(0x0F,0x40);
13114   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13115   ins_pipe( pipe_cmov_reg_long );
13116 %}
13117 
13118 // Compare 2 longs and CMOVE ints.
13119 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13120   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13121   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13122   ins_cost(200);
13123   format %{ "CMOV$cmp $dst,$src" %}
13124   opcode(0x0F,0x40);
13125   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13126   ins_pipe( pipe_cmov_reg );
13127 %}
13128 
13129 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13130   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13131   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13132   ins_cost(250);
13133   format %{ "CMOV$cmp $dst,$src" %}
13134   opcode(0x0F,0x40);
13135   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13136   ins_pipe( pipe_cmov_mem );
13137 %}
13138 
13139 // Compare 2 longs and CMOVE ints.
13140 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13141   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13142   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13143   ins_cost(200);
13144   format %{ "CMOV$cmp $dst,$src" %}
13145   opcode(0x0F,0x40);
13146   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13147   ins_pipe( pipe_cmov_reg );
13148 %}
13149 
13150 // Compare 2 longs and CMOVE doubles
13151 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13152   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13153   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13154   ins_cost(200);
13155   expand %{
13156     fcmovDPR_regS(cmp,flags,dst,src);
13157   %}
13158 %}
13159 
13160 // Compare 2 longs and CMOVE doubles
13161 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13162   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13163   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13164   ins_cost(200);
13165   expand %{
13166     fcmovD_regS(cmp,flags,dst,src);
13167   %}
13168 %}
13169 
13170 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13171   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13172   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13173   ins_cost(200);
13174   expand %{
13175     fcmovFPR_regS(cmp,flags,dst,src);
13176   %}
13177 %}
13178 
13179 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13180   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13181   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13182   ins_cost(200);
13183   expand %{
13184     fcmovF_regS(cmp,flags,dst,src);
13185   %}
13186 %}
13187 
13188 //======
13189 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13190 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13191   match( Set flags (CmpL src zero ));
13192   effect(TEMP tmp);
13193   ins_cost(200);
13194   format %{ "MOV    $tmp,$src.lo\n\t"
13195             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13196   ins_encode( long_cmp_flags0( src, tmp ) );
13197   ins_pipe( ialu_reg_reg_long );
13198 %}
13199 
13200 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13201 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13202   match( Set flags (CmpL src1 src2 ));
13203   ins_cost(200+300);
13204   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13205             "JNE,s  skip\n\t"
13206             "CMP    $src1.hi,$src2.hi\n\t"
13207      "skip:\t" %}
13208   ins_encode( long_cmp_flags1( src1, src2 ) );
13209   ins_pipe( ialu_cr_reg_reg );
13210 %}
13211 
13212 // Long compare reg == zero/reg OR reg != zero/reg
13213 // Just a wrapper for a normal branch, plus the predicate test.
13214 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13215   match(If cmp flags);
13216   effect(USE labl);
13217   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13218   expand %{
13219     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13220   %}
13221 %}
13222 
13223 //======
13224 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13225 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13226   match(Set flags (CmpUL src zero));
13227   effect(TEMP tmp);
13228   ins_cost(200);
13229   format %{ "MOV    $tmp,$src.lo\n\t"
13230             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13231   ins_encode(long_cmp_flags0(src, tmp));
13232   ins_pipe(ialu_reg_reg_long);
13233 %}
13234 
13235 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13236 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13237   match(Set flags (CmpUL src1 src2));
13238   ins_cost(200+300);
13239   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13240             "JNE,s  skip\n\t"
13241             "CMP    $src1.hi,$src2.hi\n\t"
13242      "skip:\t" %}
13243   ins_encode(long_cmp_flags1(src1, src2));
13244   ins_pipe(ialu_cr_reg_reg);
13245 %}
13246 
13247 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13248 // Just a wrapper for a normal branch, plus the predicate test.
13249 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13250   match(If cmp flags);
13251   effect(USE labl);
13252   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13253   expand %{
13254     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13255   %}
13256 %}
13257 
13258 // Compare 2 longs and CMOVE longs.
13259 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13260   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13261   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13262   ins_cost(400);
13263   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13264             "CMOV$cmp $dst.hi,$src.hi" %}
13265   opcode(0x0F,0x40);
13266   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13267   ins_pipe( pipe_cmov_reg_long );
13268 %}
13269 
13270 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13271   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13272   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13273   ins_cost(500);
13274   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13275             "CMOV$cmp $dst.hi,$src.hi" %}
13276   opcode(0x0F,0x40);
13277   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13278   ins_pipe( pipe_cmov_reg_long );
13279 %}
13280 
13281 // Compare 2 longs and CMOVE ints.
13282 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13283   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13284   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13285   ins_cost(200);
13286   format %{ "CMOV$cmp $dst,$src" %}
13287   opcode(0x0F,0x40);
13288   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13289   ins_pipe( pipe_cmov_reg );
13290 %}
13291 
13292 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13293   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13294   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13295   ins_cost(250);
13296   format %{ "CMOV$cmp $dst,$src" %}
13297   opcode(0x0F,0x40);
13298   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13299   ins_pipe( pipe_cmov_mem );
13300 %}
13301 
13302 // Compare 2 longs and CMOVE ints.
13303 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13304   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13305   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13306   ins_cost(200);
13307   format %{ "CMOV$cmp $dst,$src" %}
13308   opcode(0x0F,0x40);
13309   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13310   ins_pipe( pipe_cmov_reg );
13311 %}
13312 
13313 // Compare 2 longs and CMOVE doubles
13314 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13315   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13316   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13317   ins_cost(200);
13318   expand %{
13319     fcmovDPR_regS(cmp,flags,dst,src);
13320   %}
13321 %}
13322 
13323 // Compare 2 longs and CMOVE doubles
13324 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13325   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13326   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13327   ins_cost(200);
13328   expand %{
13329     fcmovD_regS(cmp,flags,dst,src);
13330   %}
13331 %}
13332 
13333 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13334   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13335   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13336   ins_cost(200);
13337   expand %{
13338     fcmovFPR_regS(cmp,flags,dst,src);
13339   %}
13340 %}
13341 
13342 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13343   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13344   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13345   ins_cost(200);
13346   expand %{
13347     fcmovF_regS(cmp,flags,dst,src);
13348   %}
13349 %}
13350 
13351 //======
13352 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13353 // Same as cmpL_reg_flags_LEGT except must negate src
13354 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13355   match( Set flags (CmpL src zero ));
13356   effect( TEMP tmp );
13357   ins_cost(300);
13358   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13359             "CMP    $tmp,$src.lo\n\t"
13360             "SBB    $tmp,$src.hi\n\t" %}
13361   ins_encode( long_cmp_flags3(src, tmp) );
13362   ins_pipe( ialu_reg_reg_long );
13363 %}
13364 
13365 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13366 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13367 // requires a commuted test to get the same result.
13368 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13369   match( Set flags (CmpL src1 src2 ));
13370   effect( TEMP tmp );
13371   ins_cost(300);
13372   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13373             "MOV    $tmp,$src2.hi\n\t"
13374             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13375   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13376   ins_pipe( ialu_cr_reg_reg );
13377 %}
13378 
13379 // Long compares reg < zero/req OR reg >= zero/req.
13380 // Just a wrapper for a normal branch, plus the predicate test
13381 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13382   match(If cmp flags);
13383   effect(USE labl);
13384   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13385   ins_cost(300);
13386   expand %{
13387     jmpCon(cmp,flags,labl);    // JGT or JLE...
13388   %}
13389 %}
13390 
13391 //======
13392 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13393 // Same as cmpUL_reg_flags_LEGT except must negate src
13394 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13395   match(Set flags (CmpUL src zero));
13396   effect(TEMP tmp);
13397   ins_cost(300);
13398   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13399             "CMP    $tmp,$src.lo\n\t"
13400             "SBB    $tmp,$src.hi\n\t" %}
13401   ins_encode(long_cmp_flags3(src, tmp));
13402   ins_pipe(ialu_reg_reg_long);
13403 %}
13404 
13405 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13406 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13407 // requires a commuted test to get the same result.
13408 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13409   match(Set flags (CmpUL src1 src2));
13410   effect(TEMP tmp);
13411   ins_cost(300);
13412   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13413             "MOV    $tmp,$src2.hi\n\t"
13414             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13415   ins_encode(long_cmp_flags2( src2, src1, tmp));
13416   ins_pipe(ialu_cr_reg_reg);
13417 %}
13418 
13419 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13420 // Just a wrapper for a normal branch, plus the predicate test
13421 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13422   match(If cmp flags);
13423   effect(USE labl);
13424   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13425   ins_cost(300);
13426   expand %{
13427     jmpCon(cmp, flags, labl);    // JGT or JLE...
13428   %}
13429 %}
13430 
13431 // Compare 2 longs and CMOVE longs.
13432 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13433   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13434   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13435   ins_cost(400);
13436   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13437             "CMOV$cmp $dst.hi,$src.hi" %}
13438   opcode(0x0F,0x40);
13439   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13440   ins_pipe( pipe_cmov_reg_long );
13441 %}
13442 
13443 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13444   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13445   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13446   ins_cost(500);
13447   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13448             "CMOV$cmp $dst.hi,$src.hi+4" %}
13449   opcode(0x0F,0x40);
13450   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13451   ins_pipe( pipe_cmov_reg_long );
13452 %}
13453 
13454 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13455   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13456   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13457   ins_cost(400);
13458   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13459             "CMOV$cmp $dst.hi,$src.hi" %}
13460   opcode(0x0F,0x40);
13461   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13462   ins_pipe( pipe_cmov_reg_long );
13463 %}
13464 
13465 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13466   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13467   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13468   ins_cost(500);
13469   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13470             "CMOV$cmp $dst.hi,$src.hi+4" %}
13471   opcode(0x0F,0x40);
13472   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13473   ins_pipe( pipe_cmov_reg_long );
13474 %}
13475 
13476 // Compare 2 longs and CMOVE ints.
13477 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13478   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13479   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13480   ins_cost(200);
13481   format %{ "CMOV$cmp $dst,$src" %}
13482   opcode(0x0F,0x40);
13483   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13484   ins_pipe( pipe_cmov_reg );
13485 %}
13486 
13487 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13488   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13489   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13490   ins_cost(250);
13491   format %{ "CMOV$cmp $dst,$src" %}
13492   opcode(0x0F,0x40);
13493   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13494   ins_pipe( pipe_cmov_mem );
13495 %}
13496 
13497 // Compare 2 longs and CMOVE ptrs.
13498 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13499   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13500   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13501   ins_cost(200);
13502   format %{ "CMOV$cmp $dst,$src" %}
13503   opcode(0x0F,0x40);
13504   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13505   ins_pipe( pipe_cmov_reg );
13506 %}
13507 
13508 // Compare 2 longs and CMOVE doubles
13509 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13510   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13511   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13512   ins_cost(200);
13513   expand %{
13514     fcmovDPR_regS(cmp,flags,dst,src);
13515   %}
13516 %}
13517 
13518 // Compare 2 longs and CMOVE doubles
13519 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13520   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13521   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13522   ins_cost(200);
13523   expand %{
13524     fcmovD_regS(cmp,flags,dst,src);
13525   %}
13526 %}
13527 
13528 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13529   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13530   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13531   ins_cost(200);
13532   expand %{
13533     fcmovFPR_regS(cmp,flags,dst,src);
13534   %}
13535 %}
13536 
13537 
13538 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13539   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13540   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13541   ins_cost(200);
13542   expand %{
13543     fcmovF_regS(cmp,flags,dst,src);
13544   %}
13545 %}
13546 
13547 
13548 // ============================================================================
13549 // Procedure Call/Return Instructions
13550 // Call Java Static Instruction
13551 // Note: If this code changes, the corresponding ret_addr_offset() and
13552 //       compute_padding() functions will have to be adjusted.
13553 instruct CallStaticJavaDirect(method meth) %{
13554   match(CallStaticJava);
13555   effect(USE meth);
13556 
13557   ins_cost(300);
13558   format %{ "CALL,static " %}
13559   opcode(0xE8); /* E8 cd */
13560   ins_encode( pre_call_resets,
13561               Java_Static_Call( meth ),
13562               call_epilog,
13563               post_call_FPU );
13564   ins_pipe( pipe_slow );
13565   ins_alignment(4);
13566 %}
13567 
13568 // Call Java Dynamic Instruction
13569 // Note: If this code changes, the corresponding ret_addr_offset() and
13570 //       compute_padding() functions will have to be adjusted.
13571 instruct CallDynamicJavaDirect(method meth) %{
13572   match(CallDynamicJava);
13573   effect(USE meth);
13574 
13575   ins_cost(300);
13576   format %{ "MOV    EAX,(oop)-1\n\t"
13577             "CALL,dynamic" %}
13578   opcode(0xE8); /* E8 cd */
13579   ins_encode( pre_call_resets,
13580               Java_Dynamic_Call( meth ),
13581               call_epilog,
13582               post_call_FPU );
13583   ins_pipe( pipe_slow );
13584   ins_alignment(4);
13585 %}
13586 
13587 // Call Runtime Instruction
13588 instruct CallRuntimeDirect(method meth) %{
13589   match(CallRuntime );
13590   effect(USE meth);
13591 
13592   ins_cost(300);
13593   format %{ "CALL,runtime " %}
13594   opcode(0xE8); /* E8 cd */
13595   // Use FFREEs to clear entries in float stack
13596   ins_encode( pre_call_resets,
13597               FFree_Float_Stack_All,
13598               Java_To_Runtime( meth ),
13599               post_call_FPU );
13600   ins_pipe( pipe_slow );
13601 %}
13602 
13603 // Call runtime without safepoint
13604 instruct CallLeafDirect(method meth) %{
13605   match(CallLeaf);
13606   effect(USE meth);
13607 
13608   ins_cost(300);
13609   format %{ "CALL_LEAF,runtime " %}
13610   opcode(0xE8); /* E8 cd */
13611   ins_encode( pre_call_resets,
13612               FFree_Float_Stack_All,
13613               Java_To_Runtime( meth ),
13614               Verify_FPU_For_Leaf, post_call_FPU );
13615   ins_pipe( pipe_slow );
13616 %}
13617 
13618 instruct CallLeafNoFPDirect(method meth) %{
13619   match(CallLeafNoFP);
13620   effect(USE meth);
13621 
13622   ins_cost(300);
13623   format %{ "CALL_LEAF_NOFP,runtime " %}
13624   opcode(0xE8); /* E8 cd */
13625   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13626   ins_pipe( pipe_slow );
13627 %}
13628 
13629 
13630 // Return Instruction
13631 // Remove the return address & jump to it.
13632 instruct Ret() %{
13633   match(Return);
13634   format %{ "RET" %}
13635   opcode(0xC3);
13636   ins_encode(OpcP);
13637   ins_pipe( pipe_jmp );
13638 %}
13639 
13640 // Tail Call; Jump from runtime stub to Java code.
13641 // Also known as an 'interprocedural jump'.
13642 // Target of jump will eventually return to caller.
13643 // TailJump below removes the return address.
13644 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13645   match(TailCall jump_target method_ptr);
13646   ins_cost(300);
13647   format %{ "JMP    $jump_target \t# EBX holds method" %}
13648   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13649   ins_encode( OpcP, RegOpc(jump_target) );
13650   ins_pipe( pipe_jmp );
13651 %}
13652 
13653 
13654 // Tail Jump; remove the return address; jump to target.
13655 // TailCall above leaves the return address around.
13656 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13657   match( TailJump jump_target ex_oop );
13658   ins_cost(300);
13659   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13660             "JMP    $jump_target " %}
13661   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13662   ins_encode( enc_pop_rdx,
13663               OpcP, RegOpc(jump_target) );
13664   ins_pipe( pipe_jmp );
13665 %}
13666 
13667 // Create exception oop: created by stack-crawling runtime code.
13668 // Created exception is now available to this handler, and is setup
13669 // just prior to jumping to this handler.  No code emitted.
13670 instruct CreateException( eAXRegP ex_oop )
13671 %{
13672   match(Set ex_oop (CreateEx));
13673 
13674   size(0);
13675   // use the following format syntax
13676   format %{ "# exception oop is in EAX; no code emitted" %}
13677   ins_encode();
13678   ins_pipe( empty );
13679 %}
13680 
13681 
13682 // Rethrow exception:
13683 // The exception oop will come in the first argument position.
13684 // Then JUMP (not call) to the rethrow stub code.
13685 instruct RethrowException()
13686 %{
13687   match(Rethrow);
13688 
13689   // use the following format syntax
13690   format %{ "JMP    rethrow_stub" %}
13691   ins_encode(enc_rethrow);
13692   ins_pipe( pipe_jmp );
13693 %}
13694 
13695 // inlined locking and unlocking
13696 
13697 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13698   predicate(Compile::current()->use_rtm());
13699   match(Set cr (FastLock object box));
13700   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13701   ins_cost(300);
13702   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13703   ins_encode %{
13704     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13705                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13706                  _counters, _rtm_counters, _stack_rtm_counters,
13707                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13708                  true, ra_->C->profile_rtm());
13709   %}
13710   ins_pipe(pipe_slow);
13711 %}
13712 
13713 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13714   predicate(!Compile::current()->use_rtm());
13715   match(Set cr (FastLock object box));
13716   effect(TEMP tmp, TEMP scr, USE_KILL box);
13717   ins_cost(300);
13718   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13719   ins_encode %{
13720     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13721                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13722   %}
13723   ins_pipe(pipe_slow);
13724 %}
13725 
13726 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13727   match(Set cr (FastUnlock object box));
13728   effect(TEMP tmp, USE_KILL box);
13729   ins_cost(300);
13730   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13731   ins_encode %{
13732     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13733   %}
13734   ins_pipe(pipe_slow);
13735 %}
13736 
13737 
13738 
13739 // ============================================================================
13740 // Safepoint Instruction
13741 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13742   match(SafePoint poll);
13743   effect(KILL cr, USE poll);
13744 
13745   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13746   ins_cost(125);
13747   // EBP would need size(3)
13748   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13749   ins_encode %{
13750     __ relocate(relocInfo::poll_type);
13751     address pre_pc = __ pc();
13752     __ testl(rax, Address($poll$$Register, 0));
13753     address post_pc = __ pc();
13754     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13755   %}
13756   ins_pipe(ialu_reg_mem);
13757 %}
13758 
13759 
13760 // ============================================================================
13761 // This name is KNOWN by the ADLC and cannot be changed.
13762 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13763 // for this guy.
13764 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13765   match(Set dst (ThreadLocal));
13766   effect(DEF dst, KILL cr);
13767 
13768   format %{ "MOV    $dst, Thread::current()" %}
13769   ins_encode %{
13770     Register dstReg = as_Register($dst$$reg);
13771     __ get_thread(dstReg);
13772   %}
13773   ins_pipe( ialu_reg_fat );
13774 %}
13775 
13776 
13777 
13778 //----------PEEPHOLE RULES-----------------------------------------------------
13779 // These must follow all instruction definitions as they use the names
13780 // defined in the instructions definitions.
13781 //
13782 // peepmatch ( root_instr_name [preceding_instruction]* );
13783 //
13784 // peepconstraint %{
13785 // (instruction_number.operand_name relational_op instruction_number.operand_name
13786 //  [, ...] );
13787 // // instruction numbers are zero-based using left to right order in peepmatch
13788 //
13789 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13790 // // provide an instruction_number.operand_name for each operand that appears
13791 // // in the replacement instruction's match rule
13792 //
13793 // ---------VM FLAGS---------------------------------------------------------
13794 //
13795 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13796 //
13797 // Each peephole rule is given an identifying number starting with zero and
13798 // increasing by one in the order seen by the parser.  An individual peephole
13799 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13800 // on the command-line.
13801 //
13802 // ---------CURRENT LIMITATIONS----------------------------------------------
13803 //
13804 // Only match adjacent instructions in same basic block
13805 // Only equality constraints
13806 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13807 // Only one replacement instruction
13808 //
13809 // ---------EXAMPLE----------------------------------------------------------
13810 //
13811 // // pertinent parts of existing instructions in architecture description
13812 // instruct movI(rRegI dst, rRegI src) %{
13813 //   match(Set dst (CopyI src));
13814 // %}
13815 //
13816 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13817 //   match(Set dst (AddI dst src));
13818 //   effect(KILL cr);
13819 // %}
13820 //
13821 // // Change (inc mov) to lea
13822 // peephole %{
13823 //   // increment preceeded by register-register move
13824 //   peepmatch ( incI_eReg movI );
13825 //   // require that the destination register of the increment
13826 //   // match the destination register of the move
13827 //   peepconstraint ( 0.dst == 1.dst );
13828 //   // construct a replacement instruction that sets
13829 //   // the destination to ( move's source register + one )
13830 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13831 // %}
13832 //
13833 // Implementation no longer uses movX instructions since
13834 // machine-independent system no longer uses CopyX nodes.
13835 //
13836 // peephole %{
13837 //   peepmatch ( incI_eReg movI );
13838 //   peepconstraint ( 0.dst == 1.dst );
13839 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13840 // %}
13841 //
13842 // peephole %{
13843 //   peepmatch ( decI_eReg movI );
13844 //   peepconstraint ( 0.dst == 1.dst );
13845 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13846 // %}
13847 //
13848 // peephole %{
13849 //   peepmatch ( addI_eReg_imm movI );
13850 //   peepconstraint ( 0.dst == 1.dst );
13851 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13852 // %}
13853 //
13854 // peephole %{
13855 //   peepmatch ( addP_eReg_imm movP );
13856 //   peepconstraint ( 0.dst == 1.dst );
13857 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13858 // %}
13859 
13860 // // Change load of spilled value to only a spill
13861 // instruct storeI(memory mem, rRegI src) %{
13862 //   match(Set mem (StoreI mem src));
13863 // %}
13864 //
13865 // instruct loadI(rRegI dst, memory mem) %{
13866 //   match(Set dst (LoadI mem));
13867 // %}
13868 //
13869 peephole %{
13870   peepmatch ( loadI storeI );
13871   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13872   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13873 %}
13874 
13875 //----------SMARTSPILL RULES---------------------------------------------------
13876 // These must follow all instruction definitions as they use the names
13877 // defined in the instructions definitions.