1 //
    2 // Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   __ verified_entry(C);
  628 
  629   C->output()->set_frame_complete(cbuf.insts_size());
  630 
  631   if (C->has_mach_constant_base_node()) {
  632     // NOTE: We set the table base offset here because users might be
  633     // emitted before MachConstantBaseNode.
  634     ConstantTable& constant_table = C->output()->constant_table();
  635     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  636   }
  637 }
  638 
  639 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  640   return MachNode::size(ra_); // too many variables; just compute it the hard way
  641 }
  642 
  643 int MachPrologNode::reloc() const {
  644   return 0; // a large enough number
  645 }
  646 
  647 //=============================================================================
  648 #ifndef PRODUCT
  649 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  650   Compile *C = ra_->C;
  651   int framesize = C->output()->frame_size_in_bytes();
  652   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  653   // Remove two words for return addr and rbp,
  654   framesize -= 2*wordSize;
  655 
  656   if (C->max_vector_size() > 16) {
  657     st->print("VZEROUPPER");
  658     st->cr(); st->print("\t");
  659   }
  660   if (C->in_24_bit_fp_mode()) {
  661     st->print("FLDCW  standard control word");
  662     st->cr(); st->print("\t");
  663   }
  664   if (framesize) {
  665     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  666     st->cr(); st->print("\t");
  667   }
  668   st->print_cr("POPL   EBP"); st->print("\t");
  669   if (do_polling() && C->is_method_compilation()) {
  670     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  671               "JA       #safepoint_stub\t"
  672               "# Safepoint: poll for GC");
  673   }
  674 }
  675 #endif
  676 
  677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  678   Compile *C = ra_->C;
  679   MacroAssembler _masm(&cbuf);
  680 
  681   if (C->max_vector_size() > 16) {
  682     // Clear upper bits of YMM registers when current compiled code uses
  683     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  684     _masm.vzeroupper();
  685   }
  686   // If method set FPU control word, restore to standard control word
  687   if (C->in_24_bit_fp_mode()) {
  688     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  689   }
  690 
  691   int framesize = C->output()->frame_size_in_bytes();
  692   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  693   // Remove two words for return addr and rbp,
  694   framesize -= 2*wordSize;
  695 
  696   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  697 
  698   if (framesize >= 128) {
  699     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  700     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  701     emit_d32(cbuf, framesize);
  702   } else if (framesize) {
  703     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  704     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  705     emit_d8(cbuf, framesize);
  706   }
  707 
  708   emit_opcode(cbuf, 0x58 | EBP_enc);
  709 
  710   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  711     __ reserved_stack_check();
  712   }
  713 
  714   if (do_polling() && C->is_method_compilation()) {
  715     Register thread = as_Register(EBX_enc);
  716     MacroAssembler masm(&cbuf);
  717     __ get_thread(thread);
  718     Label dummy_label;
  719     Label* code_stub = &dummy_label;
  720     if (!C->output()->in_scratch_emit_size()) {
  721       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  722     }
  723     __ relocate(relocInfo::poll_return_type);
  724     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  725   }
  726 }
  727 
  728 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  729   return MachNode::size(ra_); // too many variables; just compute it
  730                               // the hard way
  731 }
  732 
  733 int MachEpilogNode::reloc() const {
  734   return 0; // a large enough number
  735 }
  736 
  737 const Pipeline * MachEpilogNode::pipeline() const {
  738   return MachNode::pipeline_class();
  739 }
  740 
  741 //=============================================================================
  742 
  743 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  744 static enum RC rc_class( OptoReg::Name reg ) {
  745 
  746   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  747   if (OptoReg::is_stack(reg)) return rc_stack;
  748 
  749   VMReg r = OptoReg::as_VMReg(reg);
  750   if (r->is_Register()) return rc_int;
  751   if (r->is_FloatRegister()) {
  752     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  753     return rc_float;
  754   }
  755   assert(r->is_XMMRegister(), "must be");
  756   return rc_xmm;
  757 }
  758 
  759 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  760                         int opcode, const char *op_str, int size, outputStream* st ) {
  761   if( cbuf ) {
  762     emit_opcode  (*cbuf, opcode );
  763     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  764 #ifndef PRODUCT
  765   } else if( !do_size ) {
  766     if( size != 0 ) st->print("\n\t");
  767     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  768       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  769       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  770     } else { // FLD, FST, PUSH, POP
  771       st->print("%s [ESP + #%d]",op_str,offset);
  772     }
  773 #endif
  774   }
  775   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  776   return size+3+offset_size;
  777 }
  778 
  779 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  780 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  781                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  782   int in_size_in_bits = Assembler::EVEX_32bit;
  783   int evex_encoding = 0;
  784   if (reg_lo+1 == reg_hi) {
  785     in_size_in_bits = Assembler::EVEX_64bit;
  786     evex_encoding = Assembler::VEX_W;
  787   }
  788   if (cbuf) {
  789     MacroAssembler _masm(cbuf);
  790     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  791     //                          it maps more cases to single byte displacement
  792     _masm.set_managed();
  793     if (reg_lo+1 == reg_hi) { // double move?
  794       if (is_load) {
  795         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  796       } else {
  797         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  798       }
  799     } else {
  800       if (is_load) {
  801         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  802       } else {
  803         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  804       }
  805     }
  806 #ifndef PRODUCT
  807   } else if (!do_size) {
  808     if (size != 0) st->print("\n\t");
  809     if (reg_lo+1 == reg_hi) { // double move?
  810       if (is_load) st->print("%s %s,[ESP + #%d]",
  811                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  812                               Matcher::regName[reg_lo], offset);
  813       else         st->print("MOVSD  [ESP + #%d],%s",
  814                               offset, Matcher::regName[reg_lo]);
  815     } else {
  816       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  817                               Matcher::regName[reg_lo], offset);
  818       else         st->print("MOVSS  [ESP + #%d],%s",
  819                               offset, Matcher::regName[reg_lo]);
  820     }
  821 #endif
  822   }
  823   bool is_single_byte = false;
  824   if ((UseAVX > 2) && (offset != 0)) {
  825     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  826   }
  827   int offset_size = 0;
  828   if (UseAVX > 2 ) {
  829     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  830   } else {
  831     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  832   }
  833   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  834   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  835   return size+5+offset_size;
  836 }
  837 
  838 
  839 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  840                             int src_hi, int dst_hi, int size, outputStream* st ) {
  841   if (cbuf) {
  842     MacroAssembler _masm(cbuf);
  843     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  844     _masm.set_managed();
  845     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  846       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  847                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  848     } else {
  849       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  851     }
  852 #ifndef PRODUCT
  853   } else if (!do_size) {
  854     if (size != 0) st->print("\n\t");
  855     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  856       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  857         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       } else {
  859         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  860       }
  861     } else {
  862       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  863         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  864       } else {
  865         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  866       }
  867     }
  868 #endif
  869   }
  870   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  871   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  872   int sz = (UseAVX > 2) ? 6 : 4;
  873   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  874       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  875   return size + sz;
  876 }
  877 
  878 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  879                             int src_hi, int dst_hi, int size, outputStream* st ) {
  880   // 32-bit
  881   if (cbuf) {
  882     MacroAssembler _masm(cbuf);
  883     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  884     _masm.set_managed();
  885     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  886              as_Register(Matcher::_regEncode[src_lo]));
  887 #ifndef PRODUCT
  888   } else if (!do_size) {
  889     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  890 #endif
  891   }
  892   return (UseAVX> 2) ? 6 : 4;
  893 }
  894 
  895 
  896 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  897                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  898   // 32-bit
  899   if (cbuf) {
  900     MacroAssembler _masm(cbuf);
  901     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  902     _masm.set_managed();
  903     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  904              as_XMMRegister(Matcher::_regEncode[src_lo]));
  905 #ifndef PRODUCT
  906   } else if (!do_size) {
  907     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  908 #endif
  909   }
  910   return (UseAVX> 2) ? 6 : 4;
  911 }
  912 
  913 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  914   if( cbuf ) {
  915     emit_opcode(*cbuf, 0x8B );
  916     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  917 #ifndef PRODUCT
  918   } else if( !do_size ) {
  919     if( size != 0 ) st->print("\n\t");
  920     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  921 #endif
  922   }
  923   return size+2;
  924 }
  925 
  926 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  927                                  int offset, int size, outputStream* st ) {
  928   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  929     if( cbuf ) {
  930       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  931       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  932 #ifndef PRODUCT
  933     } else if( !do_size ) {
  934       if( size != 0 ) st->print("\n\t");
  935       st->print("FLD    %s",Matcher::regName[src_lo]);
  936 #endif
  937     }
  938     size += 2;
  939   }
  940 
  941   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  942   const char *op_str;
  943   int op;
  944   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  945     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  946     op = 0xDD;
  947   } else {                   // 32-bit store
  948     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  949     op = 0xD9;
  950     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  951   }
  952 
  953   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  954 }
  955 
  956 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  957 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  958                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  959 
  960 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  961                             int stack_offset, int reg, uint ireg, outputStream* st);
  962 
  963 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  964                                      int dst_offset, uint ireg, outputStream* st) {
  965   if (cbuf) {
  966     MacroAssembler _masm(cbuf);
  967     switch (ireg) {
  968     case Op_VecS:
  969       __ pushl(Address(rsp, src_offset));
  970       __ popl (Address(rsp, dst_offset));
  971       break;
  972     case Op_VecD:
  973       __ pushl(Address(rsp, src_offset));
  974       __ popl (Address(rsp, dst_offset));
  975       __ pushl(Address(rsp, src_offset+4));
  976       __ popl (Address(rsp, dst_offset+4));
  977       break;
  978     case Op_VecX:
  979       __ movdqu(Address(rsp, -16), xmm0);
  980       __ movdqu(xmm0, Address(rsp, src_offset));
  981       __ movdqu(Address(rsp, dst_offset), xmm0);
  982       __ movdqu(xmm0, Address(rsp, -16));
  983       break;
  984     case Op_VecY:
  985       __ vmovdqu(Address(rsp, -32), xmm0);
  986       __ vmovdqu(xmm0, Address(rsp, src_offset));
  987       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  988       __ vmovdqu(xmm0, Address(rsp, -32));
  989       break;
  990     case Op_VecZ:
  991       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  992       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  993       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  994       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  995       break;
  996     default:
  997       ShouldNotReachHere();
  998     }
  999 #ifndef PRODUCT
 1000   } else {
 1001     switch (ireg) {
 1002     case Op_VecS:
 1003       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1004                 "popl    [rsp + #%d]",
 1005                 src_offset, dst_offset);
 1006       break;
 1007     case Op_VecD:
 1008       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1009                 "popq    [rsp + #%d]\n\t"
 1010                 "pushl   [rsp + #%d]\n\t"
 1011                 "popq    [rsp + #%d]",
 1012                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1013       break;
 1014      case Op_VecX:
 1015       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1016                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1017                 "movdqu  [rsp + #%d], xmm0\n\t"
 1018                 "movdqu  xmm0, [rsp - #16]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecY:
 1022       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #32]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     case Op_VecZ:
 1029       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1030                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1031                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1032                 "vmovdqu xmm0, [rsp - #64]",
 1033                 src_offset, dst_offset);
 1034       break;
 1035     default:
 1036       ShouldNotReachHere();
 1037     }
 1038 #endif
 1039   }
 1040 }
 1041 
 1042 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1043   // Get registers to move
 1044   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1045   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1046   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1047   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1048 
 1049   enum RC src_second_rc = rc_class(src_second);
 1050   enum RC src_first_rc = rc_class(src_first);
 1051   enum RC dst_second_rc = rc_class(dst_second);
 1052   enum RC dst_first_rc = rc_class(dst_first);
 1053 
 1054   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1055 
 1056   // Generate spill code!
 1057   int size = 0;
 1058 
 1059   if( src_first == dst_first && src_second == dst_second )
 1060     return size;            // Self copy, no move
 1061 
 1062   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1063     uint ireg = ideal_reg();
 1064     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1065     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1066     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1067     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1068       // mem -> mem
 1069       int src_offset = ra_->reg2offset(src_first);
 1070       int dst_offset = ra_->reg2offset(dst_first);
 1071       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1072     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1073       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1074     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1075       int stack_offset = ra_->reg2offset(dst_first);
 1076       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1077     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1078       int stack_offset = ra_->reg2offset(src_first);
 1079       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1080     } else {
 1081       ShouldNotReachHere();
 1082     }
 1083     return 0;
 1084   }
 1085 
 1086   // --------------------------------------
 1087   // Check for mem-mem move.  push/pop to move.
 1088   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1089     if( src_second == dst_first ) { // overlapping stack copy ranges
 1090       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1091       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1092       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1093       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1094     }
 1095     // move low bits
 1096     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1097     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1098     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1099       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1100       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1101     }
 1102     return size;
 1103   }
 1104 
 1105   // --------------------------------------
 1106   // Check for integer reg-reg copy
 1107   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1108     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1109 
 1110   // Check for integer store
 1111   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1112     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1113 
 1114   // Check for integer load
 1115   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1116     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1117 
 1118   // Check for integer reg-xmm reg copy
 1119   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1121             "no 64 bit integer-float reg moves" );
 1122     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1123   }
 1124   // --------------------------------------
 1125   // Check for float reg-reg copy
 1126   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1127     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1128             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1129     if( cbuf ) {
 1130 
 1131       // Note the mucking with the register encode to compensate for the 0/1
 1132       // indexing issue mentioned in a comment in the reg_def sections
 1133       // for FPR registers many lines above here.
 1134 
 1135       if( src_first != FPR1L_num ) {
 1136         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1137         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1138         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1139         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1140      } else {
 1141         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1142         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1143      }
 1144 #ifndef PRODUCT
 1145     } else if( !do_size ) {
 1146       if( size != 0 ) st->print("\n\t");
 1147       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1148       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1149 #endif
 1150     }
 1151     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1152   }
 1153 
 1154   // Check for float store
 1155   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1156     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1157   }
 1158 
 1159   // Check for float load
 1160   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1161     int offset = ra_->reg2offset(src_first);
 1162     const char *op_str;
 1163     int op;
 1164     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1165       op_str = "FLD_D";
 1166       op = 0xDD;
 1167     } else {                   // 32-bit load
 1168       op_str = "FLD_S";
 1169       op = 0xD9;
 1170       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1171     }
 1172     if( cbuf ) {
 1173       emit_opcode  (*cbuf, op );
 1174       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1175       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1176       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1177 #ifndef PRODUCT
 1178     } else if( !do_size ) {
 1179       if( size != 0 ) st->print("\n\t");
 1180       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1181 #endif
 1182     }
 1183     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1184     return size + 3+offset_size+2;
 1185   }
 1186 
 1187   // Check for xmm reg-reg copy
 1188   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1189     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1190             (src_first+1 == src_second && dst_first+1 == dst_second),
 1191             "no non-adjacent float-moves" );
 1192     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm reg-integer reg copy
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1197     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1198             "no 64 bit float-integer reg moves" );
 1199     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1200   }
 1201 
 1202   // Check for xmm store
 1203   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1204     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1205   }
 1206 
 1207   // Check for float xmm load
 1208   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1209     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1210   }
 1211 
 1212   // Copy from float reg to xmm reg
 1213   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1214     // copy to the top of stack from floating point reg
 1215     // and use LEA to preserve flags
 1216     if( cbuf ) {
 1217       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1218       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1219       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1220       emit_d8(*cbuf,0xF8);
 1221 #ifndef PRODUCT
 1222     } else if( !do_size ) {
 1223       if( size != 0 ) st->print("\n\t");
 1224       st->print("LEA    ESP,[ESP-8]");
 1225 #endif
 1226     }
 1227     size += 4;
 1228 
 1229     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1230 
 1231     // Copy from the temp memory to the xmm reg.
 1232     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1233 
 1234     if( cbuf ) {
 1235       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1236       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1237       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1238       emit_d8(*cbuf,0x08);
 1239 #ifndef PRODUCT
 1240     } else if( !do_size ) {
 1241       if( size != 0 ) st->print("\n\t");
 1242       st->print("LEA    ESP,[ESP+8]");
 1243 #endif
 1244     }
 1245     size += 4;
 1246     return size;
 1247   }
 1248 
 1249   assert( size > 0, "missed a case" );
 1250 
 1251   // --------------------------------------------------------------------
 1252   // Check for second bits still needing moving.
 1253   if( src_second == dst_second )
 1254     return size;               // Self copy; no move
 1255   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1256 
 1257   // Check for second word int-int move
 1258   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1259     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1260 
 1261   // Check for second word integer store
 1262   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1263     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1264 
 1265   // Check for second word integer load
 1266   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1267     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1268 
 1269   // AVX-512 opmask specific spilling.
 1270   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1271     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1272     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1273     MacroAssembler _masm(cbuf);
 1274     int offset = ra_->reg2offset(src_first);
 1275     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     MacroAssembler _masm(cbuf);
 1283     int offset = ra_->reg2offset(dst_first);
 1284     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1285     return 0;
 1286   }
 1287 
 1288   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1289     Unimplemented();
 1290     return 0;
 1291   }
 1292 
 1293   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1294     Unimplemented();
 1295     return 0;
 1296   }
 1297 
 1298   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1299     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1300     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1301     MacroAssembler _masm(cbuf);
 1302     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1303     return 0;
 1304   }
 1305 
 1306   Unimplemented();
 1307   return 0; // Mute compiler
 1308 }
 1309 
 1310 #ifndef PRODUCT
 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1312   implementation( NULL, ra_, false, st );
 1313 }
 1314 #endif
 1315 
 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1317   implementation( &cbuf, ra_, false, NULL );
 1318 }
 1319 
 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1321   return MachNode::size(ra_);
 1322 }
 1323 
 1324 
 1325 //=============================================================================
 1326 #ifndef PRODUCT
 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1329   int reg = ra_->get_reg_first(this);
 1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1331 }
 1332 #endif
 1333 
 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1336   int reg = ra_->get_encode(this);
 1337   if( offset >= 128 ) {
 1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1339     emit_rm(cbuf, 0x2, reg, 0x04);
 1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1341     emit_d32(cbuf, offset);
 1342   }
 1343   else {
 1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(cbuf, 0x1, reg, 0x04);
 1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1347     emit_d8(cbuf, offset);
 1348   }
 1349 }
 1350 
 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1353   if( offset >= 128 ) {
 1354     return 7;
 1355   }
 1356   else {
 1357     return 4;
 1358   }
 1359 }
 1360 
 1361 //=============================================================================
 1362 #ifndef PRODUCT
 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1366   st->print_cr("\tNOP");
 1367   st->print_cr("\tNOP");
 1368   if( !OptoBreakpoint )
 1369     st->print_cr("\tNOP");
 1370 }
 1371 #endif
 1372 
 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1374   MacroAssembler masm(&cbuf);
 1375 #ifdef ASSERT
 1376   uint insts_size = cbuf.insts_size();
 1377 #endif
 1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1379   masm.jump_cc(Assembler::notEqual,
 1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1381   /* WARNING these NOPs are critical so that verified entry point is properly
 1382      aligned for patching by NativeJump::patch_verified_entry() */
 1383   int nops_cnt = 2;
 1384   if( !OptoBreakpoint ) // Leave space for int3
 1385      nops_cnt += 1;
 1386   masm.nop(nops_cnt);
 1387 
 1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1389 }
 1390 
 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1392   return OptoBreakpoint ? 11 : 12;
 1393 }
 1394 
 1395 
 1396 //=============================================================================
 1397 
 1398 // Vector calling convention not supported.
 1399 const bool Matcher::supports_vector_calling_convention() {
 1400   return false;
 1401 }
 1402 
 1403 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1404   Unimplemented();
 1405   return OptoRegPair(0, 0);
 1406 }
 1407 
 1408 // Is this branch offset short enough that a short branch can be used?
 1409 //
 1410 // NOTE: If the platform does not provide any short branch variants, then
 1411 //       this method should return false for offset 0.
 1412 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1413   // The passed offset is relative to address of the branch.
 1414   // On 86 a branch displacement is calculated relative to address
 1415   // of a next instruction.
 1416   offset -= br_size;
 1417 
 1418   // the short version of jmpConUCF2 contains multiple branches,
 1419   // making the reach slightly less
 1420   if (rule == jmpConUCF2_rule)
 1421     return (-126 <= offset && offset <= 125);
 1422   return (-128 <= offset && offset <= 127);
 1423 }
 1424 
 1425 // Return whether or not this register is ever used as an argument.  This
 1426 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1427 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1428 // arguments in those registers not be available to the callee.
 1429 bool Matcher::can_be_java_arg( int reg ) {
 1430   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1431   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1432   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1433   return false;
 1434 }
 1435 
 1436 bool Matcher::is_spillable_arg( int reg ) {
 1437   return can_be_java_arg(reg);
 1438 }
 1439 
 1440 uint Matcher::int_pressure_limit()
 1441 {
 1442   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1443 }
 1444 
 1445 uint Matcher::float_pressure_limit()
 1446 {
 1447   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1448 }
 1449 
 1450 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1451   // Use hardware integer DIV instruction when
 1452   // it is faster than a code which use multiply.
 1453   // Only when constant divisor fits into 32 bit
 1454   // (min_jint is excluded to get only correct
 1455   // positive 32 bit values from negative).
 1456   return VM_Version::has_fast_idiv() &&
 1457          (divisor == (int)divisor && divisor != min_jint);
 1458 }
 1459 
 1460 // Register for DIVI projection of divmodI
 1461 RegMask Matcher::divI_proj_mask() {
 1462   return EAX_REG_mask();
 1463 }
 1464 
 1465 // Register for MODI projection of divmodI
 1466 RegMask Matcher::modI_proj_mask() {
 1467   return EDX_REG_mask();
 1468 }
 1469 
 1470 // Register for DIVL projection of divmodL
 1471 RegMask Matcher::divL_proj_mask() {
 1472   ShouldNotReachHere();
 1473   return RegMask();
 1474 }
 1475 
 1476 // Register for MODL projection of divmodL
 1477 RegMask Matcher::modL_proj_mask() {
 1478   ShouldNotReachHere();
 1479   return RegMask();
 1480 }
 1481 
 1482 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1483   return NO_REG_mask();
 1484 }
 1485 
 1486 // Returns true if the high 32 bits of the value is known to be zero.
 1487 bool is_operand_hi32_zero(Node* n) {
 1488   int opc = n->Opcode();
 1489   if (opc == Op_AndL) {
 1490     Node* o2 = n->in(2);
 1491     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1492       return true;
 1493     }
 1494   }
 1495   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1496     return true;
 1497   }
 1498   return false;
 1499 }
 1500 
 1501 %}
 1502 
 1503 //----------ENCODING BLOCK-----------------------------------------------------
 1504 // This block specifies the encoding classes used by the compiler to output
 1505 // byte streams.  Encoding classes generate functions which are called by
 1506 // Machine Instruction Nodes in order to generate the bit encoding of the
 1507 // instruction.  Operands specify their base encoding interface with the
 1508 // interface keyword.  There are currently supported four interfaces,
 1509 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1510 // operand to generate a function which returns its register number when
 1511 // queried.   CONST_INTER causes an operand to generate a function which
 1512 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1513 // operand to generate four functions which return the Base Register, the
 1514 // Index Register, the Scale Value, and the Offset Value of the operand when
 1515 // queried.  COND_INTER causes an operand to generate six functions which
 1516 // return the encoding code (ie - encoding bits for the instruction)
 1517 // associated with each basic boolean condition for a conditional instruction.
 1518 // Instructions specify two basic values for encoding.  They use the
 1519 // ins_encode keyword to specify their encoding class (which must be one of
 1520 // the class names specified in the encoding block), and they use the
 1521 // opcode keyword to specify, in order, their primary, secondary, and
 1522 // tertiary opcode.  Only the opcode sections which a particular instruction
 1523 // needs for encoding need to be specified.
 1524 encode %{
 1525   // Build emit functions for each basic byte or larger field in the intel
 1526   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1527   // code in the enc_class source block.  Emit functions will live in the
 1528   // main source block for now.  In future, we can generalize this by
 1529   // adding a syntax that specifies the sizes of fields in an order,
 1530   // so that the adlc can build the emit functions automagically
 1531 
 1532   // Emit primary opcode
 1533   enc_class OpcP %{
 1534     emit_opcode(cbuf, $primary);
 1535   %}
 1536 
 1537   // Emit secondary opcode
 1538   enc_class OpcS %{
 1539     emit_opcode(cbuf, $secondary);
 1540   %}
 1541 
 1542   // Emit opcode directly
 1543   enc_class Opcode(immI d8) %{
 1544     emit_opcode(cbuf, $d8$$constant);
 1545   %}
 1546 
 1547   enc_class SizePrefix %{
 1548     emit_opcode(cbuf,0x66);
 1549   %}
 1550 
 1551   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1552     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1553   %}
 1554 
 1555   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1556     emit_opcode(cbuf,$opcode$$constant);
 1557     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1558   %}
 1559 
 1560   enc_class mov_r32_imm0( rRegI dst ) %{
 1561     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1562     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1563   %}
 1564 
 1565   enc_class cdq_enc %{
 1566     // Full implementation of Java idiv and irem; checks for
 1567     // special case as described in JVM spec., p.243 & p.271.
 1568     //
 1569     //         normal case                           special case
 1570     //
 1571     // input : rax,: dividend                         min_int
 1572     //         reg: divisor                          -1
 1573     //
 1574     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1575     //         rdx: remainder (= rax, irem reg)       0
 1576     //
 1577     //  Code sequnce:
 1578     //
 1579     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1580     //  0F 85 0B 00 00 00    jne         normal_case
 1581     //  33 D2                xor         rdx,edx
 1582     //  83 F9 FF             cmp         rcx,0FFh
 1583     //  0F 84 03 00 00 00    je          done
 1584     //                  normal_case:
 1585     //  99                   cdq
 1586     //  F7 F9                idiv        rax,ecx
 1587     //                  done:
 1588     //
 1589     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1591     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1592     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1593     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1594     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1595     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1596     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1597     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1598     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1599     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1600     // normal_case:
 1601     emit_opcode(cbuf,0x99);                                         // cdq
 1602     // idiv (note: must be emitted by the user of this rule)
 1603     // normal:
 1604   %}
 1605 
 1606   // Dense encoding for older common ops
 1607   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1608     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1609   %}
 1610 
 1611 
 1612   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1613   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1614     // Check for 8-bit immediate, and set sign extend bit in opcode
 1615     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1616       emit_opcode(cbuf, $primary | 0x02);
 1617     }
 1618     else {                          // If 32-bit immediate
 1619       emit_opcode(cbuf, $primary);
 1620     }
 1621   %}
 1622 
 1623   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1624     // Emit primary opcode and set sign-extend bit
 1625     // Check for 8-bit immediate, and set sign extend bit in opcode
 1626     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1627       emit_opcode(cbuf, $primary | 0x02);    }
 1628     else {                          // If 32-bit immediate
 1629       emit_opcode(cbuf, $primary);
 1630     }
 1631     // Emit r/m byte with secondary opcode, after primary opcode.
 1632     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1633   %}
 1634 
 1635   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1636     // Check for 8-bit immediate, and set sign extend bit in opcode
 1637     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1638       $$$emit8$imm$$constant;
 1639     }
 1640     else {                          // If 32-bit immediate
 1641       // Output immediate
 1642       $$$emit32$imm$$constant;
 1643     }
 1644   %}
 1645 
 1646   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1647     // Emit primary opcode and set sign-extend bit
 1648     // Check for 8-bit immediate, and set sign extend bit in opcode
 1649     int con = (int)$imm$$constant; // Throw away top bits
 1650     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1651     // Emit r/m byte with secondary opcode, after primary opcode.
 1652     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1653     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1654     else                               emit_d32(cbuf,con);
 1655   %}
 1656 
 1657   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1658     // Emit primary opcode and set sign-extend bit
 1659     // Check for 8-bit immediate, and set sign extend bit in opcode
 1660     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1661     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1662     // Emit r/m byte with tertiary opcode, after primary opcode.
 1663     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1664     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1665     else                               emit_d32(cbuf,con);
 1666   %}
 1667 
 1668   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1669     emit_cc(cbuf, $secondary, $dst$$reg );
 1670   %}
 1671 
 1672   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1673     int destlo = $dst$$reg;
 1674     int desthi = HIGH_FROM_LOW(destlo);
 1675     // bswap lo
 1676     emit_opcode(cbuf, 0x0F);
 1677     emit_cc(cbuf, 0xC8, destlo);
 1678     // bswap hi
 1679     emit_opcode(cbuf, 0x0F);
 1680     emit_cc(cbuf, 0xC8, desthi);
 1681     // xchg lo and hi
 1682     emit_opcode(cbuf, 0x87);
 1683     emit_rm(cbuf, 0x3, destlo, desthi);
 1684   %}
 1685 
 1686   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1687     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1688   %}
 1689 
 1690   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1691     $$$emit8$primary;
 1692     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1693   %}
 1694 
 1695   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1696     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1697     emit_d8(cbuf, op >> 8 );
 1698     emit_d8(cbuf, op & 255);
 1699   %}
 1700 
 1701   // emulate a CMOV with a conditional branch around a MOV
 1702   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1703     // Invert sense of branch from sense of CMOV
 1704     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1705     emit_d8( cbuf, $brOffs$$constant );
 1706   %}
 1707 
 1708   enc_class enc_PartialSubtypeCheck( ) %{
 1709     Register Redi = as_Register(EDI_enc); // result register
 1710     Register Reax = as_Register(EAX_enc); // super class
 1711     Register Recx = as_Register(ECX_enc); // killed
 1712     Register Resi = as_Register(ESI_enc); // sub class
 1713     Label miss;
 1714 
 1715     MacroAssembler _masm(&cbuf);
 1716     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1717                                      NULL, &miss,
 1718                                      /*set_cond_codes:*/ true);
 1719     if ($primary) {
 1720       __ xorptr(Redi, Redi);
 1721     }
 1722     __ bind(miss);
 1723   %}
 1724 
 1725   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1726     MacroAssembler masm(&cbuf);
 1727     int start = masm.offset();
 1728     if (UseSSE >= 2) {
 1729       if (VerifyFPU) {
 1730         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1731       }
 1732     } else {
 1733       // External c_calling_convention expects the FPU stack to be 'clean'.
 1734       // Compiled code leaves it dirty.  Do cleanup now.
 1735       masm.empty_FPU_stack();
 1736     }
 1737     if (sizeof_FFree_Float_Stack_All == -1) {
 1738       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1739     } else {
 1740       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1741     }
 1742   %}
 1743 
 1744   enc_class Verify_FPU_For_Leaf %{
 1745     if( VerifyFPU ) {
 1746       MacroAssembler masm(&cbuf);
 1747       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1748     }
 1749   %}
 1750 
 1751   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1752     // This is the instruction starting address for relocation info.
 1753     cbuf.set_insts_mark();
 1754     $$$emit8$primary;
 1755     // CALL directly to the runtime
 1756     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1757                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1758 
 1759     if (UseSSE >= 2) {
 1760       MacroAssembler _masm(&cbuf);
 1761       BasicType rt = tf()->return_type();
 1762 
 1763       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1764         // A C runtime call where the return value is unused.  In SSE2+
 1765         // mode the result needs to be removed from the FPU stack.  It's
 1766         // likely that this function call could be removed by the
 1767         // optimizer if the C function is a pure function.
 1768         __ ffree(0);
 1769       } else if (rt == T_FLOAT) {
 1770         __ lea(rsp, Address(rsp, -4));
 1771         __ fstp_s(Address(rsp, 0));
 1772         __ movflt(xmm0, Address(rsp, 0));
 1773         __ lea(rsp, Address(rsp,  4));
 1774       } else if (rt == T_DOUBLE) {
 1775         __ lea(rsp, Address(rsp, -8));
 1776         __ fstp_d(Address(rsp, 0));
 1777         __ movdbl(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  8));
 1779       }
 1780     }
 1781   %}
 1782 
 1783   enc_class pre_call_resets %{
 1784     // If method sets FPU control word restore it here
 1785     debug_only(int off0 = cbuf.insts_size());
 1786     if (ra_->C->in_24_bit_fp_mode()) {
 1787       MacroAssembler _masm(&cbuf);
 1788       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1789     }
 1790     // Clear upper bits of YMM registers when current compiled code uses
 1791     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1792     MacroAssembler _masm(&cbuf);
 1793     __ vzeroupper();
 1794     debug_only(int off1 = cbuf.insts_size());
 1795     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1796   %}
 1797 
 1798   enc_class post_call_FPU %{
 1799     // If method sets FPU control word do it here also
 1800     if (Compile::current()->in_24_bit_fp_mode()) {
 1801       MacroAssembler masm(&cbuf);
 1802       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1803     }
 1804   %}
 1805 
 1806   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1807     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1808     // who we intended to call.
 1809     cbuf.set_insts_mark();
 1810     $$$emit8$primary;
 1811 
 1812     if (!_method) {
 1813       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1814                      runtime_call_Relocation::spec(),
 1815                      RELOC_IMM32);
 1816     } else {
 1817       int method_index = resolved_method_index(cbuf);
 1818       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1819                                                   : static_call_Relocation::spec(method_index);
 1820       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1821                      rspec, RELOC_DISP32);
 1822       // Emit stubs for static call.
 1823       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1824       if (stub == NULL) {
 1825         ciEnv::current()->record_failure("CodeCache is full");
 1826         return;
 1827       }
 1828     }
 1829   %}
 1830 
 1831   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1832     MacroAssembler _masm(&cbuf);
 1833     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1834   %}
 1835 
 1836   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1837     int disp = in_bytes(Method::from_compiled_offset());
 1838     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1839 
 1840     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1841     cbuf.set_insts_mark();
 1842     $$$emit8$primary;
 1843     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1844     emit_d8(cbuf, disp);             // Displacement
 1845 
 1846   %}
 1847 
 1848 //   Following encoding is no longer used, but may be restored if calling
 1849 //   convention changes significantly.
 1850 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1851 //
 1852 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1853 //     // int ic_reg     = Matcher::inline_cache_reg();
 1854 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1855 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1856 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1857 //
 1858 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1859 //     // // so we load it immediately before the call
 1860 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1861 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1862 //
 1863 //     // xor rbp,ebp
 1864 //     emit_opcode(cbuf, 0x33);
 1865 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1866 //
 1867 //     // CALL to interpreter.
 1868 //     cbuf.set_insts_mark();
 1869 //     $$$emit8$primary;
 1870 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1871 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1872 //   %}
 1873 
 1874   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1875     $$$emit8$primary;
 1876     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1877     $$$emit8$shift$$constant;
 1878   %}
 1879 
 1880   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1881     // Load immediate does not have a zero or sign extended version
 1882     // for 8-bit immediates
 1883     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1884     $$$emit32$src$$constant;
 1885   %}
 1886 
 1887   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1888     // Load immediate does not have a zero or sign extended version
 1889     // for 8-bit immediates
 1890     emit_opcode(cbuf, $primary + $dst$$reg);
 1891     $$$emit32$src$$constant;
 1892   %}
 1893 
 1894   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1895     // Load immediate does not have a zero or sign extended version
 1896     // for 8-bit immediates
 1897     int dst_enc = $dst$$reg;
 1898     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1899     if (src_con == 0) {
 1900       // xor dst, dst
 1901       emit_opcode(cbuf, 0x33);
 1902       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1903     } else {
 1904       emit_opcode(cbuf, $primary + dst_enc);
 1905       emit_d32(cbuf, src_con);
 1906     }
 1907   %}
 1908 
 1909   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1910     // Load immediate does not have a zero or sign extended version
 1911     // for 8-bit immediates
 1912     int dst_enc = $dst$$reg + 2;
 1913     int src_con = ((julong)($src$$constant)) >> 32;
 1914     if (src_con == 0) {
 1915       // xor dst, dst
 1916       emit_opcode(cbuf, 0x33);
 1917       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1918     } else {
 1919       emit_opcode(cbuf, $primary + dst_enc);
 1920       emit_d32(cbuf, src_con);
 1921     }
 1922   %}
 1923 
 1924 
 1925   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1926   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1927     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1928   %}
 1929 
 1930   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1931     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1932   %}
 1933 
 1934   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1935     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1936   %}
 1937 
 1938   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1939     $$$emit8$primary;
 1940     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1941   %}
 1942 
 1943   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1944     $$$emit8$secondary;
 1945     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1946   %}
 1947 
 1948   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1949     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1950   %}
 1951 
 1952   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1953     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1954   %}
 1955 
 1956   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1957     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1958   %}
 1959 
 1960   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1961     // Output immediate
 1962     $$$emit32$src$$constant;
 1963   %}
 1964 
 1965   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1966     // Output Float immediate bits
 1967     jfloat jf = $src$$constant;
 1968     int    jf_as_bits = jint_cast( jf );
 1969     emit_d32(cbuf, jf_as_bits);
 1970   %}
 1971 
 1972   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1973     // Output Float immediate bits
 1974     jfloat jf = $src$$constant;
 1975     int    jf_as_bits = jint_cast( jf );
 1976     emit_d32(cbuf, jf_as_bits);
 1977   %}
 1978 
 1979   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1980     // Output immediate
 1981     $$$emit16$src$$constant;
 1982   %}
 1983 
 1984   enc_class Con_d32(immI src) %{
 1985     emit_d32(cbuf,$src$$constant);
 1986   %}
 1987 
 1988   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1989     // Output immediate memory reference
 1990     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1991     emit_d32(cbuf, 0x00);
 1992   %}
 1993 
 1994   enc_class lock_prefix( ) %{
 1995     emit_opcode(cbuf,0xF0);         // [Lock]
 1996   %}
 1997 
 1998   // Cmp-xchg long value.
 1999   // Note: we need to swap rbx, and rcx before and after the
 2000   //       cmpxchg8 instruction because the instruction uses
 2001   //       rcx as the high order word of the new value to store but
 2002   //       our register encoding uses rbx,.
 2003   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2004 
 2005     // XCHG  rbx,ecx
 2006     emit_opcode(cbuf,0x87);
 2007     emit_opcode(cbuf,0xD9);
 2008     // [Lock]
 2009     emit_opcode(cbuf,0xF0);
 2010     // CMPXCHG8 [Eptr]
 2011     emit_opcode(cbuf,0x0F);
 2012     emit_opcode(cbuf,0xC7);
 2013     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2014     // XCHG  rbx,ecx
 2015     emit_opcode(cbuf,0x87);
 2016     emit_opcode(cbuf,0xD9);
 2017   %}
 2018 
 2019   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2020     // [Lock]
 2021     emit_opcode(cbuf,0xF0);
 2022 
 2023     // CMPXCHG [Eptr]
 2024     emit_opcode(cbuf,0x0F);
 2025     emit_opcode(cbuf,0xB1);
 2026     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2027   %}
 2028 
 2029   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2030     // [Lock]
 2031     emit_opcode(cbuf,0xF0);
 2032 
 2033     // CMPXCHGB [Eptr]
 2034     emit_opcode(cbuf,0x0F);
 2035     emit_opcode(cbuf,0xB0);
 2036     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2037   %}
 2038 
 2039   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2040     // [Lock]
 2041     emit_opcode(cbuf,0xF0);
 2042 
 2043     // 16-bit mode
 2044     emit_opcode(cbuf, 0x66);
 2045 
 2046     // CMPXCHGW [Eptr]
 2047     emit_opcode(cbuf,0x0F);
 2048     emit_opcode(cbuf,0xB1);
 2049     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2050   %}
 2051 
 2052   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2053     int res_encoding = $res$$reg;
 2054 
 2055     // MOV  res,0
 2056     emit_opcode( cbuf, 0xB8 + res_encoding);
 2057     emit_d32( cbuf, 0 );
 2058     // JNE,s  fail
 2059     emit_opcode(cbuf,0x75);
 2060     emit_d8(cbuf, 5 );
 2061     // MOV  res,1
 2062     emit_opcode( cbuf, 0xB8 + res_encoding);
 2063     emit_d32( cbuf, 1 );
 2064     // fail:
 2065   %}
 2066 
 2067   enc_class set_instruction_start( ) %{
 2068     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2069   %}
 2070 
 2071   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2072     int reg_encoding = $ereg$$reg;
 2073     int base  = $mem$$base;
 2074     int index = $mem$$index;
 2075     int scale = $mem$$scale;
 2076     int displace = $mem$$disp;
 2077     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2078     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2079   %}
 2080 
 2081   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2082     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2083     int base  = $mem$$base;
 2084     int index = $mem$$index;
 2085     int scale = $mem$$scale;
 2086     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2087     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2088     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2089   %}
 2090 
 2091   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2092     int r1, r2;
 2093     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2094     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2095     emit_opcode(cbuf,0x0F);
 2096     emit_opcode(cbuf,$tertiary);
 2097     emit_rm(cbuf, 0x3, r1, r2);
 2098     emit_d8(cbuf,$cnt$$constant);
 2099     emit_d8(cbuf,$primary);
 2100     emit_rm(cbuf, 0x3, $secondary, r1);
 2101     emit_d8(cbuf,$cnt$$constant);
 2102   %}
 2103 
 2104   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2105     emit_opcode( cbuf, 0x8B ); // Move
 2106     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2107     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2108       emit_d8(cbuf,$primary);
 2109       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2110       emit_d8(cbuf,$cnt$$constant-32);
 2111     }
 2112     emit_d8(cbuf,$primary);
 2113     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2114     emit_d8(cbuf,31);
 2115   %}
 2116 
 2117   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2118     int r1, r2;
 2119     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2120     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2121 
 2122     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2123     emit_rm(cbuf, 0x3, r1, r2);
 2124     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2125       emit_opcode(cbuf,$primary);
 2126       emit_rm(cbuf, 0x3, $secondary, r1);
 2127       emit_d8(cbuf,$cnt$$constant-32);
 2128     }
 2129     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2130     emit_rm(cbuf, 0x3, r2, r2);
 2131   %}
 2132 
 2133   // Clone of RegMem but accepts an extra parameter to access each
 2134   // half of a double in memory; it never needs relocation info.
 2135   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2136     emit_opcode(cbuf,$opcode$$constant);
 2137     int reg_encoding = $rm_reg$$reg;
 2138     int base     = $mem$$base;
 2139     int index    = $mem$$index;
 2140     int scale    = $mem$$scale;
 2141     int displace = $mem$$disp + $disp_for_half$$constant;
 2142     relocInfo::relocType disp_reloc = relocInfo::none;
 2143     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2144   %}
 2145 
 2146   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2147   //
 2148   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2149   // and it never needs relocation information.
 2150   // Frequently used to move data between FPU's Stack Top and memory.
 2151   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2152     int rm_byte_opcode = $rm_opcode$$constant;
 2153     int base     = $mem$$base;
 2154     int index    = $mem$$index;
 2155     int scale    = $mem$$scale;
 2156     int displace = $mem$$disp;
 2157     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2158     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2159   %}
 2160 
 2161   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2162     int rm_byte_opcode = $rm_opcode$$constant;
 2163     int base     = $mem$$base;
 2164     int index    = $mem$$index;
 2165     int scale    = $mem$$scale;
 2166     int displace = $mem$$disp;
 2167     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2168     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2169   %}
 2170 
 2171   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2172     int reg_encoding = $dst$$reg;
 2173     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2174     int index        = 0x04;            // 0x04 indicates no index
 2175     int scale        = 0x00;            // 0x00 indicates no scale
 2176     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2177     relocInfo::relocType disp_reloc = relocInfo::none;
 2178     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2179   %}
 2180 
 2181   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2182     // Compare dst,src
 2183     emit_opcode(cbuf,0x3B);
 2184     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2185     // jmp dst < src around move
 2186     emit_opcode(cbuf,0x7C);
 2187     emit_d8(cbuf,2);
 2188     // move dst,src
 2189     emit_opcode(cbuf,0x8B);
 2190     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2191   %}
 2192 
 2193   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2194     // Compare dst,src
 2195     emit_opcode(cbuf,0x3B);
 2196     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2197     // jmp dst > src around move
 2198     emit_opcode(cbuf,0x7F);
 2199     emit_d8(cbuf,2);
 2200     // move dst,src
 2201     emit_opcode(cbuf,0x8B);
 2202     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2203   %}
 2204 
 2205   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2206     // If src is FPR1, we can just FST to store it.
 2207     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2208     int reg_encoding = 0x2; // Just store
 2209     int base  = $mem$$base;
 2210     int index = $mem$$index;
 2211     int scale = $mem$$scale;
 2212     int displace = $mem$$disp;
 2213     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2214     if( $src$$reg != FPR1L_enc ) {
 2215       reg_encoding = 0x3;  // Store & pop
 2216       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2217       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2218     }
 2219     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2220     emit_opcode(cbuf,$primary);
 2221     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2222   %}
 2223 
 2224   enc_class neg_reg(rRegI dst) %{
 2225     // NEG $dst
 2226     emit_opcode(cbuf,0xF7);
 2227     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2228   %}
 2229 
 2230   enc_class setLT_reg(eCXRegI dst) %{
 2231     // SETLT $dst
 2232     emit_opcode(cbuf,0x0F);
 2233     emit_opcode(cbuf,0x9C);
 2234     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2235   %}
 2236 
 2237   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2238     int tmpReg = $tmp$$reg;
 2239 
 2240     // SUB $p,$q
 2241     emit_opcode(cbuf,0x2B);
 2242     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2243     // SBB $tmp,$tmp
 2244     emit_opcode(cbuf,0x1B);
 2245     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2246     // AND $tmp,$y
 2247     emit_opcode(cbuf,0x23);
 2248     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2249     // ADD $p,$tmp
 2250     emit_opcode(cbuf,0x03);
 2251     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2252   %}
 2253 
 2254   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2255     // TEST shift,32
 2256     emit_opcode(cbuf,0xF7);
 2257     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2258     emit_d32(cbuf,0x20);
 2259     // JEQ,s small
 2260     emit_opcode(cbuf, 0x74);
 2261     emit_d8(cbuf, 0x04);
 2262     // MOV    $dst.hi,$dst.lo
 2263     emit_opcode( cbuf, 0x8B );
 2264     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2265     // CLR    $dst.lo
 2266     emit_opcode(cbuf, 0x33);
 2267     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2268 // small:
 2269     // SHLD   $dst.hi,$dst.lo,$shift
 2270     emit_opcode(cbuf,0x0F);
 2271     emit_opcode(cbuf,0xA5);
 2272     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2273     // SHL    $dst.lo,$shift"
 2274     emit_opcode(cbuf,0xD3);
 2275     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2276   %}
 2277 
 2278   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2279     // TEST shift,32
 2280     emit_opcode(cbuf,0xF7);
 2281     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2282     emit_d32(cbuf,0x20);
 2283     // JEQ,s small
 2284     emit_opcode(cbuf, 0x74);
 2285     emit_d8(cbuf, 0x04);
 2286     // MOV    $dst.lo,$dst.hi
 2287     emit_opcode( cbuf, 0x8B );
 2288     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2289     // CLR    $dst.hi
 2290     emit_opcode(cbuf, 0x33);
 2291     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2292 // small:
 2293     // SHRD   $dst.lo,$dst.hi,$shift
 2294     emit_opcode(cbuf,0x0F);
 2295     emit_opcode(cbuf,0xAD);
 2296     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2297     // SHR    $dst.hi,$shift"
 2298     emit_opcode(cbuf,0xD3);
 2299     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2300   %}
 2301 
 2302   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2303     // TEST shift,32
 2304     emit_opcode(cbuf,0xF7);
 2305     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2306     emit_d32(cbuf,0x20);
 2307     // JEQ,s small
 2308     emit_opcode(cbuf, 0x74);
 2309     emit_d8(cbuf, 0x05);
 2310     // MOV    $dst.lo,$dst.hi
 2311     emit_opcode( cbuf, 0x8B );
 2312     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2313     // SAR    $dst.hi,31
 2314     emit_opcode(cbuf, 0xC1);
 2315     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2316     emit_d8(cbuf, 0x1F );
 2317 // small:
 2318     // SHRD   $dst.lo,$dst.hi,$shift
 2319     emit_opcode(cbuf,0x0F);
 2320     emit_opcode(cbuf,0xAD);
 2321     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2322     // SAR    $dst.hi,$shift"
 2323     emit_opcode(cbuf,0xD3);
 2324     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2325   %}
 2326 
 2327 
 2328   // ----------------- Encodings for floating point unit -----------------
 2329   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2330   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2331     $$$emit8$primary;
 2332     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2333   %}
 2334 
 2335   // Pop argument in FPR0 with FSTP ST(0)
 2336   enc_class PopFPU() %{
 2337     emit_opcode( cbuf, 0xDD );
 2338     emit_d8( cbuf, 0xD8 );
 2339   %}
 2340 
 2341   // !!!!! equivalent to Pop_Reg_F
 2342   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2343     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2344     emit_d8( cbuf, 0xD8+$dst$$reg );
 2345   %}
 2346 
 2347   enc_class Push_Reg_DPR( regDPR dst ) %{
 2348     emit_opcode( cbuf, 0xD9 );
 2349     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2350   %}
 2351 
 2352   enc_class strictfp_bias1( regDPR dst ) %{
 2353     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2354     emit_opcode( cbuf, 0x2D );
 2355     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2356     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2357     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2358   %}
 2359 
 2360   enc_class strictfp_bias2( regDPR dst ) %{
 2361     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2362     emit_opcode( cbuf, 0x2D );
 2363     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2364     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2365     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2366   %}
 2367 
 2368   // Special case for moving an integer register to a stack slot.
 2369   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2370     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2371   %}
 2372 
 2373   // Special case for moving a register to a stack slot.
 2374   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2375     // Opcode already emitted
 2376     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2377     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2378     emit_d32(cbuf, $dst$$disp);   // Displacement
 2379   %}
 2380 
 2381   // Push the integer in stackSlot 'src' onto FP-stack
 2382   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2383     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2384   %}
 2385 
 2386   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2387   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2388     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2389   %}
 2390 
 2391   // Same as Pop_Mem_F except for opcode
 2392   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2393   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2394     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2395   %}
 2396 
 2397   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2398     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2399     emit_d8( cbuf, 0xD8+$dst$$reg );
 2400   %}
 2401 
 2402   enc_class Push_Reg_FPR( regFPR dst ) %{
 2403     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2404     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2405   %}
 2406 
 2407   // Push FPU's float to a stack-slot, and pop FPU-stack
 2408   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2409     int pop = 0x02;
 2410     if ($src$$reg != FPR1L_enc) {
 2411       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2412       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2413       pop = 0x03;
 2414     }
 2415     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2416   %}
 2417 
 2418   // Push FPU's double to a stack-slot, and pop FPU-stack
 2419   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2420     int pop = 0x02;
 2421     if ($src$$reg != FPR1L_enc) {
 2422       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2423       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2424       pop = 0x03;
 2425     }
 2426     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2427   %}
 2428 
 2429   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2430   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2431     int pop = 0xD0 - 1; // -1 since we skip FLD
 2432     if ($src$$reg != FPR1L_enc) {
 2433       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2434       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2435       pop = 0xD8;
 2436     }
 2437     emit_opcode( cbuf, 0xDD );
 2438     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2439   %}
 2440 
 2441 
 2442   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2443     // load dst in FPR0
 2444     emit_opcode( cbuf, 0xD9 );
 2445     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2446     if ($src$$reg != FPR1L_enc) {
 2447       // fincstp
 2448       emit_opcode (cbuf, 0xD9);
 2449       emit_opcode (cbuf, 0xF7);
 2450       // swap src with FPR1:
 2451       // FXCH FPR1 with src
 2452       emit_opcode(cbuf, 0xD9);
 2453       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2454       // fdecstp
 2455       emit_opcode (cbuf, 0xD9);
 2456       emit_opcode (cbuf, 0xF6);
 2457     }
 2458   %}
 2459 
 2460   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2461     MacroAssembler _masm(&cbuf);
 2462     __ subptr(rsp, 8);
 2463     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2464     __ fld_d(Address(rsp, 0));
 2465     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2466     __ fld_d(Address(rsp, 0));
 2467   %}
 2468 
 2469   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2470     MacroAssembler _masm(&cbuf);
 2471     __ subptr(rsp, 4);
 2472     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2473     __ fld_s(Address(rsp, 0));
 2474     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2475     __ fld_s(Address(rsp, 0));
 2476   %}
 2477 
 2478   enc_class Push_ResultD(regD dst) %{
 2479     MacroAssembler _masm(&cbuf);
 2480     __ fstp_d(Address(rsp, 0));
 2481     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2482     __ addptr(rsp, 8);
 2483   %}
 2484 
 2485   enc_class Push_ResultF(regF dst, immI d8) %{
 2486     MacroAssembler _masm(&cbuf);
 2487     __ fstp_s(Address(rsp, 0));
 2488     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2489     __ addptr(rsp, $d8$$constant);
 2490   %}
 2491 
 2492   enc_class Push_SrcD(regD src) %{
 2493     MacroAssembler _masm(&cbuf);
 2494     __ subptr(rsp, 8);
 2495     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2496     __ fld_d(Address(rsp, 0));
 2497   %}
 2498 
 2499   enc_class push_stack_temp_qword() %{
 2500     MacroAssembler _masm(&cbuf);
 2501     __ subptr(rsp, 8);
 2502   %}
 2503 
 2504   enc_class pop_stack_temp_qword() %{
 2505     MacroAssembler _masm(&cbuf);
 2506     __ addptr(rsp, 8);
 2507   %}
 2508 
 2509   enc_class push_xmm_to_fpr1(regD src) %{
 2510     MacroAssembler _masm(&cbuf);
 2511     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2512     __ fld_d(Address(rsp, 0));
 2513   %}
 2514 
 2515   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2516     if ($src$$reg != FPR1L_enc) {
 2517       // fincstp
 2518       emit_opcode (cbuf, 0xD9);
 2519       emit_opcode (cbuf, 0xF7);
 2520       // FXCH FPR1 with src
 2521       emit_opcode(cbuf, 0xD9);
 2522       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2523       // fdecstp
 2524       emit_opcode (cbuf, 0xD9);
 2525       emit_opcode (cbuf, 0xF6);
 2526     }
 2527     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2528     // // FSTP   FPR$dst$$reg
 2529     // emit_opcode( cbuf, 0xDD );
 2530     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2531   %}
 2532 
 2533   enc_class fnstsw_sahf_skip_parity() %{
 2534     // fnstsw ax
 2535     emit_opcode( cbuf, 0xDF );
 2536     emit_opcode( cbuf, 0xE0 );
 2537     // sahf
 2538     emit_opcode( cbuf, 0x9E );
 2539     // jnp  ::skip
 2540     emit_opcode( cbuf, 0x7B );
 2541     emit_opcode( cbuf, 0x05 );
 2542   %}
 2543 
 2544   enc_class emitModDPR() %{
 2545     // fprem must be iterative
 2546     // :: loop
 2547     // fprem
 2548     emit_opcode( cbuf, 0xD9 );
 2549     emit_opcode( cbuf, 0xF8 );
 2550     // wait
 2551     emit_opcode( cbuf, 0x9b );
 2552     // fnstsw ax
 2553     emit_opcode( cbuf, 0xDF );
 2554     emit_opcode( cbuf, 0xE0 );
 2555     // sahf
 2556     emit_opcode( cbuf, 0x9E );
 2557     // jp  ::loop
 2558     emit_opcode( cbuf, 0x0F );
 2559     emit_opcode( cbuf, 0x8A );
 2560     emit_opcode( cbuf, 0xF4 );
 2561     emit_opcode( cbuf, 0xFF );
 2562     emit_opcode( cbuf, 0xFF );
 2563     emit_opcode( cbuf, 0xFF );
 2564   %}
 2565 
 2566   enc_class fpu_flags() %{
 2567     // fnstsw_ax
 2568     emit_opcode( cbuf, 0xDF);
 2569     emit_opcode( cbuf, 0xE0);
 2570     // test ax,0x0400
 2571     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2572     emit_opcode( cbuf, 0xA9 );
 2573     emit_d16   ( cbuf, 0x0400 );
 2574     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2575     // // test rax,0x0400
 2576     // emit_opcode( cbuf, 0xA9 );
 2577     // emit_d32   ( cbuf, 0x00000400 );
 2578     //
 2579     // jz exit (no unordered comparison)
 2580     emit_opcode( cbuf, 0x74 );
 2581     emit_d8    ( cbuf, 0x02 );
 2582     // mov ah,1 - treat as LT case (set carry flag)
 2583     emit_opcode( cbuf, 0xB4 );
 2584     emit_d8    ( cbuf, 0x01 );
 2585     // sahf
 2586     emit_opcode( cbuf, 0x9E);
 2587   %}
 2588 
 2589   enc_class cmpF_P6_fixup() %{
 2590     // Fixup the integer flags in case comparison involved a NaN
 2591     //
 2592     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2593     emit_opcode( cbuf, 0x7B );
 2594     emit_d8    ( cbuf, 0x03 );
 2595     // MOV AH,1 - treat as LT case (set carry flag)
 2596     emit_opcode( cbuf, 0xB4 );
 2597     emit_d8    ( cbuf, 0x01 );
 2598     // SAHF
 2599     emit_opcode( cbuf, 0x9E);
 2600     // NOP     // target for branch to avoid branch to branch
 2601     emit_opcode( cbuf, 0x90);
 2602   %}
 2603 
 2604 //     fnstsw_ax();
 2605 //     sahf();
 2606 //     movl(dst, nan_result);
 2607 //     jcc(Assembler::parity, exit);
 2608 //     movl(dst, less_result);
 2609 //     jcc(Assembler::below, exit);
 2610 //     movl(dst, equal_result);
 2611 //     jcc(Assembler::equal, exit);
 2612 //     movl(dst, greater_result);
 2613 
 2614 // less_result     =  1;
 2615 // greater_result  = -1;
 2616 // equal_result    = 0;
 2617 // nan_result      = -1;
 2618 
 2619   enc_class CmpF_Result(rRegI dst) %{
 2620     // fnstsw_ax();
 2621     emit_opcode( cbuf, 0xDF);
 2622     emit_opcode( cbuf, 0xE0);
 2623     // sahf
 2624     emit_opcode( cbuf, 0x9E);
 2625     // movl(dst, nan_result);
 2626     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2627     emit_d32( cbuf, -1 );
 2628     // jcc(Assembler::parity, exit);
 2629     emit_opcode( cbuf, 0x7A );
 2630     emit_d8    ( cbuf, 0x13 );
 2631     // movl(dst, less_result);
 2632     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2633     emit_d32( cbuf, -1 );
 2634     // jcc(Assembler::below, exit);
 2635     emit_opcode( cbuf, 0x72 );
 2636     emit_d8    ( cbuf, 0x0C );
 2637     // movl(dst, equal_result);
 2638     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2639     emit_d32( cbuf, 0 );
 2640     // jcc(Assembler::equal, exit);
 2641     emit_opcode( cbuf, 0x74 );
 2642     emit_d8    ( cbuf, 0x05 );
 2643     // movl(dst, greater_result);
 2644     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2645     emit_d32( cbuf, 1 );
 2646   %}
 2647 
 2648 
 2649   // Compare the longs and set flags
 2650   // BROKEN!  Do Not use as-is
 2651   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2652     // CMP    $src1.hi,$src2.hi
 2653     emit_opcode( cbuf, 0x3B );
 2654     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2655     // JNE,s  done
 2656     emit_opcode(cbuf,0x75);
 2657     emit_d8(cbuf, 2 );
 2658     // CMP    $src1.lo,$src2.lo
 2659     emit_opcode( cbuf, 0x3B );
 2660     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2661 // done:
 2662   %}
 2663 
 2664   enc_class convert_int_long( regL dst, rRegI src ) %{
 2665     // mov $dst.lo,$src
 2666     int dst_encoding = $dst$$reg;
 2667     int src_encoding = $src$$reg;
 2668     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2669     // mov $dst.hi,$src
 2670     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2671     // sar $dst.hi,31
 2672     emit_opcode( cbuf, 0xC1 );
 2673     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2674     emit_d8(cbuf, 0x1F );
 2675   %}
 2676 
 2677   enc_class convert_long_double( eRegL src ) %{
 2678     // push $src.hi
 2679     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2680     // push $src.lo
 2681     emit_opcode(cbuf, 0x50+$src$$reg  );
 2682     // fild 64-bits at [SP]
 2683     emit_opcode(cbuf,0xdf);
 2684     emit_d8(cbuf, 0x6C);
 2685     emit_d8(cbuf, 0x24);
 2686     emit_d8(cbuf, 0x00);
 2687     // pop stack
 2688     emit_opcode(cbuf, 0x83); // add  SP, #8
 2689     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2690     emit_d8(cbuf, 0x8);
 2691   %}
 2692 
 2693   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2694     // IMUL   EDX:EAX,$src1
 2695     emit_opcode( cbuf, 0xF7 );
 2696     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2697     // SAR    EDX,$cnt-32
 2698     int shift_count = ((int)$cnt$$constant) - 32;
 2699     if (shift_count > 0) {
 2700       emit_opcode(cbuf, 0xC1);
 2701       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2702       emit_d8(cbuf, shift_count);
 2703     }
 2704   %}
 2705 
 2706   // this version doesn't have add sp, 8
 2707   enc_class convert_long_double2( eRegL src ) %{
 2708     // push $src.hi
 2709     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2710     // push $src.lo
 2711     emit_opcode(cbuf, 0x50+$src$$reg  );
 2712     // fild 64-bits at [SP]
 2713     emit_opcode(cbuf,0xdf);
 2714     emit_d8(cbuf, 0x6C);
 2715     emit_d8(cbuf, 0x24);
 2716     emit_d8(cbuf, 0x00);
 2717   %}
 2718 
 2719   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2720     // Basic idea: long = (long)int * (long)int
 2721     // IMUL EDX:EAX, src
 2722     emit_opcode( cbuf, 0xF7 );
 2723     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2724   %}
 2725 
 2726   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2727     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2728     // MUL EDX:EAX, src
 2729     emit_opcode( cbuf, 0xF7 );
 2730     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2731   %}
 2732 
 2733   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2734     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2735     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2736     // MOV    $tmp,$src.lo
 2737     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2738     // IMUL   $tmp,EDX
 2739     emit_opcode( cbuf, 0x0F );
 2740     emit_opcode( cbuf, 0xAF );
 2741     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2742     // MOV    EDX,$src.hi
 2743     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2744     // IMUL   EDX,EAX
 2745     emit_opcode( cbuf, 0x0F );
 2746     emit_opcode( cbuf, 0xAF );
 2747     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2748     // ADD    $tmp,EDX
 2749     emit_opcode( cbuf, 0x03 );
 2750     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2751     // MUL   EDX:EAX,$src.lo
 2752     emit_opcode( cbuf, 0xF7 );
 2753     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2754     // ADD    EDX,ESI
 2755     emit_opcode( cbuf, 0x03 );
 2756     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2757   %}
 2758 
 2759   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2760     // Basic idea: lo(result) = lo(src * y_lo)
 2761     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2762     // IMUL   $tmp,EDX,$src
 2763     emit_opcode( cbuf, 0x6B );
 2764     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2765     emit_d8( cbuf, (int)$src$$constant );
 2766     // MOV    EDX,$src
 2767     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2768     emit_d32( cbuf, (int)$src$$constant );
 2769     // MUL   EDX:EAX,EDX
 2770     emit_opcode( cbuf, 0xF7 );
 2771     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2772     // ADD    EDX,ESI
 2773     emit_opcode( cbuf, 0x03 );
 2774     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2775   %}
 2776 
 2777   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2778     // PUSH src1.hi
 2779     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2780     // PUSH src1.lo
 2781     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2782     // PUSH src2.hi
 2783     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2784     // PUSH src2.lo
 2785     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2786     // CALL directly to the runtime
 2787     cbuf.set_insts_mark();
 2788     emit_opcode(cbuf,0xE8);       // Call into runtime
 2789     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2790     // Restore stack
 2791     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2792     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2793     emit_d8(cbuf, 4*4);
 2794   %}
 2795 
 2796   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2797     // PUSH src1.hi
 2798     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2799     // PUSH src1.lo
 2800     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2801     // PUSH src2.hi
 2802     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2803     // PUSH src2.lo
 2804     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2805     // CALL directly to the runtime
 2806     cbuf.set_insts_mark();
 2807     emit_opcode(cbuf,0xE8);       // Call into runtime
 2808     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2809     // Restore stack
 2810     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2811     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2812     emit_d8(cbuf, 4*4);
 2813   %}
 2814 
 2815   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2816     // MOV   $tmp,$src.lo
 2817     emit_opcode(cbuf, 0x8B);
 2818     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2819     // OR    $tmp,$src.hi
 2820     emit_opcode(cbuf, 0x0B);
 2821     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2822   %}
 2823 
 2824   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2825     // CMP    $src1.lo,$src2.lo
 2826     emit_opcode( cbuf, 0x3B );
 2827     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2828     // JNE,s  skip
 2829     emit_cc(cbuf, 0x70, 0x5);
 2830     emit_d8(cbuf,2);
 2831     // CMP    $src1.hi,$src2.hi
 2832     emit_opcode( cbuf, 0x3B );
 2833     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2834   %}
 2835 
 2836   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2837     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2838     emit_opcode( cbuf, 0x3B );
 2839     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2840     // MOV    $tmp,$src1.hi
 2841     emit_opcode( cbuf, 0x8B );
 2842     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2843     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2844     emit_opcode( cbuf, 0x1B );
 2845     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2846   %}
 2847 
 2848   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2849     // XOR    $tmp,$tmp
 2850     emit_opcode(cbuf,0x33);  // XOR
 2851     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2852     // CMP    $tmp,$src.lo
 2853     emit_opcode( cbuf, 0x3B );
 2854     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2855     // SBB    $tmp,$src.hi
 2856     emit_opcode( cbuf, 0x1B );
 2857     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2858   %}
 2859 
 2860  // Sniff, sniff... smells like Gnu Superoptimizer
 2861   enc_class neg_long( eRegL dst ) %{
 2862     emit_opcode(cbuf,0xF7);    // NEG hi
 2863     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2864     emit_opcode(cbuf,0xF7);    // NEG lo
 2865     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2866     emit_opcode(cbuf,0x83);    // SBB hi,0
 2867     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2868     emit_d8    (cbuf,0 );
 2869   %}
 2870 
 2871   enc_class enc_pop_rdx() %{
 2872     emit_opcode(cbuf,0x5A);
 2873   %}
 2874 
 2875   enc_class enc_rethrow() %{
 2876     cbuf.set_insts_mark();
 2877     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2878     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2879                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2880   %}
 2881 
 2882 
 2883   // Convert a double to an int.  Java semantics require we do complex
 2884   // manglelations in the corner cases.  So we set the rounding mode to
 2885   // 'zero', store the darned double down as an int, and reset the
 2886   // rounding mode to 'nearest'.  The hardware throws an exception which
 2887   // patches up the correct value directly to the stack.
 2888   enc_class DPR2I_encoding( regDPR src ) %{
 2889     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2890     // exceptions here, so that a NAN or other corner-case value will
 2891     // thrown an exception (but normal values get converted at full speed).
 2892     // However, I2C adapters and other float-stack manglers leave pending
 2893     // invalid-op exceptions hanging.  We would have to clear them before
 2894     // enabling them and that is more expensive than just testing for the
 2895     // invalid value Intel stores down in the corner cases.
 2896     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2897     emit_opcode(cbuf,0x2D);
 2898     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2899     // Allocate a word
 2900     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2901     emit_opcode(cbuf,0xEC);
 2902     emit_d8(cbuf,0x04);
 2903     // Encoding assumes a double has been pushed into FPR0.
 2904     // Store down the double as an int, popping the FPU stack
 2905     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2906     emit_opcode(cbuf,0x1C);
 2907     emit_d8(cbuf,0x24);
 2908     // Restore the rounding mode; mask the exception
 2909     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2910     emit_opcode(cbuf,0x2D);
 2911     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2912         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2913         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2914 
 2915     // Load the converted int; adjust CPU stack
 2916     emit_opcode(cbuf,0x58);       // POP EAX
 2917     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2918     emit_d32   (cbuf,0x80000000); //         0x80000000
 2919     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2920     emit_d8    (cbuf,0x07);       // Size of slow_call
 2921     // Push src onto stack slow-path
 2922     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2923     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2924     // CALL directly to the runtime
 2925     cbuf.set_insts_mark();
 2926     emit_opcode(cbuf,0xE8);       // Call into runtime
 2927     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2928     // Carry on here...
 2929   %}
 2930 
 2931   enc_class DPR2L_encoding( regDPR src ) %{
 2932     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2933     emit_opcode(cbuf,0x2D);
 2934     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2935     // Allocate a word
 2936     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2937     emit_opcode(cbuf,0xEC);
 2938     emit_d8(cbuf,0x08);
 2939     // Encoding assumes a double has been pushed into FPR0.
 2940     // Store down the double as a long, popping the FPU stack
 2941     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2942     emit_opcode(cbuf,0x3C);
 2943     emit_d8(cbuf,0x24);
 2944     // Restore the rounding mode; mask the exception
 2945     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2946     emit_opcode(cbuf,0x2D);
 2947     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2948         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2949         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2950 
 2951     // Load the converted int; adjust CPU stack
 2952     emit_opcode(cbuf,0x58);       // POP EAX
 2953     emit_opcode(cbuf,0x5A);       // POP EDX
 2954     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2955     emit_d8    (cbuf,0xFA);       // rdx
 2956     emit_d32   (cbuf,0x80000000); //         0x80000000
 2957     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2958     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2959     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2960     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2961     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2962     emit_d8    (cbuf,0x07);       // Size of slow_call
 2963     // Push src onto stack slow-path
 2964     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2965     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2966     // CALL directly to the runtime
 2967     cbuf.set_insts_mark();
 2968     emit_opcode(cbuf,0xE8);       // Call into runtime
 2969     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2970     // Carry on here...
 2971   %}
 2972 
 2973   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2974     // Operand was loaded from memory into fp ST (stack top)
 2975     // FMUL   ST,$src  /* D8 C8+i */
 2976     emit_opcode(cbuf, 0xD8);
 2977     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2978   %}
 2979 
 2980   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2981     // FADDP  ST,src2  /* D8 C0+i */
 2982     emit_opcode(cbuf, 0xD8);
 2983     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2984     //could use FADDP  src2,fpST  /* DE C0+i */
 2985   %}
 2986 
 2987   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2988     // FADDP  src2,ST  /* DE C0+i */
 2989     emit_opcode(cbuf, 0xDE);
 2990     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2991   %}
 2992 
 2993   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2994     // Operand has been loaded into fp ST (stack top)
 2995       // FSUB   ST,$src1
 2996       emit_opcode(cbuf, 0xD8);
 2997       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2998 
 2999       // FDIV
 3000       emit_opcode(cbuf, 0xD8);
 3001       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3002   %}
 3003 
 3004   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3005     // Operand was loaded from memory into fp ST (stack top)
 3006     // FADD   ST,$src  /* D8 C0+i */
 3007     emit_opcode(cbuf, 0xD8);
 3008     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3009 
 3010     // FMUL  ST,src2  /* D8 C*+i */
 3011     emit_opcode(cbuf, 0xD8);
 3012     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3013   %}
 3014 
 3015 
 3016   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3017     // Operand was loaded from memory into fp ST (stack top)
 3018     // FADD   ST,$src  /* D8 C0+i */
 3019     emit_opcode(cbuf, 0xD8);
 3020     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3021 
 3022     // FMULP  src2,ST  /* DE C8+i */
 3023     emit_opcode(cbuf, 0xDE);
 3024     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3025   %}
 3026 
 3027   // Atomically load the volatile long
 3028   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3029     emit_opcode(cbuf,0xDF);
 3030     int rm_byte_opcode = 0x05;
 3031     int base     = $mem$$base;
 3032     int index    = $mem$$index;
 3033     int scale    = $mem$$scale;
 3034     int displace = $mem$$disp;
 3035     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3036     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3037     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3038   %}
 3039 
 3040   // Volatile Store Long.  Must be atomic, so move it into
 3041   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3042   // target address before the store (for null-ptr checks)
 3043   // so the memory operand is used twice in the encoding.
 3044   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3045     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3046     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3047     emit_opcode(cbuf,0xDF);
 3048     int rm_byte_opcode = 0x07;
 3049     int base     = $mem$$base;
 3050     int index    = $mem$$index;
 3051     int scale    = $mem$$scale;
 3052     int displace = $mem$$disp;
 3053     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3054     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3055   %}
 3056 
 3057 %}
 3058 
 3059 
 3060 //----------FRAME--------------------------------------------------------------
 3061 // Definition of frame structure and management information.
 3062 //
 3063 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3064 //                             |   (to get allocators register number
 3065 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3066 //  r   CALLER     |        |
 3067 //  o     |        +--------+      pad to even-align allocators stack-slot
 3068 //  w     V        |  pad0  |        numbers; owned by CALLER
 3069 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3070 //  h     ^        |   in   |  5
 3071 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3072 //  |     |        |        |  3
 3073 //  |     |        +--------+
 3074 //  V     |        | old out|      Empty on Intel, window on Sparc
 3075 //        |    old |preserve|      Must be even aligned.
 3076 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3077 //        |        |   in   |  3   area for Intel ret address
 3078 //     Owned by    |preserve|      Empty on Sparc.
 3079 //       SELF      +--------+
 3080 //        |        |  pad2  |  2   pad to align old SP
 3081 //        |        +--------+  1
 3082 //        |        | locks  |  0
 3083 //        |        +--------+----> OptoReg::stack0(), even aligned
 3084 //        |        |  pad1  | 11   pad to align new SP
 3085 //        |        +--------+
 3086 //        |        |        | 10
 3087 //        |        | spills |  9   spills
 3088 //        V        |        |  8   (pad0 slot for callee)
 3089 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3090 //        ^        |  out   |  7
 3091 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3092 //     Owned by    +--------+
 3093 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3094 //        |    new |preserve|      Must be even-aligned.
 3095 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3096 //        |        |        |
 3097 //
 3098 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3099 //         known from SELF's arguments and the Java calling convention.
 3100 //         Region 6-7 is determined per call site.
 3101 // Note 2: If the calling convention leaves holes in the incoming argument
 3102 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3103 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3104 //         incoming area, as the Java calling convention is completely under
 3105 //         the control of the AD file.  Doubles can be sorted and packed to
 3106 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3107 //         varargs C calling conventions.
 3108 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3109 //         even aligned with pad0 as needed.
 3110 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3111 //         region 6-11 is even aligned; it may be padded out more so that
 3112 //         the region from SP to FP meets the minimum stack alignment.
 3113 
 3114 frame %{
 3115   // These three registers define part of the calling convention
 3116   // between compiled code and the interpreter.
 3117   inline_cache_reg(EAX);                // Inline Cache Register
 3118 
 3119   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3120   cisc_spilling_operand_name(indOffset32);
 3121 
 3122   // Number of stack slots consumed by locking an object
 3123   sync_stack_slots(1);
 3124 
 3125   // Compiled code's Frame Pointer
 3126   frame_pointer(ESP);
 3127   // Interpreter stores its frame pointer in a register which is
 3128   // stored to the stack by I2CAdaptors.
 3129   // I2CAdaptors convert from interpreted java to compiled java.
 3130   interpreter_frame_pointer(EBP);
 3131 
 3132   // Stack alignment requirement
 3133   // Alignment size in bytes (128-bit -> 16 bytes)
 3134   stack_alignment(StackAlignmentInBytes);
 3135 
 3136   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3137   // for calls to C.  Supports the var-args backing area for register parms.
 3138   varargs_C_out_slots_killed(0);
 3139 
 3140   // The after-PROLOG location of the return address.  Location of
 3141   // return address specifies a type (REG or STACK) and a number
 3142   // representing the register number (i.e. - use a register name) or
 3143   // stack slot.
 3144   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3145   // Otherwise, it is above the locks and verification slot and alignment word
 3146   return_addr(STACK - 1 +
 3147               align_up((Compile::current()->in_preserve_stack_slots() +
 3148                         Compile::current()->fixed_slots()),
 3149                        stack_alignment_in_slots()));
 3150 
 3151   // Location of C & interpreter return values
 3152   c_return_value %{
 3153     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3154     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3155     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3156 
 3157     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3158     // that C functions return float and double results in XMM0.
 3159     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3160       return OptoRegPair(XMM0b_num,XMM0_num);
 3161     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3162       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3163 
 3164     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3165   %}
 3166 
 3167   // Location of return values
 3168   return_value %{
 3169     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3170     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3171     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3172     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3173       return OptoRegPair(XMM0b_num,XMM0_num);
 3174     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3175       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3176     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3177   %}
 3178 
 3179 %}
 3180 
 3181 //----------ATTRIBUTES---------------------------------------------------------
 3182 //----------Operand Attributes-------------------------------------------------
 3183 op_attrib op_cost(0);        // Required cost attribute
 3184 
 3185 //----------Instruction Attributes---------------------------------------------
 3186 ins_attrib ins_cost(100);       // Required cost attribute
 3187 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3188 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3189                                 // non-matching short branch variant of some
 3190                                                             // long branch?
 3191 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3192                                 // specifies the alignment that some part of the instruction (not
 3193                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3194                                 // function must be provided for the instruction
 3195 
 3196 //----------OPERANDS-----------------------------------------------------------
 3197 // Operand definitions must precede instruction definitions for correct parsing
 3198 // in the ADLC because operands constitute user defined types which are used in
 3199 // instruction definitions.
 3200 
 3201 //----------Simple Operands----------------------------------------------------
 3202 // Immediate Operands
 3203 // Integer Immediate
 3204 operand immI() %{
 3205   match(ConI);
 3206 
 3207   op_cost(10);
 3208   format %{ %}
 3209   interface(CONST_INTER);
 3210 %}
 3211 
 3212 // Constant for test vs zero
 3213 operand immI_0() %{
 3214   predicate(n->get_int() == 0);
 3215   match(ConI);
 3216 
 3217   op_cost(0);
 3218   format %{ %}
 3219   interface(CONST_INTER);
 3220 %}
 3221 
 3222 // Constant for increment
 3223 operand immI_1() %{
 3224   predicate(n->get_int() == 1);
 3225   match(ConI);
 3226 
 3227   op_cost(0);
 3228   format %{ %}
 3229   interface(CONST_INTER);
 3230 %}
 3231 
 3232 // Constant for decrement
 3233 operand immI_M1() %{
 3234   predicate(n->get_int() == -1);
 3235   match(ConI);
 3236 
 3237   op_cost(0);
 3238   format %{ %}
 3239   interface(CONST_INTER);
 3240 %}
 3241 
 3242 // Valid scale values for addressing modes
 3243 operand immI2() %{
 3244   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3245   match(ConI);
 3246 
 3247   format %{ %}
 3248   interface(CONST_INTER);
 3249 %}
 3250 
 3251 operand immI8() %{
 3252   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3253   match(ConI);
 3254 
 3255   op_cost(5);
 3256   format %{ %}
 3257   interface(CONST_INTER);
 3258 %}
 3259 
 3260 operand immU8() %{
 3261   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3262   match(ConI);
 3263 
 3264   op_cost(5);
 3265   format %{ %}
 3266   interface(CONST_INTER);
 3267 %}
 3268 
 3269 operand immI16() %{
 3270   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3271   match(ConI);
 3272 
 3273   op_cost(10);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 // Int Immediate non-negative
 3279 operand immU31()
 3280 %{
 3281   predicate(n->get_int() >= 0);
 3282   match(ConI);
 3283 
 3284   op_cost(0);
 3285   format %{ %}
 3286   interface(CONST_INTER);
 3287 %}
 3288 
 3289 // Constant for long shifts
 3290 operand immI_32() %{
 3291   predicate( n->get_int() == 32 );
 3292   match(ConI);
 3293 
 3294   op_cost(0);
 3295   format %{ %}
 3296   interface(CONST_INTER);
 3297 %}
 3298 
 3299 operand immI_1_31() %{
 3300   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3301   match(ConI);
 3302 
 3303   op_cost(0);
 3304   format %{ %}
 3305   interface(CONST_INTER);
 3306 %}
 3307 
 3308 operand immI_32_63() %{
 3309   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3310   match(ConI);
 3311   op_cost(0);
 3312 
 3313   format %{ %}
 3314   interface(CONST_INTER);
 3315 %}
 3316 
 3317 operand immI_2() %{
 3318   predicate( n->get_int() == 2 );
 3319   match(ConI);
 3320 
 3321   op_cost(0);
 3322   format %{ %}
 3323   interface(CONST_INTER);
 3324 %}
 3325 
 3326 operand immI_3() %{
 3327   predicate( n->get_int() == 3 );
 3328   match(ConI);
 3329 
 3330   op_cost(0);
 3331   format %{ %}
 3332   interface(CONST_INTER);
 3333 %}
 3334 
 3335 operand immI_4()
 3336 %{
 3337   predicate(n->get_int() == 4);
 3338   match(ConI);
 3339 
 3340   op_cost(0);
 3341   format %{ %}
 3342   interface(CONST_INTER);
 3343 %}
 3344 
 3345 operand immI_8()
 3346 %{
 3347   predicate(n->get_int() == 8);
 3348   match(ConI);
 3349 
 3350   op_cost(0);
 3351   format %{ %}
 3352   interface(CONST_INTER);
 3353 %}
 3354 
 3355 // Pointer Immediate
 3356 operand immP() %{
 3357   match(ConP);
 3358 
 3359   op_cost(10);
 3360   format %{ %}
 3361   interface(CONST_INTER);
 3362 %}
 3363 
 3364 // NULL Pointer Immediate
 3365 operand immP0() %{
 3366   predicate( n->get_ptr() == 0 );
 3367   match(ConP);
 3368   op_cost(0);
 3369 
 3370   format %{ %}
 3371   interface(CONST_INTER);
 3372 %}
 3373 
 3374 // Long Immediate
 3375 operand immL() %{
 3376   match(ConL);
 3377 
 3378   op_cost(20);
 3379   format %{ %}
 3380   interface(CONST_INTER);
 3381 %}
 3382 
 3383 // Long Immediate zero
 3384 operand immL0() %{
 3385   predicate( n->get_long() == 0L );
 3386   match(ConL);
 3387   op_cost(0);
 3388 
 3389   format %{ %}
 3390   interface(CONST_INTER);
 3391 %}
 3392 
 3393 // Long Immediate zero
 3394 operand immL_M1() %{
 3395   predicate( n->get_long() == -1L );
 3396   match(ConL);
 3397   op_cost(0);
 3398 
 3399   format %{ %}
 3400   interface(CONST_INTER);
 3401 %}
 3402 
 3403 // Long immediate from 0 to 127.
 3404 // Used for a shorter form of long mul by 10.
 3405 operand immL_127() %{
 3406   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3407   match(ConL);
 3408   op_cost(0);
 3409 
 3410   format %{ %}
 3411   interface(CONST_INTER);
 3412 %}
 3413 
 3414 // Long Immediate: low 32-bit mask
 3415 operand immL_32bits() %{
 3416   predicate(n->get_long() == 0xFFFFFFFFL);
 3417   match(ConL);
 3418   op_cost(0);
 3419 
 3420   format %{ %}
 3421   interface(CONST_INTER);
 3422 %}
 3423 
 3424 // Long Immediate: low 32-bit mask
 3425 operand immL32() %{
 3426   predicate(n->get_long() == (int)(n->get_long()));
 3427   match(ConL);
 3428   op_cost(20);
 3429 
 3430   format %{ %}
 3431   interface(CONST_INTER);
 3432 %}
 3433 
 3434 //Double Immediate zero
 3435 operand immDPR0() %{
 3436   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3437   // bug that generates code such that NaNs compare equal to 0.0
 3438   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3439   match(ConD);
 3440 
 3441   op_cost(5);
 3442   format %{ %}
 3443   interface(CONST_INTER);
 3444 %}
 3445 
 3446 // Double Immediate one
 3447 operand immDPR1() %{
 3448   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3449   match(ConD);
 3450 
 3451   op_cost(5);
 3452   format %{ %}
 3453   interface(CONST_INTER);
 3454 %}
 3455 
 3456 // Double Immediate
 3457 operand immDPR() %{
 3458   predicate(UseSSE<=1);
 3459   match(ConD);
 3460 
 3461   op_cost(5);
 3462   format %{ %}
 3463   interface(CONST_INTER);
 3464 %}
 3465 
 3466 operand immD() %{
 3467   predicate(UseSSE>=2);
 3468   match(ConD);
 3469 
 3470   op_cost(5);
 3471   format %{ %}
 3472   interface(CONST_INTER);
 3473 %}
 3474 
 3475 // Double Immediate zero
 3476 operand immD0() %{
 3477   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3478   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3479   // compare equal to -0.0.
 3480   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3481   match(ConD);
 3482 
 3483   format %{ %}
 3484   interface(CONST_INTER);
 3485 %}
 3486 
 3487 // Float Immediate zero
 3488 operand immFPR0() %{
 3489   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3490   match(ConF);
 3491 
 3492   op_cost(5);
 3493   format %{ %}
 3494   interface(CONST_INTER);
 3495 %}
 3496 
 3497 // Float Immediate one
 3498 operand immFPR1() %{
 3499   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3500   match(ConF);
 3501 
 3502   op_cost(5);
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Float Immediate
 3508 operand immFPR() %{
 3509   predicate( UseSSE == 0 );
 3510   match(ConF);
 3511 
 3512   op_cost(5);
 3513   format %{ %}
 3514   interface(CONST_INTER);
 3515 %}
 3516 
 3517 // Float Immediate
 3518 operand immF() %{
 3519   predicate(UseSSE >= 1);
 3520   match(ConF);
 3521 
 3522   op_cost(5);
 3523   format %{ %}
 3524   interface(CONST_INTER);
 3525 %}
 3526 
 3527 // Float Immediate zero.  Zero and not -0.0
 3528 operand immF0() %{
 3529   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3530   match(ConF);
 3531 
 3532   op_cost(5);
 3533   format %{ %}
 3534   interface(CONST_INTER);
 3535 %}
 3536 
 3537 // Immediates for special shifts (sign extend)
 3538 
 3539 // Constants for increment
 3540 operand immI_16() %{
 3541   predicate( n->get_int() == 16 );
 3542   match(ConI);
 3543 
 3544   format %{ %}
 3545   interface(CONST_INTER);
 3546 %}
 3547 
 3548 operand immI_24() %{
 3549   predicate( n->get_int() == 24 );
 3550   match(ConI);
 3551 
 3552   format %{ %}
 3553   interface(CONST_INTER);
 3554 %}
 3555 
 3556 // Constant for byte-wide masking
 3557 operand immI_255() %{
 3558   predicate( n->get_int() == 255 );
 3559   match(ConI);
 3560 
 3561   format %{ %}
 3562   interface(CONST_INTER);
 3563 %}
 3564 
 3565 // Constant for short-wide masking
 3566 operand immI_65535() %{
 3567   predicate(n->get_int() == 65535);
 3568   match(ConI);
 3569 
 3570   format %{ %}
 3571   interface(CONST_INTER);
 3572 %}
 3573 
 3574 operand kReg()
 3575 %{
 3576   constraint(ALLOC_IN_RC(vectmask_reg));
 3577   match(RegVectMask);
 3578   format %{%}
 3579   interface(REG_INTER);
 3580 %}
 3581 
 3582 operand kReg_K1()
 3583 %{
 3584   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3585   match(RegVectMask);
 3586   format %{%}
 3587   interface(REG_INTER);
 3588 %}
 3589 
 3590 operand kReg_K2()
 3591 %{
 3592   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3593   match(RegVectMask);
 3594   format %{%}
 3595   interface(REG_INTER);
 3596 %}
 3597 
 3598 // Special Registers
 3599 operand kReg_K3()
 3600 %{
 3601   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3602   match(RegVectMask);
 3603   format %{%}
 3604   interface(REG_INTER);
 3605 %}
 3606 
 3607 operand kReg_K4()
 3608 %{
 3609   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3610   match(RegVectMask);
 3611   format %{%}
 3612   interface(REG_INTER);
 3613 %}
 3614 
 3615 operand kReg_K5()
 3616 %{
 3617   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3618   match(RegVectMask);
 3619   format %{%}
 3620   interface(REG_INTER);
 3621 %}
 3622 
 3623 operand kReg_K6()
 3624 %{
 3625   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3626   match(RegVectMask);
 3627   format %{%}
 3628   interface(REG_INTER);
 3629 %}
 3630 
 3631 // Special Registers
 3632 operand kReg_K7()
 3633 %{
 3634   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3635   match(RegVectMask);
 3636   format %{%}
 3637   interface(REG_INTER);
 3638 %}
 3639 
 3640 // Register Operands
 3641 // Integer Register
 3642 operand rRegI() %{
 3643   constraint(ALLOC_IN_RC(int_reg));
 3644   match(RegI);
 3645   match(xRegI);
 3646   match(eAXRegI);
 3647   match(eBXRegI);
 3648   match(eCXRegI);
 3649   match(eDXRegI);
 3650   match(eDIRegI);
 3651   match(eSIRegI);
 3652 
 3653   format %{ %}
 3654   interface(REG_INTER);
 3655 %}
 3656 
 3657 // Subset of Integer Register
 3658 operand xRegI(rRegI reg) %{
 3659   constraint(ALLOC_IN_RC(int_x_reg));
 3660   match(reg);
 3661   match(eAXRegI);
 3662   match(eBXRegI);
 3663   match(eCXRegI);
 3664   match(eDXRegI);
 3665 
 3666   format %{ %}
 3667   interface(REG_INTER);
 3668 %}
 3669 
 3670 // Special Registers
 3671 operand eAXRegI(xRegI reg) %{
 3672   constraint(ALLOC_IN_RC(eax_reg));
 3673   match(reg);
 3674   match(rRegI);
 3675 
 3676   format %{ "EAX" %}
 3677   interface(REG_INTER);
 3678 %}
 3679 
 3680 // Special Registers
 3681 operand eBXRegI(xRegI reg) %{
 3682   constraint(ALLOC_IN_RC(ebx_reg));
 3683   match(reg);
 3684   match(rRegI);
 3685 
 3686   format %{ "EBX" %}
 3687   interface(REG_INTER);
 3688 %}
 3689 
 3690 operand eCXRegI(xRegI reg) %{
 3691   constraint(ALLOC_IN_RC(ecx_reg));
 3692   match(reg);
 3693   match(rRegI);
 3694 
 3695   format %{ "ECX" %}
 3696   interface(REG_INTER);
 3697 %}
 3698 
 3699 operand eDXRegI(xRegI reg) %{
 3700   constraint(ALLOC_IN_RC(edx_reg));
 3701   match(reg);
 3702   match(rRegI);
 3703 
 3704   format %{ "EDX" %}
 3705   interface(REG_INTER);
 3706 %}
 3707 
 3708 operand eDIRegI(xRegI reg) %{
 3709   constraint(ALLOC_IN_RC(edi_reg));
 3710   match(reg);
 3711   match(rRegI);
 3712 
 3713   format %{ "EDI" %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 operand naxRegI() %{
 3718   constraint(ALLOC_IN_RC(nax_reg));
 3719   match(RegI);
 3720   match(eCXRegI);
 3721   match(eDXRegI);
 3722   match(eSIRegI);
 3723   match(eDIRegI);
 3724 
 3725   format %{ %}
 3726   interface(REG_INTER);
 3727 %}
 3728 
 3729 operand nadxRegI() %{
 3730   constraint(ALLOC_IN_RC(nadx_reg));
 3731   match(RegI);
 3732   match(eBXRegI);
 3733   match(eCXRegI);
 3734   match(eSIRegI);
 3735   match(eDIRegI);
 3736 
 3737   format %{ %}
 3738   interface(REG_INTER);
 3739 %}
 3740 
 3741 operand ncxRegI() %{
 3742   constraint(ALLOC_IN_RC(ncx_reg));
 3743   match(RegI);
 3744   match(eAXRegI);
 3745   match(eDXRegI);
 3746   match(eSIRegI);
 3747   match(eDIRegI);
 3748 
 3749   format %{ %}
 3750   interface(REG_INTER);
 3751 %}
 3752 
 3753 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3754 // //
 3755 operand eSIRegI(xRegI reg) %{
 3756    constraint(ALLOC_IN_RC(esi_reg));
 3757    match(reg);
 3758    match(rRegI);
 3759 
 3760    format %{ "ESI" %}
 3761    interface(REG_INTER);
 3762 %}
 3763 
 3764 // Pointer Register
 3765 operand anyRegP() %{
 3766   constraint(ALLOC_IN_RC(any_reg));
 3767   match(RegP);
 3768   match(eAXRegP);
 3769   match(eBXRegP);
 3770   match(eCXRegP);
 3771   match(eDIRegP);
 3772   match(eRegP);
 3773 
 3774   format %{ %}
 3775   interface(REG_INTER);
 3776 %}
 3777 
 3778 operand eRegP() %{
 3779   constraint(ALLOC_IN_RC(int_reg));
 3780   match(RegP);
 3781   match(eAXRegP);
 3782   match(eBXRegP);
 3783   match(eCXRegP);
 3784   match(eDIRegP);
 3785 
 3786   format %{ %}
 3787   interface(REG_INTER);
 3788 %}
 3789 
 3790 operand rRegP() %{
 3791   constraint(ALLOC_IN_RC(int_reg));
 3792   match(RegP);
 3793   match(eAXRegP);
 3794   match(eBXRegP);
 3795   match(eCXRegP);
 3796   match(eDIRegP);
 3797 
 3798   format %{ %}
 3799   interface(REG_INTER);
 3800 %}
 3801 
 3802 // On windows95, EBP is not safe to use for implicit null tests.
 3803 operand eRegP_no_EBP() %{
 3804   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3805   match(RegP);
 3806   match(eAXRegP);
 3807   match(eBXRegP);
 3808   match(eCXRegP);
 3809   match(eDIRegP);
 3810 
 3811   op_cost(100);
 3812   format %{ %}
 3813   interface(REG_INTER);
 3814 %}
 3815 
 3816 operand naxRegP() %{
 3817   constraint(ALLOC_IN_RC(nax_reg));
 3818   match(RegP);
 3819   match(eBXRegP);
 3820   match(eDXRegP);
 3821   match(eCXRegP);
 3822   match(eSIRegP);
 3823   match(eDIRegP);
 3824 
 3825   format %{ %}
 3826   interface(REG_INTER);
 3827 %}
 3828 
 3829 operand nabxRegP() %{
 3830   constraint(ALLOC_IN_RC(nabx_reg));
 3831   match(RegP);
 3832   match(eCXRegP);
 3833   match(eDXRegP);
 3834   match(eSIRegP);
 3835   match(eDIRegP);
 3836 
 3837   format %{ %}
 3838   interface(REG_INTER);
 3839 %}
 3840 
 3841 operand pRegP() %{
 3842   constraint(ALLOC_IN_RC(p_reg));
 3843   match(RegP);
 3844   match(eBXRegP);
 3845   match(eDXRegP);
 3846   match(eSIRegP);
 3847   match(eDIRegP);
 3848 
 3849   format %{ %}
 3850   interface(REG_INTER);
 3851 %}
 3852 
 3853 // Special Registers
 3854 // Return a pointer value
 3855 operand eAXRegP(eRegP reg) %{
 3856   constraint(ALLOC_IN_RC(eax_reg));
 3857   match(reg);
 3858   format %{ "EAX" %}
 3859   interface(REG_INTER);
 3860 %}
 3861 
 3862 // Used in AtomicAdd
 3863 operand eBXRegP(eRegP reg) %{
 3864   constraint(ALLOC_IN_RC(ebx_reg));
 3865   match(reg);
 3866   format %{ "EBX" %}
 3867   interface(REG_INTER);
 3868 %}
 3869 
 3870 // Tail-call (interprocedural jump) to interpreter
 3871 operand eCXRegP(eRegP reg) %{
 3872   constraint(ALLOC_IN_RC(ecx_reg));
 3873   match(reg);
 3874   format %{ "ECX" %}
 3875   interface(REG_INTER);
 3876 %}
 3877 
 3878 operand eDXRegP(eRegP reg) %{
 3879   constraint(ALLOC_IN_RC(edx_reg));
 3880   match(reg);
 3881   format %{ "EDX" %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 operand eSIRegP(eRegP reg) %{
 3886   constraint(ALLOC_IN_RC(esi_reg));
 3887   match(reg);
 3888   format %{ "ESI" %}
 3889   interface(REG_INTER);
 3890 %}
 3891 
 3892 // Used in rep stosw
 3893 operand eDIRegP(eRegP reg) %{
 3894   constraint(ALLOC_IN_RC(edi_reg));
 3895   match(reg);
 3896   format %{ "EDI" %}
 3897   interface(REG_INTER);
 3898 %}
 3899 
 3900 operand eRegL() %{
 3901   constraint(ALLOC_IN_RC(long_reg));
 3902   match(RegL);
 3903   match(eADXRegL);
 3904 
 3905   format %{ %}
 3906   interface(REG_INTER);
 3907 %}
 3908 
 3909 operand eADXRegL( eRegL reg ) %{
 3910   constraint(ALLOC_IN_RC(eadx_reg));
 3911   match(reg);
 3912 
 3913   format %{ "EDX:EAX" %}
 3914   interface(REG_INTER);
 3915 %}
 3916 
 3917 operand eBCXRegL( eRegL reg ) %{
 3918   constraint(ALLOC_IN_RC(ebcx_reg));
 3919   match(reg);
 3920 
 3921   format %{ "EBX:ECX" %}
 3922   interface(REG_INTER);
 3923 %}
 3924 
 3925 // Special case for integer high multiply
 3926 operand eADXRegL_low_only() %{
 3927   constraint(ALLOC_IN_RC(eadx_reg));
 3928   match(RegL);
 3929 
 3930   format %{ "EAX" %}
 3931   interface(REG_INTER);
 3932 %}
 3933 
 3934 // Flags register, used as output of compare instructions
 3935 operand rFlagsReg() %{
 3936   constraint(ALLOC_IN_RC(int_flags));
 3937   match(RegFlags);
 3938 
 3939   format %{ "EFLAGS" %}
 3940   interface(REG_INTER);
 3941 %}
 3942 
 3943 // Flags register, used as output of compare instructions
 3944 operand eFlagsReg() %{
 3945   constraint(ALLOC_IN_RC(int_flags));
 3946   match(RegFlags);
 3947 
 3948   format %{ "EFLAGS" %}
 3949   interface(REG_INTER);
 3950 %}
 3951 
 3952 // Flags register, used as output of FLOATING POINT compare instructions
 3953 operand eFlagsRegU() %{
 3954   constraint(ALLOC_IN_RC(int_flags));
 3955   match(RegFlags);
 3956 
 3957   format %{ "EFLAGS_U" %}
 3958   interface(REG_INTER);
 3959 %}
 3960 
 3961 operand eFlagsRegUCF() %{
 3962   constraint(ALLOC_IN_RC(int_flags));
 3963   match(RegFlags);
 3964   predicate(false);
 3965 
 3966   format %{ "EFLAGS_U_CF" %}
 3967   interface(REG_INTER);
 3968 %}
 3969 
 3970 // Condition Code Register used by long compare
 3971 operand flagsReg_long_LTGE() %{
 3972   constraint(ALLOC_IN_RC(int_flags));
 3973   match(RegFlags);
 3974   format %{ "FLAGS_LTGE" %}
 3975   interface(REG_INTER);
 3976 %}
 3977 operand flagsReg_long_EQNE() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980   format %{ "FLAGS_EQNE" %}
 3981   interface(REG_INTER);
 3982 %}
 3983 operand flagsReg_long_LEGT() %{
 3984   constraint(ALLOC_IN_RC(int_flags));
 3985   match(RegFlags);
 3986   format %{ "FLAGS_LEGT" %}
 3987   interface(REG_INTER);
 3988 %}
 3989 
 3990 // Condition Code Register used by unsigned long compare
 3991 operand flagsReg_ulong_LTGE() %{
 3992   constraint(ALLOC_IN_RC(int_flags));
 3993   match(RegFlags);
 3994   format %{ "FLAGS_U_LTGE" %}
 3995   interface(REG_INTER);
 3996 %}
 3997 operand flagsReg_ulong_EQNE() %{
 3998   constraint(ALLOC_IN_RC(int_flags));
 3999   match(RegFlags);
 4000   format %{ "FLAGS_U_EQNE" %}
 4001   interface(REG_INTER);
 4002 %}
 4003 operand flagsReg_ulong_LEGT() %{
 4004   constraint(ALLOC_IN_RC(int_flags));
 4005   match(RegFlags);
 4006   format %{ "FLAGS_U_LEGT" %}
 4007   interface(REG_INTER);
 4008 %}
 4009 
 4010 // Float register operands
 4011 operand regDPR() %{
 4012   predicate( UseSSE < 2 );
 4013   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4014   match(RegD);
 4015   match(regDPR1);
 4016   match(regDPR2);
 4017   format %{ %}
 4018   interface(REG_INTER);
 4019 %}
 4020 
 4021 operand regDPR1(regDPR reg) %{
 4022   predicate( UseSSE < 2 );
 4023   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4024   match(reg);
 4025   format %{ "FPR1" %}
 4026   interface(REG_INTER);
 4027 %}
 4028 
 4029 operand regDPR2(regDPR reg) %{
 4030   predicate( UseSSE < 2 );
 4031   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4032   match(reg);
 4033   format %{ "FPR2" %}
 4034   interface(REG_INTER);
 4035 %}
 4036 
 4037 operand regnotDPR1(regDPR reg) %{
 4038   predicate( UseSSE < 2 );
 4039   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4040   match(reg);
 4041   format %{ %}
 4042   interface(REG_INTER);
 4043 %}
 4044 
 4045 // Float register operands
 4046 operand regFPR() %{
 4047   predicate( UseSSE < 2 );
 4048   constraint(ALLOC_IN_RC(fp_flt_reg));
 4049   match(RegF);
 4050   match(regFPR1);
 4051   format %{ %}
 4052   interface(REG_INTER);
 4053 %}
 4054 
 4055 // Float register operands
 4056 operand regFPR1(regFPR reg) %{
 4057   predicate( UseSSE < 2 );
 4058   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4059   match(reg);
 4060   format %{ "FPR1" %}
 4061   interface(REG_INTER);
 4062 %}
 4063 
 4064 // XMM Float register operands
 4065 operand regF() %{
 4066   predicate( UseSSE>=1 );
 4067   constraint(ALLOC_IN_RC(float_reg_legacy));
 4068   match(RegF);
 4069   format %{ %}
 4070   interface(REG_INTER);
 4071 %}
 4072 
 4073 operand legRegF() %{
 4074   predicate( UseSSE>=1 );
 4075   constraint(ALLOC_IN_RC(float_reg_legacy));
 4076   match(RegF);
 4077   format %{ %}
 4078   interface(REG_INTER);
 4079 %}
 4080 
 4081 // Float register operands
 4082 operand vlRegF() %{
 4083    constraint(ALLOC_IN_RC(float_reg_vl));
 4084    match(RegF);
 4085 
 4086    format %{ %}
 4087    interface(REG_INTER);
 4088 %}
 4089 
 4090 // XMM Double register operands
 4091 operand regD() %{
 4092   predicate( UseSSE>=2 );
 4093   constraint(ALLOC_IN_RC(double_reg_legacy));
 4094   match(RegD);
 4095   format %{ %}
 4096   interface(REG_INTER);
 4097 %}
 4098 
 4099 // Double register operands
 4100 operand legRegD() %{
 4101   predicate( UseSSE>=2 );
 4102   constraint(ALLOC_IN_RC(double_reg_legacy));
 4103   match(RegD);
 4104   format %{ %}
 4105   interface(REG_INTER);
 4106 %}
 4107 
 4108 operand vlRegD() %{
 4109    constraint(ALLOC_IN_RC(double_reg_vl));
 4110    match(RegD);
 4111 
 4112    format %{ %}
 4113    interface(REG_INTER);
 4114 %}
 4115 
 4116 //----------Memory Operands----------------------------------------------------
 4117 // Direct Memory Operand
 4118 operand direct(immP addr) %{
 4119   match(addr);
 4120 
 4121   format %{ "[$addr]" %}
 4122   interface(MEMORY_INTER) %{
 4123     base(0xFFFFFFFF);
 4124     index(0x4);
 4125     scale(0x0);
 4126     disp($addr);
 4127   %}
 4128 %}
 4129 
 4130 // Indirect Memory Operand
 4131 operand indirect(eRegP reg) %{
 4132   constraint(ALLOC_IN_RC(int_reg));
 4133   match(reg);
 4134 
 4135   format %{ "[$reg]" %}
 4136   interface(MEMORY_INTER) %{
 4137     base($reg);
 4138     index(0x4);
 4139     scale(0x0);
 4140     disp(0x0);
 4141   %}
 4142 %}
 4143 
 4144 // Indirect Memory Plus Short Offset Operand
 4145 operand indOffset8(eRegP reg, immI8 off) %{
 4146   match(AddP reg off);
 4147 
 4148   format %{ "[$reg + $off]" %}
 4149   interface(MEMORY_INTER) %{
 4150     base($reg);
 4151     index(0x4);
 4152     scale(0x0);
 4153     disp($off);
 4154   %}
 4155 %}
 4156 
 4157 // Indirect Memory Plus Long Offset Operand
 4158 operand indOffset32(eRegP reg, immI off) %{
 4159   match(AddP reg off);
 4160 
 4161   format %{ "[$reg + $off]" %}
 4162   interface(MEMORY_INTER) %{
 4163     base($reg);
 4164     index(0x4);
 4165     scale(0x0);
 4166     disp($off);
 4167   %}
 4168 %}
 4169 
 4170 // Indirect Memory Plus Long Offset Operand
 4171 operand indOffset32X(rRegI reg, immP off) %{
 4172   match(AddP off reg);
 4173 
 4174   format %{ "[$reg + $off]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base($reg);
 4177     index(0x4);
 4178     scale(0x0);
 4179     disp($off);
 4180   %}
 4181 %}
 4182 
 4183 // Indirect Memory Plus Index Register Plus Offset Operand
 4184 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4185   match(AddP (AddP reg ireg) off);
 4186 
 4187   op_cost(10);
 4188   format %{"[$reg + $off + $ireg]" %}
 4189   interface(MEMORY_INTER) %{
 4190     base($reg);
 4191     index($ireg);
 4192     scale(0x0);
 4193     disp($off);
 4194   %}
 4195 %}
 4196 
 4197 // Indirect Memory Plus Index Register Plus Offset Operand
 4198 operand indIndex(eRegP reg, rRegI ireg) %{
 4199   match(AddP reg ireg);
 4200 
 4201   op_cost(10);
 4202   format %{"[$reg + $ireg]" %}
 4203   interface(MEMORY_INTER) %{
 4204     base($reg);
 4205     index($ireg);
 4206     scale(0x0);
 4207     disp(0x0);
 4208   %}
 4209 %}
 4210 
 4211 // // -------------------------------------------------------------------------
 4212 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4213 // // -------------------------------------------------------------------------
 4214 // // Scaled Memory Operands
 4215 // // Indirect Memory Times Scale Plus Offset Operand
 4216 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4217 //   match(AddP off (LShiftI ireg scale));
 4218 //
 4219 //   op_cost(10);
 4220 //   format %{"[$off + $ireg << $scale]" %}
 4221 //   interface(MEMORY_INTER) %{
 4222 //     base(0x4);
 4223 //     index($ireg);
 4224 //     scale($scale);
 4225 //     disp($off);
 4226 //   %}
 4227 // %}
 4228 
 4229 // Indirect Memory Times Scale Plus Index Register
 4230 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4231   match(AddP reg (LShiftI ireg scale));
 4232 
 4233   op_cost(10);
 4234   format %{"[$reg + $ireg << $scale]" %}
 4235   interface(MEMORY_INTER) %{
 4236     base($reg);
 4237     index($ireg);
 4238     scale($scale);
 4239     disp(0x0);
 4240   %}
 4241 %}
 4242 
 4243 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4244 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4245   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4246 
 4247   op_cost(10);
 4248   format %{"[$reg + $off + $ireg << $scale]" %}
 4249   interface(MEMORY_INTER) %{
 4250     base($reg);
 4251     index($ireg);
 4252     scale($scale);
 4253     disp($off);
 4254   %}
 4255 %}
 4256 
 4257 //----------Load Long Memory Operands------------------------------------------
 4258 // The load-long idiom will use it's address expression again after loading
 4259 // the first word of the long.  If the load-long destination overlaps with
 4260 // registers used in the addressing expression, the 2nd half will be loaded
 4261 // from a clobbered address.  Fix this by requiring that load-long use
 4262 // address registers that do not overlap with the load-long target.
 4263 
 4264 // load-long support
 4265 operand load_long_RegP() %{
 4266   constraint(ALLOC_IN_RC(esi_reg));
 4267   match(RegP);
 4268   match(eSIRegP);
 4269   op_cost(100);
 4270   format %{  %}
 4271   interface(REG_INTER);
 4272 %}
 4273 
 4274 // Indirect Memory Operand Long
 4275 operand load_long_indirect(load_long_RegP reg) %{
 4276   constraint(ALLOC_IN_RC(esi_reg));
 4277   match(reg);
 4278 
 4279   format %{ "[$reg]" %}
 4280   interface(MEMORY_INTER) %{
 4281     base($reg);
 4282     index(0x4);
 4283     scale(0x0);
 4284     disp(0x0);
 4285   %}
 4286 %}
 4287 
 4288 // Indirect Memory Plus Long Offset Operand
 4289 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4290   match(AddP reg off);
 4291 
 4292   format %{ "[$reg + $off]" %}
 4293   interface(MEMORY_INTER) %{
 4294     base($reg);
 4295     index(0x4);
 4296     scale(0x0);
 4297     disp($off);
 4298   %}
 4299 %}
 4300 
 4301 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4302 
 4303 
 4304 //----------Special Memory Operands--------------------------------------------
 4305 // Stack Slot Operand - This operand is used for loading and storing temporary
 4306 //                      values on the stack where a match requires a value to
 4307 //                      flow through memory.
 4308 operand stackSlotP(sRegP reg) %{
 4309   constraint(ALLOC_IN_RC(stack_slots));
 4310   // No match rule because this operand is only generated in matching
 4311   format %{ "[$reg]" %}
 4312   interface(MEMORY_INTER) %{
 4313     base(0x4);   // ESP
 4314     index(0x4);  // No Index
 4315     scale(0x0);  // No Scale
 4316     disp($reg);  // Stack Offset
 4317   %}
 4318 %}
 4319 
 4320 operand stackSlotI(sRegI reg) %{
 4321   constraint(ALLOC_IN_RC(stack_slots));
 4322   // No match rule because this operand is only generated in matching
 4323   format %{ "[$reg]" %}
 4324   interface(MEMORY_INTER) %{
 4325     base(0x4);   // ESP
 4326     index(0x4);  // No Index
 4327     scale(0x0);  // No Scale
 4328     disp($reg);  // Stack Offset
 4329   %}
 4330 %}
 4331 
 4332 operand stackSlotF(sRegF reg) %{
 4333   constraint(ALLOC_IN_RC(stack_slots));
 4334   // No match rule because this operand is only generated in matching
 4335   format %{ "[$reg]" %}
 4336   interface(MEMORY_INTER) %{
 4337     base(0x4);   // ESP
 4338     index(0x4);  // No Index
 4339     scale(0x0);  // No Scale
 4340     disp($reg);  // Stack Offset
 4341   %}
 4342 %}
 4343 
 4344 operand stackSlotD(sRegD reg) %{
 4345   constraint(ALLOC_IN_RC(stack_slots));
 4346   // No match rule because this operand is only generated in matching
 4347   format %{ "[$reg]" %}
 4348   interface(MEMORY_INTER) %{
 4349     base(0x4);   // ESP
 4350     index(0x4);  // No Index
 4351     scale(0x0);  // No Scale
 4352     disp($reg);  // Stack Offset
 4353   %}
 4354 %}
 4355 
 4356 operand stackSlotL(sRegL reg) %{
 4357   constraint(ALLOC_IN_RC(stack_slots));
 4358   // No match rule because this operand is only generated in matching
 4359   format %{ "[$reg]" %}
 4360   interface(MEMORY_INTER) %{
 4361     base(0x4);   // ESP
 4362     index(0x4);  // No Index
 4363     scale(0x0);  // No Scale
 4364     disp($reg);  // Stack Offset
 4365   %}
 4366 %}
 4367 
 4368 //----------Conditional Branch Operands----------------------------------------
 4369 // Comparison Op  - This is the operation of the comparison, and is limited to
 4370 //                  the following set of codes:
 4371 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4372 //
 4373 // Other attributes of the comparison, such as unsignedness, are specified
 4374 // by the comparison instruction that sets a condition code flags register.
 4375 // That result is represented by a flags operand whose subtype is appropriate
 4376 // to the unsignedness (etc.) of the comparison.
 4377 //
 4378 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4379 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4380 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4381 
 4382 // Comparision Code
 4383 operand cmpOp() %{
 4384   match(Bool);
 4385 
 4386   format %{ "" %}
 4387   interface(COND_INTER) %{
 4388     equal(0x4, "e");
 4389     not_equal(0x5, "ne");
 4390     less(0xC, "l");
 4391     greater_equal(0xD, "ge");
 4392     less_equal(0xE, "le");
 4393     greater(0xF, "g");
 4394     overflow(0x0, "o");
 4395     no_overflow(0x1, "no");
 4396   %}
 4397 %}
 4398 
 4399 // Comparison Code, unsigned compare.  Used by FP also, with
 4400 // C2 (unordered) turned into GT or LT already.  The other bits
 4401 // C0 and C3 are turned into Carry & Zero flags.
 4402 operand cmpOpU() %{
 4403   match(Bool);
 4404 
 4405   format %{ "" %}
 4406   interface(COND_INTER) %{
 4407     equal(0x4, "e");
 4408     not_equal(0x5, "ne");
 4409     less(0x2, "b");
 4410     greater_equal(0x3, "nb");
 4411     less_equal(0x6, "be");
 4412     greater(0x7, "nbe");
 4413     overflow(0x0, "o");
 4414     no_overflow(0x1, "no");
 4415   %}
 4416 %}
 4417 
 4418 // Floating comparisons that don't require any fixup for the unordered case
 4419 operand cmpOpUCF() %{
 4420   match(Bool);
 4421   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4422             n->as_Bool()->_test._test == BoolTest::ge ||
 4423             n->as_Bool()->_test._test == BoolTest::le ||
 4424             n->as_Bool()->_test._test == BoolTest::gt);
 4425   format %{ "" %}
 4426   interface(COND_INTER) %{
 4427     equal(0x4, "e");
 4428     not_equal(0x5, "ne");
 4429     less(0x2, "b");
 4430     greater_equal(0x3, "nb");
 4431     less_equal(0x6, "be");
 4432     greater(0x7, "nbe");
 4433     overflow(0x0, "o");
 4434     no_overflow(0x1, "no");
 4435   %}
 4436 %}
 4437 
 4438 
 4439 // Floating comparisons that can be fixed up with extra conditional jumps
 4440 operand cmpOpUCF2() %{
 4441   match(Bool);
 4442   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4443             n->as_Bool()->_test._test == BoolTest::eq);
 4444   format %{ "" %}
 4445   interface(COND_INTER) %{
 4446     equal(0x4, "e");
 4447     not_equal(0x5, "ne");
 4448     less(0x2, "b");
 4449     greater_equal(0x3, "nb");
 4450     less_equal(0x6, "be");
 4451     greater(0x7, "nbe");
 4452     overflow(0x0, "o");
 4453     no_overflow(0x1, "no");
 4454   %}
 4455 %}
 4456 
 4457 // Comparison Code for FP conditional move
 4458 operand cmpOp_fcmov() %{
 4459   match(Bool);
 4460 
 4461   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4462             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4463   format %{ "" %}
 4464   interface(COND_INTER) %{
 4465     equal        (0x0C8);
 4466     not_equal    (0x1C8);
 4467     less         (0x0C0);
 4468     greater_equal(0x1C0);
 4469     less_equal   (0x0D0);
 4470     greater      (0x1D0);
 4471     overflow(0x0, "o"); // not really supported by the instruction
 4472     no_overflow(0x1, "no"); // not really supported by the instruction
 4473   %}
 4474 %}
 4475 
 4476 // Comparison Code used in long compares
 4477 operand cmpOp_commute() %{
 4478   match(Bool);
 4479 
 4480   format %{ "" %}
 4481   interface(COND_INTER) %{
 4482     equal(0x4, "e");
 4483     not_equal(0x5, "ne");
 4484     less(0xF, "g");
 4485     greater_equal(0xE, "le");
 4486     less_equal(0xD, "ge");
 4487     greater(0xC, "l");
 4488     overflow(0x0, "o");
 4489     no_overflow(0x1, "no");
 4490   %}
 4491 %}
 4492 
 4493 // Comparison Code used in unsigned long compares
 4494 operand cmpOpU_commute() %{
 4495   match(Bool);
 4496 
 4497   format %{ "" %}
 4498   interface(COND_INTER) %{
 4499     equal(0x4, "e");
 4500     not_equal(0x5, "ne");
 4501     less(0x7, "nbe");
 4502     greater_equal(0x6, "be");
 4503     less_equal(0x3, "nb");
 4504     greater(0x2, "b");
 4505     overflow(0x0, "o");
 4506     no_overflow(0x1, "no");
 4507   %}
 4508 %}
 4509 
 4510 //----------OPERAND CLASSES----------------------------------------------------
 4511 // Operand Classes are groups of operands that are used as to simplify
 4512 // instruction definitions by not requiring the AD writer to specify separate
 4513 // instructions for every form of operand when the instruction accepts
 4514 // multiple operand types with the same basic encoding and format.  The classic
 4515 // case of this is memory operands.
 4516 
 4517 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4518                indIndex, indIndexScale, indIndexScaleOffset);
 4519 
 4520 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4521 // This means some kind of offset is always required and you cannot use
 4522 // an oop as the offset (done when working on static globals).
 4523 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4524                     indIndex, indIndexScale, indIndexScaleOffset);
 4525 
 4526 
 4527 //----------PIPELINE-----------------------------------------------------------
 4528 // Rules which define the behavior of the target architectures pipeline.
 4529 pipeline %{
 4530 
 4531 //----------ATTRIBUTES---------------------------------------------------------
 4532 attributes %{
 4533   variable_size_instructions;        // Fixed size instructions
 4534   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4535   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4536   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4537   instruction_fetch_units = 1;       // of 16 bytes
 4538 
 4539   // List of nop instructions
 4540   nops( MachNop );
 4541 %}
 4542 
 4543 //----------RESOURCES----------------------------------------------------------
 4544 // Resources are the functional units available to the machine
 4545 
 4546 // Generic P2/P3 pipeline
 4547 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4548 // 3 instructions decoded per cycle.
 4549 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4550 // 2 ALU op, only ALU0 handles mul/div instructions.
 4551 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4552            MS0, MS1, MEM = MS0 | MS1,
 4553            BR, FPU,
 4554            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4555 
 4556 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4557 // Pipeline Description specifies the stages in the machine's pipeline
 4558 
 4559 // Generic P2/P3 pipeline
 4560 pipe_desc(S0, S1, S2, S3, S4, S5);
 4561 
 4562 //----------PIPELINE CLASSES---------------------------------------------------
 4563 // Pipeline Classes describe the stages in which input and output are
 4564 // referenced by the hardware pipeline.
 4565 
 4566 // Naming convention: ialu or fpu
 4567 // Then: _reg
 4568 // Then: _reg if there is a 2nd register
 4569 // Then: _long if it's a pair of instructions implementing a long
 4570 // Then: _fat if it requires the big decoder
 4571 //   Or: _mem if it requires the big decoder and a memory unit.
 4572 
 4573 // Integer ALU reg operation
 4574 pipe_class ialu_reg(rRegI dst) %{
 4575     single_instruction;
 4576     dst    : S4(write);
 4577     dst    : S3(read);
 4578     DECODE : S0;        // any decoder
 4579     ALU    : S3;        // any alu
 4580 %}
 4581 
 4582 // Long ALU reg operation
 4583 pipe_class ialu_reg_long(eRegL dst) %{
 4584     instruction_count(2);
 4585     dst    : S4(write);
 4586     dst    : S3(read);
 4587     DECODE : S0(2);     // any 2 decoders
 4588     ALU    : S3(2);     // both alus
 4589 %}
 4590 
 4591 // Integer ALU reg operation using big decoder
 4592 pipe_class ialu_reg_fat(rRegI dst) %{
 4593     single_instruction;
 4594     dst    : S4(write);
 4595     dst    : S3(read);
 4596     D0     : S0;        // big decoder only
 4597     ALU    : S3;        // any alu
 4598 %}
 4599 
 4600 // Long ALU reg operation using big decoder
 4601 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4602     instruction_count(2);
 4603     dst    : S4(write);
 4604     dst    : S3(read);
 4605     D0     : S0(2);     // big decoder only; twice
 4606     ALU    : S3(2);     // any 2 alus
 4607 %}
 4608 
 4609 // Integer ALU reg-reg operation
 4610 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4611     single_instruction;
 4612     dst    : S4(write);
 4613     src    : S3(read);
 4614     DECODE : S0;        // any decoder
 4615     ALU    : S3;        // any alu
 4616 %}
 4617 
 4618 // Long ALU reg-reg operation
 4619 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4620     instruction_count(2);
 4621     dst    : S4(write);
 4622     src    : S3(read);
 4623     DECODE : S0(2);     // any 2 decoders
 4624     ALU    : S3(2);     // both alus
 4625 %}
 4626 
 4627 // Integer ALU reg-reg operation
 4628 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4629     single_instruction;
 4630     dst    : S4(write);
 4631     src    : S3(read);
 4632     D0     : S0;        // big decoder only
 4633     ALU    : S3;        // any alu
 4634 %}
 4635 
 4636 // Long ALU reg-reg operation
 4637 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4638     instruction_count(2);
 4639     dst    : S4(write);
 4640     src    : S3(read);
 4641     D0     : S0(2);     // big decoder only; twice
 4642     ALU    : S3(2);     // both alus
 4643 %}
 4644 
 4645 // Integer ALU reg-mem operation
 4646 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4647     single_instruction;
 4648     dst    : S5(write);
 4649     mem    : S3(read);
 4650     D0     : S0;        // big decoder only
 4651     ALU    : S4;        // any alu
 4652     MEM    : S3;        // any mem
 4653 %}
 4654 
 4655 // Long ALU reg-mem operation
 4656 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4657     instruction_count(2);
 4658     dst    : S5(write);
 4659     mem    : S3(read);
 4660     D0     : S0(2);     // big decoder only; twice
 4661     ALU    : S4(2);     // any 2 alus
 4662     MEM    : S3(2);     // both mems
 4663 %}
 4664 
 4665 // Integer mem operation (prefetch)
 4666 pipe_class ialu_mem(memory mem)
 4667 %{
 4668     single_instruction;
 4669     mem    : S3(read);
 4670     D0     : S0;        // big decoder only
 4671     MEM    : S3;        // any mem
 4672 %}
 4673 
 4674 // Integer Store to Memory
 4675 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4676     single_instruction;
 4677     mem    : S3(read);
 4678     src    : S5(read);
 4679     D0     : S0;        // big decoder only
 4680     ALU    : S4;        // any alu
 4681     MEM    : S3;
 4682 %}
 4683 
 4684 // Long Store to Memory
 4685 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4686     instruction_count(2);
 4687     mem    : S3(read);
 4688     src    : S5(read);
 4689     D0     : S0(2);     // big decoder only; twice
 4690     ALU    : S4(2);     // any 2 alus
 4691     MEM    : S3(2);     // Both mems
 4692 %}
 4693 
 4694 // Integer Store to Memory
 4695 pipe_class ialu_mem_imm(memory mem) %{
 4696     single_instruction;
 4697     mem    : S3(read);
 4698     D0     : S0;        // big decoder only
 4699     ALU    : S4;        // any alu
 4700     MEM    : S3;
 4701 %}
 4702 
 4703 // Integer ALU0 reg-reg operation
 4704 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4705     single_instruction;
 4706     dst    : S4(write);
 4707     src    : S3(read);
 4708     D0     : S0;        // Big decoder only
 4709     ALU0   : S3;        // only alu0
 4710 %}
 4711 
 4712 // Integer ALU0 reg-mem operation
 4713 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4714     single_instruction;
 4715     dst    : S5(write);
 4716     mem    : S3(read);
 4717     D0     : S0;        // big decoder only
 4718     ALU0   : S4;        // ALU0 only
 4719     MEM    : S3;        // any mem
 4720 %}
 4721 
 4722 // Integer ALU reg-reg operation
 4723 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4724     single_instruction;
 4725     cr     : S4(write);
 4726     src1   : S3(read);
 4727     src2   : S3(read);
 4728     DECODE : S0;        // any decoder
 4729     ALU    : S3;        // any alu
 4730 %}
 4731 
 4732 // Integer ALU reg-imm operation
 4733 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4734     single_instruction;
 4735     cr     : S4(write);
 4736     src1   : S3(read);
 4737     DECODE : S0;        // any decoder
 4738     ALU    : S3;        // any alu
 4739 %}
 4740 
 4741 // Integer ALU reg-mem operation
 4742 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4743     single_instruction;
 4744     cr     : S4(write);
 4745     src1   : S3(read);
 4746     src2   : S3(read);
 4747     D0     : S0;        // big decoder only
 4748     ALU    : S4;        // any alu
 4749     MEM    : S3;
 4750 %}
 4751 
 4752 // Conditional move reg-reg
 4753 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4754     instruction_count(4);
 4755     y      : S4(read);
 4756     q      : S3(read);
 4757     p      : S3(read);
 4758     DECODE : S0(4);     // any decoder
 4759 %}
 4760 
 4761 // Conditional move reg-reg
 4762 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4763     single_instruction;
 4764     dst    : S4(write);
 4765     src    : S3(read);
 4766     cr     : S3(read);
 4767     DECODE : S0;        // any decoder
 4768 %}
 4769 
 4770 // Conditional move reg-mem
 4771 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4772     single_instruction;
 4773     dst    : S4(write);
 4774     src    : S3(read);
 4775     cr     : S3(read);
 4776     DECODE : S0;        // any decoder
 4777     MEM    : S3;
 4778 %}
 4779 
 4780 // Conditional move reg-reg long
 4781 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4782     single_instruction;
 4783     dst    : S4(write);
 4784     src    : S3(read);
 4785     cr     : S3(read);
 4786     DECODE : S0(2);     // any 2 decoders
 4787 %}
 4788 
 4789 // Conditional move double reg-reg
 4790 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4791     single_instruction;
 4792     dst    : S4(write);
 4793     src    : S3(read);
 4794     cr     : S3(read);
 4795     DECODE : S0;        // any decoder
 4796 %}
 4797 
 4798 // Float reg-reg operation
 4799 pipe_class fpu_reg(regDPR dst) %{
 4800     instruction_count(2);
 4801     dst    : S3(read);
 4802     DECODE : S0(2);     // any 2 decoders
 4803     FPU    : S3;
 4804 %}
 4805 
 4806 // Float reg-reg operation
 4807 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4808     instruction_count(2);
 4809     dst    : S4(write);
 4810     src    : S3(read);
 4811     DECODE : S0(2);     // any 2 decoders
 4812     FPU    : S3;
 4813 %}
 4814 
 4815 // Float reg-reg operation
 4816 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4817     instruction_count(3);
 4818     dst    : S4(write);
 4819     src1   : S3(read);
 4820     src2   : S3(read);
 4821     DECODE : S0(3);     // any 3 decoders
 4822     FPU    : S3(2);
 4823 %}
 4824 
 4825 // Float reg-reg operation
 4826 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4827     instruction_count(4);
 4828     dst    : S4(write);
 4829     src1   : S3(read);
 4830     src2   : S3(read);
 4831     src3   : S3(read);
 4832     DECODE : S0(4);     // any 3 decoders
 4833     FPU    : S3(2);
 4834 %}
 4835 
 4836 // Float reg-reg operation
 4837 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4838     instruction_count(4);
 4839     dst    : S4(write);
 4840     src1   : S3(read);
 4841     src2   : S3(read);
 4842     src3   : S3(read);
 4843     DECODE : S1(3);     // any 3 decoders
 4844     D0     : S0;        // Big decoder only
 4845     FPU    : S3(2);
 4846     MEM    : S3;
 4847 %}
 4848 
 4849 // Float reg-mem operation
 4850 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4851     instruction_count(2);
 4852     dst    : S5(write);
 4853     mem    : S3(read);
 4854     D0     : S0;        // big decoder only
 4855     DECODE : S1;        // any decoder for FPU POP
 4856     FPU    : S4;
 4857     MEM    : S3;        // any mem
 4858 %}
 4859 
 4860 // Float reg-mem operation
 4861 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4862     instruction_count(3);
 4863     dst    : S5(write);
 4864     src1   : S3(read);
 4865     mem    : S3(read);
 4866     D0     : S0;        // big decoder only
 4867     DECODE : S1(2);     // any decoder for FPU POP
 4868     FPU    : S4;
 4869     MEM    : S3;        // any mem
 4870 %}
 4871 
 4872 // Float mem-reg operation
 4873 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4874     instruction_count(2);
 4875     src    : S5(read);
 4876     mem    : S3(read);
 4877     DECODE : S0;        // any decoder for FPU PUSH
 4878     D0     : S1;        // big decoder only
 4879     FPU    : S4;
 4880     MEM    : S3;        // any mem
 4881 %}
 4882 
 4883 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4884     instruction_count(3);
 4885     src1   : S3(read);
 4886     src2   : S3(read);
 4887     mem    : S3(read);
 4888     DECODE : S0(2);     // any decoder for FPU PUSH
 4889     D0     : S1;        // big decoder only
 4890     FPU    : S4;
 4891     MEM    : S3;        // any mem
 4892 %}
 4893 
 4894 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4895     instruction_count(3);
 4896     src1   : S3(read);
 4897     src2   : S3(read);
 4898     mem    : S4(read);
 4899     DECODE : S0;        // any decoder for FPU PUSH
 4900     D0     : S0(2);     // big decoder only
 4901     FPU    : S4;
 4902     MEM    : S3(2);     // any mem
 4903 %}
 4904 
 4905 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4906     instruction_count(2);
 4907     src1   : S3(read);
 4908     dst    : S4(read);
 4909     D0     : S0(2);     // big decoder only
 4910     MEM    : S3(2);     // any mem
 4911 %}
 4912 
 4913 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4914     instruction_count(3);
 4915     src1   : S3(read);
 4916     src2   : S3(read);
 4917     dst    : S4(read);
 4918     D0     : S0(3);     // big decoder only
 4919     FPU    : S4;
 4920     MEM    : S3(3);     // any mem
 4921 %}
 4922 
 4923 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4924     instruction_count(3);
 4925     src1   : S4(read);
 4926     mem    : S4(read);
 4927     DECODE : S0;        // any decoder for FPU PUSH
 4928     D0     : S0(2);     // big decoder only
 4929     FPU    : S4;
 4930     MEM    : S3(2);     // any mem
 4931 %}
 4932 
 4933 // Float load constant
 4934 pipe_class fpu_reg_con(regDPR dst) %{
 4935     instruction_count(2);
 4936     dst    : S5(write);
 4937     D0     : S0;        // big decoder only for the load
 4938     DECODE : S1;        // any decoder for FPU POP
 4939     FPU    : S4;
 4940     MEM    : S3;        // any mem
 4941 %}
 4942 
 4943 // Float load constant
 4944 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4945     instruction_count(3);
 4946     dst    : S5(write);
 4947     src    : S3(read);
 4948     D0     : S0;        // big decoder only for the load
 4949     DECODE : S1(2);     // any decoder for FPU POP
 4950     FPU    : S4;
 4951     MEM    : S3;        // any mem
 4952 %}
 4953 
 4954 // UnConditional branch
 4955 pipe_class pipe_jmp( label labl ) %{
 4956     single_instruction;
 4957     BR   : S3;
 4958 %}
 4959 
 4960 // Conditional branch
 4961 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4962     single_instruction;
 4963     cr    : S1(read);
 4964     BR    : S3;
 4965 %}
 4966 
 4967 // Allocation idiom
 4968 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4969     instruction_count(1); force_serialization;
 4970     fixed_latency(6);
 4971     heap_ptr : S3(read);
 4972     DECODE   : S0(3);
 4973     D0       : S2;
 4974     MEM      : S3;
 4975     ALU      : S3(2);
 4976     dst      : S5(write);
 4977     BR       : S5;
 4978 %}
 4979 
 4980 // Generic big/slow expanded idiom
 4981 pipe_class pipe_slow(  ) %{
 4982     instruction_count(10); multiple_bundles; force_serialization;
 4983     fixed_latency(100);
 4984     D0  : S0(2);
 4985     MEM : S3(2);
 4986 %}
 4987 
 4988 // The real do-nothing guy
 4989 pipe_class empty( ) %{
 4990     instruction_count(0);
 4991 %}
 4992 
 4993 // Define the class for the Nop node
 4994 define %{
 4995    MachNop = empty;
 4996 %}
 4997 
 4998 %}
 4999 
 5000 //----------INSTRUCTIONS-------------------------------------------------------
 5001 //
 5002 // match      -- States which machine-independent subtree may be replaced
 5003 //               by this instruction.
 5004 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5005 //               selection to identify a minimum cost tree of machine
 5006 //               instructions that matches a tree of machine-independent
 5007 //               instructions.
 5008 // format     -- A string providing the disassembly for this instruction.
 5009 //               The value of an instruction's operand may be inserted
 5010 //               by referring to it with a '$' prefix.
 5011 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5012 //               to within an encode class as $primary, $secondary, and $tertiary
 5013 //               respectively.  The primary opcode is commonly used to
 5014 //               indicate the type of machine instruction, while secondary
 5015 //               and tertiary are often used for prefix options or addressing
 5016 //               modes.
 5017 // ins_encode -- A list of encode classes with parameters. The encode class
 5018 //               name must have been defined in an 'enc_class' specification
 5019 //               in the encode section of the architecture description.
 5020 
 5021 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5022 // Load Float
 5023 instruct MoveF2LEG(legRegF dst, regF src) %{
 5024   match(Set dst src);
 5025   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5026   ins_encode %{
 5027     ShouldNotReachHere();
 5028   %}
 5029   ins_pipe( fpu_reg_reg );
 5030 %}
 5031 
 5032 // Load Float
 5033 instruct MoveLEG2F(regF dst, legRegF src) %{
 5034   match(Set dst src);
 5035   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5036   ins_encode %{
 5037     ShouldNotReachHere();
 5038   %}
 5039   ins_pipe( fpu_reg_reg );
 5040 %}
 5041 
 5042 // Load Float
 5043 instruct MoveF2VL(vlRegF dst, regF src) %{
 5044   match(Set dst src);
 5045   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5046   ins_encode %{
 5047     ShouldNotReachHere();
 5048   %}
 5049   ins_pipe( fpu_reg_reg );
 5050 %}
 5051 
 5052 // Load Float
 5053 instruct MoveVL2F(regF dst, vlRegF src) %{
 5054   match(Set dst src);
 5055   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5056   ins_encode %{
 5057     ShouldNotReachHere();
 5058   %}
 5059   ins_pipe( fpu_reg_reg );
 5060 %}
 5061 
 5062 
 5063 
 5064 // Load Double
 5065 instruct MoveD2LEG(legRegD dst, regD src) %{
 5066   match(Set dst src);
 5067   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5068   ins_encode %{
 5069     ShouldNotReachHere();
 5070   %}
 5071   ins_pipe( fpu_reg_reg );
 5072 %}
 5073 
 5074 // Load Double
 5075 instruct MoveLEG2D(regD dst, legRegD src) %{
 5076   match(Set dst src);
 5077   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5078   ins_encode %{
 5079     ShouldNotReachHere();
 5080   %}
 5081   ins_pipe( fpu_reg_reg );
 5082 %}
 5083 
 5084 // Load Double
 5085 instruct MoveD2VL(vlRegD dst, regD src) %{
 5086   match(Set dst src);
 5087   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5088   ins_encode %{
 5089     ShouldNotReachHere();
 5090   %}
 5091   ins_pipe( fpu_reg_reg );
 5092 %}
 5093 
 5094 // Load Double
 5095 instruct MoveVL2D(regD dst, vlRegD src) %{
 5096   match(Set dst src);
 5097   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5098   ins_encode %{
 5099     ShouldNotReachHere();
 5100   %}
 5101   ins_pipe( fpu_reg_reg );
 5102 %}
 5103 
 5104 //----------BSWAP-Instruction--------------------------------------------------
 5105 instruct bytes_reverse_int(rRegI dst) %{
 5106   match(Set dst (ReverseBytesI dst));
 5107 
 5108   format %{ "BSWAP  $dst" %}
 5109   opcode(0x0F, 0xC8);
 5110   ins_encode( OpcP, OpcSReg(dst) );
 5111   ins_pipe( ialu_reg );
 5112 %}
 5113 
 5114 instruct bytes_reverse_long(eRegL dst) %{
 5115   match(Set dst (ReverseBytesL dst));
 5116 
 5117   format %{ "BSWAP  $dst.lo\n\t"
 5118             "BSWAP  $dst.hi\n\t"
 5119             "XCHG   $dst.lo $dst.hi" %}
 5120 
 5121   ins_cost(125);
 5122   ins_encode( bswap_long_bytes(dst) );
 5123   ins_pipe( ialu_reg_reg);
 5124 %}
 5125 
 5126 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5127   match(Set dst (ReverseBytesUS dst));
 5128   effect(KILL cr);
 5129 
 5130   format %{ "BSWAP  $dst\n\t"
 5131             "SHR    $dst,16\n\t" %}
 5132   ins_encode %{
 5133     __ bswapl($dst$$Register);
 5134     __ shrl($dst$$Register, 16);
 5135   %}
 5136   ins_pipe( ialu_reg );
 5137 %}
 5138 
 5139 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5140   match(Set dst (ReverseBytesS dst));
 5141   effect(KILL cr);
 5142 
 5143   format %{ "BSWAP  $dst\n\t"
 5144             "SAR    $dst,16\n\t" %}
 5145   ins_encode %{
 5146     __ bswapl($dst$$Register);
 5147     __ sarl($dst$$Register, 16);
 5148   %}
 5149   ins_pipe( ialu_reg );
 5150 %}
 5151 
 5152 
 5153 //---------- Zeros Count Instructions ------------------------------------------
 5154 
 5155 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5156   predicate(UseCountLeadingZerosInstruction);
 5157   match(Set dst (CountLeadingZerosI src));
 5158   effect(KILL cr);
 5159 
 5160   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5161   ins_encode %{
 5162     __ lzcntl($dst$$Register, $src$$Register);
 5163   %}
 5164   ins_pipe(ialu_reg);
 5165 %}
 5166 
 5167 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5168   predicate(!UseCountLeadingZerosInstruction);
 5169   match(Set dst (CountLeadingZerosI src));
 5170   effect(KILL cr);
 5171 
 5172   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5173             "JNZ    skip\n\t"
 5174             "MOV    $dst, -1\n"
 5175       "skip:\n\t"
 5176             "NEG    $dst\n\t"
 5177             "ADD    $dst, 31" %}
 5178   ins_encode %{
 5179     Register Rdst = $dst$$Register;
 5180     Register Rsrc = $src$$Register;
 5181     Label skip;
 5182     __ bsrl(Rdst, Rsrc);
 5183     __ jccb(Assembler::notZero, skip);
 5184     __ movl(Rdst, -1);
 5185     __ bind(skip);
 5186     __ negl(Rdst);
 5187     __ addl(Rdst, BitsPerInt - 1);
 5188   %}
 5189   ins_pipe(ialu_reg);
 5190 %}
 5191 
 5192 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5193   predicate(UseCountLeadingZerosInstruction);
 5194   match(Set dst (CountLeadingZerosL src));
 5195   effect(TEMP dst, KILL cr);
 5196 
 5197   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5198             "JNC    done\n\t"
 5199             "LZCNT  $dst, $src.lo\n\t"
 5200             "ADD    $dst, 32\n"
 5201       "done:" %}
 5202   ins_encode %{
 5203     Register Rdst = $dst$$Register;
 5204     Register Rsrc = $src$$Register;
 5205     Label done;
 5206     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5207     __ jccb(Assembler::carryClear, done);
 5208     __ lzcntl(Rdst, Rsrc);
 5209     __ addl(Rdst, BitsPerInt);
 5210     __ bind(done);
 5211   %}
 5212   ins_pipe(ialu_reg);
 5213 %}
 5214 
 5215 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5216   predicate(!UseCountLeadingZerosInstruction);
 5217   match(Set dst (CountLeadingZerosL src));
 5218   effect(TEMP dst, KILL cr);
 5219 
 5220   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5221             "JZ     msw_is_zero\n\t"
 5222             "ADD    $dst, 32\n\t"
 5223             "JMP    not_zero\n"
 5224       "msw_is_zero:\n\t"
 5225             "BSR    $dst, $src.lo\n\t"
 5226             "JNZ    not_zero\n\t"
 5227             "MOV    $dst, -1\n"
 5228       "not_zero:\n\t"
 5229             "NEG    $dst\n\t"
 5230             "ADD    $dst, 63\n" %}
 5231  ins_encode %{
 5232     Register Rdst = $dst$$Register;
 5233     Register Rsrc = $src$$Register;
 5234     Label msw_is_zero;
 5235     Label not_zero;
 5236     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5237     __ jccb(Assembler::zero, msw_is_zero);
 5238     __ addl(Rdst, BitsPerInt);
 5239     __ jmpb(not_zero);
 5240     __ bind(msw_is_zero);
 5241     __ bsrl(Rdst, Rsrc);
 5242     __ jccb(Assembler::notZero, not_zero);
 5243     __ movl(Rdst, -1);
 5244     __ bind(not_zero);
 5245     __ negl(Rdst);
 5246     __ addl(Rdst, BitsPerLong - 1);
 5247   %}
 5248   ins_pipe(ialu_reg);
 5249 %}
 5250 
 5251 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5252   predicate(UseCountTrailingZerosInstruction);
 5253   match(Set dst (CountTrailingZerosI src));
 5254   effect(KILL cr);
 5255 
 5256   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5257   ins_encode %{
 5258     __ tzcntl($dst$$Register, $src$$Register);
 5259   %}
 5260   ins_pipe(ialu_reg);
 5261 %}
 5262 
 5263 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5264   predicate(!UseCountTrailingZerosInstruction);
 5265   match(Set dst (CountTrailingZerosI src));
 5266   effect(KILL cr);
 5267 
 5268   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5269             "JNZ    done\n\t"
 5270             "MOV    $dst, 32\n"
 5271       "done:" %}
 5272   ins_encode %{
 5273     Register Rdst = $dst$$Register;
 5274     Label done;
 5275     __ bsfl(Rdst, $src$$Register);
 5276     __ jccb(Assembler::notZero, done);
 5277     __ movl(Rdst, BitsPerInt);
 5278     __ bind(done);
 5279   %}
 5280   ins_pipe(ialu_reg);
 5281 %}
 5282 
 5283 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5284   predicate(UseCountTrailingZerosInstruction);
 5285   match(Set dst (CountTrailingZerosL src));
 5286   effect(TEMP dst, KILL cr);
 5287 
 5288   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5289             "JNC    done\n\t"
 5290             "TZCNT  $dst, $src.hi\n\t"
 5291             "ADD    $dst, 32\n"
 5292             "done:" %}
 5293   ins_encode %{
 5294     Register Rdst = $dst$$Register;
 5295     Register Rsrc = $src$$Register;
 5296     Label done;
 5297     __ tzcntl(Rdst, Rsrc);
 5298     __ jccb(Assembler::carryClear, done);
 5299     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5300     __ addl(Rdst, BitsPerInt);
 5301     __ bind(done);
 5302   %}
 5303   ins_pipe(ialu_reg);
 5304 %}
 5305 
 5306 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5307   predicate(!UseCountTrailingZerosInstruction);
 5308   match(Set dst (CountTrailingZerosL src));
 5309   effect(TEMP dst, KILL cr);
 5310 
 5311   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5312             "JNZ    done\n\t"
 5313             "BSF    $dst, $src.hi\n\t"
 5314             "JNZ    msw_not_zero\n\t"
 5315             "MOV    $dst, 32\n"
 5316       "msw_not_zero:\n\t"
 5317             "ADD    $dst, 32\n"
 5318       "done:" %}
 5319   ins_encode %{
 5320     Register Rdst = $dst$$Register;
 5321     Register Rsrc = $src$$Register;
 5322     Label msw_not_zero;
 5323     Label done;
 5324     __ bsfl(Rdst, Rsrc);
 5325     __ jccb(Assembler::notZero, done);
 5326     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5327     __ jccb(Assembler::notZero, msw_not_zero);
 5328     __ movl(Rdst, BitsPerInt);
 5329     __ bind(msw_not_zero);
 5330     __ addl(Rdst, BitsPerInt);
 5331     __ bind(done);
 5332   %}
 5333   ins_pipe(ialu_reg);
 5334 %}
 5335 
 5336 
 5337 //---------- Population Count Instructions -------------------------------------
 5338 
 5339 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5340   predicate(UsePopCountInstruction);
 5341   match(Set dst (PopCountI src));
 5342   effect(KILL cr);
 5343 
 5344   format %{ "POPCNT $dst, $src" %}
 5345   ins_encode %{
 5346     __ popcntl($dst$$Register, $src$$Register);
 5347   %}
 5348   ins_pipe(ialu_reg);
 5349 %}
 5350 
 5351 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5352   predicate(UsePopCountInstruction);
 5353   match(Set dst (PopCountI (LoadI mem)));
 5354   effect(KILL cr);
 5355 
 5356   format %{ "POPCNT $dst, $mem" %}
 5357   ins_encode %{
 5358     __ popcntl($dst$$Register, $mem$$Address);
 5359   %}
 5360   ins_pipe(ialu_reg);
 5361 %}
 5362 
 5363 // Note: Long.bitCount(long) returns an int.
 5364 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5365   predicate(UsePopCountInstruction);
 5366   match(Set dst (PopCountL src));
 5367   effect(KILL cr, TEMP tmp, TEMP dst);
 5368 
 5369   format %{ "POPCNT $dst, $src.lo\n\t"
 5370             "POPCNT $tmp, $src.hi\n\t"
 5371             "ADD    $dst, $tmp" %}
 5372   ins_encode %{
 5373     __ popcntl($dst$$Register, $src$$Register);
 5374     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5375     __ addl($dst$$Register, $tmp$$Register);
 5376   %}
 5377   ins_pipe(ialu_reg);
 5378 %}
 5379 
 5380 // Note: Long.bitCount(long) returns an int.
 5381 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5382   predicate(UsePopCountInstruction);
 5383   match(Set dst (PopCountL (LoadL mem)));
 5384   effect(KILL cr, TEMP tmp, TEMP dst);
 5385 
 5386   format %{ "POPCNT $dst, $mem\n\t"
 5387             "POPCNT $tmp, $mem+4\n\t"
 5388             "ADD    $dst, $tmp" %}
 5389   ins_encode %{
 5390     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5391     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5392     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5393     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5394     __ addl($dst$$Register, $tmp$$Register);
 5395   %}
 5396   ins_pipe(ialu_reg);
 5397 %}
 5398 
 5399 
 5400 //----------Load/Store/Move Instructions---------------------------------------
 5401 //----------Load Instructions--------------------------------------------------
 5402 // Load Byte (8bit signed)
 5403 instruct loadB(xRegI dst, memory mem) %{
 5404   match(Set dst (LoadB mem));
 5405 
 5406   ins_cost(125);
 5407   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5408 
 5409   ins_encode %{
 5410     __ movsbl($dst$$Register, $mem$$Address);
 5411   %}
 5412 
 5413   ins_pipe(ialu_reg_mem);
 5414 %}
 5415 
 5416 // Load Byte (8bit signed) into Long Register
 5417 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5418   match(Set dst (ConvI2L (LoadB mem)));
 5419   effect(KILL cr);
 5420 
 5421   ins_cost(375);
 5422   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5423             "MOV    $dst.hi,$dst.lo\n\t"
 5424             "SAR    $dst.hi,7" %}
 5425 
 5426   ins_encode %{
 5427     __ movsbl($dst$$Register, $mem$$Address);
 5428     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5429     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5430   %}
 5431 
 5432   ins_pipe(ialu_reg_mem);
 5433 %}
 5434 
 5435 // Load Unsigned Byte (8bit UNsigned)
 5436 instruct loadUB(xRegI dst, memory mem) %{
 5437   match(Set dst (LoadUB mem));
 5438 
 5439   ins_cost(125);
 5440   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5441 
 5442   ins_encode %{
 5443     __ movzbl($dst$$Register, $mem$$Address);
 5444   %}
 5445 
 5446   ins_pipe(ialu_reg_mem);
 5447 %}
 5448 
 5449 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5450 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5451   match(Set dst (ConvI2L (LoadUB mem)));
 5452   effect(KILL cr);
 5453 
 5454   ins_cost(250);
 5455   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5456             "XOR    $dst.hi,$dst.hi" %}
 5457 
 5458   ins_encode %{
 5459     Register Rdst = $dst$$Register;
 5460     __ movzbl(Rdst, $mem$$Address);
 5461     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5462   %}
 5463 
 5464   ins_pipe(ialu_reg_mem);
 5465 %}
 5466 
 5467 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5468 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5469   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5470   effect(KILL cr);
 5471 
 5472   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5473             "XOR    $dst.hi,$dst.hi\n\t"
 5474             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5475   ins_encode %{
 5476     Register Rdst = $dst$$Register;
 5477     __ movzbl(Rdst, $mem$$Address);
 5478     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5479     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5480   %}
 5481   ins_pipe(ialu_reg_mem);
 5482 %}
 5483 
 5484 // Load Short (16bit signed)
 5485 instruct loadS(rRegI dst, memory mem) %{
 5486   match(Set dst (LoadS mem));
 5487 
 5488   ins_cost(125);
 5489   format %{ "MOVSX  $dst,$mem\t# short" %}
 5490 
 5491   ins_encode %{
 5492     __ movswl($dst$$Register, $mem$$Address);
 5493   %}
 5494 
 5495   ins_pipe(ialu_reg_mem);
 5496 %}
 5497 
 5498 // Load Short (16 bit signed) to Byte (8 bit signed)
 5499 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5500   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5501 
 5502   ins_cost(125);
 5503   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5504   ins_encode %{
 5505     __ movsbl($dst$$Register, $mem$$Address);
 5506   %}
 5507   ins_pipe(ialu_reg_mem);
 5508 %}
 5509 
 5510 // Load Short (16bit signed) into Long Register
 5511 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5512   match(Set dst (ConvI2L (LoadS mem)));
 5513   effect(KILL cr);
 5514 
 5515   ins_cost(375);
 5516   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5517             "MOV    $dst.hi,$dst.lo\n\t"
 5518             "SAR    $dst.hi,15" %}
 5519 
 5520   ins_encode %{
 5521     __ movswl($dst$$Register, $mem$$Address);
 5522     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5523     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5524   %}
 5525 
 5526   ins_pipe(ialu_reg_mem);
 5527 %}
 5528 
 5529 // Load Unsigned Short/Char (16bit unsigned)
 5530 instruct loadUS(rRegI dst, memory mem) %{
 5531   match(Set dst (LoadUS mem));
 5532 
 5533   ins_cost(125);
 5534   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5535 
 5536   ins_encode %{
 5537     __ movzwl($dst$$Register, $mem$$Address);
 5538   %}
 5539 
 5540   ins_pipe(ialu_reg_mem);
 5541 %}
 5542 
 5543 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5544 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5545   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5546 
 5547   ins_cost(125);
 5548   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5549   ins_encode %{
 5550     __ movsbl($dst$$Register, $mem$$Address);
 5551   %}
 5552   ins_pipe(ialu_reg_mem);
 5553 %}
 5554 
 5555 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5556 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5557   match(Set dst (ConvI2L (LoadUS mem)));
 5558   effect(KILL cr);
 5559 
 5560   ins_cost(250);
 5561   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5562             "XOR    $dst.hi,$dst.hi" %}
 5563 
 5564   ins_encode %{
 5565     __ movzwl($dst$$Register, $mem$$Address);
 5566     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5567   %}
 5568 
 5569   ins_pipe(ialu_reg_mem);
 5570 %}
 5571 
 5572 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5573 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5574   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5575   effect(KILL cr);
 5576 
 5577   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5578             "XOR    $dst.hi,$dst.hi" %}
 5579   ins_encode %{
 5580     Register Rdst = $dst$$Register;
 5581     __ movzbl(Rdst, $mem$$Address);
 5582     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5583   %}
 5584   ins_pipe(ialu_reg_mem);
 5585 %}
 5586 
 5587 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5588 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5589   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5590   effect(KILL cr);
 5591 
 5592   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5593             "XOR    $dst.hi,$dst.hi\n\t"
 5594             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5595   ins_encode %{
 5596     Register Rdst = $dst$$Register;
 5597     __ movzwl(Rdst, $mem$$Address);
 5598     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5599     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5600   %}
 5601   ins_pipe(ialu_reg_mem);
 5602 %}
 5603 
 5604 // Load Integer
 5605 instruct loadI(rRegI dst, memory mem) %{
 5606   match(Set dst (LoadI mem));
 5607 
 5608   ins_cost(125);
 5609   format %{ "MOV    $dst,$mem\t# int" %}
 5610 
 5611   ins_encode %{
 5612     __ movl($dst$$Register, $mem$$Address);
 5613   %}
 5614 
 5615   ins_pipe(ialu_reg_mem);
 5616 %}
 5617 
 5618 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5619 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5620   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5621 
 5622   ins_cost(125);
 5623   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5624   ins_encode %{
 5625     __ movsbl($dst$$Register, $mem$$Address);
 5626   %}
 5627   ins_pipe(ialu_reg_mem);
 5628 %}
 5629 
 5630 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5631 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5632   match(Set dst (AndI (LoadI mem) mask));
 5633 
 5634   ins_cost(125);
 5635   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5636   ins_encode %{
 5637     __ movzbl($dst$$Register, $mem$$Address);
 5638   %}
 5639   ins_pipe(ialu_reg_mem);
 5640 %}
 5641 
 5642 // Load Integer (32 bit signed) to Short (16 bit signed)
 5643 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5644   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5645 
 5646   ins_cost(125);
 5647   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5648   ins_encode %{
 5649     __ movswl($dst$$Register, $mem$$Address);
 5650   %}
 5651   ins_pipe(ialu_reg_mem);
 5652 %}
 5653 
 5654 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5655 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5656   match(Set dst (AndI (LoadI mem) mask));
 5657 
 5658   ins_cost(125);
 5659   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5660   ins_encode %{
 5661     __ movzwl($dst$$Register, $mem$$Address);
 5662   %}
 5663   ins_pipe(ialu_reg_mem);
 5664 %}
 5665 
 5666 // Load Integer into Long Register
 5667 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5668   match(Set dst (ConvI2L (LoadI mem)));
 5669   effect(KILL cr);
 5670 
 5671   ins_cost(375);
 5672   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5673             "MOV    $dst.hi,$dst.lo\n\t"
 5674             "SAR    $dst.hi,31" %}
 5675 
 5676   ins_encode %{
 5677     __ movl($dst$$Register, $mem$$Address);
 5678     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5679     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5680   %}
 5681 
 5682   ins_pipe(ialu_reg_mem);
 5683 %}
 5684 
 5685 // Load Integer with mask 0xFF into Long Register
 5686 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5687   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5688   effect(KILL cr);
 5689 
 5690   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5691             "XOR    $dst.hi,$dst.hi" %}
 5692   ins_encode %{
 5693     Register Rdst = $dst$$Register;
 5694     __ movzbl(Rdst, $mem$$Address);
 5695     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5696   %}
 5697   ins_pipe(ialu_reg_mem);
 5698 %}
 5699 
 5700 // Load Integer with mask 0xFFFF into Long Register
 5701 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5702   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5703   effect(KILL cr);
 5704 
 5705   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5706             "XOR    $dst.hi,$dst.hi" %}
 5707   ins_encode %{
 5708     Register Rdst = $dst$$Register;
 5709     __ movzwl(Rdst, $mem$$Address);
 5710     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5711   %}
 5712   ins_pipe(ialu_reg_mem);
 5713 %}
 5714 
 5715 // Load Integer with 31-bit mask into Long Register
 5716 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5717   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5718   effect(KILL cr);
 5719 
 5720   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5721             "XOR    $dst.hi,$dst.hi\n\t"
 5722             "AND    $dst.lo,$mask" %}
 5723   ins_encode %{
 5724     Register Rdst = $dst$$Register;
 5725     __ movl(Rdst, $mem$$Address);
 5726     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5727     __ andl(Rdst, $mask$$constant);
 5728   %}
 5729   ins_pipe(ialu_reg_mem);
 5730 %}
 5731 
 5732 // Load Unsigned Integer into Long Register
 5733 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5734   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5735   effect(KILL cr);
 5736 
 5737   ins_cost(250);
 5738   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5739             "XOR    $dst.hi,$dst.hi" %}
 5740 
 5741   ins_encode %{
 5742     __ movl($dst$$Register, $mem$$Address);
 5743     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5744   %}
 5745 
 5746   ins_pipe(ialu_reg_mem);
 5747 %}
 5748 
 5749 // Load Long.  Cannot clobber address while loading, so restrict address
 5750 // register to ESI
 5751 instruct loadL(eRegL dst, load_long_memory mem) %{
 5752   predicate(!((LoadLNode*)n)->require_atomic_access());
 5753   match(Set dst (LoadL mem));
 5754 
 5755   ins_cost(250);
 5756   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5757             "MOV    $dst.hi,$mem+4" %}
 5758 
 5759   ins_encode %{
 5760     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5761     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5762     __ movl($dst$$Register, Amemlo);
 5763     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5764   %}
 5765 
 5766   ins_pipe(ialu_reg_long_mem);
 5767 %}
 5768 
 5769 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5770 // then store it down to the stack and reload on the int
 5771 // side.
 5772 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5773   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5774   match(Set dst (LoadL mem));
 5775 
 5776   ins_cost(200);
 5777   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5778             "FISTp  $dst" %}
 5779   ins_encode(enc_loadL_volatile(mem,dst));
 5780   ins_pipe( fpu_reg_mem );
 5781 %}
 5782 
 5783 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5784   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5785   match(Set dst (LoadL mem));
 5786   effect(TEMP tmp);
 5787   ins_cost(180);
 5788   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5789             "MOVSD  $dst,$tmp" %}
 5790   ins_encode %{
 5791     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5792     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5793   %}
 5794   ins_pipe( pipe_slow );
 5795 %}
 5796 
 5797 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5798   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5799   match(Set dst (LoadL mem));
 5800   effect(TEMP tmp);
 5801   ins_cost(160);
 5802   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5803             "MOVD   $dst.lo,$tmp\n\t"
 5804             "PSRLQ  $tmp,32\n\t"
 5805             "MOVD   $dst.hi,$tmp" %}
 5806   ins_encode %{
 5807     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5808     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5809     __ psrlq($tmp$$XMMRegister, 32);
 5810     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5811   %}
 5812   ins_pipe( pipe_slow );
 5813 %}
 5814 
 5815 // Load Range
 5816 instruct loadRange(rRegI dst, memory mem) %{
 5817   match(Set dst (LoadRange mem));
 5818 
 5819   ins_cost(125);
 5820   format %{ "MOV    $dst,$mem" %}
 5821   opcode(0x8B);
 5822   ins_encode( OpcP, RegMem(dst,mem));
 5823   ins_pipe( ialu_reg_mem );
 5824 %}
 5825 
 5826 
 5827 // Load Pointer
 5828 instruct loadP(eRegP dst, memory mem) %{
 5829   match(Set dst (LoadP mem));
 5830 
 5831   ins_cost(125);
 5832   format %{ "MOV    $dst,$mem" %}
 5833   opcode(0x8B);
 5834   ins_encode( OpcP, RegMem(dst,mem));
 5835   ins_pipe( ialu_reg_mem );
 5836 %}
 5837 
 5838 // Load Klass Pointer
 5839 instruct loadKlass(eRegP dst, memory mem) %{
 5840   match(Set dst (LoadKlass mem));
 5841 
 5842   ins_cost(125);
 5843   format %{ "MOV    $dst,$mem" %}
 5844   opcode(0x8B);
 5845   ins_encode( OpcP, RegMem(dst,mem));
 5846   ins_pipe( ialu_reg_mem );
 5847 %}
 5848 
 5849 // Load Double
 5850 instruct loadDPR(regDPR dst, memory mem) %{
 5851   predicate(UseSSE<=1);
 5852   match(Set dst (LoadD mem));
 5853 
 5854   ins_cost(150);
 5855   format %{ "FLD_D  ST,$mem\n\t"
 5856             "FSTP   $dst" %}
 5857   opcode(0xDD);               /* DD /0 */
 5858   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5859               Pop_Reg_DPR(dst) );
 5860   ins_pipe( fpu_reg_mem );
 5861 %}
 5862 
 5863 // Load Double to XMM
 5864 instruct loadD(regD dst, memory mem) %{
 5865   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5866   match(Set dst (LoadD mem));
 5867   ins_cost(145);
 5868   format %{ "MOVSD  $dst,$mem" %}
 5869   ins_encode %{
 5870     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5871   %}
 5872   ins_pipe( pipe_slow );
 5873 %}
 5874 
 5875 instruct loadD_partial(regD dst, memory mem) %{
 5876   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5877   match(Set dst (LoadD mem));
 5878   ins_cost(145);
 5879   format %{ "MOVLPD $dst,$mem" %}
 5880   ins_encode %{
 5881     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5882   %}
 5883   ins_pipe( pipe_slow );
 5884 %}
 5885 
 5886 // Load to XMM register (single-precision floating point)
 5887 // MOVSS instruction
 5888 instruct loadF(regF dst, memory mem) %{
 5889   predicate(UseSSE>=1);
 5890   match(Set dst (LoadF mem));
 5891   ins_cost(145);
 5892   format %{ "MOVSS  $dst,$mem" %}
 5893   ins_encode %{
 5894     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5895   %}
 5896   ins_pipe( pipe_slow );
 5897 %}
 5898 
 5899 // Load Float
 5900 instruct loadFPR(regFPR dst, memory mem) %{
 5901   predicate(UseSSE==0);
 5902   match(Set dst (LoadF mem));
 5903 
 5904   ins_cost(150);
 5905   format %{ "FLD_S  ST,$mem\n\t"
 5906             "FSTP   $dst" %}
 5907   opcode(0xD9);               /* D9 /0 */
 5908   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5909               Pop_Reg_FPR(dst) );
 5910   ins_pipe( fpu_reg_mem );
 5911 %}
 5912 
 5913 // Load Effective Address
 5914 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5915   match(Set dst mem);
 5916 
 5917   ins_cost(110);
 5918   format %{ "LEA    $dst,$mem" %}
 5919   opcode(0x8D);
 5920   ins_encode( OpcP, RegMem(dst,mem));
 5921   ins_pipe( ialu_reg_reg_fat );
 5922 %}
 5923 
 5924 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5925   match(Set dst mem);
 5926 
 5927   ins_cost(110);
 5928   format %{ "LEA    $dst,$mem" %}
 5929   opcode(0x8D);
 5930   ins_encode( OpcP, RegMem(dst,mem));
 5931   ins_pipe( ialu_reg_reg_fat );
 5932 %}
 5933 
 5934 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5935   match(Set dst mem);
 5936 
 5937   ins_cost(110);
 5938   format %{ "LEA    $dst,$mem" %}
 5939   opcode(0x8D);
 5940   ins_encode( OpcP, RegMem(dst,mem));
 5941   ins_pipe( ialu_reg_reg_fat );
 5942 %}
 5943 
 5944 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5945   match(Set dst mem);
 5946 
 5947   ins_cost(110);
 5948   format %{ "LEA    $dst,$mem" %}
 5949   opcode(0x8D);
 5950   ins_encode( OpcP, RegMem(dst,mem));
 5951   ins_pipe( ialu_reg_reg_fat );
 5952 %}
 5953 
 5954 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5955   match(Set dst mem);
 5956 
 5957   ins_cost(110);
 5958   format %{ "LEA    $dst,$mem" %}
 5959   opcode(0x8D);
 5960   ins_encode( OpcP, RegMem(dst,mem));
 5961   ins_pipe( ialu_reg_reg_fat );
 5962 %}
 5963 
 5964 // Load Constant
 5965 instruct loadConI(rRegI dst, immI src) %{
 5966   match(Set dst src);
 5967 
 5968   format %{ "MOV    $dst,$src" %}
 5969   ins_encode( LdImmI(dst, src) );
 5970   ins_pipe( ialu_reg_fat );
 5971 %}
 5972 
 5973 // Load Constant zero
 5974 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5975   match(Set dst src);
 5976   effect(KILL cr);
 5977 
 5978   ins_cost(50);
 5979   format %{ "XOR    $dst,$dst" %}
 5980   opcode(0x33);  /* + rd */
 5981   ins_encode( OpcP, RegReg( dst, dst ) );
 5982   ins_pipe( ialu_reg );
 5983 %}
 5984 
 5985 instruct loadConP(eRegP dst, immP src) %{
 5986   match(Set dst src);
 5987 
 5988   format %{ "MOV    $dst,$src" %}
 5989   opcode(0xB8);  /* + rd */
 5990   ins_encode( LdImmP(dst, src) );
 5991   ins_pipe( ialu_reg_fat );
 5992 %}
 5993 
 5994 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5995   match(Set dst src);
 5996   effect(KILL cr);
 5997   ins_cost(200);
 5998   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5999             "MOV    $dst.hi,$src.hi" %}
 6000   opcode(0xB8);
 6001   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6002   ins_pipe( ialu_reg_long_fat );
 6003 %}
 6004 
 6005 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6006   match(Set dst src);
 6007   effect(KILL cr);
 6008   ins_cost(150);
 6009   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6010             "XOR    $dst.hi,$dst.hi" %}
 6011   opcode(0x33,0x33);
 6012   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6013   ins_pipe( ialu_reg_long );
 6014 %}
 6015 
 6016 // The instruction usage is guarded by predicate in operand immFPR().
 6017 instruct loadConFPR(regFPR dst, immFPR con) %{
 6018   match(Set dst con);
 6019   ins_cost(125);
 6020   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6021             "FSTP   $dst" %}
 6022   ins_encode %{
 6023     __ fld_s($constantaddress($con));
 6024     __ fstp_d($dst$$reg);
 6025   %}
 6026   ins_pipe(fpu_reg_con);
 6027 %}
 6028 
 6029 // The instruction usage is guarded by predicate in operand immFPR0().
 6030 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6031   match(Set dst con);
 6032   ins_cost(125);
 6033   format %{ "FLDZ   ST\n\t"
 6034             "FSTP   $dst" %}
 6035   ins_encode %{
 6036     __ fldz();
 6037     __ fstp_d($dst$$reg);
 6038   %}
 6039   ins_pipe(fpu_reg_con);
 6040 %}
 6041 
 6042 // The instruction usage is guarded by predicate in operand immFPR1().
 6043 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6044   match(Set dst con);
 6045   ins_cost(125);
 6046   format %{ "FLD1   ST\n\t"
 6047             "FSTP   $dst" %}
 6048   ins_encode %{
 6049     __ fld1();
 6050     __ fstp_d($dst$$reg);
 6051   %}
 6052   ins_pipe(fpu_reg_con);
 6053 %}
 6054 
 6055 // The instruction usage is guarded by predicate in operand immF().
 6056 instruct loadConF(regF dst, immF con) %{
 6057   match(Set dst con);
 6058   ins_cost(125);
 6059   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6060   ins_encode %{
 6061     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6062   %}
 6063   ins_pipe(pipe_slow);
 6064 %}
 6065 
 6066 // The instruction usage is guarded by predicate in operand immF0().
 6067 instruct loadConF0(regF dst, immF0 src) %{
 6068   match(Set dst src);
 6069   ins_cost(100);
 6070   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6071   ins_encode %{
 6072     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6073   %}
 6074   ins_pipe(pipe_slow);
 6075 %}
 6076 
 6077 // The instruction usage is guarded by predicate in operand immDPR().
 6078 instruct loadConDPR(regDPR dst, immDPR con) %{
 6079   match(Set dst con);
 6080   ins_cost(125);
 6081 
 6082   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6083             "FSTP   $dst" %}
 6084   ins_encode %{
 6085     __ fld_d($constantaddress($con));
 6086     __ fstp_d($dst$$reg);
 6087   %}
 6088   ins_pipe(fpu_reg_con);
 6089 %}
 6090 
 6091 // The instruction usage is guarded by predicate in operand immDPR0().
 6092 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6093   match(Set dst con);
 6094   ins_cost(125);
 6095 
 6096   format %{ "FLDZ   ST\n\t"
 6097             "FSTP   $dst" %}
 6098   ins_encode %{
 6099     __ fldz();
 6100     __ fstp_d($dst$$reg);
 6101   %}
 6102   ins_pipe(fpu_reg_con);
 6103 %}
 6104 
 6105 // The instruction usage is guarded by predicate in operand immDPR1().
 6106 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6107   match(Set dst con);
 6108   ins_cost(125);
 6109 
 6110   format %{ "FLD1   ST\n\t"
 6111             "FSTP   $dst" %}
 6112   ins_encode %{
 6113     __ fld1();
 6114     __ fstp_d($dst$$reg);
 6115   %}
 6116   ins_pipe(fpu_reg_con);
 6117 %}
 6118 
 6119 // The instruction usage is guarded by predicate in operand immD().
 6120 instruct loadConD(regD dst, immD con) %{
 6121   match(Set dst con);
 6122   ins_cost(125);
 6123   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6124   ins_encode %{
 6125     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6126   %}
 6127   ins_pipe(pipe_slow);
 6128 %}
 6129 
 6130 // The instruction usage is guarded by predicate in operand immD0().
 6131 instruct loadConD0(regD dst, immD0 src) %{
 6132   match(Set dst src);
 6133   ins_cost(100);
 6134   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6135   ins_encode %{
 6136     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6137   %}
 6138   ins_pipe( pipe_slow );
 6139 %}
 6140 
 6141 // Load Stack Slot
 6142 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6143   match(Set dst src);
 6144   ins_cost(125);
 6145 
 6146   format %{ "MOV    $dst,$src" %}
 6147   opcode(0x8B);
 6148   ins_encode( OpcP, RegMem(dst,src));
 6149   ins_pipe( ialu_reg_mem );
 6150 %}
 6151 
 6152 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6153   match(Set dst src);
 6154 
 6155   ins_cost(200);
 6156   format %{ "MOV    $dst,$src.lo\n\t"
 6157             "MOV    $dst+4,$src.hi" %}
 6158   opcode(0x8B, 0x8B);
 6159   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6160   ins_pipe( ialu_mem_long_reg );
 6161 %}
 6162 
 6163 // Load Stack Slot
 6164 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6165   match(Set dst src);
 6166   ins_cost(125);
 6167 
 6168   format %{ "MOV    $dst,$src" %}
 6169   opcode(0x8B);
 6170   ins_encode( OpcP, RegMem(dst,src));
 6171   ins_pipe( ialu_reg_mem );
 6172 %}
 6173 
 6174 // Load Stack Slot
 6175 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6176   match(Set dst src);
 6177   ins_cost(125);
 6178 
 6179   format %{ "FLD_S  $src\n\t"
 6180             "FSTP   $dst" %}
 6181   opcode(0xD9);               /* D9 /0, FLD m32real */
 6182   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6183               Pop_Reg_FPR(dst) );
 6184   ins_pipe( fpu_reg_mem );
 6185 %}
 6186 
 6187 // Load Stack Slot
 6188 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6189   match(Set dst src);
 6190   ins_cost(125);
 6191 
 6192   format %{ "FLD_D  $src\n\t"
 6193             "FSTP   $dst" %}
 6194   opcode(0xDD);               /* DD /0, FLD m64real */
 6195   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6196               Pop_Reg_DPR(dst) );
 6197   ins_pipe( fpu_reg_mem );
 6198 %}
 6199 
 6200 // Prefetch instructions for allocation.
 6201 // Must be safe to execute with invalid address (cannot fault).
 6202 
 6203 instruct prefetchAlloc0( memory mem ) %{
 6204   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6205   match(PrefetchAllocation mem);
 6206   ins_cost(0);
 6207   size(0);
 6208   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6209   ins_encode();
 6210   ins_pipe(empty);
 6211 %}
 6212 
 6213 instruct prefetchAlloc( memory mem ) %{
 6214   predicate(AllocatePrefetchInstr==3);
 6215   match( PrefetchAllocation mem );
 6216   ins_cost(100);
 6217 
 6218   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6219   ins_encode %{
 6220     __ prefetchw($mem$$Address);
 6221   %}
 6222   ins_pipe(ialu_mem);
 6223 %}
 6224 
 6225 instruct prefetchAllocNTA( memory mem ) %{
 6226   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6227   match(PrefetchAllocation mem);
 6228   ins_cost(100);
 6229 
 6230   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6231   ins_encode %{
 6232     __ prefetchnta($mem$$Address);
 6233   %}
 6234   ins_pipe(ialu_mem);
 6235 %}
 6236 
 6237 instruct prefetchAllocT0( memory mem ) %{
 6238   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6239   match(PrefetchAllocation mem);
 6240   ins_cost(100);
 6241 
 6242   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6243   ins_encode %{
 6244     __ prefetcht0($mem$$Address);
 6245   %}
 6246   ins_pipe(ialu_mem);
 6247 %}
 6248 
 6249 instruct prefetchAllocT2( memory mem ) %{
 6250   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6251   match(PrefetchAllocation mem);
 6252   ins_cost(100);
 6253 
 6254   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6255   ins_encode %{
 6256     __ prefetcht2($mem$$Address);
 6257   %}
 6258   ins_pipe(ialu_mem);
 6259 %}
 6260 
 6261 //----------Store Instructions-------------------------------------------------
 6262 
 6263 // Store Byte
 6264 instruct storeB(memory mem, xRegI src) %{
 6265   match(Set mem (StoreB mem src));
 6266 
 6267   ins_cost(125);
 6268   format %{ "MOV8   $mem,$src" %}
 6269   opcode(0x88);
 6270   ins_encode( OpcP, RegMem( src, mem ) );
 6271   ins_pipe( ialu_mem_reg );
 6272 %}
 6273 
 6274 // Store Char/Short
 6275 instruct storeC(memory mem, rRegI src) %{
 6276   match(Set mem (StoreC mem src));
 6277 
 6278   ins_cost(125);
 6279   format %{ "MOV16  $mem,$src" %}
 6280   opcode(0x89, 0x66);
 6281   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6282   ins_pipe( ialu_mem_reg );
 6283 %}
 6284 
 6285 // Store Integer
 6286 instruct storeI(memory mem, rRegI src) %{
 6287   match(Set mem (StoreI mem src));
 6288 
 6289   ins_cost(125);
 6290   format %{ "MOV    $mem,$src" %}
 6291   opcode(0x89);
 6292   ins_encode( OpcP, RegMem( src, mem ) );
 6293   ins_pipe( ialu_mem_reg );
 6294 %}
 6295 
 6296 // Store Long
 6297 instruct storeL(long_memory mem, eRegL src) %{
 6298   predicate(!((StoreLNode*)n)->require_atomic_access());
 6299   match(Set mem (StoreL mem src));
 6300 
 6301   ins_cost(200);
 6302   format %{ "MOV    $mem,$src.lo\n\t"
 6303             "MOV    $mem+4,$src.hi" %}
 6304   opcode(0x89, 0x89);
 6305   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6306   ins_pipe( ialu_mem_long_reg );
 6307 %}
 6308 
 6309 // Store Long to Integer
 6310 instruct storeL2I(memory mem, eRegL src) %{
 6311   match(Set mem (StoreI mem (ConvL2I src)));
 6312 
 6313   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6314   ins_encode %{
 6315     __ movl($mem$$Address, $src$$Register);
 6316   %}
 6317   ins_pipe(ialu_mem_reg);
 6318 %}
 6319 
 6320 // Volatile Store Long.  Must be atomic, so move it into
 6321 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6322 // target address before the store (for null-ptr checks)
 6323 // so the memory operand is used twice in the encoding.
 6324 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6325   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6326   match(Set mem (StoreL mem src));
 6327   effect( KILL cr );
 6328   ins_cost(400);
 6329   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6330             "FILD   $src\n\t"
 6331             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6332   opcode(0x3B);
 6333   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6334   ins_pipe( fpu_reg_mem );
 6335 %}
 6336 
 6337 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6338   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6339   match(Set mem (StoreL mem src));
 6340   effect( TEMP tmp, KILL cr );
 6341   ins_cost(380);
 6342   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6343             "MOVSD  $tmp,$src\n\t"
 6344             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6345   ins_encode %{
 6346     __ cmpl(rax, $mem$$Address);
 6347     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6348     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6349   %}
 6350   ins_pipe( pipe_slow );
 6351 %}
 6352 
 6353 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6354   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6355   match(Set mem (StoreL mem src));
 6356   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6357   ins_cost(360);
 6358   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6359             "MOVD   $tmp,$src.lo\n\t"
 6360             "MOVD   $tmp2,$src.hi\n\t"
 6361             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6362             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6363   ins_encode %{
 6364     __ cmpl(rax, $mem$$Address);
 6365     __ movdl($tmp$$XMMRegister, $src$$Register);
 6366     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6367     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6368     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6369   %}
 6370   ins_pipe( pipe_slow );
 6371 %}
 6372 
 6373 // Store Pointer; for storing unknown oops and raw pointers
 6374 instruct storeP(memory mem, anyRegP src) %{
 6375   match(Set mem (StoreP mem src));
 6376 
 6377   ins_cost(125);
 6378   format %{ "MOV    $mem,$src" %}
 6379   opcode(0x89);
 6380   ins_encode( OpcP, RegMem( src, mem ) );
 6381   ins_pipe( ialu_mem_reg );
 6382 %}
 6383 
 6384 // Store Integer Immediate
 6385 instruct storeImmI(memory mem, immI src) %{
 6386   match(Set mem (StoreI mem src));
 6387 
 6388   ins_cost(150);
 6389   format %{ "MOV    $mem,$src" %}
 6390   opcode(0xC7);               /* C7 /0 */
 6391   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6392   ins_pipe( ialu_mem_imm );
 6393 %}
 6394 
 6395 // Store Short/Char Immediate
 6396 instruct storeImmI16(memory mem, immI16 src) %{
 6397   predicate(UseStoreImmI16);
 6398   match(Set mem (StoreC mem src));
 6399 
 6400   ins_cost(150);
 6401   format %{ "MOV16  $mem,$src" %}
 6402   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6403   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6404   ins_pipe( ialu_mem_imm );
 6405 %}
 6406 
 6407 // Store Pointer Immediate; null pointers or constant oops that do not
 6408 // need card-mark barriers.
 6409 instruct storeImmP(memory mem, immP src) %{
 6410   match(Set mem (StoreP mem src));
 6411 
 6412   ins_cost(150);
 6413   format %{ "MOV    $mem,$src" %}
 6414   opcode(0xC7);               /* C7 /0 */
 6415   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6416   ins_pipe( ialu_mem_imm );
 6417 %}
 6418 
 6419 // Store Byte Immediate
 6420 instruct storeImmB(memory mem, immI8 src) %{
 6421   match(Set mem (StoreB mem src));
 6422 
 6423   ins_cost(150);
 6424   format %{ "MOV8   $mem,$src" %}
 6425   opcode(0xC6);               /* C6 /0 */
 6426   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6427   ins_pipe( ialu_mem_imm );
 6428 %}
 6429 
 6430 // Store CMS card-mark Immediate
 6431 instruct storeImmCM(memory mem, immI8 src) %{
 6432   match(Set mem (StoreCM mem src));
 6433 
 6434   ins_cost(150);
 6435   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6436   opcode(0xC6);               /* C6 /0 */
 6437   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6438   ins_pipe( ialu_mem_imm );
 6439 %}
 6440 
 6441 // Store Double
 6442 instruct storeDPR( memory mem, regDPR1 src) %{
 6443   predicate(UseSSE<=1);
 6444   match(Set mem (StoreD mem src));
 6445 
 6446   ins_cost(100);
 6447   format %{ "FST_D  $mem,$src" %}
 6448   opcode(0xDD);       /* DD /2 */
 6449   ins_encode( enc_FPR_store(mem,src) );
 6450   ins_pipe( fpu_mem_reg );
 6451 %}
 6452 
 6453 // Store double does rounding on x86
 6454 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6455   predicate(UseSSE<=1);
 6456   match(Set mem (StoreD mem (RoundDouble src)));
 6457 
 6458   ins_cost(100);
 6459   format %{ "FST_D  $mem,$src\t# round" %}
 6460   opcode(0xDD);       /* DD /2 */
 6461   ins_encode( enc_FPR_store(mem,src) );
 6462   ins_pipe( fpu_mem_reg );
 6463 %}
 6464 
 6465 // Store XMM register to memory (double-precision floating points)
 6466 // MOVSD instruction
 6467 instruct storeD(memory mem, regD src) %{
 6468   predicate(UseSSE>=2);
 6469   match(Set mem (StoreD mem src));
 6470   ins_cost(95);
 6471   format %{ "MOVSD  $mem,$src" %}
 6472   ins_encode %{
 6473     __ movdbl($mem$$Address, $src$$XMMRegister);
 6474   %}
 6475   ins_pipe( pipe_slow );
 6476 %}
 6477 
 6478 // Store XMM register to memory (single-precision floating point)
 6479 // MOVSS instruction
 6480 instruct storeF(memory mem, regF src) %{
 6481   predicate(UseSSE>=1);
 6482   match(Set mem (StoreF mem src));
 6483   ins_cost(95);
 6484   format %{ "MOVSS  $mem,$src" %}
 6485   ins_encode %{
 6486     __ movflt($mem$$Address, $src$$XMMRegister);
 6487   %}
 6488   ins_pipe( pipe_slow );
 6489 %}
 6490 
 6491 
 6492 // Store Float
 6493 instruct storeFPR( memory mem, regFPR1 src) %{
 6494   predicate(UseSSE==0);
 6495   match(Set mem (StoreF mem src));
 6496 
 6497   ins_cost(100);
 6498   format %{ "FST_S  $mem,$src" %}
 6499   opcode(0xD9);       /* D9 /2 */
 6500   ins_encode( enc_FPR_store(mem,src) );
 6501   ins_pipe( fpu_mem_reg );
 6502 %}
 6503 
 6504 // Store Float does rounding on x86
 6505 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6506   predicate(UseSSE==0);
 6507   match(Set mem (StoreF mem (RoundFloat src)));
 6508 
 6509   ins_cost(100);
 6510   format %{ "FST_S  $mem,$src\t# round" %}
 6511   opcode(0xD9);       /* D9 /2 */
 6512   ins_encode( enc_FPR_store(mem,src) );
 6513   ins_pipe( fpu_mem_reg );
 6514 %}
 6515 
 6516 // Store Float does rounding on x86
 6517 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6518   predicate(UseSSE<=1);
 6519   match(Set mem (StoreF mem (ConvD2F src)));
 6520 
 6521   ins_cost(100);
 6522   format %{ "FST_S  $mem,$src\t# D-round" %}
 6523   opcode(0xD9);       /* D9 /2 */
 6524   ins_encode( enc_FPR_store(mem,src) );
 6525   ins_pipe( fpu_mem_reg );
 6526 %}
 6527 
 6528 // Store immediate Float value (it is faster than store from FPU register)
 6529 // The instruction usage is guarded by predicate in operand immFPR().
 6530 instruct storeFPR_imm( memory mem, immFPR src) %{
 6531   match(Set mem (StoreF mem src));
 6532 
 6533   ins_cost(50);
 6534   format %{ "MOV    $mem,$src\t# store float" %}
 6535   opcode(0xC7);               /* C7 /0 */
 6536   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6537   ins_pipe( ialu_mem_imm );
 6538 %}
 6539 
 6540 // Store immediate Float value (it is faster than store from XMM register)
 6541 // The instruction usage is guarded by predicate in operand immF().
 6542 instruct storeF_imm( memory mem, immF src) %{
 6543   match(Set mem (StoreF mem src));
 6544 
 6545   ins_cost(50);
 6546   format %{ "MOV    $mem,$src\t# store float" %}
 6547   opcode(0xC7);               /* C7 /0 */
 6548   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6549   ins_pipe( ialu_mem_imm );
 6550 %}
 6551 
 6552 // Store Integer to stack slot
 6553 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6554   match(Set dst src);
 6555 
 6556   ins_cost(100);
 6557   format %{ "MOV    $dst,$src" %}
 6558   opcode(0x89);
 6559   ins_encode( OpcPRegSS( dst, src ) );
 6560   ins_pipe( ialu_mem_reg );
 6561 %}
 6562 
 6563 // Store Integer to stack slot
 6564 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6565   match(Set dst src);
 6566 
 6567   ins_cost(100);
 6568   format %{ "MOV    $dst,$src" %}
 6569   opcode(0x89);
 6570   ins_encode( OpcPRegSS( dst, src ) );
 6571   ins_pipe( ialu_mem_reg );
 6572 %}
 6573 
 6574 // Store Long to stack slot
 6575 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6576   match(Set dst src);
 6577 
 6578   ins_cost(200);
 6579   format %{ "MOV    $dst,$src.lo\n\t"
 6580             "MOV    $dst+4,$src.hi" %}
 6581   opcode(0x89, 0x89);
 6582   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6583   ins_pipe( ialu_mem_long_reg );
 6584 %}
 6585 
 6586 //----------MemBar Instructions-----------------------------------------------
 6587 // Memory barrier flavors
 6588 
 6589 instruct membar_acquire() %{
 6590   match(MemBarAcquire);
 6591   match(LoadFence);
 6592   ins_cost(400);
 6593 
 6594   size(0);
 6595   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6596   ins_encode();
 6597   ins_pipe(empty);
 6598 %}
 6599 
 6600 instruct membar_acquire_lock() %{
 6601   match(MemBarAcquireLock);
 6602   ins_cost(0);
 6603 
 6604   size(0);
 6605   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6606   ins_encode( );
 6607   ins_pipe(empty);
 6608 %}
 6609 
 6610 instruct membar_release() %{
 6611   match(MemBarRelease);
 6612   match(StoreFence);
 6613   ins_cost(400);
 6614 
 6615   size(0);
 6616   format %{ "MEMBAR-release ! (empty encoding)" %}
 6617   ins_encode( );
 6618   ins_pipe(empty);
 6619 %}
 6620 
 6621 instruct membar_release_lock() %{
 6622   match(MemBarReleaseLock);
 6623   ins_cost(0);
 6624 
 6625   size(0);
 6626   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6627   ins_encode( );
 6628   ins_pipe(empty);
 6629 %}
 6630 
 6631 instruct membar_volatile(eFlagsReg cr) %{
 6632   match(MemBarVolatile);
 6633   effect(KILL cr);
 6634   ins_cost(400);
 6635 
 6636   format %{
 6637     $$template
 6638     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6639   %}
 6640   ins_encode %{
 6641     __ membar(Assembler::StoreLoad);
 6642   %}
 6643   ins_pipe(pipe_slow);
 6644 %}
 6645 
 6646 instruct unnecessary_membar_volatile() %{
 6647   match(MemBarVolatile);
 6648   predicate(Matcher::post_store_load_barrier(n));
 6649   ins_cost(0);
 6650 
 6651   size(0);
 6652   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6653   ins_encode( );
 6654   ins_pipe(empty);
 6655 %}
 6656 
 6657 instruct membar_storestore() %{
 6658   match(MemBarStoreStore);
 6659   ins_cost(0);
 6660 
 6661   size(0);
 6662   format %{ "MEMBAR-storestore (empty encoding)" %}
 6663   ins_encode( );
 6664   ins_pipe(empty);
 6665 %}
 6666 
 6667 //----------Move Instructions--------------------------------------------------
 6668 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6669   match(Set dst (CastX2P src));
 6670   format %{ "# X2P  $dst, $src" %}
 6671   ins_encode( /*empty encoding*/ );
 6672   ins_cost(0);
 6673   ins_pipe(empty);
 6674 %}
 6675 
 6676 instruct castP2X(rRegI dst, eRegP src ) %{
 6677   match(Set dst (CastP2X src));
 6678   ins_cost(50);
 6679   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6680   ins_encode( enc_Copy( dst, src) );
 6681   ins_pipe( ialu_reg_reg );
 6682 %}
 6683 
 6684 //----------Conditional Move---------------------------------------------------
 6685 // Conditional move
 6686 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6687   predicate(!VM_Version::supports_cmov() );
 6688   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6689   ins_cost(200);
 6690   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6691             "MOV    $dst,$src\n"
 6692       "skip:" %}
 6693   ins_encode %{
 6694     Label Lskip;
 6695     // Invert sense of branch from sense of CMOV
 6696     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6697     __ movl($dst$$Register, $src$$Register);
 6698     __ bind(Lskip);
 6699   %}
 6700   ins_pipe( pipe_cmov_reg );
 6701 %}
 6702 
 6703 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6704   predicate(!VM_Version::supports_cmov() );
 6705   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6706   ins_cost(200);
 6707   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6708             "MOV    $dst,$src\n"
 6709       "skip:" %}
 6710   ins_encode %{
 6711     Label Lskip;
 6712     // Invert sense of branch from sense of CMOV
 6713     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6714     __ movl($dst$$Register, $src$$Register);
 6715     __ bind(Lskip);
 6716   %}
 6717   ins_pipe( pipe_cmov_reg );
 6718 %}
 6719 
 6720 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6721   predicate(VM_Version::supports_cmov() );
 6722   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6723   ins_cost(200);
 6724   format %{ "CMOV$cop $dst,$src" %}
 6725   opcode(0x0F,0x40);
 6726   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6727   ins_pipe( pipe_cmov_reg );
 6728 %}
 6729 
 6730 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6731   predicate(VM_Version::supports_cmov() );
 6732   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6733   ins_cost(200);
 6734   format %{ "CMOV$cop $dst,$src" %}
 6735   opcode(0x0F,0x40);
 6736   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6737   ins_pipe( pipe_cmov_reg );
 6738 %}
 6739 
 6740 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6741   predicate(VM_Version::supports_cmov() );
 6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6743   ins_cost(200);
 6744   expand %{
 6745     cmovI_regU(cop, cr, dst, src);
 6746   %}
 6747 %}
 6748 
 6749 // Conditional move
 6750 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6751   predicate(VM_Version::supports_cmov() );
 6752   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6753   ins_cost(250);
 6754   format %{ "CMOV$cop $dst,$src" %}
 6755   opcode(0x0F,0x40);
 6756   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6757   ins_pipe( pipe_cmov_mem );
 6758 %}
 6759 
 6760 // Conditional move
 6761 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6762   predicate(VM_Version::supports_cmov() );
 6763   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6764   ins_cost(250);
 6765   format %{ "CMOV$cop $dst,$src" %}
 6766   opcode(0x0F,0x40);
 6767   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6768   ins_pipe( pipe_cmov_mem );
 6769 %}
 6770 
 6771 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6772   predicate(VM_Version::supports_cmov() );
 6773   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6774   ins_cost(250);
 6775   expand %{
 6776     cmovI_memU(cop, cr, dst, src);
 6777   %}
 6778 %}
 6779 
 6780 // Conditional move
 6781 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6782   predicate(VM_Version::supports_cmov() );
 6783   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6784   ins_cost(200);
 6785   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6786   opcode(0x0F,0x40);
 6787   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6788   ins_pipe( pipe_cmov_reg );
 6789 %}
 6790 
 6791 // Conditional move (non-P6 version)
 6792 // Note:  a CMoveP is generated for  stubs and native wrappers
 6793 //        regardless of whether we are on a P6, so we
 6794 //        emulate a cmov here
 6795 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6796   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6797   ins_cost(300);
 6798   format %{ "Jn$cop   skip\n\t"
 6799           "MOV    $dst,$src\t# pointer\n"
 6800       "skip:" %}
 6801   opcode(0x8b);
 6802   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6803   ins_pipe( pipe_cmov_reg );
 6804 %}
 6805 
 6806 // Conditional move
 6807 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6808   predicate(VM_Version::supports_cmov() );
 6809   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6810   ins_cost(200);
 6811   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6812   opcode(0x0F,0x40);
 6813   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6814   ins_pipe( pipe_cmov_reg );
 6815 %}
 6816 
 6817 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6818   predicate(VM_Version::supports_cmov() );
 6819   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6820   ins_cost(200);
 6821   expand %{
 6822     cmovP_regU(cop, cr, dst, src);
 6823   %}
 6824 %}
 6825 
 6826 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6827 // correctly meets the two pointer arguments; one is an incoming
 6828 // register but the other is a memory operand.  ALSO appears to
 6829 // be buggy with implicit null checks.
 6830 //
 6831 //// Conditional move
 6832 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6833 //  predicate(VM_Version::supports_cmov() );
 6834 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6835 //  ins_cost(250);
 6836 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6837 //  opcode(0x0F,0x40);
 6838 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6839 //  ins_pipe( pipe_cmov_mem );
 6840 //%}
 6841 //
 6842 //// Conditional move
 6843 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6844 //  predicate(VM_Version::supports_cmov() );
 6845 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6846 //  ins_cost(250);
 6847 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6848 //  opcode(0x0F,0x40);
 6849 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6850 //  ins_pipe( pipe_cmov_mem );
 6851 //%}
 6852 
 6853 // Conditional move
 6854 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6855   predicate(UseSSE<=1);
 6856   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6857   ins_cost(200);
 6858   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6859   opcode(0xDA);
 6860   ins_encode( enc_cmov_dpr(cop,src) );
 6861   ins_pipe( pipe_cmovDPR_reg );
 6862 %}
 6863 
 6864 // Conditional move
 6865 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6866   predicate(UseSSE==0);
 6867   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6868   ins_cost(200);
 6869   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6870   opcode(0xDA);
 6871   ins_encode( enc_cmov_dpr(cop,src) );
 6872   ins_pipe( pipe_cmovDPR_reg );
 6873 %}
 6874 
 6875 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6876 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6877   predicate(UseSSE<=1);
 6878   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6879   ins_cost(200);
 6880   format %{ "Jn$cop   skip\n\t"
 6881             "MOV    $dst,$src\t# double\n"
 6882       "skip:" %}
 6883   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6884   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6885   ins_pipe( pipe_cmovDPR_reg );
 6886 %}
 6887 
 6888 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6889 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6890   predicate(UseSSE==0);
 6891   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6892   ins_cost(200);
 6893   format %{ "Jn$cop    skip\n\t"
 6894             "MOV    $dst,$src\t# float\n"
 6895       "skip:" %}
 6896   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6897   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6898   ins_pipe( pipe_cmovDPR_reg );
 6899 %}
 6900 
 6901 // No CMOVE with SSE/SSE2
 6902 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6903   predicate (UseSSE>=1);
 6904   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6905   ins_cost(200);
 6906   format %{ "Jn$cop   skip\n\t"
 6907             "MOVSS  $dst,$src\t# float\n"
 6908       "skip:" %}
 6909   ins_encode %{
 6910     Label skip;
 6911     // Invert sense of branch from sense of CMOV
 6912     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6913     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6914     __ bind(skip);
 6915   %}
 6916   ins_pipe( pipe_slow );
 6917 %}
 6918 
 6919 // No CMOVE with SSE/SSE2
 6920 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6921   predicate (UseSSE>=2);
 6922   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6923   ins_cost(200);
 6924   format %{ "Jn$cop   skip\n\t"
 6925             "MOVSD  $dst,$src\t# float\n"
 6926       "skip:" %}
 6927   ins_encode %{
 6928     Label skip;
 6929     // Invert sense of branch from sense of CMOV
 6930     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6931     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6932     __ bind(skip);
 6933   %}
 6934   ins_pipe( pipe_slow );
 6935 %}
 6936 
 6937 // unsigned version
 6938 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6939   predicate (UseSSE>=1);
 6940   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6941   ins_cost(200);
 6942   format %{ "Jn$cop   skip\n\t"
 6943             "MOVSS  $dst,$src\t# float\n"
 6944       "skip:" %}
 6945   ins_encode %{
 6946     Label skip;
 6947     // Invert sense of branch from sense of CMOV
 6948     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6949     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6950     __ bind(skip);
 6951   %}
 6952   ins_pipe( pipe_slow );
 6953 %}
 6954 
 6955 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6956   predicate (UseSSE>=1);
 6957   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6958   ins_cost(200);
 6959   expand %{
 6960     fcmovF_regU(cop, cr, dst, src);
 6961   %}
 6962 %}
 6963 
 6964 // unsigned version
 6965 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6966   predicate (UseSSE>=2);
 6967   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6968   ins_cost(200);
 6969   format %{ "Jn$cop   skip\n\t"
 6970             "MOVSD  $dst,$src\t# float\n"
 6971       "skip:" %}
 6972   ins_encode %{
 6973     Label skip;
 6974     // Invert sense of branch from sense of CMOV
 6975     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6976     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6977     __ bind(skip);
 6978   %}
 6979   ins_pipe( pipe_slow );
 6980 %}
 6981 
 6982 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6983   predicate (UseSSE>=2);
 6984   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6985   ins_cost(200);
 6986   expand %{
 6987     fcmovD_regU(cop, cr, dst, src);
 6988   %}
 6989 %}
 6990 
 6991 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6992   predicate(VM_Version::supports_cmov() );
 6993   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6994   ins_cost(200);
 6995   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6996             "CMOV$cop $dst.hi,$src.hi" %}
 6997   opcode(0x0F,0x40);
 6998   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6999   ins_pipe( pipe_cmov_reg_long );
 7000 %}
 7001 
 7002 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7003   predicate(VM_Version::supports_cmov() );
 7004   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7005   ins_cost(200);
 7006   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7007             "CMOV$cop $dst.hi,$src.hi" %}
 7008   opcode(0x0F,0x40);
 7009   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7010   ins_pipe( pipe_cmov_reg_long );
 7011 %}
 7012 
 7013 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7014   predicate(VM_Version::supports_cmov() );
 7015   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7016   ins_cost(200);
 7017   expand %{
 7018     cmovL_regU(cop, cr, dst, src);
 7019   %}
 7020 %}
 7021 
 7022 //----------Arithmetic Instructions--------------------------------------------
 7023 //----------Addition Instructions----------------------------------------------
 7024 
 7025 // Integer Addition Instructions
 7026 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7027   match(Set dst (AddI dst src));
 7028   effect(KILL cr);
 7029 
 7030   size(2);
 7031   format %{ "ADD    $dst,$src" %}
 7032   opcode(0x03);
 7033   ins_encode( OpcP, RegReg( dst, src) );
 7034   ins_pipe( ialu_reg_reg );
 7035 %}
 7036 
 7037 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7038   match(Set dst (AddI dst src));
 7039   effect(KILL cr);
 7040 
 7041   format %{ "ADD    $dst,$src" %}
 7042   opcode(0x81, 0x00); /* /0 id */
 7043   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7044   ins_pipe( ialu_reg );
 7045 %}
 7046 
 7047 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7048   predicate(UseIncDec);
 7049   match(Set dst (AddI dst src));
 7050   effect(KILL cr);
 7051 
 7052   size(1);
 7053   format %{ "INC    $dst" %}
 7054   opcode(0x40); /*  */
 7055   ins_encode( Opc_plus( primary, dst ) );
 7056   ins_pipe( ialu_reg );
 7057 %}
 7058 
 7059 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7060   match(Set dst (AddI src0 src1));
 7061   ins_cost(110);
 7062 
 7063   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7064   opcode(0x8D); /* 0x8D /r */
 7065   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7066   ins_pipe( ialu_reg_reg );
 7067 %}
 7068 
 7069 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7070   match(Set dst (AddP src0 src1));
 7071   ins_cost(110);
 7072 
 7073   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7074   opcode(0x8D); /* 0x8D /r */
 7075   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7076   ins_pipe( ialu_reg_reg );
 7077 %}
 7078 
 7079 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7080   predicate(UseIncDec);
 7081   match(Set dst (AddI dst src));
 7082   effect(KILL cr);
 7083 
 7084   size(1);
 7085   format %{ "DEC    $dst" %}
 7086   opcode(0x48); /*  */
 7087   ins_encode( Opc_plus( primary, dst ) );
 7088   ins_pipe( ialu_reg );
 7089 %}
 7090 
 7091 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7092   match(Set dst (AddP dst src));
 7093   effect(KILL cr);
 7094 
 7095   size(2);
 7096   format %{ "ADD    $dst,$src" %}
 7097   opcode(0x03);
 7098   ins_encode( OpcP, RegReg( dst, src) );
 7099   ins_pipe( ialu_reg_reg );
 7100 %}
 7101 
 7102 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7103   match(Set dst (AddP dst src));
 7104   effect(KILL cr);
 7105 
 7106   format %{ "ADD    $dst,$src" %}
 7107   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7108   // ins_encode( RegImm( dst, src) );
 7109   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7110   ins_pipe( ialu_reg );
 7111 %}
 7112 
 7113 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7114   match(Set dst (AddI dst (LoadI src)));
 7115   effect(KILL cr);
 7116 
 7117   ins_cost(125);
 7118   format %{ "ADD    $dst,$src" %}
 7119   opcode(0x03);
 7120   ins_encode( OpcP, RegMem( dst, src) );
 7121   ins_pipe( ialu_reg_mem );
 7122 %}
 7123 
 7124 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7125   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7126   effect(KILL cr);
 7127 
 7128   ins_cost(150);
 7129   format %{ "ADD    $dst,$src" %}
 7130   opcode(0x01);  /* Opcode 01 /r */
 7131   ins_encode( OpcP, RegMem( src, dst ) );
 7132   ins_pipe( ialu_mem_reg );
 7133 %}
 7134 
 7135 // Add Memory with Immediate
 7136 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7137   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7138   effect(KILL cr);
 7139 
 7140   ins_cost(125);
 7141   format %{ "ADD    $dst,$src" %}
 7142   opcode(0x81);               /* Opcode 81 /0 id */
 7143   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7144   ins_pipe( ialu_mem_imm );
 7145 %}
 7146 
 7147 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7148   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7149   effect(KILL cr);
 7150 
 7151   ins_cost(125);
 7152   format %{ "INC    $dst" %}
 7153   opcode(0xFF);               /* Opcode FF /0 */
 7154   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7155   ins_pipe( ialu_mem_imm );
 7156 %}
 7157 
 7158 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7159   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7160   effect(KILL cr);
 7161 
 7162   ins_cost(125);
 7163   format %{ "DEC    $dst" %}
 7164   opcode(0xFF);               /* Opcode FF /1 */
 7165   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7166   ins_pipe( ialu_mem_imm );
 7167 %}
 7168 
 7169 
 7170 instruct checkCastPP( eRegP dst ) %{
 7171   match(Set dst (CheckCastPP dst));
 7172 
 7173   size(0);
 7174   format %{ "#checkcastPP of $dst" %}
 7175   ins_encode( /*empty encoding*/ );
 7176   ins_pipe( empty );
 7177 %}
 7178 
 7179 instruct castPP( eRegP dst ) %{
 7180   match(Set dst (CastPP dst));
 7181   format %{ "#castPP of $dst" %}
 7182   ins_encode( /*empty encoding*/ );
 7183   ins_pipe( empty );
 7184 %}
 7185 
 7186 instruct castII( rRegI dst ) %{
 7187   match(Set dst (CastII dst));
 7188   format %{ "#castII of $dst" %}
 7189   ins_encode( /*empty encoding*/ );
 7190   ins_cost(0);
 7191   ins_pipe( empty );
 7192 %}
 7193 
 7194 instruct castLL( eRegL dst ) %{
 7195   match(Set dst (CastLL dst));
 7196   format %{ "#castLL of $dst" %}
 7197   ins_encode( /*empty encoding*/ );
 7198   ins_cost(0);
 7199   ins_pipe( empty );
 7200 %}
 7201 
 7202 instruct castFF( regF dst ) %{
 7203   match(Set dst (CastFF dst));
 7204   format %{ "#castFF of $dst" %}
 7205   ins_encode( /*empty encoding*/ );
 7206   ins_cost(0);
 7207   ins_pipe( empty );
 7208 %}
 7209 
 7210 instruct castDD( regD dst ) %{
 7211   match(Set dst (CastDD dst));
 7212   format %{ "#castDD of $dst" %}
 7213   ins_encode( /*empty encoding*/ );
 7214   ins_cost(0);
 7215   ins_pipe( empty );
 7216 %}
 7217 
 7218 // Load-locked - same as a regular pointer load when used with compare-swap
 7219 instruct loadPLocked(eRegP dst, memory mem) %{
 7220   match(Set dst (LoadPLocked mem));
 7221 
 7222   ins_cost(125);
 7223   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7224   opcode(0x8B);
 7225   ins_encode( OpcP, RegMem(dst,mem));
 7226   ins_pipe( ialu_reg_mem );
 7227 %}
 7228 
 7229 // Conditional-store of the updated heap-top.
 7230 // Used during allocation of the shared heap.
 7231 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7232 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7233   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7234   // EAX is killed if there is contention, but then it's also unused.
 7235   // In the common case of no contention, EAX holds the new oop address.
 7236   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7237   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7238   ins_pipe( pipe_cmpxchg );
 7239 %}
 7240 
 7241 // Conditional-store of an int value.
 7242 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7243 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7244   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7245   effect(KILL oldval);
 7246   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7247   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7248   ins_pipe( pipe_cmpxchg );
 7249 %}
 7250 
 7251 // Conditional-store of a long value.
 7252 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7253 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7254   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7255   effect(KILL oldval);
 7256   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7257             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7258             "XCHG   EBX,ECX"
 7259   %}
 7260   ins_encode %{
 7261     // Note: we need to swap rbx, and rcx before and after the
 7262     //       cmpxchg8 instruction because the instruction uses
 7263     //       rcx as the high order word of the new value to store but
 7264     //       our register encoding uses rbx.
 7265     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7266     __ lock();
 7267     __ cmpxchg8($mem$$Address);
 7268     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7269   %}
 7270   ins_pipe( pipe_cmpxchg );
 7271 %}
 7272 
 7273 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7274 
 7275 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7276   predicate(VM_Version::supports_cx8());
 7277   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7278   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7279   effect(KILL cr, KILL oldval);
 7280   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7281             "MOV    $res,0\n\t"
 7282             "JNE,s  fail\n\t"
 7283             "MOV    $res,1\n"
 7284           "fail:" %}
 7285   ins_encode( enc_cmpxchg8(mem_ptr),
 7286               enc_flags_ne_to_boolean(res) );
 7287   ins_pipe( pipe_cmpxchg );
 7288 %}
 7289 
 7290 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7291   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7292   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7293   effect(KILL cr, KILL oldval);
 7294   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7295             "MOV    $res,0\n\t"
 7296             "JNE,s  fail\n\t"
 7297             "MOV    $res,1\n"
 7298           "fail:" %}
 7299   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7300   ins_pipe( pipe_cmpxchg );
 7301 %}
 7302 
 7303 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7304   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7305   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7306   effect(KILL cr, KILL oldval);
 7307   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7308             "MOV    $res,0\n\t"
 7309             "JNE,s  fail\n\t"
 7310             "MOV    $res,1\n"
 7311           "fail:" %}
 7312   ins_encode( enc_cmpxchgb(mem_ptr),
 7313               enc_flags_ne_to_boolean(res) );
 7314   ins_pipe( pipe_cmpxchg );
 7315 %}
 7316 
 7317 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7318   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7319   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7320   effect(KILL cr, KILL oldval);
 7321   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7322             "MOV    $res,0\n\t"
 7323             "JNE,s  fail\n\t"
 7324             "MOV    $res,1\n"
 7325           "fail:" %}
 7326   ins_encode( enc_cmpxchgw(mem_ptr),
 7327               enc_flags_ne_to_boolean(res) );
 7328   ins_pipe( pipe_cmpxchg );
 7329 %}
 7330 
 7331 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7332   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7333   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7334   effect(KILL cr, KILL oldval);
 7335   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7336             "MOV    $res,0\n\t"
 7337             "JNE,s  fail\n\t"
 7338             "MOV    $res,1\n"
 7339           "fail:" %}
 7340   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7341   ins_pipe( pipe_cmpxchg );
 7342 %}
 7343 
 7344 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7345   predicate(VM_Version::supports_cx8());
 7346   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7347   effect(KILL cr);
 7348   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7349   ins_encode( enc_cmpxchg8(mem_ptr) );
 7350   ins_pipe( pipe_cmpxchg );
 7351 %}
 7352 
 7353 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7354   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7355   effect(KILL cr);
 7356   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7357   ins_encode( enc_cmpxchg(mem_ptr) );
 7358   ins_pipe( pipe_cmpxchg );
 7359 %}
 7360 
 7361 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7362   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7363   effect(KILL cr);
 7364   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7365   ins_encode( enc_cmpxchgb(mem_ptr) );
 7366   ins_pipe( pipe_cmpxchg );
 7367 %}
 7368 
 7369 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7370   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7371   effect(KILL cr);
 7372   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7373   ins_encode( enc_cmpxchgw(mem_ptr) );
 7374   ins_pipe( pipe_cmpxchg );
 7375 %}
 7376 
 7377 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7378   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7379   effect(KILL cr);
 7380   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7381   ins_encode( enc_cmpxchg(mem_ptr) );
 7382   ins_pipe( pipe_cmpxchg );
 7383 %}
 7384 
 7385 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7386   predicate(n->as_LoadStore()->result_not_used());
 7387   match(Set dummy (GetAndAddB mem add));
 7388   effect(KILL cr);
 7389   format %{ "ADDB  [$mem],$add" %}
 7390   ins_encode %{
 7391     __ lock();
 7392     __ addb($mem$$Address, $add$$constant);
 7393   %}
 7394   ins_pipe( pipe_cmpxchg );
 7395 %}
 7396 
 7397 // Important to match to xRegI: only 8-bit regs.
 7398 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7399   match(Set newval (GetAndAddB mem newval));
 7400   effect(KILL cr);
 7401   format %{ "XADDB  [$mem],$newval" %}
 7402   ins_encode %{
 7403     __ lock();
 7404     __ xaddb($mem$$Address, $newval$$Register);
 7405   %}
 7406   ins_pipe( pipe_cmpxchg );
 7407 %}
 7408 
 7409 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7410   predicate(n->as_LoadStore()->result_not_used());
 7411   match(Set dummy (GetAndAddS mem add));
 7412   effect(KILL cr);
 7413   format %{ "ADDS  [$mem],$add" %}
 7414   ins_encode %{
 7415     __ lock();
 7416     __ addw($mem$$Address, $add$$constant);
 7417   %}
 7418   ins_pipe( pipe_cmpxchg );
 7419 %}
 7420 
 7421 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7422   match(Set newval (GetAndAddS mem newval));
 7423   effect(KILL cr);
 7424   format %{ "XADDS  [$mem],$newval" %}
 7425   ins_encode %{
 7426     __ lock();
 7427     __ xaddw($mem$$Address, $newval$$Register);
 7428   %}
 7429   ins_pipe( pipe_cmpxchg );
 7430 %}
 7431 
 7432 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7433   predicate(n->as_LoadStore()->result_not_used());
 7434   match(Set dummy (GetAndAddI mem add));
 7435   effect(KILL cr);
 7436   format %{ "ADDL  [$mem],$add" %}
 7437   ins_encode %{
 7438     __ lock();
 7439     __ addl($mem$$Address, $add$$constant);
 7440   %}
 7441   ins_pipe( pipe_cmpxchg );
 7442 %}
 7443 
 7444 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7445   match(Set newval (GetAndAddI mem newval));
 7446   effect(KILL cr);
 7447   format %{ "XADDL  [$mem],$newval" %}
 7448   ins_encode %{
 7449     __ lock();
 7450     __ xaddl($mem$$Address, $newval$$Register);
 7451   %}
 7452   ins_pipe( pipe_cmpxchg );
 7453 %}
 7454 
 7455 // Important to match to xRegI: only 8-bit regs.
 7456 instruct xchgB( memory mem, xRegI newval) %{
 7457   match(Set newval (GetAndSetB mem newval));
 7458   format %{ "XCHGB  $newval,[$mem]" %}
 7459   ins_encode %{
 7460     __ xchgb($newval$$Register, $mem$$Address);
 7461   %}
 7462   ins_pipe( pipe_cmpxchg );
 7463 %}
 7464 
 7465 instruct xchgS( memory mem, rRegI newval) %{
 7466   match(Set newval (GetAndSetS mem newval));
 7467   format %{ "XCHGW  $newval,[$mem]" %}
 7468   ins_encode %{
 7469     __ xchgw($newval$$Register, $mem$$Address);
 7470   %}
 7471   ins_pipe( pipe_cmpxchg );
 7472 %}
 7473 
 7474 instruct xchgI( memory mem, rRegI newval) %{
 7475   match(Set newval (GetAndSetI mem newval));
 7476   format %{ "XCHGL  $newval,[$mem]" %}
 7477   ins_encode %{
 7478     __ xchgl($newval$$Register, $mem$$Address);
 7479   %}
 7480   ins_pipe( pipe_cmpxchg );
 7481 %}
 7482 
 7483 instruct xchgP( memory mem, pRegP newval) %{
 7484   match(Set newval (GetAndSetP mem newval));
 7485   format %{ "XCHGL  $newval,[$mem]" %}
 7486   ins_encode %{
 7487     __ xchgl($newval$$Register, $mem$$Address);
 7488   %}
 7489   ins_pipe( pipe_cmpxchg );
 7490 %}
 7491 
 7492 //----------Subtraction Instructions-------------------------------------------
 7493 
 7494 // Integer Subtraction Instructions
 7495 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7496   match(Set dst (SubI dst src));
 7497   effect(KILL cr);
 7498 
 7499   size(2);
 7500   format %{ "SUB    $dst,$src" %}
 7501   opcode(0x2B);
 7502   ins_encode( OpcP, RegReg( dst, src) );
 7503   ins_pipe( ialu_reg_reg );
 7504 %}
 7505 
 7506 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7507   match(Set dst (SubI dst src));
 7508   effect(KILL cr);
 7509 
 7510   format %{ "SUB    $dst,$src" %}
 7511   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7512   // ins_encode( RegImm( dst, src) );
 7513   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7514   ins_pipe( ialu_reg );
 7515 %}
 7516 
 7517 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7518   match(Set dst (SubI dst (LoadI src)));
 7519   effect(KILL cr);
 7520 
 7521   ins_cost(125);
 7522   format %{ "SUB    $dst,$src" %}
 7523   opcode(0x2B);
 7524   ins_encode( OpcP, RegMem( dst, src) );
 7525   ins_pipe( ialu_reg_mem );
 7526 %}
 7527 
 7528 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7529   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7530   effect(KILL cr);
 7531 
 7532   ins_cost(150);
 7533   format %{ "SUB    $dst,$src" %}
 7534   opcode(0x29);  /* Opcode 29 /r */
 7535   ins_encode( OpcP, RegMem( src, dst ) );
 7536   ins_pipe( ialu_mem_reg );
 7537 %}
 7538 
 7539 // Subtract from a pointer
 7540 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7541   match(Set dst (AddP dst (SubI zero src)));
 7542   effect(KILL cr);
 7543 
 7544   size(2);
 7545   format %{ "SUB    $dst,$src" %}
 7546   opcode(0x2B);
 7547   ins_encode( OpcP, RegReg( dst, src) );
 7548   ins_pipe( ialu_reg_reg );
 7549 %}
 7550 
 7551 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7552   match(Set dst (SubI zero dst));
 7553   effect(KILL cr);
 7554 
 7555   size(2);
 7556   format %{ "NEG    $dst" %}
 7557   opcode(0xF7,0x03);  // Opcode F7 /3
 7558   ins_encode( OpcP, RegOpc( dst ) );
 7559   ins_pipe( ialu_reg );
 7560 %}
 7561 
 7562 //----------Multiplication/Division Instructions-------------------------------
 7563 // Integer Multiplication Instructions
 7564 // Multiply Register
 7565 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7566   match(Set dst (MulI dst src));
 7567   effect(KILL cr);
 7568 
 7569   size(3);
 7570   ins_cost(300);
 7571   format %{ "IMUL   $dst,$src" %}
 7572   opcode(0xAF, 0x0F);
 7573   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7574   ins_pipe( ialu_reg_reg_alu0 );
 7575 %}
 7576 
 7577 // Multiply 32-bit Immediate
 7578 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7579   match(Set dst (MulI src imm));
 7580   effect(KILL cr);
 7581 
 7582   ins_cost(300);
 7583   format %{ "IMUL   $dst,$src,$imm" %}
 7584   opcode(0x69);  /* 69 /r id */
 7585   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7586   ins_pipe( ialu_reg_reg_alu0 );
 7587 %}
 7588 
 7589 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7590   match(Set dst src);
 7591   effect(KILL cr);
 7592 
 7593   // Note that this is artificially increased to make it more expensive than loadConL
 7594   ins_cost(250);
 7595   format %{ "MOV    EAX,$src\t// low word only" %}
 7596   opcode(0xB8);
 7597   ins_encode( LdImmL_Lo(dst, src) );
 7598   ins_pipe( ialu_reg_fat );
 7599 %}
 7600 
 7601 // Multiply by 32-bit Immediate, taking the shifted high order results
 7602 //  (special case for shift by 32)
 7603 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7604   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7605   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7606              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7607              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7608   effect(USE src1, KILL cr);
 7609 
 7610   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7611   ins_cost(0*100 + 1*400 - 150);
 7612   format %{ "IMUL   EDX:EAX,$src1" %}
 7613   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7614   ins_pipe( pipe_slow );
 7615 %}
 7616 
 7617 // Multiply by 32-bit Immediate, taking the shifted high order results
 7618 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7619   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7620   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7621              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7622              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7623   effect(USE src1, KILL cr);
 7624 
 7625   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7626   ins_cost(1*100 + 1*400 - 150);
 7627   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7628             "SAR    EDX,$cnt-32" %}
 7629   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7630   ins_pipe( pipe_slow );
 7631 %}
 7632 
 7633 // Multiply Memory 32-bit Immediate
 7634 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7635   match(Set dst (MulI (LoadI src) imm));
 7636   effect(KILL cr);
 7637 
 7638   ins_cost(300);
 7639   format %{ "IMUL   $dst,$src,$imm" %}
 7640   opcode(0x69);  /* 69 /r id */
 7641   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7642   ins_pipe( ialu_reg_mem_alu0 );
 7643 %}
 7644 
 7645 // Multiply Memory
 7646 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7647   match(Set dst (MulI dst (LoadI src)));
 7648   effect(KILL cr);
 7649 
 7650   ins_cost(350);
 7651   format %{ "IMUL   $dst,$src" %}
 7652   opcode(0xAF, 0x0F);
 7653   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7654   ins_pipe( ialu_reg_mem_alu0 );
 7655 %}
 7656 
 7657 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7658 %{
 7659   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7660   effect(KILL cr, KILL src2);
 7661 
 7662   expand %{ mulI_eReg(dst, src1, cr);
 7663            mulI_eReg(src2, src3, cr);
 7664            addI_eReg(dst, src2, cr); %}
 7665 %}
 7666 
 7667 // Multiply Register Int to Long
 7668 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7669   // Basic Idea: long = (long)int * (long)int
 7670   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7671   effect(DEF dst, USE src, USE src1, KILL flags);
 7672 
 7673   ins_cost(300);
 7674   format %{ "IMUL   $dst,$src1" %}
 7675 
 7676   ins_encode( long_int_multiply( dst, src1 ) );
 7677   ins_pipe( ialu_reg_reg_alu0 );
 7678 %}
 7679 
 7680 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7681   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7682   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7683   effect(KILL flags);
 7684 
 7685   ins_cost(300);
 7686   format %{ "MUL    $dst,$src1" %}
 7687 
 7688   ins_encode( long_uint_multiply(dst, src1) );
 7689   ins_pipe( ialu_reg_reg_alu0 );
 7690 %}
 7691 
 7692 // Multiply Register Long
 7693 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7694   match(Set dst (MulL dst src));
 7695   effect(KILL cr, TEMP tmp);
 7696   ins_cost(4*100+3*400);
 7697 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7698 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7699   format %{ "MOV    $tmp,$src.lo\n\t"
 7700             "IMUL   $tmp,EDX\n\t"
 7701             "MOV    EDX,$src.hi\n\t"
 7702             "IMUL   EDX,EAX\n\t"
 7703             "ADD    $tmp,EDX\n\t"
 7704             "MUL    EDX:EAX,$src.lo\n\t"
 7705             "ADD    EDX,$tmp" %}
 7706   ins_encode( long_multiply( dst, src, tmp ) );
 7707   ins_pipe( pipe_slow );
 7708 %}
 7709 
 7710 // Multiply Register Long where the left operand's high 32 bits are zero
 7711 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7712   predicate(is_operand_hi32_zero(n->in(1)));
 7713   match(Set dst (MulL dst src));
 7714   effect(KILL cr, TEMP tmp);
 7715   ins_cost(2*100+2*400);
 7716 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7717 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7718   format %{ "MOV    $tmp,$src.hi\n\t"
 7719             "IMUL   $tmp,EAX\n\t"
 7720             "MUL    EDX:EAX,$src.lo\n\t"
 7721             "ADD    EDX,$tmp" %}
 7722   ins_encode %{
 7723     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7724     __ imull($tmp$$Register, rax);
 7725     __ mull($src$$Register);
 7726     __ addl(rdx, $tmp$$Register);
 7727   %}
 7728   ins_pipe( pipe_slow );
 7729 %}
 7730 
 7731 // Multiply Register Long where the right operand's high 32 bits are zero
 7732 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7733   predicate(is_operand_hi32_zero(n->in(2)));
 7734   match(Set dst (MulL dst src));
 7735   effect(KILL cr, TEMP tmp);
 7736   ins_cost(2*100+2*400);
 7737 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7738 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7739   format %{ "MOV    $tmp,$src.lo\n\t"
 7740             "IMUL   $tmp,EDX\n\t"
 7741             "MUL    EDX:EAX,$src.lo\n\t"
 7742             "ADD    EDX,$tmp" %}
 7743   ins_encode %{
 7744     __ movl($tmp$$Register, $src$$Register);
 7745     __ imull($tmp$$Register, rdx);
 7746     __ mull($src$$Register);
 7747     __ addl(rdx, $tmp$$Register);
 7748   %}
 7749   ins_pipe( pipe_slow );
 7750 %}
 7751 
 7752 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7753 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7754   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7755   match(Set dst (MulL dst src));
 7756   effect(KILL cr);
 7757   ins_cost(1*400);
 7758 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7759 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7760   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7761   ins_encode %{
 7762     __ mull($src$$Register);
 7763   %}
 7764   ins_pipe( pipe_slow );
 7765 %}
 7766 
 7767 // Multiply Register Long by small constant
 7768 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7769   match(Set dst (MulL dst src));
 7770   effect(KILL cr, TEMP tmp);
 7771   ins_cost(2*100+2*400);
 7772   size(12);
 7773 // Basic idea: lo(result) = lo(src * EAX)
 7774 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7775   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7776             "MOV    EDX,$src\n\t"
 7777             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7778             "ADD    EDX,$tmp" %}
 7779   ins_encode( long_multiply_con( dst, src, tmp ) );
 7780   ins_pipe( pipe_slow );
 7781 %}
 7782 
 7783 // Integer DIV with Register
 7784 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7785   match(Set rax (DivI rax div));
 7786   effect(KILL rdx, KILL cr);
 7787   size(26);
 7788   ins_cost(30*100+10*100);
 7789   format %{ "CMP    EAX,0x80000000\n\t"
 7790             "JNE,s  normal\n\t"
 7791             "XOR    EDX,EDX\n\t"
 7792             "CMP    ECX,-1\n\t"
 7793             "JE,s   done\n"
 7794     "normal: CDQ\n\t"
 7795             "IDIV   $div\n\t"
 7796     "done:"        %}
 7797   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7798   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7799   ins_pipe( ialu_reg_reg_alu0 );
 7800 %}
 7801 
 7802 // Divide Register Long
 7803 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7804   match(Set dst (DivL src1 src2));
 7805   effect( KILL cr, KILL cx, KILL bx );
 7806   ins_cost(10000);
 7807   format %{ "PUSH   $src1.hi\n\t"
 7808             "PUSH   $src1.lo\n\t"
 7809             "PUSH   $src2.hi\n\t"
 7810             "PUSH   $src2.lo\n\t"
 7811             "CALL   SharedRuntime::ldiv\n\t"
 7812             "ADD    ESP,16" %}
 7813   ins_encode( long_div(src1,src2) );
 7814   ins_pipe( pipe_slow );
 7815 %}
 7816 
 7817 // Integer DIVMOD with Register, both quotient and mod results
 7818 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7819   match(DivModI rax div);
 7820   effect(KILL cr);
 7821   size(26);
 7822   ins_cost(30*100+10*100);
 7823   format %{ "CMP    EAX,0x80000000\n\t"
 7824             "JNE,s  normal\n\t"
 7825             "XOR    EDX,EDX\n\t"
 7826             "CMP    ECX,-1\n\t"
 7827             "JE,s   done\n"
 7828     "normal: CDQ\n\t"
 7829             "IDIV   $div\n\t"
 7830     "done:"        %}
 7831   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7832   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7833   ins_pipe( pipe_slow );
 7834 %}
 7835 
 7836 // Integer MOD with Register
 7837 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7838   match(Set rdx (ModI rax div));
 7839   effect(KILL rax, KILL cr);
 7840 
 7841   size(26);
 7842   ins_cost(300);
 7843   format %{ "CDQ\n\t"
 7844             "IDIV   $div" %}
 7845   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7846   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7847   ins_pipe( ialu_reg_reg_alu0 );
 7848 %}
 7849 
 7850 // Remainder Register Long
 7851 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
 7852   match(Set dst (ModL src1 src2));
 7853   effect( KILL cr, KILL cx, KILL bx );
 7854   ins_cost(10000);
 7855   format %{ "PUSH   $src1.hi\n\t"
 7856             "PUSH   $src1.lo\n\t"
 7857             "PUSH   $src2.hi\n\t"
 7858             "PUSH   $src2.lo\n\t"
 7859             "CALL   SharedRuntime::lrem\n\t"
 7860             "ADD    ESP,16" %}
 7861   ins_encode( long_mod(src1,src2) );
 7862   ins_pipe( pipe_slow );
 7863 %}
 7864 
 7865 // Divide Register Long (no special case since divisor != -1)
 7866 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7867   match(Set dst (DivL dst imm));
 7868   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7869   ins_cost(1000);
 7870   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7871             "XOR    $tmp2,$tmp2\n\t"
 7872             "CMP    $tmp,EDX\n\t"
 7873             "JA,s   fast\n\t"
 7874             "MOV    $tmp2,EAX\n\t"
 7875             "MOV    EAX,EDX\n\t"
 7876             "MOV    EDX,0\n\t"
 7877             "JLE,s  pos\n\t"
 7878             "LNEG   EAX : $tmp2\n\t"
 7879             "DIV    $tmp # unsigned division\n\t"
 7880             "XCHG   EAX,$tmp2\n\t"
 7881             "DIV    $tmp\n\t"
 7882             "LNEG   $tmp2 : EAX\n\t"
 7883             "JMP,s  done\n"
 7884     "pos:\n\t"
 7885             "DIV    $tmp\n\t"
 7886             "XCHG   EAX,$tmp2\n"
 7887     "fast:\n\t"
 7888             "DIV    $tmp\n"
 7889     "done:\n\t"
 7890             "MOV    EDX,$tmp2\n\t"
 7891             "NEG    EDX:EAX # if $imm < 0" %}
 7892   ins_encode %{
 7893     int con = (int)$imm$$constant;
 7894     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7895     int pcon = (con > 0) ? con : -con;
 7896     Label Lfast, Lpos, Ldone;
 7897 
 7898     __ movl($tmp$$Register, pcon);
 7899     __ xorl($tmp2$$Register,$tmp2$$Register);
 7900     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7901     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7902 
 7903     __ movl($tmp2$$Register, $dst$$Register); // save
 7904     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7905     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7906     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7907 
 7908     // Negative dividend.
 7909     // convert value to positive to use unsigned division
 7910     __ lneg($dst$$Register, $tmp2$$Register);
 7911     __ divl($tmp$$Register);
 7912     __ xchgl($dst$$Register, $tmp2$$Register);
 7913     __ divl($tmp$$Register);
 7914     // revert result back to negative
 7915     __ lneg($tmp2$$Register, $dst$$Register);
 7916     __ jmpb(Ldone);
 7917 
 7918     __ bind(Lpos);
 7919     __ divl($tmp$$Register); // Use unsigned division
 7920     __ xchgl($dst$$Register, $tmp2$$Register);
 7921     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7922 
 7923     __ bind(Lfast);
 7924     // fast path: src is positive
 7925     __ divl($tmp$$Register); // Use unsigned division
 7926 
 7927     __ bind(Ldone);
 7928     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7929     if (con < 0) {
 7930       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7931     }
 7932   %}
 7933   ins_pipe( pipe_slow );
 7934 %}
 7935 
 7936 // Remainder Register Long (remainder fit into 32 bits)
 7937 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7938   match(Set dst (ModL dst imm));
 7939   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7940   ins_cost(1000);
 7941   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7942             "CMP    $tmp,EDX\n\t"
 7943             "JA,s   fast\n\t"
 7944             "MOV    $tmp2,EAX\n\t"
 7945             "MOV    EAX,EDX\n\t"
 7946             "MOV    EDX,0\n\t"
 7947             "JLE,s  pos\n\t"
 7948             "LNEG   EAX : $tmp2\n\t"
 7949             "DIV    $tmp # unsigned division\n\t"
 7950             "MOV    EAX,$tmp2\n\t"
 7951             "DIV    $tmp\n\t"
 7952             "NEG    EDX\n\t"
 7953             "JMP,s  done\n"
 7954     "pos:\n\t"
 7955             "DIV    $tmp\n\t"
 7956             "MOV    EAX,$tmp2\n"
 7957     "fast:\n\t"
 7958             "DIV    $tmp\n"
 7959     "done:\n\t"
 7960             "MOV    EAX,EDX\n\t"
 7961             "SAR    EDX,31\n\t" %}
 7962   ins_encode %{
 7963     int con = (int)$imm$$constant;
 7964     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7965     int pcon = (con > 0) ? con : -con;
 7966     Label  Lfast, Lpos, Ldone;
 7967 
 7968     __ movl($tmp$$Register, pcon);
 7969     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7970     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7971 
 7972     __ movl($tmp2$$Register, $dst$$Register); // save
 7973     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7974     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7975     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7976 
 7977     // Negative dividend.
 7978     // convert value to positive to use unsigned division
 7979     __ lneg($dst$$Register, $tmp2$$Register);
 7980     __ divl($tmp$$Register);
 7981     __ movl($dst$$Register, $tmp2$$Register);
 7982     __ divl($tmp$$Register);
 7983     // revert remainder back to negative
 7984     __ negl(HIGH_FROM_LOW($dst$$Register));
 7985     __ jmpb(Ldone);
 7986 
 7987     __ bind(Lpos);
 7988     __ divl($tmp$$Register);
 7989     __ movl($dst$$Register, $tmp2$$Register);
 7990 
 7991     __ bind(Lfast);
 7992     // fast path: src is positive
 7993     __ divl($tmp$$Register);
 7994 
 7995     __ bind(Ldone);
 7996     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7997     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7998 
 7999   %}
 8000   ins_pipe( pipe_slow );
 8001 %}
 8002 
 8003 // Integer Shift Instructions
 8004 // Shift Left by one
 8005 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8006   match(Set dst (LShiftI dst shift));
 8007   effect(KILL cr);
 8008 
 8009   size(2);
 8010   format %{ "SHL    $dst,$shift" %}
 8011   opcode(0xD1, 0x4);  /* D1 /4 */
 8012   ins_encode( OpcP, RegOpc( dst ) );
 8013   ins_pipe( ialu_reg );
 8014 %}
 8015 
 8016 // Shift Left by 8-bit immediate
 8017 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8018   match(Set dst (LShiftI dst shift));
 8019   effect(KILL cr);
 8020 
 8021   size(3);
 8022   format %{ "SHL    $dst,$shift" %}
 8023   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8024   ins_encode( RegOpcImm( dst, shift) );
 8025   ins_pipe( ialu_reg );
 8026 %}
 8027 
 8028 // Shift Left by variable
 8029 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8030   match(Set dst (LShiftI dst shift));
 8031   effect(KILL cr);
 8032 
 8033   size(2);
 8034   format %{ "SHL    $dst,$shift" %}
 8035   opcode(0xD3, 0x4);  /* D3 /4 */
 8036   ins_encode( OpcP, RegOpc( dst ) );
 8037   ins_pipe( ialu_reg_reg );
 8038 %}
 8039 
 8040 // Arithmetic shift right by one
 8041 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8042   match(Set dst (RShiftI dst shift));
 8043   effect(KILL cr);
 8044 
 8045   size(2);
 8046   format %{ "SAR    $dst,$shift" %}
 8047   opcode(0xD1, 0x7);  /* D1 /7 */
 8048   ins_encode( OpcP, RegOpc( dst ) );
 8049   ins_pipe( ialu_reg );
 8050 %}
 8051 
 8052 // Arithmetic shift right by one
 8053 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8054   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8055   effect(KILL cr);
 8056   format %{ "SAR    $dst,$shift" %}
 8057   opcode(0xD1, 0x7);  /* D1 /7 */
 8058   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8059   ins_pipe( ialu_mem_imm );
 8060 %}
 8061 
 8062 // Arithmetic Shift Right by 8-bit immediate
 8063 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8064   match(Set dst (RShiftI dst shift));
 8065   effect(KILL cr);
 8066 
 8067   size(3);
 8068   format %{ "SAR    $dst,$shift" %}
 8069   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8070   ins_encode( RegOpcImm( dst, shift ) );
 8071   ins_pipe( ialu_mem_imm );
 8072 %}
 8073 
 8074 // Arithmetic Shift Right by 8-bit immediate
 8075 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8076   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8077   effect(KILL cr);
 8078 
 8079   format %{ "SAR    $dst,$shift" %}
 8080   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8081   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8082   ins_pipe( ialu_mem_imm );
 8083 %}
 8084 
 8085 // Arithmetic Shift Right by variable
 8086 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8087   match(Set dst (RShiftI dst shift));
 8088   effect(KILL cr);
 8089 
 8090   size(2);
 8091   format %{ "SAR    $dst,$shift" %}
 8092   opcode(0xD3, 0x7);  /* D3 /7 */
 8093   ins_encode( OpcP, RegOpc( dst ) );
 8094   ins_pipe( ialu_reg_reg );
 8095 %}
 8096 
 8097 // Logical shift right by one
 8098 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8099   match(Set dst (URShiftI dst shift));
 8100   effect(KILL cr);
 8101 
 8102   size(2);
 8103   format %{ "SHR    $dst,$shift" %}
 8104   opcode(0xD1, 0x5);  /* D1 /5 */
 8105   ins_encode( OpcP, RegOpc( dst ) );
 8106   ins_pipe( ialu_reg );
 8107 %}
 8108 
 8109 // Logical Shift Right by 8-bit immediate
 8110 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8111   match(Set dst (URShiftI dst shift));
 8112   effect(KILL cr);
 8113 
 8114   size(3);
 8115   format %{ "SHR    $dst,$shift" %}
 8116   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8117   ins_encode( RegOpcImm( dst, shift) );
 8118   ins_pipe( ialu_reg );
 8119 %}
 8120 
 8121 
 8122 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8123 // This idiom is used by the compiler for the i2b bytecode.
 8124 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8125   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8126 
 8127   size(3);
 8128   format %{ "MOVSX  $dst,$src :8" %}
 8129   ins_encode %{
 8130     __ movsbl($dst$$Register, $src$$Register);
 8131   %}
 8132   ins_pipe(ialu_reg_reg);
 8133 %}
 8134 
 8135 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8136 // This idiom is used by the compiler the i2s bytecode.
 8137 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8138   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8139 
 8140   size(3);
 8141   format %{ "MOVSX  $dst,$src :16" %}
 8142   ins_encode %{
 8143     __ movswl($dst$$Register, $src$$Register);
 8144   %}
 8145   ins_pipe(ialu_reg_reg);
 8146 %}
 8147 
 8148 
 8149 // Logical Shift Right by variable
 8150 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8151   match(Set dst (URShiftI dst shift));
 8152   effect(KILL cr);
 8153 
 8154   size(2);
 8155   format %{ "SHR    $dst,$shift" %}
 8156   opcode(0xD3, 0x5);  /* D3 /5 */
 8157   ins_encode( OpcP, RegOpc( dst ) );
 8158   ins_pipe( ialu_reg_reg );
 8159 %}
 8160 
 8161 
 8162 //----------Logical Instructions-----------------------------------------------
 8163 //----------Integer Logical Instructions---------------------------------------
 8164 // And Instructions
 8165 // And Register with Register
 8166 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8167   match(Set dst (AndI dst src));
 8168   effect(KILL cr);
 8169 
 8170   size(2);
 8171   format %{ "AND    $dst,$src" %}
 8172   opcode(0x23);
 8173   ins_encode( OpcP, RegReg( dst, src) );
 8174   ins_pipe( ialu_reg_reg );
 8175 %}
 8176 
 8177 // And Register with Immediate
 8178 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8179   match(Set dst (AndI dst src));
 8180   effect(KILL cr);
 8181 
 8182   format %{ "AND    $dst,$src" %}
 8183   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8184   // ins_encode( RegImm( dst, src) );
 8185   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8186   ins_pipe( ialu_reg );
 8187 %}
 8188 
 8189 // And Register with Memory
 8190 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8191   match(Set dst (AndI dst (LoadI src)));
 8192   effect(KILL cr);
 8193 
 8194   ins_cost(125);
 8195   format %{ "AND    $dst,$src" %}
 8196   opcode(0x23);
 8197   ins_encode( OpcP, RegMem( dst, src) );
 8198   ins_pipe( ialu_reg_mem );
 8199 %}
 8200 
 8201 // And Memory with Register
 8202 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8203   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8204   effect(KILL cr);
 8205 
 8206   ins_cost(150);
 8207   format %{ "AND    $dst,$src" %}
 8208   opcode(0x21);  /* Opcode 21 /r */
 8209   ins_encode( OpcP, RegMem( src, dst ) );
 8210   ins_pipe( ialu_mem_reg );
 8211 %}
 8212 
 8213 // And Memory with Immediate
 8214 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8215   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8216   effect(KILL cr);
 8217 
 8218   ins_cost(125);
 8219   format %{ "AND    $dst,$src" %}
 8220   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8221   // ins_encode( MemImm( dst, src) );
 8222   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8223   ins_pipe( ialu_mem_imm );
 8224 %}
 8225 
 8226 // BMI1 instructions
 8227 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8228   match(Set dst (AndI (XorI src1 minus_1) src2));
 8229   predicate(UseBMI1Instructions);
 8230   effect(KILL cr);
 8231 
 8232   format %{ "ANDNL  $dst, $src1, $src2" %}
 8233 
 8234   ins_encode %{
 8235     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8236   %}
 8237   ins_pipe(ialu_reg);
 8238 %}
 8239 
 8240 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8241   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8242   predicate(UseBMI1Instructions);
 8243   effect(KILL cr);
 8244 
 8245   ins_cost(125);
 8246   format %{ "ANDNL  $dst, $src1, $src2" %}
 8247 
 8248   ins_encode %{
 8249     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8250   %}
 8251   ins_pipe(ialu_reg_mem);
 8252 %}
 8253 
 8254 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8255   match(Set dst (AndI (SubI imm_zero src) src));
 8256   predicate(UseBMI1Instructions);
 8257   effect(KILL cr);
 8258 
 8259   format %{ "BLSIL  $dst, $src" %}
 8260 
 8261   ins_encode %{
 8262     __ blsil($dst$$Register, $src$$Register);
 8263   %}
 8264   ins_pipe(ialu_reg);
 8265 %}
 8266 
 8267 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8268   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8269   predicate(UseBMI1Instructions);
 8270   effect(KILL cr);
 8271 
 8272   ins_cost(125);
 8273   format %{ "BLSIL  $dst, $src" %}
 8274 
 8275   ins_encode %{
 8276     __ blsil($dst$$Register, $src$$Address);
 8277   %}
 8278   ins_pipe(ialu_reg_mem);
 8279 %}
 8280 
 8281 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8282 %{
 8283   match(Set dst (XorI (AddI src minus_1) src));
 8284   predicate(UseBMI1Instructions);
 8285   effect(KILL cr);
 8286 
 8287   format %{ "BLSMSKL $dst, $src" %}
 8288 
 8289   ins_encode %{
 8290     __ blsmskl($dst$$Register, $src$$Register);
 8291   %}
 8292 
 8293   ins_pipe(ialu_reg);
 8294 %}
 8295 
 8296 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8297 %{
 8298   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8299   predicate(UseBMI1Instructions);
 8300   effect(KILL cr);
 8301 
 8302   ins_cost(125);
 8303   format %{ "BLSMSKL $dst, $src" %}
 8304 
 8305   ins_encode %{
 8306     __ blsmskl($dst$$Register, $src$$Address);
 8307   %}
 8308 
 8309   ins_pipe(ialu_reg_mem);
 8310 %}
 8311 
 8312 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8313 %{
 8314   match(Set dst (AndI (AddI src minus_1) src) );
 8315   predicate(UseBMI1Instructions);
 8316   effect(KILL cr);
 8317 
 8318   format %{ "BLSRL  $dst, $src" %}
 8319 
 8320   ins_encode %{
 8321     __ blsrl($dst$$Register, $src$$Register);
 8322   %}
 8323 
 8324   ins_pipe(ialu_reg);
 8325 %}
 8326 
 8327 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8328 %{
 8329   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8330   predicate(UseBMI1Instructions);
 8331   effect(KILL cr);
 8332 
 8333   ins_cost(125);
 8334   format %{ "BLSRL  $dst, $src" %}
 8335 
 8336   ins_encode %{
 8337     __ blsrl($dst$$Register, $src$$Address);
 8338   %}
 8339 
 8340   ins_pipe(ialu_reg_mem);
 8341 %}
 8342 
 8343 // Or Instructions
 8344 // Or Register with Register
 8345 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8346   match(Set dst (OrI dst src));
 8347   effect(KILL cr);
 8348 
 8349   size(2);
 8350   format %{ "OR     $dst,$src" %}
 8351   opcode(0x0B);
 8352   ins_encode( OpcP, RegReg( dst, src) );
 8353   ins_pipe( ialu_reg_reg );
 8354 %}
 8355 
 8356 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8357   match(Set dst (OrI dst (CastP2X src)));
 8358   effect(KILL cr);
 8359 
 8360   size(2);
 8361   format %{ "OR     $dst,$src" %}
 8362   opcode(0x0B);
 8363   ins_encode( OpcP, RegReg( dst, src) );
 8364   ins_pipe( ialu_reg_reg );
 8365 %}
 8366 
 8367 
 8368 // Or Register with Immediate
 8369 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8370   match(Set dst (OrI dst src));
 8371   effect(KILL cr);
 8372 
 8373   format %{ "OR     $dst,$src" %}
 8374   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8375   // ins_encode( RegImm( dst, src) );
 8376   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8377   ins_pipe( ialu_reg );
 8378 %}
 8379 
 8380 // Or Register with Memory
 8381 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8382   match(Set dst (OrI dst (LoadI src)));
 8383   effect(KILL cr);
 8384 
 8385   ins_cost(125);
 8386   format %{ "OR     $dst,$src" %}
 8387   opcode(0x0B);
 8388   ins_encode( OpcP, RegMem( dst, src) );
 8389   ins_pipe( ialu_reg_mem );
 8390 %}
 8391 
 8392 // Or Memory with Register
 8393 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8394   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8395   effect(KILL cr);
 8396 
 8397   ins_cost(150);
 8398   format %{ "OR     $dst,$src" %}
 8399   opcode(0x09);  /* Opcode 09 /r */
 8400   ins_encode( OpcP, RegMem( src, dst ) );
 8401   ins_pipe( ialu_mem_reg );
 8402 %}
 8403 
 8404 // Or Memory with Immediate
 8405 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8406   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8407   effect(KILL cr);
 8408 
 8409   ins_cost(125);
 8410   format %{ "OR     $dst,$src" %}
 8411   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8412   // ins_encode( MemImm( dst, src) );
 8413   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8414   ins_pipe( ialu_mem_imm );
 8415 %}
 8416 
 8417 // ROL/ROR
 8418 // ROL expand
 8419 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8420   effect(USE_DEF dst, USE shift, KILL cr);
 8421 
 8422   format %{ "ROL    $dst, $shift" %}
 8423   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8424   ins_encode( OpcP, RegOpc( dst ));
 8425   ins_pipe( ialu_reg );
 8426 %}
 8427 
 8428 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8429   effect(USE_DEF dst, USE shift, KILL cr);
 8430 
 8431   format %{ "ROL    $dst, $shift" %}
 8432   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8433   ins_encode( RegOpcImm(dst, shift) );
 8434   ins_pipe(ialu_reg);
 8435 %}
 8436 
 8437 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8438   effect(USE_DEF dst, USE shift, KILL cr);
 8439 
 8440   format %{ "ROL    $dst, $shift" %}
 8441   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8442   ins_encode(OpcP, RegOpc(dst));
 8443   ins_pipe( ialu_reg_reg );
 8444 %}
 8445 // end of ROL expand
 8446 
 8447 // ROL 32bit by one once
 8448 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8449   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8450 
 8451   expand %{
 8452     rolI_eReg_imm1(dst, lshift, cr);
 8453   %}
 8454 %}
 8455 
 8456 // ROL 32bit var by imm8 once
 8457 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8458   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8459   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8460 
 8461   expand %{
 8462     rolI_eReg_imm8(dst, lshift, cr);
 8463   %}
 8464 %}
 8465 
 8466 // ROL 32bit var by var once
 8467 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8468   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8469 
 8470   expand %{
 8471     rolI_eReg_CL(dst, shift, cr);
 8472   %}
 8473 %}
 8474 
 8475 // ROL 32bit var by var once
 8476 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8477   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8478 
 8479   expand %{
 8480     rolI_eReg_CL(dst, shift, cr);
 8481   %}
 8482 %}
 8483 
 8484 // ROR expand
 8485 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8486   effect(USE_DEF dst, USE shift, KILL cr);
 8487 
 8488   format %{ "ROR    $dst, $shift" %}
 8489   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8490   ins_encode( OpcP, RegOpc( dst ) );
 8491   ins_pipe( ialu_reg );
 8492 %}
 8493 
 8494 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8495   effect (USE_DEF dst, USE shift, KILL cr);
 8496 
 8497   format %{ "ROR    $dst, $shift" %}
 8498   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8499   ins_encode( RegOpcImm(dst, shift) );
 8500   ins_pipe( ialu_reg );
 8501 %}
 8502 
 8503 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8504   effect(USE_DEF dst, USE shift, KILL cr);
 8505 
 8506   format %{ "ROR    $dst, $shift" %}
 8507   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8508   ins_encode(OpcP, RegOpc(dst));
 8509   ins_pipe( ialu_reg_reg );
 8510 %}
 8511 // end of ROR expand
 8512 
 8513 // ROR right once
 8514 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8515   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8516 
 8517   expand %{
 8518     rorI_eReg_imm1(dst, rshift, cr);
 8519   %}
 8520 %}
 8521 
 8522 // ROR 32bit by immI8 once
 8523 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8524   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8525   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8526 
 8527   expand %{
 8528     rorI_eReg_imm8(dst, rshift, cr);
 8529   %}
 8530 %}
 8531 
 8532 // ROR 32bit var by var once
 8533 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8534   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8535 
 8536   expand %{
 8537     rorI_eReg_CL(dst, shift, cr);
 8538   %}
 8539 %}
 8540 
 8541 // ROR 32bit var by var once
 8542 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8543   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8544 
 8545   expand %{
 8546     rorI_eReg_CL(dst, shift, cr);
 8547   %}
 8548 %}
 8549 
 8550 // Xor Instructions
 8551 // Xor Register with Register
 8552 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8553   match(Set dst (XorI dst src));
 8554   effect(KILL cr);
 8555 
 8556   size(2);
 8557   format %{ "XOR    $dst,$src" %}
 8558   opcode(0x33);
 8559   ins_encode( OpcP, RegReg( dst, src) );
 8560   ins_pipe( ialu_reg_reg );
 8561 %}
 8562 
 8563 // Xor Register with Immediate -1
 8564 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8565   match(Set dst (XorI dst imm));
 8566 
 8567   size(2);
 8568   format %{ "NOT    $dst" %}
 8569   ins_encode %{
 8570      __ notl($dst$$Register);
 8571   %}
 8572   ins_pipe( ialu_reg );
 8573 %}
 8574 
 8575 // Xor Register with Immediate
 8576 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8577   match(Set dst (XorI dst src));
 8578   effect(KILL cr);
 8579 
 8580   format %{ "XOR    $dst,$src" %}
 8581   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8582   // ins_encode( RegImm( dst, src) );
 8583   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8584   ins_pipe( ialu_reg );
 8585 %}
 8586 
 8587 // Xor Register with Memory
 8588 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8589   match(Set dst (XorI dst (LoadI src)));
 8590   effect(KILL cr);
 8591 
 8592   ins_cost(125);
 8593   format %{ "XOR    $dst,$src" %}
 8594   opcode(0x33);
 8595   ins_encode( OpcP, RegMem(dst, src) );
 8596   ins_pipe( ialu_reg_mem );
 8597 %}
 8598 
 8599 // Xor Memory with Register
 8600 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8601   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8602   effect(KILL cr);
 8603 
 8604   ins_cost(150);
 8605   format %{ "XOR    $dst,$src" %}
 8606   opcode(0x31);  /* Opcode 31 /r */
 8607   ins_encode( OpcP, RegMem( src, dst ) );
 8608   ins_pipe( ialu_mem_reg );
 8609 %}
 8610 
 8611 // Xor Memory with Immediate
 8612 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8613   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8614   effect(KILL cr);
 8615 
 8616   ins_cost(125);
 8617   format %{ "XOR    $dst,$src" %}
 8618   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8619   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8620   ins_pipe( ialu_mem_imm );
 8621 %}
 8622 
 8623 //----------Convert Int to Boolean---------------------------------------------
 8624 
 8625 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8626   effect( DEF dst, USE src );
 8627   format %{ "MOV    $dst,$src" %}
 8628   ins_encode( enc_Copy( dst, src) );
 8629   ins_pipe( ialu_reg_reg );
 8630 %}
 8631 
 8632 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8633   effect( USE_DEF dst, USE src, KILL cr );
 8634 
 8635   size(4);
 8636   format %{ "NEG    $dst\n\t"
 8637             "ADC    $dst,$src" %}
 8638   ins_encode( neg_reg(dst),
 8639               OpcRegReg(0x13,dst,src) );
 8640   ins_pipe( ialu_reg_reg_long );
 8641 %}
 8642 
 8643 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8644   match(Set dst (Conv2B src));
 8645 
 8646   expand %{
 8647     movI_nocopy(dst,src);
 8648     ci2b(dst,src,cr);
 8649   %}
 8650 %}
 8651 
 8652 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8653   effect( DEF dst, USE src );
 8654   format %{ "MOV    $dst,$src" %}
 8655   ins_encode( enc_Copy( dst, src) );
 8656   ins_pipe( ialu_reg_reg );
 8657 %}
 8658 
 8659 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8660   effect( USE_DEF dst, USE src, KILL cr );
 8661   format %{ "NEG    $dst\n\t"
 8662             "ADC    $dst,$src" %}
 8663   ins_encode( neg_reg(dst),
 8664               OpcRegReg(0x13,dst,src) );
 8665   ins_pipe( ialu_reg_reg_long );
 8666 %}
 8667 
 8668 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8669   match(Set dst (Conv2B src));
 8670 
 8671   expand %{
 8672     movP_nocopy(dst,src);
 8673     cp2b(dst,src,cr);
 8674   %}
 8675 %}
 8676 
 8677 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8678   match(Set dst (CmpLTMask p q));
 8679   effect(KILL cr);
 8680   ins_cost(400);
 8681 
 8682   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8683   format %{ "XOR    $dst,$dst\n\t"
 8684             "CMP    $p,$q\n\t"
 8685             "SETlt  $dst\n\t"
 8686             "NEG    $dst" %}
 8687   ins_encode %{
 8688     Register Rp = $p$$Register;
 8689     Register Rq = $q$$Register;
 8690     Register Rd = $dst$$Register;
 8691     Label done;
 8692     __ xorl(Rd, Rd);
 8693     __ cmpl(Rp, Rq);
 8694     __ setb(Assembler::less, Rd);
 8695     __ negl(Rd);
 8696   %}
 8697 
 8698   ins_pipe(pipe_slow);
 8699 %}
 8700 
 8701 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8702   match(Set dst (CmpLTMask dst zero));
 8703   effect(DEF dst, KILL cr);
 8704   ins_cost(100);
 8705 
 8706   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8707   ins_encode %{
 8708   __ sarl($dst$$Register, 31);
 8709   %}
 8710   ins_pipe(ialu_reg);
 8711 %}
 8712 
 8713 /* better to save a register than avoid a branch */
 8714 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8715   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8716   effect(KILL cr);
 8717   ins_cost(400);
 8718   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8719             "JGE    done\n\t"
 8720             "ADD    $p,$y\n"
 8721             "done:  " %}
 8722   ins_encode %{
 8723     Register Rp = $p$$Register;
 8724     Register Rq = $q$$Register;
 8725     Register Ry = $y$$Register;
 8726     Label done;
 8727     __ subl(Rp, Rq);
 8728     __ jccb(Assembler::greaterEqual, done);
 8729     __ addl(Rp, Ry);
 8730     __ bind(done);
 8731   %}
 8732 
 8733   ins_pipe(pipe_cmplt);
 8734 %}
 8735 
 8736 /* better to save a register than avoid a branch */
 8737 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8738   match(Set y (AndI (CmpLTMask p q) y));
 8739   effect(KILL cr);
 8740 
 8741   ins_cost(300);
 8742 
 8743   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8744             "JLT      done\n\t"
 8745             "XORL     $y, $y\n"
 8746             "done:  " %}
 8747   ins_encode %{
 8748     Register Rp = $p$$Register;
 8749     Register Rq = $q$$Register;
 8750     Register Ry = $y$$Register;
 8751     Label done;
 8752     __ cmpl(Rp, Rq);
 8753     __ jccb(Assembler::less, done);
 8754     __ xorl(Ry, Ry);
 8755     __ bind(done);
 8756   %}
 8757 
 8758   ins_pipe(pipe_cmplt);
 8759 %}
 8760 
 8761 /* If I enable this, I encourage spilling in the inner loop of compress.
 8762 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8763   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8764 */
 8765 //----------Overflow Math Instructions-----------------------------------------
 8766 
 8767 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8768 %{
 8769   match(Set cr (OverflowAddI op1 op2));
 8770   effect(DEF cr, USE_KILL op1, USE op2);
 8771 
 8772   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8773 
 8774   ins_encode %{
 8775     __ addl($op1$$Register, $op2$$Register);
 8776   %}
 8777   ins_pipe(ialu_reg_reg);
 8778 %}
 8779 
 8780 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8781 %{
 8782   match(Set cr (OverflowAddI op1 op2));
 8783   effect(DEF cr, USE_KILL op1, USE op2);
 8784 
 8785   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8786 
 8787   ins_encode %{
 8788     __ addl($op1$$Register, $op2$$constant);
 8789   %}
 8790   ins_pipe(ialu_reg_reg);
 8791 %}
 8792 
 8793 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8794 %{
 8795   match(Set cr (OverflowSubI op1 op2));
 8796 
 8797   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8798   ins_encode %{
 8799     __ cmpl($op1$$Register, $op2$$Register);
 8800   %}
 8801   ins_pipe(ialu_reg_reg);
 8802 %}
 8803 
 8804 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8805 %{
 8806   match(Set cr (OverflowSubI op1 op2));
 8807 
 8808   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8809   ins_encode %{
 8810     __ cmpl($op1$$Register, $op2$$constant);
 8811   %}
 8812   ins_pipe(ialu_reg_reg);
 8813 %}
 8814 
 8815 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8816 %{
 8817   match(Set cr (OverflowSubI zero op2));
 8818   effect(DEF cr, USE_KILL op2);
 8819 
 8820   format %{ "NEG    $op2\t# overflow check int" %}
 8821   ins_encode %{
 8822     __ negl($op2$$Register);
 8823   %}
 8824   ins_pipe(ialu_reg_reg);
 8825 %}
 8826 
 8827 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8828 %{
 8829   match(Set cr (OverflowMulI op1 op2));
 8830   effect(DEF cr, USE_KILL op1, USE op2);
 8831 
 8832   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8833   ins_encode %{
 8834     __ imull($op1$$Register, $op2$$Register);
 8835   %}
 8836   ins_pipe(ialu_reg_reg_alu0);
 8837 %}
 8838 
 8839 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8840 %{
 8841   match(Set cr (OverflowMulI op1 op2));
 8842   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8843 
 8844   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8845   ins_encode %{
 8846     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8847   %}
 8848   ins_pipe(ialu_reg_reg_alu0);
 8849 %}
 8850 
 8851 // Integer Absolute Instructions
 8852 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8853 %{
 8854   match(Set dst (AbsI src));
 8855   effect(TEMP dst, TEMP tmp, KILL cr);
 8856   format %{ "movl $tmp, $src\n\t"
 8857             "sarl $tmp, 31\n\t"
 8858             "movl $dst, $src\n\t"
 8859             "xorl $dst, $tmp\n\t"
 8860             "subl $dst, $tmp\n"
 8861           %}
 8862   ins_encode %{
 8863     __ movl($tmp$$Register, $src$$Register);
 8864     __ sarl($tmp$$Register, 31);
 8865     __ movl($dst$$Register, $src$$Register);
 8866     __ xorl($dst$$Register, $tmp$$Register);
 8867     __ subl($dst$$Register, $tmp$$Register);
 8868   %}
 8869 
 8870   ins_pipe(ialu_reg_reg);
 8871 %}
 8872 
 8873 //----------Long Instructions------------------------------------------------
 8874 // Add Long Register with Register
 8875 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8876   match(Set dst (AddL dst src));
 8877   effect(KILL cr);
 8878   ins_cost(200);
 8879   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8880             "ADC    $dst.hi,$src.hi" %}
 8881   opcode(0x03, 0x13);
 8882   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8883   ins_pipe( ialu_reg_reg_long );
 8884 %}
 8885 
 8886 // Add Long Register with Immediate
 8887 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8888   match(Set dst (AddL dst src));
 8889   effect(KILL cr);
 8890   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8891             "ADC    $dst.hi,$src.hi" %}
 8892   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8893   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8894   ins_pipe( ialu_reg_long );
 8895 %}
 8896 
 8897 // Add Long Register with Memory
 8898 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8899   match(Set dst (AddL dst (LoadL mem)));
 8900   effect(KILL cr);
 8901   ins_cost(125);
 8902   format %{ "ADD    $dst.lo,$mem\n\t"
 8903             "ADC    $dst.hi,$mem+4" %}
 8904   opcode(0x03, 0x13);
 8905   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8906   ins_pipe( ialu_reg_long_mem );
 8907 %}
 8908 
 8909 // Subtract Long Register with Register.
 8910 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8911   match(Set dst (SubL dst src));
 8912   effect(KILL cr);
 8913   ins_cost(200);
 8914   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8915             "SBB    $dst.hi,$src.hi" %}
 8916   opcode(0x2B, 0x1B);
 8917   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8918   ins_pipe( ialu_reg_reg_long );
 8919 %}
 8920 
 8921 // Subtract Long Register with Immediate
 8922 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8923   match(Set dst (SubL dst src));
 8924   effect(KILL cr);
 8925   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8926             "SBB    $dst.hi,$src.hi" %}
 8927   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8928   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8929   ins_pipe( ialu_reg_long );
 8930 %}
 8931 
 8932 // Subtract Long Register with Memory
 8933 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8934   match(Set dst (SubL dst (LoadL mem)));
 8935   effect(KILL cr);
 8936   ins_cost(125);
 8937   format %{ "SUB    $dst.lo,$mem\n\t"
 8938             "SBB    $dst.hi,$mem+4" %}
 8939   opcode(0x2B, 0x1B);
 8940   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8941   ins_pipe( ialu_reg_long_mem );
 8942 %}
 8943 
 8944 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8945   match(Set dst (SubL zero dst));
 8946   effect(KILL cr);
 8947   ins_cost(300);
 8948   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8949   ins_encode( neg_long(dst) );
 8950   ins_pipe( ialu_reg_reg_long );
 8951 %}
 8952 
 8953 // And Long Register with Register
 8954 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8955   match(Set dst (AndL dst src));
 8956   effect(KILL cr);
 8957   format %{ "AND    $dst.lo,$src.lo\n\t"
 8958             "AND    $dst.hi,$src.hi" %}
 8959   opcode(0x23,0x23);
 8960   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8961   ins_pipe( ialu_reg_reg_long );
 8962 %}
 8963 
 8964 // And Long Register with Immediate
 8965 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8966   match(Set dst (AndL dst src));
 8967   effect(KILL cr);
 8968   format %{ "AND    $dst.lo,$src.lo\n\t"
 8969             "AND    $dst.hi,$src.hi" %}
 8970   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8971   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8972   ins_pipe( ialu_reg_long );
 8973 %}
 8974 
 8975 // And Long Register with Memory
 8976 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8977   match(Set dst (AndL dst (LoadL mem)));
 8978   effect(KILL cr);
 8979   ins_cost(125);
 8980   format %{ "AND    $dst.lo,$mem\n\t"
 8981             "AND    $dst.hi,$mem+4" %}
 8982   opcode(0x23, 0x23);
 8983   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8984   ins_pipe( ialu_reg_long_mem );
 8985 %}
 8986 
 8987 // BMI1 instructions
 8988 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8989   match(Set dst (AndL (XorL src1 minus_1) src2));
 8990   predicate(UseBMI1Instructions);
 8991   effect(KILL cr, TEMP dst);
 8992 
 8993   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8994             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8995          %}
 8996 
 8997   ins_encode %{
 8998     Register Rdst = $dst$$Register;
 8999     Register Rsrc1 = $src1$$Register;
 9000     Register Rsrc2 = $src2$$Register;
 9001     __ andnl(Rdst, Rsrc1, Rsrc2);
 9002     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9003   %}
 9004   ins_pipe(ialu_reg_reg_long);
 9005 %}
 9006 
 9007 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9008   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9009   predicate(UseBMI1Instructions);
 9010   effect(KILL cr, TEMP dst);
 9011 
 9012   ins_cost(125);
 9013   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9014             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9015          %}
 9016 
 9017   ins_encode %{
 9018     Register Rdst = $dst$$Register;
 9019     Register Rsrc1 = $src1$$Register;
 9020     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9021 
 9022     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9023     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9024   %}
 9025   ins_pipe(ialu_reg_mem);
 9026 %}
 9027 
 9028 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9029   match(Set dst (AndL (SubL imm_zero src) src));
 9030   predicate(UseBMI1Instructions);
 9031   effect(KILL cr, TEMP dst);
 9032 
 9033   format %{ "MOVL   $dst.hi, 0\n\t"
 9034             "BLSIL  $dst.lo, $src.lo\n\t"
 9035             "JNZ    done\n\t"
 9036             "BLSIL  $dst.hi, $src.hi\n"
 9037             "done:"
 9038          %}
 9039 
 9040   ins_encode %{
 9041     Label done;
 9042     Register Rdst = $dst$$Register;
 9043     Register Rsrc = $src$$Register;
 9044     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9045     __ blsil(Rdst, Rsrc);
 9046     __ jccb(Assembler::notZero, done);
 9047     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9048     __ bind(done);
 9049   %}
 9050   ins_pipe(ialu_reg);
 9051 %}
 9052 
 9053 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9054   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9055   predicate(UseBMI1Instructions);
 9056   effect(KILL cr, TEMP dst);
 9057 
 9058   ins_cost(125);
 9059   format %{ "MOVL   $dst.hi, 0\n\t"
 9060             "BLSIL  $dst.lo, $src\n\t"
 9061             "JNZ    done\n\t"
 9062             "BLSIL  $dst.hi, $src+4\n"
 9063             "done:"
 9064          %}
 9065 
 9066   ins_encode %{
 9067     Label done;
 9068     Register Rdst = $dst$$Register;
 9069     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9070 
 9071     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9072     __ blsil(Rdst, $src$$Address);
 9073     __ jccb(Assembler::notZero, done);
 9074     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9075     __ bind(done);
 9076   %}
 9077   ins_pipe(ialu_reg_mem);
 9078 %}
 9079 
 9080 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9081 %{
 9082   match(Set dst (XorL (AddL src minus_1) src));
 9083   predicate(UseBMI1Instructions);
 9084   effect(KILL cr, TEMP dst);
 9085 
 9086   format %{ "MOVL    $dst.hi, 0\n\t"
 9087             "BLSMSKL $dst.lo, $src.lo\n\t"
 9088             "JNC     done\n\t"
 9089             "BLSMSKL $dst.hi, $src.hi\n"
 9090             "done:"
 9091          %}
 9092 
 9093   ins_encode %{
 9094     Label done;
 9095     Register Rdst = $dst$$Register;
 9096     Register Rsrc = $src$$Register;
 9097     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9098     __ blsmskl(Rdst, Rsrc);
 9099     __ jccb(Assembler::carryClear, done);
 9100     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9101     __ bind(done);
 9102   %}
 9103 
 9104   ins_pipe(ialu_reg);
 9105 %}
 9106 
 9107 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9108 %{
 9109   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9110   predicate(UseBMI1Instructions);
 9111   effect(KILL cr, TEMP dst);
 9112 
 9113   ins_cost(125);
 9114   format %{ "MOVL    $dst.hi, 0\n\t"
 9115             "BLSMSKL $dst.lo, $src\n\t"
 9116             "JNC     done\n\t"
 9117             "BLSMSKL $dst.hi, $src+4\n"
 9118             "done:"
 9119          %}
 9120 
 9121   ins_encode %{
 9122     Label done;
 9123     Register Rdst = $dst$$Register;
 9124     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9125 
 9126     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9127     __ blsmskl(Rdst, $src$$Address);
 9128     __ jccb(Assembler::carryClear, done);
 9129     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9130     __ bind(done);
 9131   %}
 9132 
 9133   ins_pipe(ialu_reg_mem);
 9134 %}
 9135 
 9136 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9137 %{
 9138   match(Set dst (AndL (AddL src minus_1) src) );
 9139   predicate(UseBMI1Instructions);
 9140   effect(KILL cr, TEMP dst);
 9141 
 9142   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9143             "BLSRL  $dst.lo, $src.lo\n\t"
 9144             "JNC    done\n\t"
 9145             "BLSRL  $dst.hi, $src.hi\n"
 9146             "done:"
 9147   %}
 9148 
 9149   ins_encode %{
 9150     Label done;
 9151     Register Rdst = $dst$$Register;
 9152     Register Rsrc = $src$$Register;
 9153     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9154     __ blsrl(Rdst, Rsrc);
 9155     __ jccb(Assembler::carryClear, done);
 9156     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9157     __ bind(done);
 9158   %}
 9159 
 9160   ins_pipe(ialu_reg);
 9161 %}
 9162 
 9163 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9164 %{
 9165   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9166   predicate(UseBMI1Instructions);
 9167   effect(KILL cr, TEMP dst);
 9168 
 9169   ins_cost(125);
 9170   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9171             "BLSRL  $dst.lo, $src\n\t"
 9172             "JNC    done\n\t"
 9173             "BLSRL  $dst.hi, $src+4\n"
 9174             "done:"
 9175   %}
 9176 
 9177   ins_encode %{
 9178     Label done;
 9179     Register Rdst = $dst$$Register;
 9180     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9181     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9182     __ blsrl(Rdst, $src$$Address);
 9183     __ jccb(Assembler::carryClear, done);
 9184     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9185     __ bind(done);
 9186   %}
 9187 
 9188   ins_pipe(ialu_reg_mem);
 9189 %}
 9190 
 9191 // Or Long Register with Register
 9192 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9193   match(Set dst (OrL dst src));
 9194   effect(KILL cr);
 9195   format %{ "OR     $dst.lo,$src.lo\n\t"
 9196             "OR     $dst.hi,$src.hi" %}
 9197   opcode(0x0B,0x0B);
 9198   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9199   ins_pipe( ialu_reg_reg_long );
 9200 %}
 9201 
 9202 // Or Long Register with Immediate
 9203 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9204   match(Set dst (OrL dst src));
 9205   effect(KILL cr);
 9206   format %{ "OR     $dst.lo,$src.lo\n\t"
 9207             "OR     $dst.hi,$src.hi" %}
 9208   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9209   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9210   ins_pipe( ialu_reg_long );
 9211 %}
 9212 
 9213 // Or Long Register with Memory
 9214 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9215   match(Set dst (OrL dst (LoadL mem)));
 9216   effect(KILL cr);
 9217   ins_cost(125);
 9218   format %{ "OR     $dst.lo,$mem\n\t"
 9219             "OR     $dst.hi,$mem+4" %}
 9220   opcode(0x0B,0x0B);
 9221   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9222   ins_pipe( ialu_reg_long_mem );
 9223 %}
 9224 
 9225 // Xor Long Register with Register
 9226 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9227   match(Set dst (XorL dst src));
 9228   effect(KILL cr);
 9229   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9230             "XOR    $dst.hi,$src.hi" %}
 9231   opcode(0x33,0x33);
 9232   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9233   ins_pipe( ialu_reg_reg_long );
 9234 %}
 9235 
 9236 // Xor Long Register with Immediate -1
 9237 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9238   match(Set dst (XorL dst imm));
 9239   format %{ "NOT    $dst.lo\n\t"
 9240             "NOT    $dst.hi" %}
 9241   ins_encode %{
 9242      __ notl($dst$$Register);
 9243      __ notl(HIGH_FROM_LOW($dst$$Register));
 9244   %}
 9245   ins_pipe( ialu_reg_long );
 9246 %}
 9247 
 9248 // Xor Long Register with Immediate
 9249 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9250   match(Set dst (XorL dst src));
 9251   effect(KILL cr);
 9252   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9253             "XOR    $dst.hi,$src.hi" %}
 9254   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9255   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9256   ins_pipe( ialu_reg_long );
 9257 %}
 9258 
 9259 // Xor Long Register with Memory
 9260 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9261   match(Set dst (XorL dst (LoadL mem)));
 9262   effect(KILL cr);
 9263   ins_cost(125);
 9264   format %{ "XOR    $dst.lo,$mem\n\t"
 9265             "XOR    $dst.hi,$mem+4" %}
 9266   opcode(0x33,0x33);
 9267   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9268   ins_pipe( ialu_reg_long_mem );
 9269 %}
 9270 
 9271 // Shift Left Long by 1
 9272 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9273   predicate(UseNewLongLShift);
 9274   match(Set dst (LShiftL dst cnt));
 9275   effect(KILL cr);
 9276   ins_cost(100);
 9277   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9278             "ADC    $dst.hi,$dst.hi" %}
 9279   ins_encode %{
 9280     __ addl($dst$$Register,$dst$$Register);
 9281     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9282   %}
 9283   ins_pipe( ialu_reg_long );
 9284 %}
 9285 
 9286 // Shift Left Long by 2
 9287 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9288   predicate(UseNewLongLShift);
 9289   match(Set dst (LShiftL dst cnt));
 9290   effect(KILL cr);
 9291   ins_cost(100);
 9292   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9293             "ADC    $dst.hi,$dst.hi\n\t"
 9294             "ADD    $dst.lo,$dst.lo\n\t"
 9295             "ADC    $dst.hi,$dst.hi" %}
 9296   ins_encode %{
 9297     __ addl($dst$$Register,$dst$$Register);
 9298     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9299     __ addl($dst$$Register,$dst$$Register);
 9300     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9301   %}
 9302   ins_pipe( ialu_reg_long );
 9303 %}
 9304 
 9305 // Shift Left Long by 3
 9306 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9307   predicate(UseNewLongLShift);
 9308   match(Set dst (LShiftL dst cnt));
 9309   effect(KILL cr);
 9310   ins_cost(100);
 9311   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9312             "ADC    $dst.hi,$dst.hi\n\t"
 9313             "ADD    $dst.lo,$dst.lo\n\t"
 9314             "ADC    $dst.hi,$dst.hi\n\t"
 9315             "ADD    $dst.lo,$dst.lo\n\t"
 9316             "ADC    $dst.hi,$dst.hi" %}
 9317   ins_encode %{
 9318     __ addl($dst$$Register,$dst$$Register);
 9319     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9320     __ addl($dst$$Register,$dst$$Register);
 9321     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9322     __ addl($dst$$Register,$dst$$Register);
 9323     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9324   %}
 9325   ins_pipe( ialu_reg_long );
 9326 %}
 9327 
 9328 // Shift Left Long by 1-31
 9329 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9330   match(Set dst (LShiftL dst cnt));
 9331   effect(KILL cr);
 9332   ins_cost(200);
 9333   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9334             "SHL    $dst.lo,$cnt" %}
 9335   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9336   ins_encode( move_long_small_shift(dst,cnt) );
 9337   ins_pipe( ialu_reg_long );
 9338 %}
 9339 
 9340 // Shift Left Long by 32-63
 9341 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9342   match(Set dst (LShiftL dst cnt));
 9343   effect(KILL cr);
 9344   ins_cost(300);
 9345   format %{ "MOV    $dst.hi,$dst.lo\n"
 9346           "\tSHL    $dst.hi,$cnt-32\n"
 9347           "\tXOR    $dst.lo,$dst.lo" %}
 9348   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9349   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9350   ins_pipe( ialu_reg_long );
 9351 %}
 9352 
 9353 // Shift Left Long by variable
 9354 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9355   match(Set dst (LShiftL dst shift));
 9356   effect(KILL cr);
 9357   ins_cost(500+200);
 9358   size(17);
 9359   format %{ "TEST   $shift,32\n\t"
 9360             "JEQ,s  small\n\t"
 9361             "MOV    $dst.hi,$dst.lo\n\t"
 9362             "XOR    $dst.lo,$dst.lo\n"
 9363     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9364             "SHL    $dst.lo,$shift" %}
 9365   ins_encode( shift_left_long( dst, shift ) );
 9366   ins_pipe( pipe_slow );
 9367 %}
 9368 
 9369 // Shift Right Long by 1-31
 9370 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9371   match(Set dst (URShiftL dst cnt));
 9372   effect(KILL cr);
 9373   ins_cost(200);
 9374   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9375             "SHR    $dst.hi,$cnt" %}
 9376   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9377   ins_encode( move_long_small_shift(dst,cnt) );
 9378   ins_pipe( ialu_reg_long );
 9379 %}
 9380 
 9381 // Shift Right Long by 32-63
 9382 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9383   match(Set dst (URShiftL dst cnt));
 9384   effect(KILL cr);
 9385   ins_cost(300);
 9386   format %{ "MOV    $dst.lo,$dst.hi\n"
 9387           "\tSHR    $dst.lo,$cnt-32\n"
 9388           "\tXOR    $dst.hi,$dst.hi" %}
 9389   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9390   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9391   ins_pipe( ialu_reg_long );
 9392 %}
 9393 
 9394 // Shift Right Long by variable
 9395 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9396   match(Set dst (URShiftL dst shift));
 9397   effect(KILL cr);
 9398   ins_cost(600);
 9399   size(17);
 9400   format %{ "TEST   $shift,32\n\t"
 9401             "JEQ,s  small\n\t"
 9402             "MOV    $dst.lo,$dst.hi\n\t"
 9403             "XOR    $dst.hi,$dst.hi\n"
 9404     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9405             "SHR    $dst.hi,$shift" %}
 9406   ins_encode( shift_right_long( dst, shift ) );
 9407   ins_pipe( pipe_slow );
 9408 %}
 9409 
 9410 // Shift Right Long by 1-31
 9411 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9412   match(Set dst (RShiftL dst cnt));
 9413   effect(KILL cr);
 9414   ins_cost(200);
 9415   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9416             "SAR    $dst.hi,$cnt" %}
 9417   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9418   ins_encode( move_long_small_shift(dst,cnt) );
 9419   ins_pipe( ialu_reg_long );
 9420 %}
 9421 
 9422 // Shift Right Long by 32-63
 9423 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9424   match(Set dst (RShiftL dst cnt));
 9425   effect(KILL cr);
 9426   ins_cost(300);
 9427   format %{ "MOV    $dst.lo,$dst.hi\n"
 9428           "\tSAR    $dst.lo,$cnt-32\n"
 9429           "\tSAR    $dst.hi,31" %}
 9430   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9431   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9432   ins_pipe( ialu_reg_long );
 9433 %}
 9434 
 9435 // Shift Right arithmetic Long by variable
 9436 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9437   match(Set dst (RShiftL dst shift));
 9438   effect(KILL cr);
 9439   ins_cost(600);
 9440   size(18);
 9441   format %{ "TEST   $shift,32\n\t"
 9442             "JEQ,s  small\n\t"
 9443             "MOV    $dst.lo,$dst.hi\n\t"
 9444             "SAR    $dst.hi,31\n"
 9445     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9446             "SAR    $dst.hi,$shift" %}
 9447   ins_encode( shift_right_arith_long( dst, shift ) );
 9448   ins_pipe( pipe_slow );
 9449 %}
 9450 
 9451 
 9452 //----------Double Instructions------------------------------------------------
 9453 // Double Math
 9454 
 9455 // Compare & branch
 9456 
 9457 // P6 version of float compare, sets condition codes in EFLAGS
 9458 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9459   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9460   match(Set cr (CmpD src1 src2));
 9461   effect(KILL rax);
 9462   ins_cost(150);
 9463   format %{ "FLD    $src1\n\t"
 9464             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9465             "JNP    exit\n\t"
 9466             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9467             "SAHF\n"
 9468      "exit:\tNOP               // avoid branch to branch" %}
 9469   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9470   ins_encode( Push_Reg_DPR(src1),
 9471               OpcP, RegOpc(src2),
 9472               cmpF_P6_fixup );
 9473   ins_pipe( pipe_slow );
 9474 %}
 9475 
 9476 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9477   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9478   match(Set cr (CmpD src1 src2));
 9479   ins_cost(150);
 9480   format %{ "FLD    $src1\n\t"
 9481             "FUCOMIP ST,$src2  // P6 instruction" %}
 9482   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9483   ins_encode( Push_Reg_DPR(src1),
 9484               OpcP, RegOpc(src2));
 9485   ins_pipe( pipe_slow );
 9486 %}
 9487 
 9488 // Compare & branch
 9489 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9490   predicate(UseSSE<=1);
 9491   match(Set cr (CmpD src1 src2));
 9492   effect(KILL rax);
 9493   ins_cost(200);
 9494   format %{ "FLD    $src1\n\t"
 9495             "FCOMp  $src2\n\t"
 9496             "FNSTSW AX\n\t"
 9497             "TEST   AX,0x400\n\t"
 9498             "JZ,s   flags\n\t"
 9499             "MOV    AH,1\t# unordered treat as LT\n"
 9500     "flags:\tSAHF" %}
 9501   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9502   ins_encode( Push_Reg_DPR(src1),
 9503               OpcP, RegOpc(src2),
 9504               fpu_flags);
 9505   ins_pipe( pipe_slow );
 9506 %}
 9507 
 9508 // Compare vs zero into -1,0,1
 9509 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9510   predicate(UseSSE<=1);
 9511   match(Set dst (CmpD3 src1 zero));
 9512   effect(KILL cr, KILL rax);
 9513   ins_cost(280);
 9514   format %{ "FTSTD  $dst,$src1" %}
 9515   opcode(0xE4, 0xD9);
 9516   ins_encode( Push_Reg_DPR(src1),
 9517               OpcS, OpcP, PopFPU,
 9518               CmpF_Result(dst));
 9519   ins_pipe( pipe_slow );
 9520 %}
 9521 
 9522 // Compare into -1,0,1
 9523 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9524   predicate(UseSSE<=1);
 9525   match(Set dst (CmpD3 src1 src2));
 9526   effect(KILL cr, KILL rax);
 9527   ins_cost(300);
 9528   format %{ "FCMPD  $dst,$src1,$src2" %}
 9529   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9530   ins_encode( Push_Reg_DPR(src1),
 9531               OpcP, RegOpc(src2),
 9532               CmpF_Result(dst));
 9533   ins_pipe( pipe_slow );
 9534 %}
 9535 
 9536 // float compare and set condition codes in EFLAGS by XMM regs
 9537 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9538   predicate(UseSSE>=2);
 9539   match(Set cr (CmpD src1 src2));
 9540   ins_cost(145);
 9541   format %{ "UCOMISD $src1,$src2\n\t"
 9542             "JNP,s   exit\n\t"
 9543             "PUSHF\t# saw NaN, set CF\n\t"
 9544             "AND     [rsp], #0xffffff2b\n\t"
 9545             "POPF\n"
 9546     "exit:" %}
 9547   ins_encode %{
 9548     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9549     emit_cmpfp_fixup(_masm);
 9550   %}
 9551   ins_pipe( pipe_slow );
 9552 %}
 9553 
 9554 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9555   predicate(UseSSE>=2);
 9556   match(Set cr (CmpD src1 src2));
 9557   ins_cost(100);
 9558   format %{ "UCOMISD $src1,$src2" %}
 9559   ins_encode %{
 9560     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9561   %}
 9562   ins_pipe( pipe_slow );
 9563 %}
 9564 
 9565 // float compare and set condition codes in EFLAGS by XMM regs
 9566 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9567   predicate(UseSSE>=2);
 9568   match(Set cr (CmpD src1 (LoadD src2)));
 9569   ins_cost(145);
 9570   format %{ "UCOMISD $src1,$src2\n\t"
 9571             "JNP,s   exit\n\t"
 9572             "PUSHF\t# saw NaN, set CF\n\t"
 9573             "AND     [rsp], #0xffffff2b\n\t"
 9574             "POPF\n"
 9575     "exit:" %}
 9576   ins_encode %{
 9577     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9578     emit_cmpfp_fixup(_masm);
 9579   %}
 9580   ins_pipe( pipe_slow );
 9581 %}
 9582 
 9583 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9584   predicate(UseSSE>=2);
 9585   match(Set cr (CmpD src1 (LoadD src2)));
 9586   ins_cost(100);
 9587   format %{ "UCOMISD $src1,$src2" %}
 9588   ins_encode %{
 9589     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9590   %}
 9591   ins_pipe( pipe_slow );
 9592 %}
 9593 
 9594 // Compare into -1,0,1 in XMM
 9595 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9596   predicate(UseSSE>=2);
 9597   match(Set dst (CmpD3 src1 src2));
 9598   effect(KILL cr);
 9599   ins_cost(255);
 9600   format %{ "UCOMISD $src1, $src2\n\t"
 9601             "MOV     $dst, #-1\n\t"
 9602             "JP,s    done\n\t"
 9603             "JB,s    done\n\t"
 9604             "SETNE   $dst\n\t"
 9605             "MOVZB   $dst, $dst\n"
 9606     "done:" %}
 9607   ins_encode %{
 9608     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9609     emit_cmpfp3(_masm, $dst$$Register);
 9610   %}
 9611   ins_pipe( pipe_slow );
 9612 %}
 9613 
 9614 // Compare into -1,0,1 in XMM and memory
 9615 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9616   predicate(UseSSE>=2);
 9617   match(Set dst (CmpD3 src1 (LoadD src2)));
 9618   effect(KILL cr);
 9619   ins_cost(275);
 9620   format %{ "UCOMISD $src1, $src2\n\t"
 9621             "MOV     $dst, #-1\n\t"
 9622             "JP,s    done\n\t"
 9623             "JB,s    done\n\t"
 9624             "SETNE   $dst\n\t"
 9625             "MOVZB   $dst, $dst\n"
 9626     "done:" %}
 9627   ins_encode %{
 9628     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9629     emit_cmpfp3(_masm, $dst$$Register);
 9630   %}
 9631   ins_pipe( pipe_slow );
 9632 %}
 9633 
 9634 
 9635 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9636   predicate (UseSSE <=1);
 9637   match(Set dst (SubD dst src));
 9638 
 9639   format %{ "FLD    $src\n\t"
 9640             "DSUBp  $dst,ST" %}
 9641   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9642   ins_cost(150);
 9643   ins_encode( Push_Reg_DPR(src),
 9644               OpcP, RegOpc(dst) );
 9645   ins_pipe( fpu_reg_reg );
 9646 %}
 9647 
 9648 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9649   predicate (UseSSE <=1);
 9650   match(Set dst (RoundDouble (SubD src1 src2)));
 9651   ins_cost(250);
 9652 
 9653   format %{ "FLD    $src2\n\t"
 9654             "DSUB   ST,$src1\n\t"
 9655             "FSTP_D $dst\t# D-round" %}
 9656   opcode(0xD8, 0x5);
 9657   ins_encode( Push_Reg_DPR(src2),
 9658               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9659   ins_pipe( fpu_mem_reg_reg );
 9660 %}
 9661 
 9662 
 9663 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9664   predicate (UseSSE <=1);
 9665   match(Set dst (SubD dst (LoadD src)));
 9666   ins_cost(150);
 9667 
 9668   format %{ "FLD    $src\n\t"
 9669             "DSUBp  $dst,ST" %}
 9670   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9671   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9672               OpcP, RegOpc(dst) );
 9673   ins_pipe( fpu_reg_mem );
 9674 %}
 9675 
 9676 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9677   predicate (UseSSE<=1);
 9678   match(Set dst (AbsD src));
 9679   ins_cost(100);
 9680   format %{ "FABS" %}
 9681   opcode(0xE1, 0xD9);
 9682   ins_encode( OpcS, OpcP );
 9683   ins_pipe( fpu_reg_reg );
 9684 %}
 9685 
 9686 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9687   predicate(UseSSE<=1);
 9688   match(Set dst (NegD src));
 9689   ins_cost(100);
 9690   format %{ "FCHS" %}
 9691   opcode(0xE0, 0xD9);
 9692   ins_encode( OpcS, OpcP );
 9693   ins_pipe( fpu_reg_reg );
 9694 %}
 9695 
 9696 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9697   predicate(UseSSE<=1);
 9698   match(Set dst (AddD dst src));
 9699   format %{ "FLD    $src\n\t"
 9700             "DADD   $dst,ST" %}
 9701   size(4);
 9702   ins_cost(150);
 9703   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9704   ins_encode( Push_Reg_DPR(src),
 9705               OpcP, RegOpc(dst) );
 9706   ins_pipe( fpu_reg_reg );
 9707 %}
 9708 
 9709 
 9710 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9711   predicate(UseSSE<=1);
 9712   match(Set dst (RoundDouble (AddD src1 src2)));
 9713   ins_cost(250);
 9714 
 9715   format %{ "FLD    $src2\n\t"
 9716             "DADD   ST,$src1\n\t"
 9717             "FSTP_D $dst\t# D-round" %}
 9718   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9719   ins_encode( Push_Reg_DPR(src2),
 9720               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9721   ins_pipe( fpu_mem_reg_reg );
 9722 %}
 9723 
 9724 
 9725 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9726   predicate(UseSSE<=1);
 9727   match(Set dst (AddD dst (LoadD src)));
 9728   ins_cost(150);
 9729 
 9730   format %{ "FLD    $src\n\t"
 9731             "DADDp  $dst,ST" %}
 9732   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9733   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9734               OpcP, RegOpc(dst) );
 9735   ins_pipe( fpu_reg_mem );
 9736 %}
 9737 
 9738 // add-to-memory
 9739 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9740   predicate(UseSSE<=1);
 9741   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9742   ins_cost(150);
 9743 
 9744   format %{ "FLD_D  $dst\n\t"
 9745             "DADD   ST,$src\n\t"
 9746             "FST_D  $dst" %}
 9747   opcode(0xDD, 0x0);
 9748   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9749               Opcode(0xD8), RegOpc(src),
 9750               set_instruction_start,
 9751               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9752   ins_pipe( fpu_reg_mem );
 9753 %}
 9754 
 9755 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9756   predicate(UseSSE<=1);
 9757   match(Set dst (AddD dst con));
 9758   ins_cost(125);
 9759   format %{ "FLD1\n\t"
 9760             "DADDp  $dst,ST" %}
 9761   ins_encode %{
 9762     __ fld1();
 9763     __ faddp($dst$$reg);
 9764   %}
 9765   ins_pipe(fpu_reg);
 9766 %}
 9767 
 9768 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9769   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9770   match(Set dst (AddD dst con));
 9771   ins_cost(200);
 9772   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9773             "DADDp  $dst,ST" %}
 9774   ins_encode %{
 9775     __ fld_d($constantaddress($con));
 9776     __ faddp($dst$$reg);
 9777   %}
 9778   ins_pipe(fpu_reg_mem);
 9779 %}
 9780 
 9781 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9782   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9783   match(Set dst (RoundDouble (AddD src con)));
 9784   ins_cost(200);
 9785   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9786             "DADD   ST,$src\n\t"
 9787             "FSTP_D $dst\t# D-round" %}
 9788   ins_encode %{
 9789     __ fld_d($constantaddress($con));
 9790     __ fadd($src$$reg);
 9791     __ fstp_d(Address(rsp, $dst$$disp));
 9792   %}
 9793   ins_pipe(fpu_mem_reg_con);
 9794 %}
 9795 
 9796 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9797   predicate(UseSSE<=1);
 9798   match(Set dst (MulD dst src));
 9799   format %{ "FLD    $src\n\t"
 9800             "DMULp  $dst,ST" %}
 9801   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9802   ins_cost(150);
 9803   ins_encode( Push_Reg_DPR(src),
 9804               OpcP, RegOpc(dst) );
 9805   ins_pipe( fpu_reg_reg );
 9806 %}
 9807 
 9808 // Strict FP instruction biases argument before multiply then
 9809 // biases result to avoid double rounding of subnormals.
 9810 //
 9811 // scale arg1 by multiplying arg1 by 2^(-15360)
 9812 // load arg2
 9813 // multiply scaled arg1 by arg2
 9814 // rescale product by 2^(15360)
 9815 //
 9816 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9817   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9818   match(Set dst (MulD dst src));
 9819   ins_cost(1);   // Select this instruction for all FP double multiplies
 9820 
 9821   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9822             "DMULp  $dst,ST\n\t"
 9823             "FLD    $src\n\t"
 9824             "DMULp  $dst,ST\n\t"
 9825             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9826             "DMULp  $dst,ST\n\t" %}
 9827   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9828   ins_encode( strictfp_bias1(dst),
 9829               Push_Reg_DPR(src),
 9830               OpcP, RegOpc(dst),
 9831               strictfp_bias2(dst) );
 9832   ins_pipe( fpu_reg_reg );
 9833 %}
 9834 
 9835 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9836   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9837   match(Set dst (MulD dst con));
 9838   ins_cost(200);
 9839   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9840             "DMULp  $dst,ST" %}
 9841   ins_encode %{
 9842     __ fld_d($constantaddress($con));
 9843     __ fmulp($dst$$reg);
 9844   %}
 9845   ins_pipe(fpu_reg_mem);
 9846 %}
 9847 
 9848 
 9849 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9850   predicate( UseSSE<=1 );
 9851   match(Set dst (MulD dst (LoadD src)));
 9852   ins_cost(200);
 9853   format %{ "FLD_D  $src\n\t"
 9854             "DMULp  $dst,ST" %}
 9855   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9856   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9857               OpcP, RegOpc(dst) );
 9858   ins_pipe( fpu_reg_mem );
 9859 %}
 9860 
 9861 //
 9862 // Cisc-alternate to reg-reg multiply
 9863 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9864   predicate( UseSSE<=1 );
 9865   match(Set dst (MulD src (LoadD mem)));
 9866   ins_cost(250);
 9867   format %{ "FLD_D  $mem\n\t"
 9868             "DMUL   ST,$src\n\t"
 9869             "FSTP_D $dst" %}
 9870   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9871   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9872               OpcReg_FPR(src),
 9873               Pop_Reg_DPR(dst) );
 9874   ins_pipe( fpu_reg_reg_mem );
 9875 %}
 9876 
 9877 
 9878 // MACRO3 -- addDPR a mulDPR
 9879 // This instruction is a '2-address' instruction in that the result goes
 9880 // back to src2.  This eliminates a move from the macro; possibly the
 9881 // register allocator will have to add it back (and maybe not).
 9882 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9883   predicate( UseSSE<=1 );
 9884   match(Set src2 (AddD (MulD src0 src1) src2));
 9885   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9886             "DMUL   ST,$src1\n\t"
 9887             "DADDp  $src2,ST" %}
 9888   ins_cost(250);
 9889   opcode(0xDD); /* LoadD DD /0 */
 9890   ins_encode( Push_Reg_FPR(src0),
 9891               FMul_ST_reg(src1),
 9892               FAddP_reg_ST(src2) );
 9893   ins_pipe( fpu_reg_reg_reg );
 9894 %}
 9895 
 9896 
 9897 // MACRO3 -- subDPR a mulDPR
 9898 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9899   predicate( UseSSE<=1 );
 9900   match(Set src2 (SubD (MulD src0 src1) src2));
 9901   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9902             "DMUL   ST,$src1\n\t"
 9903             "DSUBRp $src2,ST" %}
 9904   ins_cost(250);
 9905   ins_encode( Push_Reg_FPR(src0),
 9906               FMul_ST_reg(src1),
 9907               Opcode(0xDE), Opc_plus(0xE0,src2));
 9908   ins_pipe( fpu_reg_reg_reg );
 9909 %}
 9910 
 9911 
 9912 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9913   predicate( UseSSE<=1 );
 9914   match(Set dst (DivD dst src));
 9915 
 9916   format %{ "FLD    $src\n\t"
 9917             "FDIVp  $dst,ST" %}
 9918   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9919   ins_cost(150);
 9920   ins_encode( Push_Reg_DPR(src),
 9921               OpcP, RegOpc(dst) );
 9922   ins_pipe( fpu_reg_reg );
 9923 %}
 9924 
 9925 // Strict FP instruction biases argument before division then
 9926 // biases result, to avoid double rounding of subnormals.
 9927 //
 9928 // scale dividend by multiplying dividend by 2^(-15360)
 9929 // load divisor
 9930 // divide scaled dividend by divisor
 9931 // rescale quotient by 2^(15360)
 9932 //
 9933 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9934   predicate (UseSSE<=1);
 9935   match(Set dst (DivD dst src));
 9936   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9937   ins_cost(01);
 9938 
 9939   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9940             "DMULp  $dst,ST\n\t"
 9941             "FLD    $src\n\t"
 9942             "FDIVp  $dst,ST\n\t"
 9943             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9944             "DMULp  $dst,ST\n\t" %}
 9945   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9946   ins_encode( strictfp_bias1(dst),
 9947               Push_Reg_DPR(src),
 9948               OpcP, RegOpc(dst),
 9949               strictfp_bias2(dst) );
 9950   ins_pipe( fpu_reg_reg );
 9951 %}
 9952 
 9953 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9954   predicate(UseSSE<=1);
 9955   match(Set dst (ModD dst src));
 9956   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9957 
 9958   format %{ "DMOD   $dst,$src" %}
 9959   ins_cost(250);
 9960   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9961               emitModDPR(),
 9962               Push_Result_Mod_DPR(src),
 9963               Pop_Reg_DPR(dst));
 9964   ins_pipe( pipe_slow );
 9965 %}
 9966 
 9967 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9968   predicate(UseSSE>=2);
 9969   match(Set dst (ModD src0 src1));
 9970   effect(KILL rax, KILL cr);
 9971 
 9972   format %{ "SUB    ESP,8\t # DMOD\n"
 9973           "\tMOVSD  [ESP+0],$src1\n"
 9974           "\tFLD_D  [ESP+0]\n"
 9975           "\tMOVSD  [ESP+0],$src0\n"
 9976           "\tFLD_D  [ESP+0]\n"
 9977      "loop:\tFPREM\n"
 9978           "\tFWAIT\n"
 9979           "\tFNSTSW AX\n"
 9980           "\tSAHF\n"
 9981           "\tJP     loop\n"
 9982           "\tFSTP_D [ESP+0]\n"
 9983           "\tMOVSD  $dst,[ESP+0]\n"
 9984           "\tADD    ESP,8\n"
 9985           "\tFSTP   ST0\t # Restore FPU Stack"
 9986     %}
 9987   ins_cost(250);
 9988   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9989   ins_pipe( pipe_slow );
 9990 %}
 9991 
 9992 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9993   predicate (UseSSE<=1);
 9994   match(Set dst(AtanD dst src));
 9995   format %{ "DATA   $dst,$src" %}
 9996   opcode(0xD9, 0xF3);
 9997   ins_encode( Push_Reg_DPR(src),
 9998               OpcP, OpcS, RegOpc(dst) );
 9999   ins_pipe( pipe_slow );
10000 %}
10001 
10002 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10003   predicate (UseSSE>=2);
10004   match(Set dst(AtanD dst src));
10005   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10006   format %{ "DATA   $dst,$src" %}
10007   opcode(0xD9, 0xF3);
10008   ins_encode( Push_SrcD(src),
10009               OpcP, OpcS, Push_ResultD(dst) );
10010   ins_pipe( pipe_slow );
10011 %}
10012 
10013 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10014   predicate (UseSSE<=1);
10015   match(Set dst (SqrtD src));
10016   format %{ "DSQRT  $dst,$src" %}
10017   opcode(0xFA, 0xD9);
10018   ins_encode( Push_Reg_DPR(src),
10019               OpcS, OpcP, Pop_Reg_DPR(dst) );
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 //-------------Float Instructions-------------------------------
10024 // Float Math
10025 
10026 // Code for float compare:
10027 //     fcompp();
10028 //     fwait(); fnstsw_ax();
10029 //     sahf();
10030 //     movl(dst, unordered_result);
10031 //     jcc(Assembler::parity, exit);
10032 //     movl(dst, less_result);
10033 //     jcc(Assembler::below, exit);
10034 //     movl(dst, equal_result);
10035 //     jcc(Assembler::equal, exit);
10036 //     movl(dst, greater_result);
10037 //   exit:
10038 
10039 // P6 version of float compare, sets condition codes in EFLAGS
10040 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10041   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10042   match(Set cr (CmpF src1 src2));
10043   effect(KILL rax);
10044   ins_cost(150);
10045   format %{ "FLD    $src1\n\t"
10046             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10047             "JNP    exit\n\t"
10048             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10049             "SAHF\n"
10050      "exit:\tNOP               // avoid branch to branch" %}
10051   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10052   ins_encode( Push_Reg_DPR(src1),
10053               OpcP, RegOpc(src2),
10054               cmpF_P6_fixup );
10055   ins_pipe( pipe_slow );
10056 %}
10057 
10058 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10059   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10060   match(Set cr (CmpF src1 src2));
10061   ins_cost(100);
10062   format %{ "FLD    $src1\n\t"
10063             "FUCOMIP ST,$src2  // P6 instruction" %}
10064   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10065   ins_encode( Push_Reg_DPR(src1),
10066               OpcP, RegOpc(src2));
10067   ins_pipe( pipe_slow );
10068 %}
10069 
10070 
10071 // Compare & branch
10072 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10073   predicate(UseSSE == 0);
10074   match(Set cr (CmpF src1 src2));
10075   effect(KILL rax);
10076   ins_cost(200);
10077   format %{ "FLD    $src1\n\t"
10078             "FCOMp  $src2\n\t"
10079             "FNSTSW AX\n\t"
10080             "TEST   AX,0x400\n\t"
10081             "JZ,s   flags\n\t"
10082             "MOV    AH,1\t# unordered treat as LT\n"
10083     "flags:\tSAHF" %}
10084   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10085   ins_encode( Push_Reg_DPR(src1),
10086               OpcP, RegOpc(src2),
10087               fpu_flags);
10088   ins_pipe( pipe_slow );
10089 %}
10090 
10091 // Compare vs zero into -1,0,1
10092 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10093   predicate(UseSSE == 0);
10094   match(Set dst (CmpF3 src1 zero));
10095   effect(KILL cr, KILL rax);
10096   ins_cost(280);
10097   format %{ "FTSTF  $dst,$src1" %}
10098   opcode(0xE4, 0xD9);
10099   ins_encode( Push_Reg_DPR(src1),
10100               OpcS, OpcP, PopFPU,
10101               CmpF_Result(dst));
10102   ins_pipe( pipe_slow );
10103 %}
10104 
10105 // Compare into -1,0,1
10106 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10107   predicate(UseSSE == 0);
10108   match(Set dst (CmpF3 src1 src2));
10109   effect(KILL cr, KILL rax);
10110   ins_cost(300);
10111   format %{ "FCMPF  $dst,$src1,$src2" %}
10112   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10113   ins_encode( Push_Reg_DPR(src1),
10114               OpcP, RegOpc(src2),
10115               CmpF_Result(dst));
10116   ins_pipe( pipe_slow );
10117 %}
10118 
10119 // float compare and set condition codes in EFLAGS by XMM regs
10120 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10121   predicate(UseSSE>=1);
10122   match(Set cr (CmpF src1 src2));
10123   ins_cost(145);
10124   format %{ "UCOMISS $src1,$src2\n\t"
10125             "JNP,s   exit\n\t"
10126             "PUSHF\t# saw NaN, set CF\n\t"
10127             "AND     [rsp], #0xffffff2b\n\t"
10128             "POPF\n"
10129     "exit:" %}
10130   ins_encode %{
10131     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10132     emit_cmpfp_fixup(_masm);
10133   %}
10134   ins_pipe( pipe_slow );
10135 %}
10136 
10137 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10138   predicate(UseSSE>=1);
10139   match(Set cr (CmpF src1 src2));
10140   ins_cost(100);
10141   format %{ "UCOMISS $src1,$src2" %}
10142   ins_encode %{
10143     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10144   %}
10145   ins_pipe( pipe_slow );
10146 %}
10147 
10148 // float compare and set condition codes in EFLAGS by XMM regs
10149 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10150   predicate(UseSSE>=1);
10151   match(Set cr (CmpF src1 (LoadF src2)));
10152   ins_cost(165);
10153   format %{ "UCOMISS $src1,$src2\n\t"
10154             "JNP,s   exit\n\t"
10155             "PUSHF\t# saw NaN, set CF\n\t"
10156             "AND     [rsp], #0xffffff2b\n\t"
10157             "POPF\n"
10158     "exit:" %}
10159   ins_encode %{
10160     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10161     emit_cmpfp_fixup(_masm);
10162   %}
10163   ins_pipe( pipe_slow );
10164 %}
10165 
10166 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10167   predicate(UseSSE>=1);
10168   match(Set cr (CmpF src1 (LoadF src2)));
10169   ins_cost(100);
10170   format %{ "UCOMISS $src1,$src2" %}
10171   ins_encode %{
10172     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10173   %}
10174   ins_pipe( pipe_slow );
10175 %}
10176 
10177 // Compare into -1,0,1 in XMM
10178 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10179   predicate(UseSSE>=1);
10180   match(Set dst (CmpF3 src1 src2));
10181   effect(KILL cr);
10182   ins_cost(255);
10183   format %{ "UCOMISS $src1, $src2\n\t"
10184             "MOV     $dst, #-1\n\t"
10185             "JP,s    done\n\t"
10186             "JB,s    done\n\t"
10187             "SETNE   $dst\n\t"
10188             "MOVZB   $dst, $dst\n"
10189     "done:" %}
10190   ins_encode %{
10191     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10192     emit_cmpfp3(_masm, $dst$$Register);
10193   %}
10194   ins_pipe( pipe_slow );
10195 %}
10196 
10197 // Compare into -1,0,1 in XMM and memory
10198 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10199   predicate(UseSSE>=1);
10200   match(Set dst (CmpF3 src1 (LoadF src2)));
10201   effect(KILL cr);
10202   ins_cost(275);
10203   format %{ "UCOMISS $src1, $src2\n\t"
10204             "MOV     $dst, #-1\n\t"
10205             "JP,s    done\n\t"
10206             "JB,s    done\n\t"
10207             "SETNE   $dst\n\t"
10208             "MOVZB   $dst, $dst\n"
10209     "done:" %}
10210   ins_encode %{
10211     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10212     emit_cmpfp3(_masm, $dst$$Register);
10213   %}
10214   ins_pipe( pipe_slow );
10215 %}
10216 
10217 // Spill to obtain 24-bit precision
10218 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10219   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10220   match(Set dst (SubF src1 src2));
10221 
10222   format %{ "FSUB   $dst,$src1 - $src2" %}
10223   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10224   ins_encode( Push_Reg_FPR(src1),
10225               OpcReg_FPR(src2),
10226               Pop_Mem_FPR(dst) );
10227   ins_pipe( fpu_mem_reg_reg );
10228 %}
10229 //
10230 // This instruction does not round to 24-bits
10231 instruct subFPR_reg(regFPR dst, regFPR src) %{
10232   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10233   match(Set dst (SubF dst src));
10234 
10235   format %{ "FSUB   $dst,$src" %}
10236   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10237   ins_encode( Push_Reg_FPR(src),
10238               OpcP, RegOpc(dst) );
10239   ins_pipe( fpu_reg_reg );
10240 %}
10241 
10242 // Spill to obtain 24-bit precision
10243 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10244   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10245   match(Set dst (AddF src1 src2));
10246 
10247   format %{ "FADD   $dst,$src1,$src2" %}
10248   opcode(0xD8, 0x0); /* D8 C0+i */
10249   ins_encode( Push_Reg_FPR(src2),
10250               OpcReg_FPR(src1),
10251               Pop_Mem_FPR(dst) );
10252   ins_pipe( fpu_mem_reg_reg );
10253 %}
10254 //
10255 // This instruction does not round to 24-bits
10256 instruct addFPR_reg(regFPR dst, regFPR src) %{
10257   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10258   match(Set dst (AddF dst src));
10259 
10260   format %{ "FLD    $src\n\t"
10261             "FADDp  $dst,ST" %}
10262   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10263   ins_encode( Push_Reg_FPR(src),
10264               OpcP, RegOpc(dst) );
10265   ins_pipe( fpu_reg_reg );
10266 %}
10267 
10268 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10269   predicate(UseSSE==0);
10270   match(Set dst (AbsF src));
10271   ins_cost(100);
10272   format %{ "FABS" %}
10273   opcode(0xE1, 0xD9);
10274   ins_encode( OpcS, OpcP );
10275   ins_pipe( fpu_reg_reg );
10276 %}
10277 
10278 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10279   predicate(UseSSE==0);
10280   match(Set dst (NegF src));
10281   ins_cost(100);
10282   format %{ "FCHS" %}
10283   opcode(0xE0, 0xD9);
10284   ins_encode( OpcS, OpcP );
10285   ins_pipe( fpu_reg_reg );
10286 %}
10287 
10288 // Cisc-alternate to addFPR_reg
10289 // Spill to obtain 24-bit precision
10290 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10291   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10292   match(Set dst (AddF src1 (LoadF src2)));
10293 
10294   format %{ "FLD    $src2\n\t"
10295             "FADD   ST,$src1\n\t"
10296             "FSTP_S $dst" %}
10297   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10298   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10299               OpcReg_FPR(src1),
10300               Pop_Mem_FPR(dst) );
10301   ins_pipe( fpu_mem_reg_mem );
10302 %}
10303 //
10304 // Cisc-alternate to addFPR_reg
10305 // This instruction does not round to 24-bits
10306 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10307   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10308   match(Set dst (AddF dst (LoadF src)));
10309 
10310   format %{ "FADD   $dst,$src" %}
10311   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10312   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10313               OpcP, RegOpc(dst) );
10314   ins_pipe( fpu_reg_mem );
10315 %}
10316 
10317 // // Following two instructions for _222_mpegaudio
10318 // Spill to obtain 24-bit precision
10319 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10320   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10321   match(Set dst (AddF src1 src2));
10322 
10323   format %{ "FADD   $dst,$src1,$src2" %}
10324   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10325   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10326               OpcReg_FPR(src2),
10327               Pop_Mem_FPR(dst) );
10328   ins_pipe( fpu_mem_reg_mem );
10329 %}
10330 
10331 // Cisc-spill variant
10332 // Spill to obtain 24-bit precision
10333 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10334   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10335   match(Set dst (AddF src1 (LoadF src2)));
10336 
10337   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10338   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10339   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10340               set_instruction_start,
10341               OpcP, RMopc_Mem(secondary,src1),
10342               Pop_Mem_FPR(dst) );
10343   ins_pipe( fpu_mem_mem_mem );
10344 %}
10345 
10346 // Spill to obtain 24-bit precision
10347 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10348   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10349   match(Set dst (AddF src1 src2));
10350 
10351   format %{ "FADD   $dst,$src1,$src2" %}
10352   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10353   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10354               set_instruction_start,
10355               OpcP, RMopc_Mem(secondary,src1),
10356               Pop_Mem_FPR(dst) );
10357   ins_pipe( fpu_mem_mem_mem );
10358 %}
10359 
10360 
10361 // Spill to obtain 24-bit precision
10362 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10363   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10364   match(Set dst (AddF src con));
10365   format %{ "FLD    $src\n\t"
10366             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10367             "FSTP_S $dst"  %}
10368   ins_encode %{
10369     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10370     __ fadd_s($constantaddress($con));
10371     __ fstp_s(Address(rsp, $dst$$disp));
10372   %}
10373   ins_pipe(fpu_mem_reg_con);
10374 %}
10375 //
10376 // This instruction does not round to 24-bits
10377 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10378   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10379   match(Set dst (AddF src con));
10380   format %{ "FLD    $src\n\t"
10381             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10382             "FSTP   $dst"  %}
10383   ins_encode %{
10384     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10385     __ fadd_s($constantaddress($con));
10386     __ fstp_d($dst$$reg);
10387   %}
10388   ins_pipe(fpu_reg_reg_con);
10389 %}
10390 
10391 // Spill to obtain 24-bit precision
10392 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10393   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10394   match(Set dst (MulF src1 src2));
10395 
10396   format %{ "FLD    $src1\n\t"
10397             "FMUL   $src2\n\t"
10398             "FSTP_S $dst"  %}
10399   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10400   ins_encode( Push_Reg_FPR(src1),
10401               OpcReg_FPR(src2),
10402               Pop_Mem_FPR(dst) );
10403   ins_pipe( fpu_mem_reg_reg );
10404 %}
10405 //
10406 // This instruction does not round to 24-bits
10407 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10408   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10409   match(Set dst (MulF src1 src2));
10410 
10411   format %{ "FLD    $src1\n\t"
10412             "FMUL   $src2\n\t"
10413             "FSTP_S $dst"  %}
10414   opcode(0xD8, 0x1); /* D8 C8+i */
10415   ins_encode( Push_Reg_FPR(src2),
10416               OpcReg_FPR(src1),
10417               Pop_Reg_FPR(dst) );
10418   ins_pipe( fpu_reg_reg_reg );
10419 %}
10420 
10421 
10422 // Spill to obtain 24-bit precision
10423 // Cisc-alternate to reg-reg multiply
10424 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10425   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10426   match(Set dst (MulF src1 (LoadF src2)));
10427 
10428   format %{ "FLD_S  $src2\n\t"
10429             "FMUL   $src1\n\t"
10430             "FSTP_S $dst"  %}
10431   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10432   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10433               OpcReg_FPR(src1),
10434               Pop_Mem_FPR(dst) );
10435   ins_pipe( fpu_mem_reg_mem );
10436 %}
10437 //
10438 // This instruction does not round to 24-bits
10439 // Cisc-alternate to reg-reg multiply
10440 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10441   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10442   match(Set dst (MulF src1 (LoadF src2)));
10443 
10444   format %{ "FMUL   $dst,$src1,$src2" %}
10445   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10446   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10447               OpcReg_FPR(src1),
10448               Pop_Reg_FPR(dst) );
10449   ins_pipe( fpu_reg_reg_mem );
10450 %}
10451 
10452 // Spill to obtain 24-bit precision
10453 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10454   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10455   match(Set dst (MulF src1 src2));
10456 
10457   format %{ "FMUL   $dst,$src1,$src2" %}
10458   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10459   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10460               set_instruction_start,
10461               OpcP, RMopc_Mem(secondary,src1),
10462               Pop_Mem_FPR(dst) );
10463   ins_pipe( fpu_mem_mem_mem );
10464 %}
10465 
10466 // Spill to obtain 24-bit precision
10467 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10468   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10469   match(Set dst (MulF src con));
10470 
10471   format %{ "FLD    $src\n\t"
10472             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10473             "FSTP_S $dst"  %}
10474   ins_encode %{
10475     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10476     __ fmul_s($constantaddress($con));
10477     __ fstp_s(Address(rsp, $dst$$disp));
10478   %}
10479   ins_pipe(fpu_mem_reg_con);
10480 %}
10481 //
10482 // This instruction does not round to 24-bits
10483 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10484   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10485   match(Set dst (MulF src con));
10486 
10487   format %{ "FLD    $src\n\t"
10488             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10489             "FSTP   $dst"  %}
10490   ins_encode %{
10491     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10492     __ fmul_s($constantaddress($con));
10493     __ fstp_d($dst$$reg);
10494   %}
10495   ins_pipe(fpu_reg_reg_con);
10496 %}
10497 
10498 
10499 //
10500 // MACRO1 -- subsume unshared load into mulFPR
10501 // This instruction does not round to 24-bits
10502 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10503   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10504   match(Set dst (MulF (LoadF mem1) src));
10505 
10506   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10507             "FMUL   ST,$src\n\t"
10508             "FSTP   $dst" %}
10509   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10510   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10511               OpcReg_FPR(src),
10512               Pop_Reg_FPR(dst) );
10513   ins_pipe( fpu_reg_reg_mem );
10514 %}
10515 //
10516 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10517 // This instruction does not round to 24-bits
10518 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10519   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10520   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10521   ins_cost(95);
10522 
10523   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10524             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10525             "FADD   ST,$src2\n\t"
10526             "FSTP   $dst" %}
10527   opcode(0xD9); /* LoadF D9 /0 */
10528   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10529               FMul_ST_reg(src1),
10530               FAdd_ST_reg(src2),
10531               Pop_Reg_FPR(dst) );
10532   ins_pipe( fpu_reg_mem_reg_reg );
10533 %}
10534 
10535 // MACRO3 -- addFPR a mulFPR
10536 // This instruction does not round to 24-bits.  It is a '2-address'
10537 // instruction in that the result goes back to src2.  This eliminates
10538 // a move from the macro; possibly the register allocator will have
10539 // to add it back (and maybe not).
10540 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10541   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10542   match(Set src2 (AddF (MulF src0 src1) src2));
10543 
10544   format %{ "FLD    $src0     ===MACRO3===\n\t"
10545             "FMUL   ST,$src1\n\t"
10546             "FADDP  $src2,ST" %}
10547   opcode(0xD9); /* LoadF D9 /0 */
10548   ins_encode( Push_Reg_FPR(src0),
10549               FMul_ST_reg(src1),
10550               FAddP_reg_ST(src2) );
10551   ins_pipe( fpu_reg_reg_reg );
10552 %}
10553 
10554 // MACRO4 -- divFPR subFPR
10555 // This instruction does not round to 24-bits
10556 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10557   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10558   match(Set dst (DivF (SubF src2 src1) src3));
10559 
10560   format %{ "FLD    $src2   ===MACRO4===\n\t"
10561             "FSUB   ST,$src1\n\t"
10562             "FDIV   ST,$src3\n\t"
10563             "FSTP  $dst" %}
10564   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10565   ins_encode( Push_Reg_FPR(src2),
10566               subFPR_divFPR_encode(src1,src3),
10567               Pop_Reg_FPR(dst) );
10568   ins_pipe( fpu_reg_reg_reg_reg );
10569 %}
10570 
10571 // Spill to obtain 24-bit precision
10572 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10573   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10574   match(Set dst (DivF src1 src2));
10575 
10576   format %{ "FDIV   $dst,$src1,$src2" %}
10577   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10578   ins_encode( Push_Reg_FPR(src1),
10579               OpcReg_FPR(src2),
10580               Pop_Mem_FPR(dst) );
10581   ins_pipe( fpu_mem_reg_reg );
10582 %}
10583 //
10584 // This instruction does not round to 24-bits
10585 instruct divFPR_reg(regFPR dst, regFPR src) %{
10586   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10587   match(Set dst (DivF dst src));
10588 
10589   format %{ "FDIV   $dst,$src" %}
10590   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10591   ins_encode( Push_Reg_FPR(src),
10592               OpcP, RegOpc(dst) );
10593   ins_pipe( fpu_reg_reg );
10594 %}
10595 
10596 
10597 // Spill to obtain 24-bit precision
10598 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10599   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10600   match(Set dst (ModF src1 src2));
10601   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10602 
10603   format %{ "FMOD   $dst,$src1,$src2" %}
10604   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10605               emitModDPR(),
10606               Push_Result_Mod_DPR(src2),
10607               Pop_Mem_FPR(dst));
10608   ins_pipe( pipe_slow );
10609 %}
10610 //
10611 // This instruction does not round to 24-bits
10612 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10613   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10614   match(Set dst (ModF dst src));
10615   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10616 
10617   format %{ "FMOD   $dst,$src" %}
10618   ins_encode(Push_Reg_Mod_DPR(dst, src),
10619               emitModDPR(),
10620               Push_Result_Mod_DPR(src),
10621               Pop_Reg_FPR(dst));
10622   ins_pipe( pipe_slow );
10623 %}
10624 
10625 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10626   predicate(UseSSE>=1);
10627   match(Set dst (ModF src0 src1));
10628   effect(KILL rax, KILL cr);
10629   format %{ "SUB    ESP,4\t # FMOD\n"
10630           "\tMOVSS  [ESP+0],$src1\n"
10631           "\tFLD_S  [ESP+0]\n"
10632           "\tMOVSS  [ESP+0],$src0\n"
10633           "\tFLD_S  [ESP+0]\n"
10634      "loop:\tFPREM\n"
10635           "\tFWAIT\n"
10636           "\tFNSTSW AX\n"
10637           "\tSAHF\n"
10638           "\tJP     loop\n"
10639           "\tFSTP_S [ESP+0]\n"
10640           "\tMOVSS  $dst,[ESP+0]\n"
10641           "\tADD    ESP,4\n"
10642           "\tFSTP   ST0\t # Restore FPU Stack"
10643     %}
10644   ins_cost(250);
10645   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10646   ins_pipe( pipe_slow );
10647 %}
10648 
10649 
10650 //----------Arithmetic Conversion Instructions---------------------------------
10651 // The conversions operations are all Alpha sorted.  Please keep it that way!
10652 
10653 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10654   predicate(UseSSE==0);
10655   match(Set dst (RoundFloat src));
10656   ins_cost(125);
10657   format %{ "FST_S  $dst,$src\t# F-round" %}
10658   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10659   ins_pipe( fpu_mem_reg );
10660 %}
10661 
10662 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10663   predicate(UseSSE<=1);
10664   match(Set dst (RoundDouble src));
10665   ins_cost(125);
10666   format %{ "FST_D  $dst,$src\t# D-round" %}
10667   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10668   ins_pipe( fpu_mem_reg );
10669 %}
10670 
10671 // Force rounding to 24-bit precision and 6-bit exponent
10672 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10673   predicate(UseSSE==0);
10674   match(Set dst (ConvD2F src));
10675   format %{ "FST_S  $dst,$src\t# F-round" %}
10676   expand %{
10677     roundFloat_mem_reg(dst,src);
10678   %}
10679 %}
10680 
10681 // Force rounding to 24-bit precision and 6-bit exponent
10682 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10683   predicate(UseSSE==1);
10684   match(Set dst (ConvD2F src));
10685   effect( KILL cr );
10686   format %{ "SUB    ESP,4\n\t"
10687             "FST_S  [ESP],$src\t# F-round\n\t"
10688             "MOVSS  $dst,[ESP]\n\t"
10689             "ADD ESP,4" %}
10690   ins_encode %{
10691     __ subptr(rsp, 4);
10692     if ($src$$reg != FPR1L_enc) {
10693       __ fld_s($src$$reg-1);
10694       __ fstp_s(Address(rsp, 0));
10695     } else {
10696       __ fst_s(Address(rsp, 0));
10697     }
10698     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10699     __ addptr(rsp, 4);
10700   %}
10701   ins_pipe( pipe_slow );
10702 %}
10703 
10704 // Force rounding double precision to single precision
10705 instruct convD2F_reg(regF dst, regD src) %{
10706   predicate(UseSSE>=2);
10707   match(Set dst (ConvD2F src));
10708   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10709   ins_encode %{
10710     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10711   %}
10712   ins_pipe( pipe_slow );
10713 %}
10714 
10715 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10716   predicate(UseSSE==0);
10717   match(Set dst (ConvF2D src));
10718   format %{ "FST_S  $dst,$src\t# D-round" %}
10719   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10720   ins_pipe( fpu_reg_reg );
10721 %}
10722 
10723 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10724   predicate(UseSSE==1);
10725   match(Set dst (ConvF2D src));
10726   format %{ "FST_D  $dst,$src\t# D-round" %}
10727   expand %{
10728     roundDouble_mem_reg(dst,src);
10729   %}
10730 %}
10731 
10732 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10733   predicate(UseSSE==1);
10734   match(Set dst (ConvF2D src));
10735   effect( KILL cr );
10736   format %{ "SUB    ESP,4\n\t"
10737             "MOVSS  [ESP] $src\n\t"
10738             "FLD_S  [ESP]\n\t"
10739             "ADD    ESP,4\n\t"
10740             "FSTP   $dst\t# D-round" %}
10741   ins_encode %{
10742     __ subptr(rsp, 4);
10743     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10744     __ fld_s(Address(rsp, 0));
10745     __ addptr(rsp, 4);
10746     __ fstp_d($dst$$reg);
10747   %}
10748   ins_pipe( pipe_slow );
10749 %}
10750 
10751 instruct convF2D_reg(regD dst, regF src) %{
10752   predicate(UseSSE>=2);
10753   match(Set dst (ConvF2D src));
10754   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10755   ins_encode %{
10756     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10757   %}
10758   ins_pipe( pipe_slow );
10759 %}
10760 
10761 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10762 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10763   predicate(UseSSE<=1);
10764   match(Set dst (ConvD2I src));
10765   effect( KILL tmp, KILL cr );
10766   format %{ "FLD    $src\t# Convert double to int \n\t"
10767             "FLDCW  trunc mode\n\t"
10768             "SUB    ESP,4\n\t"
10769             "FISTp  [ESP + #0]\n\t"
10770             "FLDCW  std/24-bit mode\n\t"
10771             "POP    EAX\n\t"
10772             "CMP    EAX,0x80000000\n\t"
10773             "JNE,s  fast\n\t"
10774             "FLD_D  $src\n\t"
10775             "CALL   d2i_wrapper\n"
10776       "fast:" %}
10777   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10778   ins_pipe( pipe_slow );
10779 %}
10780 
10781 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10782 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10783   predicate(UseSSE>=2);
10784   match(Set dst (ConvD2I src));
10785   effect( KILL tmp, KILL cr );
10786   format %{ "CVTTSD2SI $dst, $src\n\t"
10787             "CMP    $dst,0x80000000\n\t"
10788             "JNE,s  fast\n\t"
10789             "SUB    ESP, 8\n\t"
10790             "MOVSD  [ESP], $src\n\t"
10791             "FLD_D  [ESP]\n\t"
10792             "ADD    ESP, 8\n\t"
10793             "CALL   d2i_wrapper\n"
10794       "fast:" %}
10795   ins_encode %{
10796     Label fast;
10797     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10798     __ cmpl($dst$$Register, 0x80000000);
10799     __ jccb(Assembler::notEqual, fast);
10800     __ subptr(rsp, 8);
10801     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10802     __ fld_d(Address(rsp, 0));
10803     __ addptr(rsp, 8);
10804     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10805     __ bind(fast);
10806   %}
10807   ins_pipe( pipe_slow );
10808 %}
10809 
10810 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10811   predicate(UseSSE<=1);
10812   match(Set dst (ConvD2L src));
10813   effect( KILL cr );
10814   format %{ "FLD    $src\t# Convert double to long\n\t"
10815             "FLDCW  trunc mode\n\t"
10816             "SUB    ESP,8\n\t"
10817             "FISTp  [ESP + #0]\n\t"
10818             "FLDCW  std/24-bit mode\n\t"
10819             "POP    EAX\n\t"
10820             "POP    EDX\n\t"
10821             "CMP    EDX,0x80000000\n\t"
10822             "JNE,s  fast\n\t"
10823             "TEST   EAX,EAX\n\t"
10824             "JNE,s  fast\n\t"
10825             "FLD    $src\n\t"
10826             "CALL   d2l_wrapper\n"
10827       "fast:" %}
10828   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10829   ins_pipe( pipe_slow );
10830 %}
10831 
10832 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10833 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10834   predicate (UseSSE>=2);
10835   match(Set dst (ConvD2L src));
10836   effect( KILL cr );
10837   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10838             "MOVSD  [ESP],$src\n\t"
10839             "FLD_D  [ESP]\n\t"
10840             "FLDCW  trunc mode\n\t"
10841             "FISTp  [ESP + #0]\n\t"
10842             "FLDCW  std/24-bit mode\n\t"
10843             "POP    EAX\n\t"
10844             "POP    EDX\n\t"
10845             "CMP    EDX,0x80000000\n\t"
10846             "JNE,s  fast\n\t"
10847             "TEST   EAX,EAX\n\t"
10848             "JNE,s  fast\n\t"
10849             "SUB    ESP,8\n\t"
10850             "MOVSD  [ESP],$src\n\t"
10851             "FLD_D  [ESP]\n\t"
10852             "ADD    ESP,8\n\t"
10853             "CALL   d2l_wrapper\n"
10854       "fast:" %}
10855   ins_encode %{
10856     Label fast;
10857     __ subptr(rsp, 8);
10858     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10859     __ fld_d(Address(rsp, 0));
10860     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10861     __ fistp_d(Address(rsp, 0));
10862     // Restore the rounding mode, mask the exception
10863     if (Compile::current()->in_24_bit_fp_mode()) {
10864       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10865     } else {
10866       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10867     }
10868     // Load the converted long, adjust CPU stack
10869     __ pop(rax);
10870     __ pop(rdx);
10871     __ cmpl(rdx, 0x80000000);
10872     __ jccb(Assembler::notEqual, fast);
10873     __ testl(rax, rax);
10874     __ jccb(Assembler::notEqual, fast);
10875     __ subptr(rsp, 8);
10876     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10877     __ fld_d(Address(rsp, 0));
10878     __ addptr(rsp, 8);
10879     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10880     __ bind(fast);
10881   %}
10882   ins_pipe( pipe_slow );
10883 %}
10884 
10885 // Convert a double to an int.  Java semantics require we do complex
10886 // manglations in the corner cases.  So we set the rounding mode to
10887 // 'zero', store the darned double down as an int, and reset the
10888 // rounding mode to 'nearest'.  The hardware stores a flag value down
10889 // if we would overflow or converted a NAN; we check for this and
10890 // and go the slow path if needed.
10891 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10892   predicate(UseSSE==0);
10893   match(Set dst (ConvF2I src));
10894   effect( KILL tmp, KILL cr );
10895   format %{ "FLD    $src\t# Convert float to int \n\t"
10896             "FLDCW  trunc mode\n\t"
10897             "SUB    ESP,4\n\t"
10898             "FISTp  [ESP + #0]\n\t"
10899             "FLDCW  std/24-bit mode\n\t"
10900             "POP    EAX\n\t"
10901             "CMP    EAX,0x80000000\n\t"
10902             "JNE,s  fast\n\t"
10903             "FLD    $src\n\t"
10904             "CALL   d2i_wrapper\n"
10905       "fast:" %}
10906   // DPR2I_encoding works for FPR2I
10907   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10908   ins_pipe( pipe_slow );
10909 %}
10910 
10911 // Convert a float in xmm to an int reg.
10912 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10913   predicate(UseSSE>=1);
10914   match(Set dst (ConvF2I src));
10915   effect( KILL tmp, KILL cr );
10916   format %{ "CVTTSS2SI $dst, $src\n\t"
10917             "CMP    $dst,0x80000000\n\t"
10918             "JNE,s  fast\n\t"
10919             "SUB    ESP, 4\n\t"
10920             "MOVSS  [ESP], $src\n\t"
10921             "FLD    [ESP]\n\t"
10922             "ADD    ESP, 4\n\t"
10923             "CALL   d2i_wrapper\n"
10924       "fast:" %}
10925   ins_encode %{
10926     Label fast;
10927     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10928     __ cmpl($dst$$Register, 0x80000000);
10929     __ jccb(Assembler::notEqual, fast);
10930     __ subptr(rsp, 4);
10931     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10932     __ fld_s(Address(rsp, 0));
10933     __ addptr(rsp, 4);
10934     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10935     __ bind(fast);
10936   %}
10937   ins_pipe( pipe_slow );
10938 %}
10939 
10940 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10941   predicate(UseSSE==0);
10942   match(Set dst (ConvF2L src));
10943   effect( KILL cr );
10944   format %{ "FLD    $src\t# Convert float to long\n\t"
10945             "FLDCW  trunc mode\n\t"
10946             "SUB    ESP,8\n\t"
10947             "FISTp  [ESP + #0]\n\t"
10948             "FLDCW  std/24-bit mode\n\t"
10949             "POP    EAX\n\t"
10950             "POP    EDX\n\t"
10951             "CMP    EDX,0x80000000\n\t"
10952             "JNE,s  fast\n\t"
10953             "TEST   EAX,EAX\n\t"
10954             "JNE,s  fast\n\t"
10955             "FLD    $src\n\t"
10956             "CALL   d2l_wrapper\n"
10957       "fast:" %}
10958   // DPR2L_encoding works for FPR2L
10959   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10960   ins_pipe( pipe_slow );
10961 %}
10962 
10963 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10964 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10965   predicate (UseSSE>=1);
10966   match(Set dst (ConvF2L src));
10967   effect( KILL cr );
10968   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10969             "MOVSS  [ESP],$src\n\t"
10970             "FLD_S  [ESP]\n\t"
10971             "FLDCW  trunc mode\n\t"
10972             "FISTp  [ESP + #0]\n\t"
10973             "FLDCW  std/24-bit mode\n\t"
10974             "POP    EAX\n\t"
10975             "POP    EDX\n\t"
10976             "CMP    EDX,0x80000000\n\t"
10977             "JNE,s  fast\n\t"
10978             "TEST   EAX,EAX\n\t"
10979             "JNE,s  fast\n\t"
10980             "SUB    ESP,4\t# Convert float to long\n\t"
10981             "MOVSS  [ESP],$src\n\t"
10982             "FLD_S  [ESP]\n\t"
10983             "ADD    ESP,4\n\t"
10984             "CALL   d2l_wrapper\n"
10985       "fast:" %}
10986   ins_encode %{
10987     Label fast;
10988     __ subptr(rsp, 8);
10989     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10990     __ fld_s(Address(rsp, 0));
10991     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10992     __ fistp_d(Address(rsp, 0));
10993     // Restore the rounding mode, mask the exception
10994     if (Compile::current()->in_24_bit_fp_mode()) {
10995       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10996     } else {
10997       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10998     }
10999     // Load the converted long, adjust CPU stack
11000     __ pop(rax);
11001     __ pop(rdx);
11002     __ cmpl(rdx, 0x80000000);
11003     __ jccb(Assembler::notEqual, fast);
11004     __ testl(rax, rax);
11005     __ jccb(Assembler::notEqual, fast);
11006     __ subptr(rsp, 4);
11007     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11008     __ fld_s(Address(rsp, 0));
11009     __ addptr(rsp, 4);
11010     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11011     __ bind(fast);
11012   %}
11013   ins_pipe( pipe_slow );
11014 %}
11015 
11016 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11017   predicate( UseSSE<=1 );
11018   match(Set dst (ConvI2D src));
11019   format %{ "FILD   $src\n\t"
11020             "FSTP   $dst" %}
11021   opcode(0xDB, 0x0);  /* DB /0 */
11022   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11023   ins_pipe( fpu_reg_mem );
11024 %}
11025 
11026 instruct convI2D_reg(regD dst, rRegI src) %{
11027   predicate( UseSSE>=2 && !UseXmmI2D );
11028   match(Set dst (ConvI2D src));
11029   format %{ "CVTSI2SD $dst,$src" %}
11030   ins_encode %{
11031     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11032   %}
11033   ins_pipe( pipe_slow );
11034 %}
11035 
11036 instruct convI2D_mem(regD dst, memory mem) %{
11037   predicate( UseSSE>=2 );
11038   match(Set dst (ConvI2D (LoadI mem)));
11039   format %{ "CVTSI2SD $dst,$mem" %}
11040   ins_encode %{
11041     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11042   %}
11043   ins_pipe( pipe_slow );
11044 %}
11045 
11046 instruct convXI2D_reg(regD dst, rRegI src)
11047 %{
11048   predicate( UseSSE>=2 && UseXmmI2D );
11049   match(Set dst (ConvI2D src));
11050 
11051   format %{ "MOVD  $dst,$src\n\t"
11052             "CVTDQ2PD $dst,$dst\t# i2d" %}
11053   ins_encode %{
11054     __ movdl($dst$$XMMRegister, $src$$Register);
11055     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11056   %}
11057   ins_pipe(pipe_slow); // XXX
11058 %}
11059 
11060 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11061   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11062   match(Set dst (ConvI2D (LoadI mem)));
11063   format %{ "FILD   $mem\n\t"
11064             "FSTP   $dst" %}
11065   opcode(0xDB);      /* DB /0 */
11066   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11067               Pop_Reg_DPR(dst));
11068   ins_pipe( fpu_reg_mem );
11069 %}
11070 
11071 // Convert a byte to a float; no rounding step needed.
11072 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11073   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11074   match(Set dst (ConvI2F src));
11075   format %{ "FILD   $src\n\t"
11076             "FSTP   $dst" %}
11077 
11078   opcode(0xDB, 0x0);  /* DB /0 */
11079   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11080   ins_pipe( fpu_reg_mem );
11081 %}
11082 
11083 // In 24-bit mode, force exponent rounding by storing back out
11084 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11085   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11086   match(Set dst (ConvI2F src));
11087   ins_cost(200);
11088   format %{ "FILD   $src\n\t"
11089             "FSTP_S $dst" %}
11090   opcode(0xDB, 0x0);  /* DB /0 */
11091   ins_encode( Push_Mem_I(src),
11092               Pop_Mem_FPR(dst));
11093   ins_pipe( fpu_mem_mem );
11094 %}
11095 
11096 // In 24-bit mode, force exponent rounding by storing back out
11097 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11098   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11099   match(Set dst (ConvI2F (LoadI mem)));
11100   ins_cost(200);
11101   format %{ "FILD   $mem\n\t"
11102             "FSTP_S $dst" %}
11103   opcode(0xDB);  /* DB /0 */
11104   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11105               Pop_Mem_FPR(dst));
11106   ins_pipe( fpu_mem_mem );
11107 %}
11108 
11109 // This instruction does not round to 24-bits
11110 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11111   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11112   match(Set dst (ConvI2F src));
11113   format %{ "FILD   $src\n\t"
11114             "FSTP   $dst" %}
11115   opcode(0xDB, 0x0);  /* DB /0 */
11116   ins_encode( Push_Mem_I(src),
11117               Pop_Reg_FPR(dst));
11118   ins_pipe( fpu_reg_mem );
11119 %}
11120 
11121 // This instruction does not round to 24-bits
11122 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11123   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11124   match(Set dst (ConvI2F (LoadI mem)));
11125   format %{ "FILD   $mem\n\t"
11126             "FSTP   $dst" %}
11127   opcode(0xDB);      /* DB /0 */
11128   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11129               Pop_Reg_FPR(dst));
11130   ins_pipe( fpu_reg_mem );
11131 %}
11132 
11133 // Convert an int to a float in xmm; no rounding step needed.
11134 instruct convI2F_reg(regF dst, rRegI src) %{
11135   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11136   match(Set dst (ConvI2F src));
11137   format %{ "CVTSI2SS $dst, $src" %}
11138   ins_encode %{
11139     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11140   %}
11141   ins_pipe( pipe_slow );
11142 %}
11143 
11144  instruct convXI2F_reg(regF dst, rRegI src)
11145 %{
11146   predicate( UseSSE>=2 && UseXmmI2F );
11147   match(Set dst (ConvI2F src));
11148 
11149   format %{ "MOVD  $dst,$src\n\t"
11150             "CVTDQ2PS $dst,$dst\t# i2f" %}
11151   ins_encode %{
11152     __ movdl($dst$$XMMRegister, $src$$Register);
11153     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11154   %}
11155   ins_pipe(pipe_slow); // XXX
11156 %}
11157 
11158 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11159   match(Set dst (ConvI2L src));
11160   effect(KILL cr);
11161   ins_cost(375);
11162   format %{ "MOV    $dst.lo,$src\n\t"
11163             "MOV    $dst.hi,$src\n\t"
11164             "SAR    $dst.hi,31" %}
11165   ins_encode(convert_int_long(dst,src));
11166   ins_pipe( ialu_reg_reg_long );
11167 %}
11168 
11169 // Zero-extend convert int to long
11170 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11171   match(Set dst (AndL (ConvI2L src) mask) );
11172   effect( KILL flags );
11173   ins_cost(250);
11174   format %{ "MOV    $dst.lo,$src\n\t"
11175             "XOR    $dst.hi,$dst.hi" %}
11176   opcode(0x33); // XOR
11177   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11178   ins_pipe( ialu_reg_reg_long );
11179 %}
11180 
11181 // Zero-extend long
11182 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11183   match(Set dst (AndL src mask) );
11184   effect( KILL flags );
11185   ins_cost(250);
11186   format %{ "MOV    $dst.lo,$src.lo\n\t"
11187             "XOR    $dst.hi,$dst.hi\n\t" %}
11188   opcode(0x33); // XOR
11189   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11190   ins_pipe( ialu_reg_reg_long );
11191 %}
11192 
11193 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11194   predicate (UseSSE<=1);
11195   match(Set dst (ConvL2D src));
11196   effect( KILL cr );
11197   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11198             "PUSH   $src.lo\n\t"
11199             "FILD   ST,[ESP + #0]\n\t"
11200             "ADD    ESP,8\n\t"
11201             "FSTP_D $dst\t# D-round" %}
11202   opcode(0xDF, 0x5);  /* DF /5 */
11203   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11204   ins_pipe( pipe_slow );
11205 %}
11206 
11207 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11208   predicate (UseSSE>=2);
11209   match(Set dst (ConvL2D src));
11210   effect( KILL cr );
11211   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11212             "PUSH   $src.lo\n\t"
11213             "FILD_D [ESP]\n\t"
11214             "FSTP_D [ESP]\n\t"
11215             "MOVSD  $dst,[ESP]\n\t"
11216             "ADD    ESP,8" %}
11217   opcode(0xDF, 0x5);  /* DF /5 */
11218   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11219   ins_pipe( pipe_slow );
11220 %}
11221 
11222 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11223   predicate (UseSSE>=1);
11224   match(Set dst (ConvL2F src));
11225   effect( KILL cr );
11226   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11227             "PUSH   $src.lo\n\t"
11228             "FILD_D [ESP]\n\t"
11229             "FSTP_S [ESP]\n\t"
11230             "MOVSS  $dst,[ESP]\n\t"
11231             "ADD    ESP,8" %}
11232   opcode(0xDF, 0x5);  /* DF /5 */
11233   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11234   ins_pipe( pipe_slow );
11235 %}
11236 
11237 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11238   match(Set dst (ConvL2F src));
11239   effect( KILL cr );
11240   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11241             "PUSH   $src.lo\n\t"
11242             "FILD   ST,[ESP + #0]\n\t"
11243             "ADD    ESP,8\n\t"
11244             "FSTP_S $dst\t# F-round" %}
11245   opcode(0xDF, 0x5);  /* DF /5 */
11246   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11247   ins_pipe( pipe_slow );
11248 %}
11249 
11250 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11251   match(Set dst (ConvL2I src));
11252   effect( DEF dst, USE src );
11253   format %{ "MOV    $dst,$src.lo" %}
11254   ins_encode(enc_CopyL_Lo(dst,src));
11255   ins_pipe( ialu_reg_reg );
11256 %}
11257 
11258 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11259   match(Set dst (MoveF2I src));
11260   effect( DEF dst, USE src );
11261   ins_cost(100);
11262   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11263   ins_encode %{
11264     __ movl($dst$$Register, Address(rsp, $src$$disp));
11265   %}
11266   ins_pipe( ialu_reg_mem );
11267 %}
11268 
11269 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11270   predicate(UseSSE==0);
11271   match(Set dst (MoveF2I src));
11272   effect( DEF dst, USE src );
11273 
11274   ins_cost(125);
11275   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11276   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11277   ins_pipe( fpu_mem_reg );
11278 %}
11279 
11280 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11281   predicate(UseSSE>=1);
11282   match(Set dst (MoveF2I src));
11283   effect( DEF dst, USE src );
11284 
11285   ins_cost(95);
11286   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11287   ins_encode %{
11288     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11289   %}
11290   ins_pipe( pipe_slow );
11291 %}
11292 
11293 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11294   predicate(UseSSE>=2);
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297   ins_cost(85);
11298   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11299   ins_encode %{
11300     __ movdl($dst$$Register, $src$$XMMRegister);
11301   %}
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11306   match(Set dst (MoveI2F src));
11307   effect( DEF dst, USE src );
11308 
11309   ins_cost(100);
11310   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11311   ins_encode %{
11312     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11313   %}
11314   ins_pipe( ialu_mem_reg );
11315 %}
11316 
11317 
11318 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11319   predicate(UseSSE==0);
11320   match(Set dst (MoveI2F src));
11321   effect(DEF dst, USE src);
11322 
11323   ins_cost(125);
11324   format %{ "FLD_S  $src\n\t"
11325             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11326   opcode(0xD9);               /* D9 /0, FLD m32real */
11327   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11328               Pop_Reg_FPR(dst) );
11329   ins_pipe( fpu_reg_mem );
11330 %}
11331 
11332 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11333   predicate(UseSSE>=1);
11334   match(Set dst (MoveI2F src));
11335   effect( DEF dst, USE src );
11336 
11337   ins_cost(95);
11338   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11339   ins_encode %{
11340     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11341   %}
11342   ins_pipe( pipe_slow );
11343 %}
11344 
11345 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11346   predicate(UseSSE>=2);
11347   match(Set dst (MoveI2F src));
11348   effect( DEF dst, USE src );
11349 
11350   ins_cost(85);
11351   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11352   ins_encode %{
11353     __ movdl($dst$$XMMRegister, $src$$Register);
11354   %}
11355   ins_pipe( pipe_slow );
11356 %}
11357 
11358 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11359   match(Set dst (MoveD2L src));
11360   effect(DEF dst, USE src);
11361 
11362   ins_cost(250);
11363   format %{ "MOV    $dst.lo,$src\n\t"
11364             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11365   opcode(0x8B, 0x8B);
11366   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11367   ins_pipe( ialu_mem_long_reg );
11368 %}
11369 
11370 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11371   predicate(UseSSE<=1);
11372   match(Set dst (MoveD2L src));
11373   effect(DEF dst, USE src);
11374 
11375   ins_cost(125);
11376   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11377   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11378   ins_pipe( fpu_mem_reg );
11379 %}
11380 
11381 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11382   predicate(UseSSE>=2);
11383   match(Set dst (MoveD2L src));
11384   effect(DEF dst, USE src);
11385   ins_cost(95);
11386   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11387   ins_encode %{
11388     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11389   %}
11390   ins_pipe( pipe_slow );
11391 %}
11392 
11393 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11394   predicate(UseSSE>=2);
11395   match(Set dst (MoveD2L src));
11396   effect(DEF dst, USE src, TEMP tmp);
11397   ins_cost(85);
11398   format %{ "MOVD   $dst.lo,$src\n\t"
11399             "PSHUFLW $tmp,$src,0x4E\n\t"
11400             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11401   ins_encode %{
11402     __ movdl($dst$$Register, $src$$XMMRegister);
11403     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11404     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11405   %}
11406   ins_pipe( pipe_slow );
11407 %}
11408 
11409 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11410   match(Set dst (MoveL2D src));
11411   effect(DEF dst, USE src);
11412 
11413   ins_cost(200);
11414   format %{ "MOV    $dst,$src.lo\n\t"
11415             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11416   opcode(0x89, 0x89);
11417   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11418   ins_pipe( ialu_mem_long_reg );
11419 %}
11420 
11421 
11422 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11423   predicate(UseSSE<=1);
11424   match(Set dst (MoveL2D src));
11425   effect(DEF dst, USE src);
11426   ins_cost(125);
11427 
11428   format %{ "FLD_D  $src\n\t"
11429             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11430   opcode(0xDD);               /* DD /0, FLD m64real */
11431   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11432               Pop_Reg_DPR(dst) );
11433   ins_pipe( fpu_reg_mem );
11434 %}
11435 
11436 
11437 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11438   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11439   match(Set dst (MoveL2D src));
11440   effect(DEF dst, USE src);
11441 
11442   ins_cost(95);
11443   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11444   ins_encode %{
11445     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11446   %}
11447   ins_pipe( pipe_slow );
11448 %}
11449 
11450 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11451   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11452   match(Set dst (MoveL2D src));
11453   effect(DEF dst, USE src);
11454 
11455   ins_cost(95);
11456   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11457   ins_encode %{
11458     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11459   %}
11460   ins_pipe( pipe_slow );
11461 %}
11462 
11463 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11464   predicate(UseSSE>=2);
11465   match(Set dst (MoveL2D src));
11466   effect(TEMP dst, USE src, TEMP tmp);
11467   ins_cost(85);
11468   format %{ "MOVD   $dst,$src.lo\n\t"
11469             "MOVD   $tmp,$src.hi\n\t"
11470             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11471   ins_encode %{
11472     __ movdl($dst$$XMMRegister, $src$$Register);
11473     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11474     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11475   %}
11476   ins_pipe( pipe_slow );
11477 %}
11478 
11479 
11480 // =======================================================================
11481 // fast clearing of an array
11482 // Small ClearArray non-AVX512.
11483 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11484   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11485   match(Set dummy (ClearArray cnt base));
11486   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11487 
11488   format %{ $$template
11489     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11490     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11491     $$emit$$"JG     LARGE\n\t"
11492     $$emit$$"SHL    ECX, 1\n\t"
11493     $$emit$$"DEC    ECX\n\t"
11494     $$emit$$"JS     DONE\t# Zero length\n\t"
11495     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11496     $$emit$$"DEC    ECX\n\t"
11497     $$emit$$"JGE    LOOP\n\t"
11498     $$emit$$"JMP    DONE\n\t"
11499     $$emit$$"# LARGE:\n\t"
11500     if (UseFastStosb) {
11501        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11502        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11503     } else if (UseXMMForObjInit) {
11504        $$emit$$"MOV     RDI,RAX\n\t"
11505        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11506        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11507        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11508        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11509        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11510        $$emit$$"ADD     0x40,RAX\n\t"
11511        $$emit$$"# L_zero_64_bytes:\n\t"
11512        $$emit$$"SUB     0x8,RCX\n\t"
11513        $$emit$$"JGE     L_loop\n\t"
11514        $$emit$$"ADD     0x4,RCX\n\t"
11515        $$emit$$"JL      L_tail\n\t"
11516        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11517        $$emit$$"ADD     0x20,RAX\n\t"
11518        $$emit$$"SUB     0x4,RCX\n\t"
11519        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11520        $$emit$$"ADD     0x4,RCX\n\t"
11521        $$emit$$"JLE     L_end\n\t"
11522        $$emit$$"DEC     RCX\n\t"
11523        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11524        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11525        $$emit$$"ADD     0x8,RAX\n\t"
11526        $$emit$$"DEC     RCX\n\t"
11527        $$emit$$"JGE     L_sloop\n\t"
11528        $$emit$$"# L_end:\n\t"
11529     } else {
11530        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11531        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11532     }
11533     $$emit$$"# DONE"
11534   %}
11535   ins_encode %{
11536     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11537                  $tmp$$XMMRegister, false, knoreg);
11538   %}
11539   ins_pipe( pipe_slow );
11540 %}
11541 
11542 // Small ClearArray AVX512 non-constant length.
11543 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11544   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11545   match(Set dummy (ClearArray cnt base));
11546   ins_cost(125);
11547   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11548 
11549   format %{ $$template
11550     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11551     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11552     $$emit$$"JG     LARGE\n\t"
11553     $$emit$$"SHL    ECX, 1\n\t"
11554     $$emit$$"DEC    ECX\n\t"
11555     $$emit$$"JS     DONE\t# Zero length\n\t"
11556     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11557     $$emit$$"DEC    ECX\n\t"
11558     $$emit$$"JGE    LOOP\n\t"
11559     $$emit$$"JMP    DONE\n\t"
11560     $$emit$$"# LARGE:\n\t"
11561     if (UseFastStosb) {
11562        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11563        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11564     } else if (UseXMMForObjInit) {
11565        $$emit$$"MOV     RDI,RAX\n\t"
11566        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11567        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11568        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11569        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11570        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11571        $$emit$$"ADD     0x40,RAX\n\t"
11572        $$emit$$"# L_zero_64_bytes:\n\t"
11573        $$emit$$"SUB     0x8,RCX\n\t"
11574        $$emit$$"JGE     L_loop\n\t"
11575        $$emit$$"ADD     0x4,RCX\n\t"
11576        $$emit$$"JL      L_tail\n\t"
11577        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11578        $$emit$$"ADD     0x20,RAX\n\t"
11579        $$emit$$"SUB     0x4,RCX\n\t"
11580        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11581        $$emit$$"ADD     0x4,RCX\n\t"
11582        $$emit$$"JLE     L_end\n\t"
11583        $$emit$$"DEC     RCX\n\t"
11584        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11585        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11586        $$emit$$"ADD     0x8,RAX\n\t"
11587        $$emit$$"DEC     RCX\n\t"
11588        $$emit$$"JGE     L_sloop\n\t"
11589        $$emit$$"# L_end:\n\t"
11590     } else {
11591        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11592        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11593     }
11594     $$emit$$"# DONE"
11595   %}
11596   ins_encode %{
11597     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11598                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11599   %}
11600   ins_pipe( pipe_slow );
11601 %}
11602 
11603 // Large ClearArray non-AVX512.
11604 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11605   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11606   match(Set dummy (ClearArray cnt base));
11607   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11608   format %{ $$template
11609     if (UseFastStosb) {
11610        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11611        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11612        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11613     } else if (UseXMMForObjInit) {
11614        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11615        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11616        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11617        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11618        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11619        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11620        $$emit$$"ADD     0x40,RAX\n\t"
11621        $$emit$$"# L_zero_64_bytes:\n\t"
11622        $$emit$$"SUB     0x8,RCX\n\t"
11623        $$emit$$"JGE     L_loop\n\t"
11624        $$emit$$"ADD     0x4,RCX\n\t"
11625        $$emit$$"JL      L_tail\n\t"
11626        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11627        $$emit$$"ADD     0x20,RAX\n\t"
11628        $$emit$$"SUB     0x4,RCX\n\t"
11629        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11630        $$emit$$"ADD     0x4,RCX\n\t"
11631        $$emit$$"JLE     L_end\n\t"
11632        $$emit$$"DEC     RCX\n\t"
11633        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11634        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11635        $$emit$$"ADD     0x8,RAX\n\t"
11636        $$emit$$"DEC     RCX\n\t"
11637        $$emit$$"JGE     L_sloop\n\t"
11638        $$emit$$"# L_end:\n\t"
11639     } else {
11640        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11641        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11642        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11643     }
11644     $$emit$$"# DONE"
11645   %}
11646   ins_encode %{
11647     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11648                  $tmp$$XMMRegister, true, knoreg);
11649   %}
11650   ins_pipe( pipe_slow );
11651 %}
11652 
11653 // Large ClearArray AVX512.
11654 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11655   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11656   match(Set dummy (ClearArray cnt base));
11657   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11658   format %{ $$template
11659     if (UseFastStosb) {
11660        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11661        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11662        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11663     } else if (UseXMMForObjInit) {
11664        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11665        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11666        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11667        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11668        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11669        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11670        $$emit$$"ADD     0x40,RAX\n\t"
11671        $$emit$$"# L_zero_64_bytes:\n\t"
11672        $$emit$$"SUB     0x8,RCX\n\t"
11673        $$emit$$"JGE     L_loop\n\t"
11674        $$emit$$"ADD     0x4,RCX\n\t"
11675        $$emit$$"JL      L_tail\n\t"
11676        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11677        $$emit$$"ADD     0x20,RAX\n\t"
11678        $$emit$$"SUB     0x4,RCX\n\t"
11679        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11680        $$emit$$"ADD     0x4,RCX\n\t"
11681        $$emit$$"JLE     L_end\n\t"
11682        $$emit$$"DEC     RCX\n\t"
11683        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11684        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11685        $$emit$$"ADD     0x8,RAX\n\t"
11686        $$emit$$"DEC     RCX\n\t"
11687        $$emit$$"JGE     L_sloop\n\t"
11688        $$emit$$"# L_end:\n\t"
11689     } else {
11690        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11691        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11692        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11693     }
11694     $$emit$$"# DONE"
11695   %}
11696   ins_encode %{
11697     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11698                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11699   %}
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 // Small ClearArray AVX512 constant length.
11704 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11705 %{
11706   predicate(!((ClearArrayNode*)n)->is_large() &&
11707                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11708   match(Set dummy (ClearArray cnt base));
11709   ins_cost(100);
11710   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11711   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11712   ins_encode %{
11713    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11714   %}
11715   ins_pipe(pipe_slow);
11716 %}
11717 
11718 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11719                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11720   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11725   ins_encode %{
11726     __ string_compare($str1$$Register, $str2$$Register,
11727                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11734                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11735   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11736   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11737   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11738 
11739   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11740   ins_encode %{
11741     __ string_compare($str1$$Register, $str2$$Register,
11742                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11743                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11744   %}
11745   ins_pipe( pipe_slow );
11746 %}
11747 
11748 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11749                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11750   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11751   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11752   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11753 
11754   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11755   ins_encode %{
11756     __ string_compare($str1$$Register, $str2$$Register,
11757                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11758                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11759   %}
11760   ins_pipe( pipe_slow );
11761 %}
11762 
11763 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11764                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11765   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11766   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11767   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11768 
11769   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11770   ins_encode %{
11771     __ string_compare($str1$$Register, $str2$$Register,
11772                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11773                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11774   %}
11775   ins_pipe( pipe_slow );
11776 %}
11777 
11778 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11779                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11780   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11781   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11782   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11783 
11784   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11785   ins_encode %{
11786     __ string_compare($str1$$Register, $str2$$Register,
11787                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11788                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11789   %}
11790   ins_pipe( pipe_slow );
11791 %}
11792 
11793 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11794                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11795   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11796   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11797   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11798 
11799   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11800   ins_encode %{
11801     __ string_compare($str1$$Register, $str2$$Register,
11802                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11803                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11804   %}
11805   ins_pipe( pipe_slow );
11806 %}
11807 
11808 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11809                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11810   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11811   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11812   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11813 
11814   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11815   ins_encode %{
11816     __ string_compare($str2$$Register, $str1$$Register,
11817                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11818                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11819   %}
11820   ins_pipe( pipe_slow );
11821 %}
11822 
11823 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11824                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11825   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11826   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11827   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11828 
11829   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11830   ins_encode %{
11831     __ string_compare($str2$$Register, $str1$$Register,
11832                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11833                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11834   %}
11835   ins_pipe( pipe_slow );
11836 %}
11837 
11838 // fast string equals
11839 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11840                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11841   predicate(!VM_Version::supports_avx512vlbw());
11842   match(Set result (StrEquals (Binary str1 str2) cnt));
11843   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11844 
11845   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11846   ins_encode %{
11847     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11848                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11849                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11850   %}
11851 
11852   ins_pipe( pipe_slow );
11853 %}
11854 
11855 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11856                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11857   predicate(VM_Version::supports_avx512vlbw());
11858   match(Set result (StrEquals (Binary str1 str2) cnt));
11859   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11860 
11861   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11862   ins_encode %{
11863     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11864                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11865                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11866   %}
11867 
11868   ins_pipe( pipe_slow );
11869 %}
11870 
11871 
11872 // fast search of substring with known size.
11873 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11874                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11875   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11876   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11877   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11878 
11879   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11880   ins_encode %{
11881     int icnt2 = (int)$int_cnt2$$constant;
11882     if (icnt2 >= 16) {
11883       // IndexOf for constant substrings with size >= 16 elements
11884       // which don't need to be loaded through stack.
11885       __ string_indexofC8($str1$$Register, $str2$$Register,
11886                           $cnt1$$Register, $cnt2$$Register,
11887                           icnt2, $result$$Register,
11888                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11889     } else {
11890       // Small strings are loaded through stack if they cross page boundary.
11891       __ string_indexof($str1$$Register, $str2$$Register,
11892                         $cnt1$$Register, $cnt2$$Register,
11893                         icnt2, $result$$Register,
11894                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11895     }
11896   %}
11897   ins_pipe( pipe_slow );
11898 %}
11899 
11900 // fast search of substring with known size.
11901 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11902                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11903   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11904   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11905   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11906 
11907   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11908   ins_encode %{
11909     int icnt2 = (int)$int_cnt2$$constant;
11910     if (icnt2 >= 8) {
11911       // IndexOf for constant substrings with size >= 8 elements
11912       // which don't need to be loaded through stack.
11913       __ string_indexofC8($str1$$Register, $str2$$Register,
11914                           $cnt1$$Register, $cnt2$$Register,
11915                           icnt2, $result$$Register,
11916                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11917     } else {
11918       // Small strings are loaded through stack if they cross page boundary.
11919       __ string_indexof($str1$$Register, $str2$$Register,
11920                         $cnt1$$Register, $cnt2$$Register,
11921                         icnt2, $result$$Register,
11922                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11923     }
11924   %}
11925   ins_pipe( pipe_slow );
11926 %}
11927 
11928 // fast search of substring with known size.
11929 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11930                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11931   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11932   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11933   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11934 
11935   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11936   ins_encode %{
11937     int icnt2 = (int)$int_cnt2$$constant;
11938     if (icnt2 >= 8) {
11939       // IndexOf for constant substrings with size >= 8 elements
11940       // which don't need to be loaded through stack.
11941       __ string_indexofC8($str1$$Register, $str2$$Register,
11942                           $cnt1$$Register, $cnt2$$Register,
11943                           icnt2, $result$$Register,
11944                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11945     } else {
11946       // Small strings are loaded through stack if they cross page boundary.
11947       __ string_indexof($str1$$Register, $str2$$Register,
11948                         $cnt1$$Register, $cnt2$$Register,
11949                         icnt2, $result$$Register,
11950                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11951     }
11952   %}
11953   ins_pipe( pipe_slow );
11954 %}
11955 
11956 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11957                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11958   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11959   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11960   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11961 
11962   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11963   ins_encode %{
11964     __ string_indexof($str1$$Register, $str2$$Register,
11965                       $cnt1$$Register, $cnt2$$Register,
11966                       (-1), $result$$Register,
11967                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11968   %}
11969   ins_pipe( pipe_slow );
11970 %}
11971 
11972 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11973                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11974   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11975   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11976   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11977 
11978   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11979   ins_encode %{
11980     __ string_indexof($str1$$Register, $str2$$Register,
11981                       $cnt1$$Register, $cnt2$$Register,
11982                       (-1), $result$$Register,
11983                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11984   %}
11985   ins_pipe( pipe_slow );
11986 %}
11987 
11988 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11989                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11990   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11991   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11992   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11993 
11994   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11995   ins_encode %{
11996     __ string_indexof($str1$$Register, $str2$$Register,
11997                       $cnt1$$Register, $cnt2$$Register,
11998                       (-1), $result$$Register,
11999                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12000   %}
12001   ins_pipe( pipe_slow );
12002 %}
12003 
12004 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12005                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12006   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12007   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12008   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12009   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12010   ins_encode %{
12011     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12012                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12013   %}
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12018                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12019   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12020   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12021   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12022   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12023   ins_encode %{
12024     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12025                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12026   %}
12027   ins_pipe( pipe_slow );
12028 %}
12029 
12030 
12031 // fast array equals
12032 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12033                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12034 %{
12035   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12036   match(Set result (AryEq ary1 ary2));
12037   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12038   //ins_cost(300);
12039 
12040   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12041   ins_encode %{
12042     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12043                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12044                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12045   %}
12046   ins_pipe( pipe_slow );
12047 %}
12048 
12049 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12050                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12051 %{
12052   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12053   match(Set result (AryEq ary1 ary2));
12054   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12055   //ins_cost(300);
12056 
12057   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12058   ins_encode %{
12059     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12060                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12061                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12062   %}
12063   ins_pipe( pipe_slow );
12064 %}
12065 
12066 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12067                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12068 %{
12069   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12070   match(Set result (AryEq ary1 ary2));
12071   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12072   //ins_cost(300);
12073 
12074   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12075   ins_encode %{
12076     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12077                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12078                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12079   %}
12080   ins_pipe( pipe_slow );
12081 %}
12082 
12083 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12084                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12085 %{
12086   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12087   match(Set result (AryEq ary1 ary2));
12088   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12089   //ins_cost(300);
12090 
12091   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12092   ins_encode %{
12093     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12094                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12095                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12096   %}
12097   ins_pipe( pipe_slow );
12098 %}
12099 
12100 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12101                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12102 %{
12103   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12104   match(Set result (HasNegatives ary1 len));
12105   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12106 
12107   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12108   ins_encode %{
12109     __ has_negatives($ary1$$Register, $len$$Register,
12110                      $result$$Register, $tmp3$$Register,
12111                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12112   %}
12113   ins_pipe( pipe_slow );
12114 %}
12115 
12116 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12117                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12118 %{
12119   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12120   match(Set result (HasNegatives ary1 len));
12121   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12122 
12123   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12124   ins_encode %{
12125     __ has_negatives($ary1$$Register, $len$$Register,
12126                      $result$$Register, $tmp3$$Register,
12127                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12128   %}
12129   ins_pipe( pipe_slow );
12130 %}
12131 
12132 
12133 // fast char[] to byte[] compression
12134 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12135                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12136   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12137   match(Set result (StrCompressedCopy src (Binary dst len)));
12138   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12139 
12140   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12141   ins_encode %{
12142     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12143                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12144                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12145                            knoreg, knoreg);
12146   %}
12147   ins_pipe( pipe_slow );
12148 %}
12149 
12150 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12151                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12152   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12153   match(Set result (StrCompressedCopy src (Binary dst len)));
12154   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12155 
12156   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12157   ins_encode %{
12158     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12159                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12160                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12161                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12162   %}
12163   ins_pipe( pipe_slow );
12164 %}
12165 
12166 // fast byte[] to char[] inflation
12167 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12168                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12169   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12170   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12171   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12172 
12173   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12174   ins_encode %{
12175     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12176                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12177   %}
12178   ins_pipe( pipe_slow );
12179 %}
12180 
12181 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12182                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12183   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12184   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12185   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12186 
12187   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12188   ins_encode %{
12189     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12190                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12191   %}
12192   ins_pipe( pipe_slow );
12193 %}
12194 
12195 // encode char[] to byte[] in ISO_8859_1
12196 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12197                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12198                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12199   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12200   match(Set result (EncodeISOArray src (Binary dst len)));
12201   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12202 
12203   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12204   ins_encode %{
12205     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12206                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12207                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12208   %}
12209   ins_pipe( pipe_slow );
12210 %}
12211 
12212 // encode char[] to byte[] in ASCII
12213 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12214                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12215                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12216   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12217   match(Set result (EncodeISOArray src (Binary dst len)));
12218   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12219 
12220   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12221   ins_encode %{
12222     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12223                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12224                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12225   %}
12226   ins_pipe( pipe_slow );
12227 %}
12228 
12229 //----------Control Flow Instructions------------------------------------------
12230 // Signed compare Instructions
12231 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12232   match(Set cr (CmpI op1 op2));
12233   effect( DEF cr, USE op1, USE op2 );
12234   format %{ "CMP    $op1,$op2" %}
12235   opcode(0x3B);  /* Opcode 3B /r */
12236   ins_encode( OpcP, RegReg( op1, op2) );
12237   ins_pipe( ialu_cr_reg_reg );
12238 %}
12239 
12240 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12241   match(Set cr (CmpI op1 op2));
12242   effect( DEF cr, USE op1 );
12243   format %{ "CMP    $op1,$op2" %}
12244   opcode(0x81,0x07);  /* Opcode 81 /7 */
12245   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12246   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12247   ins_pipe( ialu_cr_reg_imm );
12248 %}
12249 
12250 // Cisc-spilled version of cmpI_eReg
12251 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12252   match(Set cr (CmpI op1 (LoadI op2)));
12253 
12254   format %{ "CMP    $op1,$op2" %}
12255   ins_cost(500);
12256   opcode(0x3B);  /* Opcode 3B /r */
12257   ins_encode( OpcP, RegMem( op1, op2) );
12258   ins_pipe( ialu_cr_reg_mem );
12259 %}
12260 
12261 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12262   match(Set cr (CmpI src zero));
12263   effect( DEF cr, USE src );
12264 
12265   format %{ "TEST   $src,$src" %}
12266   opcode(0x85);
12267   ins_encode( OpcP, RegReg( src, src ) );
12268   ins_pipe( ialu_cr_reg_imm );
12269 %}
12270 
12271 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12272   match(Set cr (CmpI (AndI src con) zero));
12273 
12274   format %{ "TEST   $src,$con" %}
12275   opcode(0xF7,0x00);
12276   ins_encode( OpcP, RegOpc(src), Con32(con) );
12277   ins_pipe( ialu_cr_reg_imm );
12278 %}
12279 
12280 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12281   match(Set cr (CmpI (AndI src mem) zero));
12282 
12283   format %{ "TEST   $src,$mem" %}
12284   opcode(0x85);
12285   ins_encode( OpcP, RegMem( src, mem ) );
12286   ins_pipe( ialu_cr_reg_mem );
12287 %}
12288 
12289 // Unsigned compare Instructions; really, same as signed except they
12290 // produce an eFlagsRegU instead of eFlagsReg.
12291 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12292   match(Set cr (CmpU op1 op2));
12293 
12294   format %{ "CMPu   $op1,$op2" %}
12295   opcode(0x3B);  /* Opcode 3B /r */
12296   ins_encode( OpcP, RegReg( op1, op2) );
12297   ins_pipe( ialu_cr_reg_reg );
12298 %}
12299 
12300 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12301   match(Set cr (CmpU op1 op2));
12302 
12303   format %{ "CMPu   $op1,$op2" %}
12304   opcode(0x81,0x07);  /* Opcode 81 /7 */
12305   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12306   ins_pipe( ialu_cr_reg_imm );
12307 %}
12308 
12309 // // Cisc-spilled version of cmpU_eReg
12310 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12311   match(Set cr (CmpU op1 (LoadI op2)));
12312 
12313   format %{ "CMPu   $op1,$op2" %}
12314   ins_cost(500);
12315   opcode(0x3B);  /* Opcode 3B /r */
12316   ins_encode( OpcP, RegMem( op1, op2) );
12317   ins_pipe( ialu_cr_reg_mem );
12318 %}
12319 
12320 // // Cisc-spilled version of cmpU_eReg
12321 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12322 //  match(Set cr (CmpU (LoadI op1) op2));
12323 //
12324 //  format %{ "CMPu   $op1,$op2" %}
12325 //  ins_cost(500);
12326 //  opcode(0x39);  /* Opcode 39 /r */
12327 //  ins_encode( OpcP, RegMem( op1, op2) );
12328 //%}
12329 
12330 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12331   match(Set cr (CmpU src zero));
12332 
12333   format %{ "TESTu  $src,$src" %}
12334   opcode(0x85);
12335   ins_encode( OpcP, RegReg( src, src ) );
12336   ins_pipe( ialu_cr_reg_imm );
12337 %}
12338 
12339 // Unsigned pointer compare Instructions
12340 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12341   match(Set cr (CmpP op1 op2));
12342 
12343   format %{ "CMPu   $op1,$op2" %}
12344   opcode(0x3B);  /* Opcode 3B /r */
12345   ins_encode( OpcP, RegReg( op1, op2) );
12346   ins_pipe( ialu_cr_reg_reg );
12347 %}
12348 
12349 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12350   match(Set cr (CmpP op1 op2));
12351 
12352   format %{ "CMPu   $op1,$op2" %}
12353   opcode(0x81,0x07);  /* Opcode 81 /7 */
12354   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12355   ins_pipe( ialu_cr_reg_imm );
12356 %}
12357 
12358 // // Cisc-spilled version of cmpP_eReg
12359 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12360   match(Set cr (CmpP op1 (LoadP op2)));
12361 
12362   format %{ "CMPu   $op1,$op2" %}
12363   ins_cost(500);
12364   opcode(0x3B);  /* Opcode 3B /r */
12365   ins_encode( OpcP, RegMem( op1, op2) );
12366   ins_pipe( ialu_cr_reg_mem );
12367 %}
12368 
12369 // // Cisc-spilled version of cmpP_eReg
12370 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12371 //  match(Set cr (CmpP (LoadP op1) op2));
12372 //
12373 //  format %{ "CMPu   $op1,$op2" %}
12374 //  ins_cost(500);
12375 //  opcode(0x39);  /* Opcode 39 /r */
12376 //  ins_encode( OpcP, RegMem( op1, op2) );
12377 //%}
12378 
12379 // Compare raw pointer (used in out-of-heap check).
12380 // Only works because non-oop pointers must be raw pointers
12381 // and raw pointers have no anti-dependencies.
12382 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12383   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12384   match(Set cr (CmpP op1 (LoadP op2)));
12385 
12386   format %{ "CMPu   $op1,$op2" %}
12387   opcode(0x3B);  /* Opcode 3B /r */
12388   ins_encode( OpcP, RegMem( op1, op2) );
12389   ins_pipe( ialu_cr_reg_mem );
12390 %}
12391 
12392 //
12393 // This will generate a signed flags result. This should be ok
12394 // since any compare to a zero should be eq/neq.
12395 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12396   match(Set cr (CmpP src zero));
12397 
12398   format %{ "TEST   $src,$src" %}
12399   opcode(0x85);
12400   ins_encode( OpcP, RegReg( src, src ) );
12401   ins_pipe( ialu_cr_reg_imm );
12402 %}
12403 
12404 // Cisc-spilled version of testP_reg
12405 // This will generate a signed flags result. This should be ok
12406 // since any compare to a zero should be eq/neq.
12407 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12408   match(Set cr (CmpP (LoadP op) zero));
12409 
12410   format %{ "TEST   $op,0xFFFFFFFF" %}
12411   ins_cost(500);
12412   opcode(0xF7);               /* Opcode F7 /0 */
12413   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12414   ins_pipe( ialu_cr_reg_imm );
12415 %}
12416 
12417 // Yanked all unsigned pointer compare operations.
12418 // Pointer compares are done with CmpP which is already unsigned.
12419 
12420 //----------Max and Min--------------------------------------------------------
12421 // Min Instructions
12422 ////
12423 //   *** Min and Max using the conditional move are slower than the
12424 //   *** branch version on a Pentium III.
12425 // // Conditional move for min
12426 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12427 //  effect( USE_DEF op2, USE op1, USE cr );
12428 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12429 //  opcode(0x4C,0x0F);
12430 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12431 //  ins_pipe( pipe_cmov_reg );
12432 //%}
12433 //
12434 //// Min Register with Register (P6 version)
12435 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12436 //  predicate(VM_Version::supports_cmov() );
12437 //  match(Set op2 (MinI op1 op2));
12438 //  ins_cost(200);
12439 //  expand %{
12440 //    eFlagsReg cr;
12441 //    compI_eReg(cr,op1,op2);
12442 //    cmovI_reg_lt(op2,op1,cr);
12443 //  %}
12444 //%}
12445 
12446 // Min Register with Register (generic version)
12447 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12448   match(Set dst (MinI dst src));
12449   effect(KILL flags);
12450   ins_cost(300);
12451 
12452   format %{ "MIN    $dst,$src" %}
12453   opcode(0xCC);
12454   ins_encode( min_enc(dst,src) );
12455   ins_pipe( pipe_slow );
12456 %}
12457 
12458 // Max Register with Register
12459 //   *** Min and Max using the conditional move are slower than the
12460 //   *** branch version on a Pentium III.
12461 // // Conditional move for max
12462 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12463 //  effect( USE_DEF op2, USE op1, USE cr );
12464 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12465 //  opcode(0x4F,0x0F);
12466 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12467 //  ins_pipe( pipe_cmov_reg );
12468 //%}
12469 //
12470 // // Max Register with Register (P6 version)
12471 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12472 //  predicate(VM_Version::supports_cmov() );
12473 //  match(Set op2 (MaxI op1 op2));
12474 //  ins_cost(200);
12475 //  expand %{
12476 //    eFlagsReg cr;
12477 //    compI_eReg(cr,op1,op2);
12478 //    cmovI_reg_gt(op2,op1,cr);
12479 //  %}
12480 //%}
12481 
12482 // Max Register with Register (generic version)
12483 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12484   match(Set dst (MaxI dst src));
12485   effect(KILL flags);
12486   ins_cost(300);
12487 
12488   format %{ "MAX    $dst,$src" %}
12489   opcode(0xCC);
12490   ins_encode( max_enc(dst,src) );
12491   ins_pipe( pipe_slow );
12492 %}
12493 
12494 // ============================================================================
12495 // Counted Loop limit node which represents exact final iterator value.
12496 // Note: the resulting value should fit into integer range since
12497 // counted loops have limit check on overflow.
12498 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12499   match(Set limit (LoopLimit (Binary init limit) stride));
12500   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12501   ins_cost(300);
12502 
12503   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12504   ins_encode %{
12505     int strd = (int)$stride$$constant;
12506     assert(strd != 1 && strd != -1, "sanity");
12507     int m1 = (strd > 0) ? 1 : -1;
12508     // Convert limit to long (EAX:EDX)
12509     __ cdql();
12510     // Convert init to long (init:tmp)
12511     __ movl($tmp$$Register, $init$$Register);
12512     __ sarl($tmp$$Register, 31);
12513     // $limit - $init
12514     __ subl($limit$$Register, $init$$Register);
12515     __ sbbl($limit_hi$$Register, $tmp$$Register);
12516     // + ($stride - 1)
12517     if (strd > 0) {
12518       __ addl($limit$$Register, (strd - 1));
12519       __ adcl($limit_hi$$Register, 0);
12520       __ movl($tmp$$Register, strd);
12521     } else {
12522       __ addl($limit$$Register, (strd + 1));
12523       __ adcl($limit_hi$$Register, -1);
12524       __ lneg($limit_hi$$Register, $limit$$Register);
12525       __ movl($tmp$$Register, -strd);
12526     }
12527     // signed devision: (EAX:EDX) / pos_stride
12528     __ idivl($tmp$$Register);
12529     if (strd < 0) {
12530       // restore sign
12531       __ negl($tmp$$Register);
12532     }
12533     // (EAX) * stride
12534     __ mull($tmp$$Register);
12535     // + init (ignore upper bits)
12536     __ addl($limit$$Register, $init$$Register);
12537   %}
12538   ins_pipe( pipe_slow );
12539 %}
12540 
12541 // ============================================================================
12542 // Branch Instructions
12543 // Jump Table
12544 instruct jumpXtnd(rRegI switch_val) %{
12545   match(Jump switch_val);
12546   ins_cost(350);
12547   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12548   ins_encode %{
12549     // Jump to Address(table_base + switch_reg)
12550     Address index(noreg, $switch_val$$Register, Address::times_1);
12551     __ jump(ArrayAddress($constantaddress, index));
12552   %}
12553   ins_pipe(pipe_jmp);
12554 %}
12555 
12556 // Jump Direct - Label defines a relative address from JMP+1
12557 instruct jmpDir(label labl) %{
12558   match(Goto);
12559   effect(USE labl);
12560 
12561   ins_cost(300);
12562   format %{ "JMP    $labl" %}
12563   size(5);
12564   ins_encode %{
12565     Label* L = $labl$$label;
12566     __ jmp(*L, false); // Always long jump
12567   %}
12568   ins_pipe( pipe_jmp );
12569 %}
12570 
12571 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12572 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12573   match(If cop cr);
12574   effect(USE labl);
12575 
12576   ins_cost(300);
12577   format %{ "J$cop    $labl" %}
12578   size(6);
12579   ins_encode %{
12580     Label* L = $labl$$label;
12581     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12582   %}
12583   ins_pipe( pipe_jcc );
12584 %}
12585 
12586 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12587 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12588   predicate(!n->has_vector_mask_set());
12589   match(CountedLoopEnd cop cr);
12590   effect(USE labl);
12591 
12592   ins_cost(300);
12593   format %{ "J$cop    $labl\t# Loop end" %}
12594   size(6);
12595   ins_encode %{
12596     Label* L = $labl$$label;
12597     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12598   %}
12599   ins_pipe( pipe_jcc );
12600 %}
12601 
12602 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12603 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12604   predicate(!n->has_vector_mask_set());
12605   match(CountedLoopEnd cop cmp);
12606   effect(USE labl);
12607 
12608   ins_cost(300);
12609   format %{ "J$cop,u  $labl\t# Loop end" %}
12610   size(6);
12611   ins_encode %{
12612     Label* L = $labl$$label;
12613     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12614   %}
12615   ins_pipe( pipe_jcc );
12616 %}
12617 
12618 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12619   predicate(!n->has_vector_mask_set());
12620   match(CountedLoopEnd cop cmp);
12621   effect(USE labl);
12622 
12623   ins_cost(200);
12624   format %{ "J$cop,u  $labl\t# Loop end" %}
12625   size(6);
12626   ins_encode %{
12627     Label* L = $labl$$label;
12628     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12629   %}
12630   ins_pipe( pipe_jcc );
12631 %}
12632 
12633 // mask version
12634 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12635 // Bounded mask operand used in following patten is needed for
12636 // post-loop multiversioning.
12637 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12638   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12639   match(CountedLoopEnd cop cr);
12640   effect(USE labl, TEMP ktmp);
12641 
12642   ins_cost(400);
12643   format %{ "J$cop    $labl\t# Loop end\n\t"
12644             "restorevectmask \t# vector mask restore for loops" %}
12645   size(10);
12646   ins_encode %{
12647     Label* L = $labl$$label;
12648     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12649     __ restorevectmask($ktmp$$KRegister);
12650   %}
12651   ins_pipe( pipe_jcc );
12652 %}
12653 
12654 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12655 // Bounded mask operand used in following patten is needed for
12656 // post-loop multiversioning.
12657 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12658   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12659   match(CountedLoopEnd cop cmp);
12660   effect(USE labl, TEMP ktmp);
12661 
12662   ins_cost(400);
12663   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12664             "restorevectmask \t# vector mask restore for loops" %}
12665   size(10);
12666   ins_encode %{
12667     Label* L = $labl$$label;
12668     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12669     __ restorevectmask($ktmp$$KRegister);
12670   %}
12671   ins_pipe( pipe_jcc );
12672 %}
12673 
12674 // Bounded mask operand used in following patten is needed for
12675 // post-loop multiversioning.
12676 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12677   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12678   match(CountedLoopEnd cop cmp);
12679   effect(USE labl, TEMP ktmp);
12680 
12681   ins_cost(300);
12682   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12683             "restorevectmask \t# vector mask restore for loops" %}
12684   size(10);
12685   ins_encode %{
12686     Label* L = $labl$$label;
12687     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12688     __ restorevectmask($ktmp$$KRegister);
12689   %}
12690   ins_pipe( pipe_jcc );
12691 %}
12692 
12693 // Jump Direct Conditional - using unsigned comparison
12694 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12695   match(If cop cmp);
12696   effect(USE labl);
12697 
12698   ins_cost(300);
12699   format %{ "J$cop,u  $labl" %}
12700   size(6);
12701   ins_encode %{
12702     Label* L = $labl$$label;
12703     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12704   %}
12705   ins_pipe(pipe_jcc);
12706 %}
12707 
12708 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12709   match(If cop cmp);
12710   effect(USE labl);
12711 
12712   ins_cost(200);
12713   format %{ "J$cop,u  $labl" %}
12714   size(6);
12715   ins_encode %{
12716     Label* L = $labl$$label;
12717     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12718   %}
12719   ins_pipe(pipe_jcc);
12720 %}
12721 
12722 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12723   match(If cop cmp);
12724   effect(USE labl);
12725 
12726   ins_cost(200);
12727   format %{ $$template
12728     if ($cop$$cmpcode == Assembler::notEqual) {
12729       $$emit$$"JP,u   $labl\n\t"
12730       $$emit$$"J$cop,u   $labl"
12731     } else {
12732       $$emit$$"JP,u   done\n\t"
12733       $$emit$$"J$cop,u   $labl\n\t"
12734       $$emit$$"done:"
12735     }
12736   %}
12737   ins_encode %{
12738     Label* l = $labl$$label;
12739     if ($cop$$cmpcode == Assembler::notEqual) {
12740       __ jcc(Assembler::parity, *l, false);
12741       __ jcc(Assembler::notEqual, *l, false);
12742     } else if ($cop$$cmpcode == Assembler::equal) {
12743       Label done;
12744       __ jccb(Assembler::parity, done);
12745       __ jcc(Assembler::equal, *l, false);
12746       __ bind(done);
12747     } else {
12748        ShouldNotReachHere();
12749     }
12750   %}
12751   ins_pipe(pipe_jcc);
12752 %}
12753 
12754 // ============================================================================
12755 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12756 // array for an instance of the superklass.  Set a hidden internal cache on a
12757 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12758 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12759 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12760   match(Set result (PartialSubtypeCheck sub super));
12761   effect( KILL rcx, KILL cr );
12762 
12763   ins_cost(1100);  // slightly larger than the next version
12764   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12765             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12766             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12767             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12768             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12769             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12770             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12771      "miss:\t" %}
12772 
12773   opcode(0x1); // Force a XOR of EDI
12774   ins_encode( enc_PartialSubtypeCheck() );
12775   ins_pipe( pipe_slow );
12776 %}
12777 
12778 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12779   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12780   effect( KILL rcx, KILL result );
12781 
12782   ins_cost(1000);
12783   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12784             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12785             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12786             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12787             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12788             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12789      "miss:\t" %}
12790 
12791   opcode(0x0);  // No need to XOR EDI
12792   ins_encode( enc_PartialSubtypeCheck() );
12793   ins_pipe( pipe_slow );
12794 %}
12795 
12796 // ============================================================================
12797 // Branch Instructions -- short offset versions
12798 //
12799 // These instructions are used to replace jumps of a long offset (the default
12800 // match) with jumps of a shorter offset.  These instructions are all tagged
12801 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12802 // match rules in general matching.  Instead, the ADLC generates a conversion
12803 // method in the MachNode which can be used to do in-place replacement of the
12804 // long variant with the shorter variant.  The compiler will determine if a
12805 // branch can be taken by the is_short_branch_offset() predicate in the machine
12806 // specific code section of the file.
12807 
12808 // Jump Direct - Label defines a relative address from JMP+1
12809 instruct jmpDir_short(label labl) %{
12810   match(Goto);
12811   effect(USE labl);
12812 
12813   ins_cost(300);
12814   format %{ "JMP,s  $labl" %}
12815   size(2);
12816   ins_encode %{
12817     Label* L = $labl$$label;
12818     __ jmpb(*L);
12819   %}
12820   ins_pipe( pipe_jmp );
12821   ins_short_branch(1);
12822 %}
12823 
12824 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12825 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12826   match(If cop cr);
12827   effect(USE labl);
12828 
12829   ins_cost(300);
12830   format %{ "J$cop,s  $labl" %}
12831   size(2);
12832   ins_encode %{
12833     Label* L = $labl$$label;
12834     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12835   %}
12836   ins_pipe( pipe_jcc );
12837   ins_short_branch(1);
12838 %}
12839 
12840 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12841 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12842   match(CountedLoopEnd cop cr);
12843   effect(USE labl);
12844 
12845   ins_cost(300);
12846   format %{ "J$cop,s  $labl\t# Loop end" %}
12847   size(2);
12848   ins_encode %{
12849     Label* L = $labl$$label;
12850     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12851   %}
12852   ins_pipe( pipe_jcc );
12853   ins_short_branch(1);
12854 %}
12855 
12856 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12857 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12858   match(CountedLoopEnd cop cmp);
12859   effect(USE labl);
12860 
12861   ins_cost(300);
12862   format %{ "J$cop,us $labl\t# Loop end" %}
12863   size(2);
12864   ins_encode %{
12865     Label* L = $labl$$label;
12866     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12867   %}
12868   ins_pipe( pipe_jcc );
12869   ins_short_branch(1);
12870 %}
12871 
12872 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12873   match(CountedLoopEnd cop cmp);
12874   effect(USE labl);
12875 
12876   ins_cost(300);
12877   format %{ "J$cop,us $labl\t# Loop end" %}
12878   size(2);
12879   ins_encode %{
12880     Label* L = $labl$$label;
12881     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12882   %}
12883   ins_pipe( pipe_jcc );
12884   ins_short_branch(1);
12885 %}
12886 
12887 // Jump Direct Conditional - using unsigned comparison
12888 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12889   match(If cop cmp);
12890   effect(USE labl);
12891 
12892   ins_cost(300);
12893   format %{ "J$cop,us $labl" %}
12894   size(2);
12895   ins_encode %{
12896     Label* L = $labl$$label;
12897     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12898   %}
12899   ins_pipe( pipe_jcc );
12900   ins_short_branch(1);
12901 %}
12902 
12903 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12904   match(If cop cmp);
12905   effect(USE labl);
12906 
12907   ins_cost(300);
12908   format %{ "J$cop,us $labl" %}
12909   size(2);
12910   ins_encode %{
12911     Label* L = $labl$$label;
12912     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12913   %}
12914   ins_pipe( pipe_jcc );
12915   ins_short_branch(1);
12916 %}
12917 
12918 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12919   match(If cop cmp);
12920   effect(USE labl);
12921 
12922   ins_cost(300);
12923   format %{ $$template
12924     if ($cop$$cmpcode == Assembler::notEqual) {
12925       $$emit$$"JP,u,s   $labl\n\t"
12926       $$emit$$"J$cop,u,s   $labl"
12927     } else {
12928       $$emit$$"JP,u,s   done\n\t"
12929       $$emit$$"J$cop,u,s  $labl\n\t"
12930       $$emit$$"done:"
12931     }
12932   %}
12933   size(4);
12934   ins_encode %{
12935     Label* l = $labl$$label;
12936     if ($cop$$cmpcode == Assembler::notEqual) {
12937       __ jccb(Assembler::parity, *l);
12938       __ jccb(Assembler::notEqual, *l);
12939     } else if ($cop$$cmpcode == Assembler::equal) {
12940       Label done;
12941       __ jccb(Assembler::parity, done);
12942       __ jccb(Assembler::equal, *l);
12943       __ bind(done);
12944     } else {
12945        ShouldNotReachHere();
12946     }
12947   %}
12948   ins_pipe(pipe_jcc);
12949   ins_short_branch(1);
12950 %}
12951 
12952 // ============================================================================
12953 // Long Compare
12954 //
12955 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12956 // is tricky.  The flavor of compare used depends on whether we are testing
12957 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12958 // The GE test is the negated LT test.  The LE test can be had by commuting
12959 // the operands (yielding a GE test) and then negating; negate again for the
12960 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12961 // NE test is negated from that.
12962 
12963 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12964 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12965 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12966 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12967 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12968 // foo match ends up with the wrong leaf.  One fix is to not match both
12969 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12970 // both forms beat the trinary form of long-compare and both are very useful
12971 // on Intel which has so few registers.
12972 
12973 // Manifest a CmpL result in an integer register.  Very painful.
12974 // This is the test to avoid.
12975 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12976   match(Set dst (CmpL3 src1 src2));
12977   effect( KILL flags );
12978   ins_cost(1000);
12979   format %{ "XOR    $dst,$dst\n\t"
12980             "CMP    $src1.hi,$src2.hi\n\t"
12981             "JLT,s  m_one\n\t"
12982             "JGT,s  p_one\n\t"
12983             "CMP    $src1.lo,$src2.lo\n\t"
12984             "JB,s   m_one\n\t"
12985             "JEQ,s  done\n"
12986     "p_one:\tINC    $dst\n\t"
12987             "JMP,s  done\n"
12988     "m_one:\tDEC    $dst\n"
12989      "done:" %}
12990   ins_encode %{
12991     Label p_one, m_one, done;
12992     __ xorptr($dst$$Register, $dst$$Register);
12993     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12994     __ jccb(Assembler::less,    m_one);
12995     __ jccb(Assembler::greater, p_one);
12996     __ cmpl($src1$$Register, $src2$$Register);
12997     __ jccb(Assembler::below,   m_one);
12998     __ jccb(Assembler::equal,   done);
12999     __ bind(p_one);
13000     __ incrementl($dst$$Register);
13001     __ jmpb(done);
13002     __ bind(m_one);
13003     __ decrementl($dst$$Register);
13004     __ bind(done);
13005   %}
13006   ins_pipe( pipe_slow );
13007 %}
13008 
13009 //======
13010 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13011 // compares.  Can be used for LE or GT compares by reversing arguments.
13012 // NOT GOOD FOR EQ/NE tests.
13013 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13014   match( Set flags (CmpL src zero ));
13015   ins_cost(100);
13016   format %{ "TEST   $src.hi,$src.hi" %}
13017   opcode(0x85);
13018   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13019   ins_pipe( ialu_cr_reg_reg );
13020 %}
13021 
13022 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13023 // compares.  Can be used for LE or GT compares by reversing arguments.
13024 // NOT GOOD FOR EQ/NE tests.
13025 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13026   match( Set flags (CmpL src1 src2 ));
13027   effect( TEMP tmp );
13028   ins_cost(300);
13029   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13030             "MOV    $tmp,$src1.hi\n\t"
13031             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13032   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13033   ins_pipe( ialu_cr_reg_reg );
13034 %}
13035 
13036 // Long compares reg < zero/req OR reg >= zero/req.
13037 // Just a wrapper for a normal branch, plus the predicate test.
13038 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13039   match(If cmp flags);
13040   effect(USE labl);
13041   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13042   expand %{
13043     jmpCon(cmp,flags,labl);    // JLT or JGE...
13044   %}
13045 %}
13046 
13047 //======
13048 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13049 // compares.  Can be used for LE or GT compares by reversing arguments.
13050 // NOT GOOD FOR EQ/NE tests.
13051 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13052   match(Set flags (CmpUL src zero));
13053   ins_cost(100);
13054   format %{ "TEST   $src.hi,$src.hi" %}
13055   opcode(0x85);
13056   ins_encode(OpcP, RegReg_Hi2(src, src));
13057   ins_pipe(ialu_cr_reg_reg);
13058 %}
13059 
13060 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13061 // compares.  Can be used for LE or GT compares by reversing arguments.
13062 // NOT GOOD FOR EQ/NE tests.
13063 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13064   match(Set flags (CmpUL src1 src2));
13065   effect(TEMP tmp);
13066   ins_cost(300);
13067   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13068             "MOV    $tmp,$src1.hi\n\t"
13069             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13070   ins_encode(long_cmp_flags2(src1, src2, tmp));
13071   ins_pipe(ialu_cr_reg_reg);
13072 %}
13073 
13074 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13075 // Just a wrapper for a normal branch, plus the predicate test.
13076 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13077   match(If cmp flags);
13078   effect(USE labl);
13079   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13080   expand %{
13081     jmpCon(cmp, flags, labl);    // JLT or JGE...
13082   %}
13083 %}
13084 
13085 // Compare 2 longs and CMOVE longs.
13086 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13087   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13088   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13089   ins_cost(400);
13090   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13091             "CMOV$cmp $dst.hi,$src.hi" %}
13092   opcode(0x0F,0x40);
13093   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13094   ins_pipe( pipe_cmov_reg_long );
13095 %}
13096 
13097 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13098   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13099   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13100   ins_cost(500);
13101   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13102             "CMOV$cmp $dst.hi,$src.hi" %}
13103   opcode(0x0F,0x40);
13104   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13105   ins_pipe( pipe_cmov_reg_long );
13106 %}
13107 
13108 // Compare 2 longs and CMOVE ints.
13109 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13110   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13111   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13112   ins_cost(200);
13113   format %{ "CMOV$cmp $dst,$src" %}
13114   opcode(0x0F,0x40);
13115   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13116   ins_pipe( pipe_cmov_reg );
13117 %}
13118 
13119 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13120   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13121   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13122   ins_cost(250);
13123   format %{ "CMOV$cmp $dst,$src" %}
13124   opcode(0x0F,0x40);
13125   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13126   ins_pipe( pipe_cmov_mem );
13127 %}
13128 
13129 // Compare 2 longs and CMOVE ints.
13130 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13131   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13132   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13133   ins_cost(200);
13134   format %{ "CMOV$cmp $dst,$src" %}
13135   opcode(0x0F,0x40);
13136   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13137   ins_pipe( pipe_cmov_reg );
13138 %}
13139 
13140 // Compare 2 longs and CMOVE doubles
13141 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13142   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13143   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13144   ins_cost(200);
13145   expand %{
13146     fcmovDPR_regS(cmp,flags,dst,src);
13147   %}
13148 %}
13149 
13150 // Compare 2 longs and CMOVE doubles
13151 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13152   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13153   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13154   ins_cost(200);
13155   expand %{
13156     fcmovD_regS(cmp,flags,dst,src);
13157   %}
13158 %}
13159 
13160 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13161   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13162   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13163   ins_cost(200);
13164   expand %{
13165     fcmovFPR_regS(cmp,flags,dst,src);
13166   %}
13167 %}
13168 
13169 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13170   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13171   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13172   ins_cost(200);
13173   expand %{
13174     fcmovF_regS(cmp,flags,dst,src);
13175   %}
13176 %}
13177 
13178 //======
13179 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13180 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13181   match( Set flags (CmpL src zero ));
13182   effect(TEMP tmp);
13183   ins_cost(200);
13184   format %{ "MOV    $tmp,$src.lo\n\t"
13185             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13186   ins_encode( long_cmp_flags0( src, tmp ) );
13187   ins_pipe( ialu_reg_reg_long );
13188 %}
13189 
13190 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13191 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13192   match( Set flags (CmpL src1 src2 ));
13193   ins_cost(200+300);
13194   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13195             "JNE,s  skip\n\t"
13196             "CMP    $src1.hi,$src2.hi\n\t"
13197      "skip:\t" %}
13198   ins_encode( long_cmp_flags1( src1, src2 ) );
13199   ins_pipe( ialu_cr_reg_reg );
13200 %}
13201 
13202 // Long compare reg == zero/reg OR reg != zero/reg
13203 // Just a wrapper for a normal branch, plus the predicate test.
13204 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13205   match(If cmp flags);
13206   effect(USE labl);
13207   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13208   expand %{
13209     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13210   %}
13211 %}
13212 
13213 //======
13214 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13215 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13216   match(Set flags (CmpUL src zero));
13217   effect(TEMP tmp);
13218   ins_cost(200);
13219   format %{ "MOV    $tmp,$src.lo\n\t"
13220             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13221   ins_encode(long_cmp_flags0(src, tmp));
13222   ins_pipe(ialu_reg_reg_long);
13223 %}
13224 
13225 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13226 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13227   match(Set flags (CmpUL src1 src2));
13228   ins_cost(200+300);
13229   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13230             "JNE,s  skip\n\t"
13231             "CMP    $src1.hi,$src2.hi\n\t"
13232      "skip:\t" %}
13233   ins_encode(long_cmp_flags1(src1, src2));
13234   ins_pipe(ialu_cr_reg_reg);
13235 %}
13236 
13237 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13238 // Just a wrapper for a normal branch, plus the predicate test.
13239 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13240   match(If cmp flags);
13241   effect(USE labl);
13242   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13243   expand %{
13244     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13245   %}
13246 %}
13247 
13248 // Compare 2 longs and CMOVE longs.
13249 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13250   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13251   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13252   ins_cost(400);
13253   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13254             "CMOV$cmp $dst.hi,$src.hi" %}
13255   opcode(0x0F,0x40);
13256   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13257   ins_pipe( pipe_cmov_reg_long );
13258 %}
13259 
13260 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13261   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13262   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13263   ins_cost(500);
13264   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13265             "CMOV$cmp $dst.hi,$src.hi" %}
13266   opcode(0x0F,0x40);
13267   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13268   ins_pipe( pipe_cmov_reg_long );
13269 %}
13270 
13271 // Compare 2 longs and CMOVE ints.
13272 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13273   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13274   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13275   ins_cost(200);
13276   format %{ "CMOV$cmp $dst,$src" %}
13277   opcode(0x0F,0x40);
13278   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13279   ins_pipe( pipe_cmov_reg );
13280 %}
13281 
13282 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13283   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13284   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13285   ins_cost(250);
13286   format %{ "CMOV$cmp $dst,$src" %}
13287   opcode(0x0F,0x40);
13288   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13289   ins_pipe( pipe_cmov_mem );
13290 %}
13291 
13292 // Compare 2 longs and CMOVE ints.
13293 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13294   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13295   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13296   ins_cost(200);
13297   format %{ "CMOV$cmp $dst,$src" %}
13298   opcode(0x0F,0x40);
13299   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13300   ins_pipe( pipe_cmov_reg );
13301 %}
13302 
13303 // Compare 2 longs and CMOVE doubles
13304 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13305   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13306   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13307   ins_cost(200);
13308   expand %{
13309     fcmovDPR_regS(cmp,flags,dst,src);
13310   %}
13311 %}
13312 
13313 // Compare 2 longs and CMOVE doubles
13314 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13315   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13316   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13317   ins_cost(200);
13318   expand %{
13319     fcmovD_regS(cmp,flags,dst,src);
13320   %}
13321 %}
13322 
13323 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13324   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13325   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13326   ins_cost(200);
13327   expand %{
13328     fcmovFPR_regS(cmp,flags,dst,src);
13329   %}
13330 %}
13331 
13332 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13333   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13334   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13335   ins_cost(200);
13336   expand %{
13337     fcmovF_regS(cmp,flags,dst,src);
13338   %}
13339 %}
13340 
13341 //======
13342 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13343 // Same as cmpL_reg_flags_LEGT except must negate src
13344 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13345   match( Set flags (CmpL src zero ));
13346   effect( TEMP tmp );
13347   ins_cost(300);
13348   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13349             "CMP    $tmp,$src.lo\n\t"
13350             "SBB    $tmp,$src.hi\n\t" %}
13351   ins_encode( long_cmp_flags3(src, tmp) );
13352   ins_pipe( ialu_reg_reg_long );
13353 %}
13354 
13355 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13356 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13357 // requires a commuted test to get the same result.
13358 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13359   match( Set flags (CmpL src1 src2 ));
13360   effect( TEMP tmp );
13361   ins_cost(300);
13362   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13363             "MOV    $tmp,$src2.hi\n\t"
13364             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13365   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13366   ins_pipe( ialu_cr_reg_reg );
13367 %}
13368 
13369 // Long compares reg < zero/req OR reg >= zero/req.
13370 // Just a wrapper for a normal branch, plus the predicate test
13371 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13372   match(If cmp flags);
13373   effect(USE labl);
13374   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13375   ins_cost(300);
13376   expand %{
13377     jmpCon(cmp,flags,labl);    // JGT or JLE...
13378   %}
13379 %}
13380 
13381 //======
13382 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13383 // Same as cmpUL_reg_flags_LEGT except must negate src
13384 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13385   match(Set flags (CmpUL src zero));
13386   effect(TEMP tmp);
13387   ins_cost(300);
13388   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13389             "CMP    $tmp,$src.lo\n\t"
13390             "SBB    $tmp,$src.hi\n\t" %}
13391   ins_encode(long_cmp_flags3(src, tmp));
13392   ins_pipe(ialu_reg_reg_long);
13393 %}
13394 
13395 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13396 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13397 // requires a commuted test to get the same result.
13398 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13399   match(Set flags (CmpUL src1 src2));
13400   effect(TEMP tmp);
13401   ins_cost(300);
13402   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13403             "MOV    $tmp,$src2.hi\n\t"
13404             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13405   ins_encode(long_cmp_flags2( src2, src1, tmp));
13406   ins_pipe(ialu_cr_reg_reg);
13407 %}
13408 
13409 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13410 // Just a wrapper for a normal branch, plus the predicate test
13411 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13412   match(If cmp flags);
13413   effect(USE labl);
13414   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13415   ins_cost(300);
13416   expand %{
13417     jmpCon(cmp, flags, labl);    // JGT or JLE...
13418   %}
13419 %}
13420 
13421 // Compare 2 longs and CMOVE longs.
13422 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13423   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13424   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13425   ins_cost(400);
13426   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13427             "CMOV$cmp $dst.hi,$src.hi" %}
13428   opcode(0x0F,0x40);
13429   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13430   ins_pipe( pipe_cmov_reg_long );
13431 %}
13432 
13433 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13434   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13435   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13436   ins_cost(500);
13437   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13438             "CMOV$cmp $dst.hi,$src.hi+4" %}
13439   opcode(0x0F,0x40);
13440   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13441   ins_pipe( pipe_cmov_reg_long );
13442 %}
13443 
13444 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13445   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13446   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447   ins_cost(400);
13448   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13449             "CMOV$cmp $dst.hi,$src.hi" %}
13450   opcode(0x0F,0x40);
13451   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13452   ins_pipe( pipe_cmov_reg_long );
13453 %}
13454 
13455 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13456   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13457   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13458   ins_cost(500);
13459   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13460             "CMOV$cmp $dst.hi,$src.hi+4" %}
13461   opcode(0x0F,0x40);
13462   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13463   ins_pipe( pipe_cmov_reg_long );
13464 %}
13465 
13466 // Compare 2 longs and CMOVE ints.
13467 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13468   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13469   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13470   ins_cost(200);
13471   format %{ "CMOV$cmp $dst,$src" %}
13472   opcode(0x0F,0x40);
13473   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13474   ins_pipe( pipe_cmov_reg );
13475 %}
13476 
13477 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13478   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13479   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13480   ins_cost(250);
13481   format %{ "CMOV$cmp $dst,$src" %}
13482   opcode(0x0F,0x40);
13483   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13484   ins_pipe( pipe_cmov_mem );
13485 %}
13486 
13487 // Compare 2 longs and CMOVE ptrs.
13488 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13489   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13490   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13491   ins_cost(200);
13492   format %{ "CMOV$cmp $dst,$src" %}
13493   opcode(0x0F,0x40);
13494   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13495   ins_pipe( pipe_cmov_reg );
13496 %}
13497 
13498 // Compare 2 longs and CMOVE doubles
13499 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13500   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13501   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13502   ins_cost(200);
13503   expand %{
13504     fcmovDPR_regS(cmp,flags,dst,src);
13505   %}
13506 %}
13507 
13508 // Compare 2 longs and CMOVE doubles
13509 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13510   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13511   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13512   ins_cost(200);
13513   expand %{
13514     fcmovD_regS(cmp,flags,dst,src);
13515   %}
13516 %}
13517 
13518 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13519   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13520   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13521   ins_cost(200);
13522   expand %{
13523     fcmovFPR_regS(cmp,flags,dst,src);
13524   %}
13525 %}
13526 
13527 
13528 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13529   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13530   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13531   ins_cost(200);
13532   expand %{
13533     fcmovF_regS(cmp,flags,dst,src);
13534   %}
13535 %}
13536 
13537 
13538 // ============================================================================
13539 // Procedure Call/Return Instructions
13540 // Call Java Static Instruction
13541 // Note: If this code changes, the corresponding ret_addr_offset() and
13542 //       compute_padding() functions will have to be adjusted.
13543 instruct CallStaticJavaDirect(method meth) %{
13544   match(CallStaticJava);
13545   effect(USE meth);
13546 
13547   ins_cost(300);
13548   format %{ "CALL,static " %}
13549   opcode(0xE8); /* E8 cd */
13550   ins_encode( pre_call_resets,
13551               Java_Static_Call( meth ),
13552               call_epilog,
13553               post_call_FPU );
13554   ins_pipe( pipe_slow );
13555   ins_alignment(4);
13556 %}
13557 
13558 // Call Java Dynamic Instruction
13559 // Note: If this code changes, the corresponding ret_addr_offset() and
13560 //       compute_padding() functions will have to be adjusted.
13561 instruct CallDynamicJavaDirect(method meth) %{
13562   match(CallDynamicJava);
13563   effect(USE meth);
13564 
13565   ins_cost(300);
13566   format %{ "MOV    EAX,(oop)-1\n\t"
13567             "CALL,dynamic" %}
13568   opcode(0xE8); /* E8 cd */
13569   ins_encode( pre_call_resets,
13570               Java_Dynamic_Call( meth ),
13571               call_epilog,
13572               post_call_FPU );
13573   ins_pipe( pipe_slow );
13574   ins_alignment(4);
13575 %}
13576 
13577 // Call Runtime Instruction
13578 instruct CallRuntimeDirect(method meth) %{
13579   match(CallRuntime );
13580   effect(USE meth);
13581 
13582   ins_cost(300);
13583   format %{ "CALL,runtime " %}
13584   opcode(0xE8); /* E8 cd */
13585   // Use FFREEs to clear entries in float stack
13586   ins_encode( pre_call_resets,
13587               FFree_Float_Stack_All,
13588               Java_To_Runtime( meth ),
13589               post_call_FPU );
13590   ins_pipe( pipe_slow );
13591 %}
13592 
13593 // Call runtime without safepoint
13594 instruct CallLeafDirect(method meth) %{
13595   match(CallLeaf);
13596   effect(USE meth);
13597 
13598   ins_cost(300);
13599   format %{ "CALL_LEAF,runtime " %}
13600   opcode(0xE8); /* E8 cd */
13601   ins_encode( pre_call_resets,
13602               FFree_Float_Stack_All,
13603               Java_To_Runtime( meth ),
13604               Verify_FPU_For_Leaf, post_call_FPU );
13605   ins_pipe( pipe_slow );
13606 %}
13607 
13608 instruct CallLeafNoFPDirect(method meth) %{
13609   match(CallLeafNoFP);
13610   effect(USE meth);
13611 
13612   ins_cost(300);
13613   format %{ "CALL_LEAF_NOFP,runtime " %}
13614   opcode(0xE8); /* E8 cd */
13615   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13616   ins_pipe( pipe_slow );
13617 %}
13618 
13619 
13620 // Return Instruction
13621 // Remove the return address & jump to it.
13622 instruct Ret() %{
13623   match(Return);
13624   format %{ "RET" %}
13625   opcode(0xC3);
13626   ins_encode(OpcP);
13627   ins_pipe( pipe_jmp );
13628 %}
13629 
13630 // Tail Call; Jump from runtime stub to Java code.
13631 // Also known as an 'interprocedural jump'.
13632 // Target of jump will eventually return to caller.
13633 // TailJump below removes the return address.
13634 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13635   match(TailCall jump_target method_ptr);
13636   ins_cost(300);
13637   format %{ "JMP    $jump_target \t# EBX holds method" %}
13638   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13639   ins_encode( OpcP, RegOpc(jump_target) );
13640   ins_pipe( pipe_jmp );
13641 %}
13642 
13643 
13644 // Tail Jump; remove the return address; jump to target.
13645 // TailCall above leaves the return address around.
13646 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13647   match( TailJump jump_target ex_oop );
13648   ins_cost(300);
13649   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13650             "JMP    $jump_target " %}
13651   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13652   ins_encode( enc_pop_rdx,
13653               OpcP, RegOpc(jump_target) );
13654   ins_pipe( pipe_jmp );
13655 %}
13656 
13657 // Create exception oop: created by stack-crawling runtime code.
13658 // Created exception is now available to this handler, and is setup
13659 // just prior to jumping to this handler.  No code emitted.
13660 instruct CreateException( eAXRegP ex_oop )
13661 %{
13662   match(Set ex_oop (CreateEx));
13663 
13664   size(0);
13665   // use the following format syntax
13666   format %{ "# exception oop is in EAX; no code emitted" %}
13667   ins_encode();
13668   ins_pipe( empty );
13669 %}
13670 
13671 
13672 // Rethrow exception:
13673 // The exception oop will come in the first argument position.
13674 // Then JUMP (not call) to the rethrow stub code.
13675 instruct RethrowException()
13676 %{
13677   match(Rethrow);
13678 
13679   // use the following format syntax
13680   format %{ "JMP    rethrow_stub" %}
13681   ins_encode(enc_rethrow);
13682   ins_pipe( pipe_jmp );
13683 %}
13684 
13685 // inlined locking and unlocking
13686 
13687 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13688   predicate(Compile::current()->use_rtm());
13689   match(Set cr (FastLock object box));
13690   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13691   ins_cost(300);
13692   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13693   ins_encode %{
13694     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13695                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13696                  _rtm_counters, _stack_rtm_counters,
13697                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13698                  true, ra_->C->profile_rtm());
13699   %}
13700   ins_pipe(pipe_slow);
13701 %}
13702 
13703 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13704   predicate(!Compile::current()->use_rtm());
13705   match(Set cr (FastLock object box));
13706   effect(TEMP tmp, TEMP scr, USE_KILL box);
13707   ins_cost(300);
13708   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13709   ins_encode %{
13710     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13711                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13712   %}
13713   ins_pipe(pipe_slow);
13714 %}
13715 
13716 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13717   match(Set cr (FastUnlock object box));
13718   effect(TEMP tmp, USE_KILL box);
13719   ins_cost(300);
13720   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13721   ins_encode %{
13722     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13723   %}
13724   ins_pipe(pipe_slow);
13725 %}
13726 
13727 
13728 
13729 // ============================================================================
13730 // Safepoint Instruction
13731 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13732   match(SafePoint poll);
13733   effect(KILL cr, USE poll);
13734 
13735   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13736   ins_cost(125);
13737   // EBP would need size(3)
13738   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13739   ins_encode %{
13740     __ relocate(relocInfo::poll_type);
13741     address pre_pc = __ pc();
13742     __ testl(rax, Address($poll$$Register, 0));
13743     address post_pc = __ pc();
13744     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13745   %}
13746   ins_pipe(ialu_reg_mem);
13747 %}
13748 
13749 
13750 // ============================================================================
13751 // This name is KNOWN by the ADLC and cannot be changed.
13752 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13753 // for this guy.
13754 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13755   match(Set dst (ThreadLocal));
13756   effect(DEF dst, KILL cr);
13757 
13758   format %{ "MOV    $dst, Thread::current()" %}
13759   ins_encode %{
13760     Register dstReg = as_Register($dst$$reg);
13761     __ get_thread(dstReg);
13762   %}
13763   ins_pipe( ialu_reg_fat );
13764 %}
13765 
13766 
13767 
13768 //----------PEEPHOLE RULES-----------------------------------------------------
13769 // These must follow all instruction definitions as they use the names
13770 // defined in the instructions definitions.
13771 //
13772 // peepmatch ( root_instr_name [preceding_instruction]* );
13773 //
13774 // peepconstraint %{
13775 // (instruction_number.operand_name relational_op instruction_number.operand_name
13776 //  [, ...] );
13777 // // instruction numbers are zero-based using left to right order in peepmatch
13778 //
13779 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13780 // // provide an instruction_number.operand_name for each operand that appears
13781 // // in the replacement instruction's match rule
13782 //
13783 // ---------VM FLAGS---------------------------------------------------------
13784 //
13785 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13786 //
13787 // Each peephole rule is given an identifying number starting with zero and
13788 // increasing by one in the order seen by the parser.  An individual peephole
13789 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13790 // on the command-line.
13791 //
13792 // ---------CURRENT LIMITATIONS----------------------------------------------
13793 //
13794 // Only match adjacent instructions in same basic block
13795 // Only equality constraints
13796 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13797 // Only one replacement instruction
13798 //
13799 // ---------EXAMPLE----------------------------------------------------------
13800 //
13801 // // pertinent parts of existing instructions in architecture description
13802 // instruct movI(rRegI dst, rRegI src) %{
13803 //   match(Set dst (CopyI src));
13804 // %}
13805 //
13806 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13807 //   match(Set dst (AddI dst src));
13808 //   effect(KILL cr);
13809 // %}
13810 //
13811 // // Change (inc mov) to lea
13812 // peephole %{
13813 //   // increment preceeded by register-register move
13814 //   peepmatch ( incI_eReg movI );
13815 //   // require that the destination register of the increment
13816 //   // match the destination register of the move
13817 //   peepconstraint ( 0.dst == 1.dst );
13818 //   // construct a replacement instruction that sets
13819 //   // the destination to ( move's source register + one )
13820 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13821 // %}
13822 //
13823 // Implementation no longer uses movX instructions since
13824 // machine-independent system no longer uses CopyX nodes.
13825 //
13826 // peephole %{
13827 //   peepmatch ( incI_eReg movI );
13828 //   peepconstraint ( 0.dst == 1.dst );
13829 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13830 // %}
13831 //
13832 // peephole %{
13833 //   peepmatch ( decI_eReg movI );
13834 //   peepconstraint ( 0.dst == 1.dst );
13835 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13836 // %}
13837 //
13838 // peephole %{
13839 //   peepmatch ( addI_eReg_imm movI );
13840 //   peepconstraint ( 0.dst == 1.dst );
13841 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13842 // %}
13843 //
13844 // peephole %{
13845 //   peepmatch ( addP_eReg_imm movP );
13846 //   peepconstraint ( 0.dst == 1.dst );
13847 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13848 // %}
13849 
13850 // // Change load of spilled value to only a spill
13851 // instruct storeI(memory mem, rRegI src) %{
13852 //   match(Set mem (StoreI mem src));
13853 // %}
13854 //
13855 // instruct loadI(rRegI dst, memory mem) %{
13856 //   match(Set dst (LoadI mem));
13857 // %}
13858 //
13859 peephole %{
13860   peepmatch ( loadI storeI );
13861   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13862   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13863 %}
13864 
13865 //----------SMARTSPILL RULES---------------------------------------------------
13866 // These must follow all instruction definitions as they use the names
13867 // defined in the instructions definitions.