1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   __ verified_entry(C);
  615 
  616   C->output()->set_frame_complete(cbuf.insts_size());
  617 
  618   if (C->has_mach_constant_base_node()) {
  619     // NOTE: We set the table base offset here because users might be
  620     // emitted before MachConstantBaseNode.
  621     ConstantTable& constant_table = C->output()->constant_table();
  622     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  623   }
  624 }
  625 
  626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  627   return MachNode::size(ra_); // too many variables; just compute it the hard way
  628 }
  629 
  630 int MachPrologNode::reloc() const {
  631   return 0; // a large enough number
  632 }
  633 
  634 //=============================================================================
  635 #ifndef PRODUCT
  636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  637   Compile *C = ra_->C;
  638   int framesize = C->output()->frame_size_in_bytes();
  639   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  640   // Remove two words for return addr and rbp,
  641   framesize -= 2*wordSize;
  642 
  643   if (C->max_vector_size() > 16) {
  644     st->print("VZEROUPPER");
  645     st->cr(); st->print("\t");
  646   }
  647   if (C->in_24_bit_fp_mode()) {
  648     st->print("FLDCW  standard control word");
  649     st->cr(); st->print("\t");
  650   }
  651   if (framesize) {
  652     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  653     st->cr(); st->print("\t");
  654   }
  655   st->print_cr("POPL   EBP"); st->print("\t");
  656   if (do_polling() && C->is_method_compilation()) {
  657     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  658               "JA      #safepoint_stub\t"
  659               "# Safepoint: poll for GC");
  660   }
  661 }
  662 #endif
  663 
  664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  665   Compile *C = ra_->C;
  666   MacroAssembler _masm(&cbuf);
  667 
  668   if (C->max_vector_size() > 16) {
  669     // Clear upper bits of YMM registers when current compiled code uses
  670     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  671     _masm.vzeroupper();
  672   }
  673   // If method set FPU control word, restore to standard control word
  674   if (C->in_24_bit_fp_mode()) {
  675     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  676   }
  677 
  678   int framesize = C->output()->frame_size_in_bytes();
  679   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  680   // Remove two words for return addr and rbp,
  681   framesize -= 2*wordSize;
  682 
  683   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  684 
  685   if (framesize >= 128) {
  686     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  687     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  688     emit_d32(cbuf, framesize);
  689   } else if (framesize) {
  690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d8(cbuf, framesize);
  693   }
  694 
  695   emit_opcode(cbuf, 0x58 | EBP_enc);
  696 
  697   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  698     __ reserved_stack_check();
  699   }
  700 
  701   if (do_polling() && C->is_method_compilation()) {
  702     Register thread = as_Register(EBX_enc);
  703     MacroAssembler masm(&cbuf);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ relocate(relocInfo::poll_return_type);
  713     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  714   }
  715 }
  716 
  717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  718   return MachNode::size(ra_); // too many variables; just compute it
  719                               // the hard way
  720 }
  721 
  722 int MachEpilogNode::reloc() const {
  723   return 0; // a large enough number
  724 }
  725 
  726 const Pipeline * MachEpilogNode::pipeline() const {
  727   return MachNode::pipeline_class();
  728 }
  729 
  730 //=============================================================================
  731 
  732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  733 static enum RC rc_class( OptoReg::Name reg ) {
  734 
  735   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  736   if (OptoReg::is_stack(reg)) return rc_stack;
  737 
  738   VMReg r = OptoReg::as_VMReg(reg);
  739   if (r->is_Register()) return rc_int;
  740   if (r->is_FloatRegister()) {
  741     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  742     return rc_float;
  743   }
  744   if (r->is_KRegister()) return rc_kreg;
  745   assert(r->is_XMMRegister(), "must be");
  746   return rc_xmm;
  747 }
  748 
  749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  750                         int opcode, const char *op_str, int size, outputStream* st ) {
  751   if( cbuf ) {
  752     emit_opcode  (*cbuf, opcode );
  753     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754 #ifndef PRODUCT
  755   } else if( !do_size ) {
  756     if( size != 0 ) st->print("\n\t");
  757     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  758       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  759       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  760     } else { // FLD, FST, PUSH, POP
  761       st->print("%s [ESP + #%d]",op_str,offset);
  762     }
  763 #endif
  764   }
  765   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  766   return size+3+offset_size;
  767 }
  768 
  769 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  771                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  772   int in_size_in_bits = Assembler::EVEX_32bit;
  773   int evex_encoding = 0;
  774   if (reg_lo+1 == reg_hi) {
  775     in_size_in_bits = Assembler::EVEX_64bit;
  776     evex_encoding = Assembler::VEX_W;
  777   }
  778   if (cbuf) {
  779     MacroAssembler _masm(cbuf);
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     _masm.set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (cbuf) {
  832     MacroAssembler _masm(cbuf);
  833     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  834     _masm.set_managed();
  835     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  836       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  837                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  838     } else {
  839       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     }
  842 #ifndef PRODUCT
  843   } else if (!do_size) {
  844     if (size != 0) st->print("\n\t");
  845     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  846       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  847         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  848       } else {
  849         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       }
  851     } else {
  852       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  853         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       } else {
  855         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       }
  857     }
  858 #endif
  859   }
  860   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  861   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  862   int sz = (UseAVX > 2) ? 6 : 4;
  863   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  864       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  865   return size + sz;
  866 }
  867 
  868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  869                             int src_hi, int dst_hi, int size, outputStream* st ) {
  870   // 32-bit
  871   if (cbuf) {
  872     MacroAssembler _masm(cbuf);
  873     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  874     _masm.set_managed();
  875     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  876              as_Register(Matcher::_regEncode[src_lo]));
  877 #ifndef PRODUCT
  878   } else if (!do_size) {
  879     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  880 #endif
  881   }
  882   return (UseAVX> 2) ? 6 : 4;
  883 }
  884 
  885 
  886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  887                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  888   // 32-bit
  889   if (cbuf) {
  890     MacroAssembler _masm(cbuf);
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     _masm.set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( cbuf ) {
  905     emit_opcode(*cbuf, 0x8B );
  906     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( cbuf ) {
  920       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (cbuf) {
  956     MacroAssembler _masm(cbuf);
  957     switch (ireg) {
  958     case Op_VecS:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       break;
  962     case Op_VecD:
  963       __ pushl(Address(rsp, src_offset));
  964       __ popl (Address(rsp, dst_offset));
  965       __ pushl(Address(rsp, src_offset+4));
  966       __ popl (Address(rsp, dst_offset+4));
  967       break;
  968     case Op_VecX:
  969       __ movdqu(Address(rsp, -16), xmm0);
  970       __ movdqu(xmm0, Address(rsp, src_offset));
  971       __ movdqu(Address(rsp, dst_offset), xmm0);
  972       __ movdqu(xmm0, Address(rsp, -16));
  973       break;
  974     case Op_VecY:
  975       __ vmovdqu(Address(rsp, -32), xmm0);
  976       __ vmovdqu(xmm0, Address(rsp, src_offset));
  977       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  978       __ vmovdqu(xmm0, Address(rsp, -32));
  979       break;
  980     case Op_VecZ:
  981       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  982       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  983       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  984       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  985       break;
  986     default:
  987       ShouldNotReachHere();
  988     }
  989 #ifndef PRODUCT
  990   } else {
  991     switch (ireg) {
  992     case Op_VecS:
  993       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  994                 "popl    [rsp + #%d]",
  995                 src_offset, dst_offset);
  996       break;
  997     case Op_VecD:
  998       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  999                 "popq    [rsp + #%d]\n\t"
 1000                 "pushl   [rsp + #%d]\n\t"
 1001                 "popq    [rsp + #%d]",
 1002                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1003       break;
 1004      case Op_VecX:
 1005       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1006                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1007                 "movdqu  [rsp + #%d], xmm0\n\t"
 1008                 "movdqu  xmm0, [rsp - #16]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecY:
 1012       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1013                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1014                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1015                 "vmovdqu xmm0, [rsp - #32]",
 1016                 src_offset, dst_offset);
 1017       break;
 1018     case Op_VecZ:
 1019       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1020                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1021                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1022                 "vmovdqu xmm0, [rsp - #64]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     default:
 1026       ShouldNotReachHere();
 1027     }
 1028 #endif
 1029   }
 1030 }
 1031 
 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1033   // Get registers to move
 1034   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1035   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1036   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1037   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1038 
 1039   enum RC src_second_rc = rc_class(src_second);
 1040   enum RC src_first_rc = rc_class(src_first);
 1041   enum RC dst_second_rc = rc_class(dst_second);
 1042   enum RC dst_first_rc = rc_class(dst_first);
 1043 
 1044   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1045 
 1046   // Generate spill code!
 1047   int size = 0;
 1048 
 1049   if( src_first == dst_first && src_second == dst_second )
 1050     return size;            // Self copy, no move
 1051 
 1052   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1053     uint ireg = ideal_reg();
 1054     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1055     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1056     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1057     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1058       // mem -> mem
 1059       int src_offset = ra_->reg2offset(src_first);
 1060       int dst_offset = ra_->reg2offset(dst_first);
 1061       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1063       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1064     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1065       int stack_offset = ra_->reg2offset(dst_first);
 1066       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1067     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1068       int stack_offset = ra_->reg2offset(src_first);
 1069       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1070     } else {
 1071       ShouldNotReachHere();
 1072     }
 1073     return 0;
 1074   }
 1075 
 1076   // --------------------------------------
 1077   // Check for mem-mem move.  push/pop to move.
 1078   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1079     if( src_second == dst_first ) { // overlapping stack copy ranges
 1080       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1081       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1082       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1083       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1084     }
 1085     // move low bits
 1086     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1087     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1088     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1089       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1090       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1091     }
 1092     return size;
 1093   }
 1094 
 1095   // --------------------------------------
 1096   // Check for integer reg-reg copy
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1098     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1099 
 1100   // Check for integer store
 1101   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1102     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1103 
 1104   // Check for integer load
 1105   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1106     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1107 
 1108   // Check for integer reg-xmm reg copy
 1109   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1110     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1111             "no 64 bit integer-float reg moves" );
 1112     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1113   }
 1114   // --------------------------------------
 1115   // Check for float reg-reg copy
 1116   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1117     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1118             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1119     if( cbuf ) {
 1120 
 1121       // Note the mucking with the register encode to compensate for the 0/1
 1122       // indexing issue mentioned in a comment in the reg_def sections
 1123       // for FPR registers many lines above here.
 1124 
 1125       if( src_first != FPR1L_num ) {
 1126         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1127         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1128         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1129         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1130      } else {
 1131         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1132         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1133      }
 1134 #ifndef PRODUCT
 1135     } else if( !do_size ) {
 1136       if( size != 0 ) st->print("\n\t");
 1137       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1138       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1139 #endif
 1140     }
 1141     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1142   }
 1143 
 1144   // Check for float store
 1145   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1146     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1147   }
 1148 
 1149   // Check for float load
 1150   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1151     int offset = ra_->reg2offset(src_first);
 1152     const char *op_str;
 1153     int op;
 1154     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1155       op_str = "FLD_D";
 1156       op = 0xDD;
 1157     } else {                   // 32-bit load
 1158       op_str = "FLD_S";
 1159       op = 0xD9;
 1160       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1161     }
 1162     if( cbuf ) {
 1163       emit_opcode  (*cbuf, op );
 1164       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1167 #ifndef PRODUCT
 1168     } else if( !do_size ) {
 1169       if( size != 0 ) st->print("\n\t");
 1170       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1171 #endif
 1172     }
 1173     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1174     return size + 3+offset_size+2;
 1175   }
 1176 
 1177   // Check for xmm reg-reg copy
 1178   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1179     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1180             (src_first+1 == src_second && dst_first+1 == dst_second),
 1181             "no non-adjacent float-moves" );
 1182     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1183   }
 1184 
 1185   // Check for xmm reg-integer reg copy
 1186   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1187     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1188             "no 64 bit float-integer reg moves" );
 1189     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1190   }
 1191 
 1192   // Check for xmm store
 1193   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1194     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1195   }
 1196 
 1197   // Check for float xmm load
 1198   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1199     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1200   }
 1201 
 1202   // Copy from float reg to xmm reg
 1203   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1204     // copy to the top of stack from floating point reg
 1205     // and use LEA to preserve flags
 1206     if( cbuf ) {
 1207       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1208       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1209       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1210       emit_d8(*cbuf,0xF8);
 1211 #ifndef PRODUCT
 1212     } else if( !do_size ) {
 1213       if( size != 0 ) st->print("\n\t");
 1214       st->print("LEA    ESP,[ESP-8]");
 1215 #endif
 1216     }
 1217     size += 4;
 1218 
 1219     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1220 
 1221     // Copy from the temp memory to the xmm reg.
 1222     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1223 
 1224     if( cbuf ) {
 1225       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1226       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1227       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1228       emit_d8(*cbuf,0x08);
 1229 #ifndef PRODUCT
 1230     } else if( !do_size ) {
 1231       if( size != 0 ) st->print("\n\t");
 1232       st->print("LEA    ESP,[ESP+8]");
 1233 #endif
 1234     }
 1235     size += 4;
 1236     return size;
 1237   }
 1238 
 1239   // AVX-512 opmask specific spilling.
 1240   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1241     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1242     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1243     int offset = ra_->reg2offset(src_first);
 1244     if (cbuf != nullptr) {
 1245       MacroAssembler _masm(cbuf);
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (cbuf != nullptr) {
 1260       MacroAssembler _masm(cbuf);
 1261       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1262 #ifndef PRODUCT
 1263     } else {
 1264       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1265 #endif
 1266     }
 1267     return 0;
 1268   }
 1269 
 1270   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1271     Unimplemented();
 1272     return 0;
 1273   }
 1274 
 1275   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1276     Unimplemented();
 1277     return 0;
 1278   }
 1279 
 1280   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1281     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1282     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1283     if (cbuf != nullptr) {
 1284       MacroAssembler _masm(cbuf);
 1285       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1286 #ifndef PRODUCT
 1287     } else {
 1288       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1289 #endif
 1290     }
 1291     return 0;
 1292   }
 1293 
 1294   assert( size > 0, "missed a case" );
 1295 
 1296   // --------------------------------------------------------------------
 1297   // Check for second bits still needing moving.
 1298   if( src_second == dst_second )
 1299     return size;               // Self copy; no move
 1300   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1301 
 1302   // Check for second word int-int move
 1303   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1304     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1305 
 1306   // Check for second word integer store
 1307   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1308     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1309 
 1310   // Check for second word integer load
 1311   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1312     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1313 
 1314   Unimplemented();
 1315   return 0; // Mute compiler
 1316 }
 1317 
 1318 #ifndef PRODUCT
 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1320   implementation( nullptr, ra_, false, st );
 1321 }
 1322 #endif
 1323 
 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1325   implementation( &cbuf, ra_, false, nullptr );
 1326 }
 1327 
 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1329   return MachNode::size(ra_);
 1330 }
 1331 
 1332 
 1333 //=============================================================================
 1334 #ifndef PRODUCT
 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1337   int reg = ra_->get_reg_first(this);
 1338   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1339 }
 1340 #endif
 1341 
 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1344   int reg = ra_->get_encode(this);
 1345   if( offset >= 128 ) {
 1346     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1347     emit_rm(cbuf, 0x2, reg, 0x04);
 1348     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1349     emit_d32(cbuf, offset);
 1350   }
 1351   else {
 1352     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1353     emit_rm(cbuf, 0x1, reg, 0x04);
 1354     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1355     emit_d8(cbuf, offset);
 1356   }
 1357 }
 1358 
 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1360   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1361   if( offset >= 128 ) {
 1362     return 7;
 1363   }
 1364   else {
 1365     return 4;
 1366   }
 1367 }
 1368 
 1369 //=============================================================================
 1370 #ifndef PRODUCT
 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1372   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1373   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1374   st->print_cr("\tNOP");
 1375   st->print_cr("\tNOP");
 1376   if( !OptoBreakpoint )
 1377     st->print_cr("\tNOP");
 1378 }
 1379 #endif
 1380 
 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1382   MacroAssembler masm(&cbuf);
 1383 #ifdef ASSERT
 1384   uint insts_size = cbuf.insts_size();
 1385 #endif
 1386   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1387   masm.jump_cc(Assembler::notEqual,
 1388                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1389   /* WARNING these NOPs are critical so that verified entry point is properly
 1390      aligned for patching by NativeJump::patch_verified_entry() */
 1391   int nops_cnt = 2;
 1392   if( !OptoBreakpoint ) // Leave space for int3
 1393      nops_cnt += 1;
 1394   masm.nop(nops_cnt);
 1395 
 1396   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1397 }
 1398 
 1399 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1400   return OptoBreakpoint ? 11 : 12;
 1401 }
 1402 
 1403 
 1404 //=============================================================================
 1405 
 1406 // Vector calling convention not supported.
 1407 bool Matcher::supports_vector_calling_convention() {
 1408   return false;
 1409 }
 1410 
 1411 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1412   Unimplemented();
 1413   return OptoRegPair(0, 0);
 1414 }
 1415 
 1416 // Is this branch offset short enough that a short branch can be used?
 1417 //
 1418 // NOTE: If the platform does not provide any short branch variants, then
 1419 //       this method should return false for offset 0.
 1420 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1421   // The passed offset is relative to address of the branch.
 1422   // On 86 a branch displacement is calculated relative to address
 1423   // of a next instruction.
 1424   offset -= br_size;
 1425 
 1426   // the short version of jmpConUCF2 contains multiple branches,
 1427   // making the reach slightly less
 1428   if (rule == jmpConUCF2_rule)
 1429     return (-126 <= offset && offset <= 125);
 1430   return (-128 <= offset && offset <= 127);
 1431 }
 1432 
 1433 // Return whether or not this register is ever used as an argument.  This
 1434 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1435 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1436 // arguments in those registers not be available to the callee.
 1437 bool Matcher::can_be_java_arg( int reg ) {
 1438   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1439   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1440   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1441   return false;
 1442 }
 1443 
 1444 bool Matcher::is_spillable_arg( int reg ) {
 1445   return can_be_java_arg(reg);
 1446 }
 1447 
 1448 uint Matcher::int_pressure_limit()
 1449 {
 1450   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1451 }
 1452 
 1453 uint Matcher::float_pressure_limit()
 1454 {
 1455   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1456 }
 1457 
 1458 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1459   // Use hardware integer DIV instruction when
 1460   // it is faster than a code which use multiply.
 1461   // Only when constant divisor fits into 32 bit
 1462   // (min_jint is excluded to get only correct
 1463   // positive 32 bit values from negative).
 1464   return VM_Version::has_fast_idiv() &&
 1465          (divisor == (int)divisor && divisor != min_jint);
 1466 }
 1467 
 1468 // Register for DIVI projection of divmodI
 1469 RegMask Matcher::divI_proj_mask() {
 1470   return EAX_REG_mask();
 1471 }
 1472 
 1473 // Register for MODI projection of divmodI
 1474 RegMask Matcher::modI_proj_mask() {
 1475   return EDX_REG_mask();
 1476 }
 1477 
 1478 // Register for DIVL projection of divmodL
 1479 RegMask Matcher::divL_proj_mask() {
 1480   ShouldNotReachHere();
 1481   return RegMask();
 1482 }
 1483 
 1484 // Register for MODL projection of divmodL
 1485 RegMask Matcher::modL_proj_mask() {
 1486   ShouldNotReachHere();
 1487   return RegMask();
 1488 }
 1489 
 1490 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1491   return NO_REG_mask();
 1492 }
 1493 
 1494 // Returns true if the high 32 bits of the value is known to be zero.
 1495 bool is_operand_hi32_zero(Node* n) {
 1496   int opc = n->Opcode();
 1497   if (opc == Op_AndL) {
 1498     Node* o2 = n->in(2);
 1499     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1500       return true;
 1501     }
 1502   }
 1503   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1504     return true;
 1505   }
 1506   return false;
 1507 }
 1508 
 1509 %}
 1510 
 1511 //----------ENCODING BLOCK-----------------------------------------------------
 1512 // This block specifies the encoding classes used by the compiler to output
 1513 // byte streams.  Encoding classes generate functions which are called by
 1514 // Machine Instruction Nodes in order to generate the bit encoding of the
 1515 // instruction.  Operands specify their base encoding interface with the
 1516 // interface keyword.  There are currently supported four interfaces,
 1517 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1518 // operand to generate a function which returns its register number when
 1519 // queried.   CONST_INTER causes an operand to generate a function which
 1520 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1521 // operand to generate four functions which return the Base Register, the
 1522 // Index Register, the Scale Value, and the Offset Value of the operand when
 1523 // queried.  COND_INTER causes an operand to generate six functions which
 1524 // return the encoding code (ie - encoding bits for the instruction)
 1525 // associated with each basic boolean condition for a conditional instruction.
 1526 // Instructions specify two basic values for encoding.  They use the
 1527 // ins_encode keyword to specify their encoding class (which must be one of
 1528 // the class names specified in the encoding block), and they use the
 1529 // opcode keyword to specify, in order, their primary, secondary, and
 1530 // tertiary opcode.  Only the opcode sections which a particular instruction
 1531 // needs for encoding need to be specified.
 1532 encode %{
 1533   // Build emit functions for each basic byte or larger field in the intel
 1534   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1535   // code in the enc_class source block.  Emit functions will live in the
 1536   // main source block for now.  In future, we can generalize this by
 1537   // adding a syntax that specifies the sizes of fields in an order,
 1538   // so that the adlc can build the emit functions automagically
 1539 
 1540   // Emit primary opcode
 1541   enc_class OpcP %{
 1542     emit_opcode(cbuf, $primary);
 1543   %}
 1544 
 1545   // Emit secondary opcode
 1546   enc_class OpcS %{
 1547     emit_opcode(cbuf, $secondary);
 1548   %}
 1549 
 1550   // Emit opcode directly
 1551   enc_class Opcode(immI d8) %{
 1552     emit_opcode(cbuf, $d8$$constant);
 1553   %}
 1554 
 1555   enc_class SizePrefix %{
 1556     emit_opcode(cbuf,0x66);
 1557   %}
 1558 
 1559   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1560     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1561   %}
 1562 
 1563   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1564     emit_opcode(cbuf,$opcode$$constant);
 1565     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1566   %}
 1567 
 1568   enc_class mov_r32_imm0( rRegI dst ) %{
 1569     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1570     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1571   %}
 1572 
 1573   enc_class cdq_enc %{
 1574     // Full implementation of Java idiv and irem; checks for
 1575     // special case as described in JVM spec., p.243 & p.271.
 1576     //
 1577     //         normal case                           special case
 1578     //
 1579     // input : rax,: dividend                         min_int
 1580     //         reg: divisor                          -1
 1581     //
 1582     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1583     //         rdx: remainder (= rax, irem reg)       0
 1584     //
 1585     //  Code sequnce:
 1586     //
 1587     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1588     //  0F 85 0B 00 00 00    jne         normal_case
 1589     //  33 D2                xor         rdx,edx
 1590     //  83 F9 FF             cmp         rcx,0FFh
 1591     //  0F 84 03 00 00 00    je          done
 1592     //                  normal_case:
 1593     //  99                   cdq
 1594     //  F7 F9                idiv        rax,ecx
 1595     //                  done:
 1596     //
 1597     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1598     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1599     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1600     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1601     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1603     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1604     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1605     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1606     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1607     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1608     // normal_case:
 1609     emit_opcode(cbuf,0x99);                                         // cdq
 1610     // idiv (note: must be emitted by the user of this rule)
 1611     // normal:
 1612   %}
 1613 
 1614   // Dense encoding for older common ops
 1615   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1616     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1617   %}
 1618 
 1619 
 1620   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1621   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1622     // Check for 8-bit immediate, and set sign extend bit in opcode
 1623     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1624       emit_opcode(cbuf, $primary | 0x02);
 1625     }
 1626     else {                          // If 32-bit immediate
 1627       emit_opcode(cbuf, $primary);
 1628     }
 1629   %}
 1630 
 1631   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1632     // Emit primary opcode and set sign-extend bit
 1633     // Check for 8-bit immediate, and set sign extend bit in opcode
 1634     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1635       emit_opcode(cbuf, $primary | 0x02);    }
 1636     else {                          // If 32-bit immediate
 1637       emit_opcode(cbuf, $primary);
 1638     }
 1639     // Emit r/m byte with secondary opcode, after primary opcode.
 1640     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1641   %}
 1642 
 1643   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1644     // Check for 8-bit immediate, and set sign extend bit in opcode
 1645     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1646       $$$emit8$imm$$constant;
 1647     }
 1648     else {                          // If 32-bit immediate
 1649       // Output immediate
 1650       $$$emit32$imm$$constant;
 1651     }
 1652   %}
 1653 
 1654   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1655     // Emit primary opcode and set sign-extend bit
 1656     // Check for 8-bit immediate, and set sign extend bit in opcode
 1657     int con = (int)$imm$$constant; // Throw away top bits
 1658     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1659     // Emit r/m byte with secondary opcode, after primary opcode.
 1660     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1661     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1662     else                               emit_d32(cbuf,con);
 1663   %}
 1664 
 1665   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1666     // Emit primary opcode and set sign-extend bit
 1667     // Check for 8-bit immediate, and set sign extend bit in opcode
 1668     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1669     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1670     // Emit r/m byte with tertiary opcode, after primary opcode.
 1671     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1672     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1673     else                               emit_d32(cbuf,con);
 1674   %}
 1675 
 1676   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1677     emit_cc(cbuf, $secondary, $dst$$reg );
 1678   %}
 1679 
 1680   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1681     int destlo = $dst$$reg;
 1682     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1683     // bswap lo
 1684     emit_opcode(cbuf, 0x0F);
 1685     emit_cc(cbuf, 0xC8, destlo);
 1686     // bswap hi
 1687     emit_opcode(cbuf, 0x0F);
 1688     emit_cc(cbuf, 0xC8, desthi);
 1689     // xchg lo and hi
 1690     emit_opcode(cbuf, 0x87);
 1691     emit_rm(cbuf, 0x3, destlo, desthi);
 1692   %}
 1693 
 1694   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1695     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1696   %}
 1697 
 1698   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1699     $$$emit8$primary;
 1700     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1701   %}
 1702 
 1703   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1704     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1705     emit_d8(cbuf, op >> 8 );
 1706     emit_d8(cbuf, op & 255);
 1707   %}
 1708 
 1709   // emulate a CMOV with a conditional branch around a MOV
 1710   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1711     // Invert sense of branch from sense of CMOV
 1712     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1713     emit_d8( cbuf, $brOffs$$constant );
 1714   %}
 1715 
 1716   enc_class enc_PartialSubtypeCheck( ) %{
 1717     Register Redi = as_Register(EDI_enc); // result register
 1718     Register Reax = as_Register(EAX_enc); // super class
 1719     Register Recx = as_Register(ECX_enc); // killed
 1720     Register Resi = as_Register(ESI_enc); // sub class
 1721     Label miss;
 1722 
 1723     MacroAssembler _masm(&cbuf);
 1724     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1725                                      nullptr, &miss,
 1726                                      /*set_cond_codes:*/ true);
 1727     if ($primary) {
 1728       __ xorptr(Redi, Redi);
 1729     }
 1730     __ bind(miss);
 1731   %}
 1732 
 1733   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1734     MacroAssembler masm(&cbuf);
 1735     int start = masm.offset();
 1736     if (UseSSE >= 2) {
 1737       if (VerifyFPU) {
 1738         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1739       }
 1740     } else {
 1741       // External c_calling_convention expects the FPU stack to be 'clean'.
 1742       // Compiled code leaves it dirty.  Do cleanup now.
 1743       masm.empty_FPU_stack();
 1744     }
 1745     if (sizeof_FFree_Float_Stack_All == -1) {
 1746       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1747     } else {
 1748       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1749     }
 1750   %}
 1751 
 1752   enc_class Verify_FPU_For_Leaf %{
 1753     if( VerifyFPU ) {
 1754       MacroAssembler masm(&cbuf);
 1755       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1756     }
 1757   %}
 1758 
 1759   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1760     // This is the instruction starting address for relocation info.
 1761     MacroAssembler _masm(&cbuf);
 1762     cbuf.set_insts_mark();
 1763     $$$emit8$primary;
 1764     // CALL directly to the runtime
 1765     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1766                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1767     __ post_call_nop();
 1768 
 1769     if (UseSSE >= 2) {
 1770       MacroAssembler _masm(&cbuf);
 1771       BasicType rt = tf()->return_type();
 1772 
 1773       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1774         // A C runtime call where the return value is unused.  In SSE2+
 1775         // mode the result needs to be removed from the FPU stack.  It's
 1776         // likely that this function call could be removed by the
 1777         // optimizer if the C function is a pure function.
 1778         __ ffree(0);
 1779       } else if (rt == T_FLOAT) {
 1780         __ lea(rsp, Address(rsp, -4));
 1781         __ fstp_s(Address(rsp, 0));
 1782         __ movflt(xmm0, Address(rsp, 0));
 1783         __ lea(rsp, Address(rsp,  4));
 1784       } else if (rt == T_DOUBLE) {
 1785         __ lea(rsp, Address(rsp, -8));
 1786         __ fstp_d(Address(rsp, 0));
 1787         __ movdbl(xmm0, Address(rsp, 0));
 1788         __ lea(rsp, Address(rsp,  8));
 1789       }
 1790     }
 1791   %}
 1792 
 1793   enc_class pre_call_resets %{
 1794     // If method sets FPU control word restore it here
 1795     debug_only(int off0 = cbuf.insts_size());
 1796     if (ra_->C->in_24_bit_fp_mode()) {
 1797       MacroAssembler _masm(&cbuf);
 1798       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1799     }
 1800     // Clear upper bits of YMM registers when current compiled code uses
 1801     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1802     MacroAssembler _masm(&cbuf);
 1803     __ vzeroupper();
 1804     debug_only(int off1 = cbuf.insts_size());
 1805     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1806   %}
 1807 
 1808   enc_class post_call_FPU %{
 1809     // If method sets FPU control word do it here also
 1810     if (Compile::current()->in_24_bit_fp_mode()) {
 1811       MacroAssembler masm(&cbuf);
 1812       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1813     }
 1814   %}
 1815 
 1816   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1817     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1818     // who we intended to call.
 1819     MacroAssembler _masm(&cbuf);
 1820     cbuf.set_insts_mark();
 1821     $$$emit8$primary;
 1822 
 1823     if (!_method) {
 1824       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1825                      runtime_call_Relocation::spec(),
 1826                      RELOC_IMM32);
 1827       __ post_call_nop();
 1828     } else {
 1829       int method_index = resolved_method_index(cbuf);
 1830       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1831                                                   : static_call_Relocation::spec(method_index);
 1832       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1833                      rspec, RELOC_DISP32);
 1834       __ post_call_nop();
 1835       address mark = cbuf.insts_mark();
 1836       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1837         // Calls of the same statically bound method can share
 1838         // a stub to the interpreter.
 1839         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1840       } else {
 1841         // Emit stubs for static call.
 1842         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1843         if (stub == nullptr) {
 1844           ciEnv::current()->record_failure("CodeCache is full");
 1845           return;
 1846         }
 1847       }
 1848     }
 1849   %}
 1850 
 1851   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1852     MacroAssembler _masm(&cbuf);
 1853     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1854     __ post_call_nop();
 1855   %}
 1856 
 1857   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1858     int disp = in_bytes(Method::from_compiled_offset());
 1859     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1860 
 1861     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1862     MacroAssembler _masm(&cbuf);
 1863     cbuf.set_insts_mark();
 1864     $$$emit8$primary;
 1865     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1866     emit_d8(cbuf, disp);             // Displacement
 1867     __ post_call_nop();
 1868   %}
 1869 
 1870 //   Following encoding is no longer used, but may be restored if calling
 1871 //   convention changes significantly.
 1872 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1873 //
 1874 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1875 //     // int ic_reg     = Matcher::inline_cache_reg();
 1876 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1877 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1878 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1879 //
 1880 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1881 //     // // so we load it immediately before the call
 1882 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1883 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1884 //
 1885 //     // xor rbp,ebp
 1886 //     emit_opcode(cbuf, 0x33);
 1887 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1888 //
 1889 //     // CALL to interpreter.
 1890 //     cbuf.set_insts_mark();
 1891 //     $$$emit8$primary;
 1892 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1893 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1894 //   %}
 1895 
 1896   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1897     $$$emit8$primary;
 1898     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1899     $$$emit8$shift$$constant;
 1900   %}
 1901 
 1902   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1903     // Load immediate does not have a zero or sign extended version
 1904     // for 8-bit immediates
 1905     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1906     $$$emit32$src$$constant;
 1907   %}
 1908 
 1909   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1910     // Load immediate does not have a zero or sign extended version
 1911     // for 8-bit immediates
 1912     emit_opcode(cbuf, $primary + $dst$$reg);
 1913     $$$emit32$src$$constant;
 1914   %}
 1915 
 1916   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1917     // Load immediate does not have a zero or sign extended version
 1918     // for 8-bit immediates
 1919     int dst_enc = $dst$$reg;
 1920     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1921     if (src_con == 0) {
 1922       // xor dst, dst
 1923       emit_opcode(cbuf, 0x33);
 1924       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1925     } else {
 1926       emit_opcode(cbuf, $primary + dst_enc);
 1927       emit_d32(cbuf, src_con);
 1928     }
 1929   %}
 1930 
 1931   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1932     // Load immediate does not have a zero or sign extended version
 1933     // for 8-bit immediates
 1934     int dst_enc = $dst$$reg + 2;
 1935     int src_con = ((julong)($src$$constant)) >> 32;
 1936     if (src_con == 0) {
 1937       // xor dst, dst
 1938       emit_opcode(cbuf, 0x33);
 1939       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1940     } else {
 1941       emit_opcode(cbuf, $primary + dst_enc);
 1942       emit_d32(cbuf, src_con);
 1943     }
 1944   %}
 1945 
 1946 
 1947   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1948   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1949     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1950   %}
 1951 
 1952   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1953     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1954   %}
 1955 
 1956   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1957     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1958   %}
 1959 
 1960   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1961     $$$emit8$primary;
 1962     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1963   %}
 1964 
 1965   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1966     $$$emit8$secondary;
 1967     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1968   %}
 1969 
 1970   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1971     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1972   %}
 1973 
 1974   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1975     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1976   %}
 1977 
 1978   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1979     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1980   %}
 1981 
 1982   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1983     // Output immediate
 1984     $$$emit32$src$$constant;
 1985   %}
 1986 
 1987   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1988     // Output Float immediate bits
 1989     jfloat jf = $src$$constant;
 1990     int    jf_as_bits = jint_cast( jf );
 1991     emit_d32(cbuf, jf_as_bits);
 1992   %}
 1993 
 1994   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1995     // Output Float immediate bits
 1996     jfloat jf = $src$$constant;
 1997     int    jf_as_bits = jint_cast( jf );
 1998     emit_d32(cbuf, jf_as_bits);
 1999   %}
 2000 
 2001   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 2002     // Output immediate
 2003     $$$emit16$src$$constant;
 2004   %}
 2005 
 2006   enc_class Con_d32(immI src) %{
 2007     emit_d32(cbuf,$src$$constant);
 2008   %}
 2009 
 2010   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2011     // Output immediate memory reference
 2012     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2013     emit_d32(cbuf, 0x00);
 2014   %}
 2015 
 2016   enc_class lock_prefix( ) %{
 2017     emit_opcode(cbuf,0xF0);         // [Lock]
 2018   %}
 2019 
 2020   // Cmp-xchg long value.
 2021   // Note: we need to swap rbx, and rcx before and after the
 2022   //       cmpxchg8 instruction because the instruction uses
 2023   //       rcx as the high order word of the new value to store but
 2024   //       our register encoding uses rbx,.
 2025   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2026 
 2027     // XCHG  rbx,ecx
 2028     emit_opcode(cbuf,0x87);
 2029     emit_opcode(cbuf,0xD9);
 2030     // [Lock]
 2031     emit_opcode(cbuf,0xF0);
 2032     // CMPXCHG8 [Eptr]
 2033     emit_opcode(cbuf,0x0F);
 2034     emit_opcode(cbuf,0xC7);
 2035     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2036     // XCHG  rbx,ecx
 2037     emit_opcode(cbuf,0x87);
 2038     emit_opcode(cbuf,0xD9);
 2039   %}
 2040 
 2041   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2042     // [Lock]
 2043     emit_opcode(cbuf,0xF0);
 2044 
 2045     // CMPXCHG [Eptr]
 2046     emit_opcode(cbuf,0x0F);
 2047     emit_opcode(cbuf,0xB1);
 2048     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2049   %}
 2050 
 2051   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2052     // [Lock]
 2053     emit_opcode(cbuf,0xF0);
 2054 
 2055     // CMPXCHGB [Eptr]
 2056     emit_opcode(cbuf,0x0F);
 2057     emit_opcode(cbuf,0xB0);
 2058     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2059   %}
 2060 
 2061   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2062     // [Lock]
 2063     emit_opcode(cbuf,0xF0);
 2064 
 2065     // 16-bit mode
 2066     emit_opcode(cbuf, 0x66);
 2067 
 2068     // CMPXCHGW [Eptr]
 2069     emit_opcode(cbuf,0x0F);
 2070     emit_opcode(cbuf,0xB1);
 2071     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2072   %}
 2073 
 2074   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2075     int res_encoding = $res$$reg;
 2076 
 2077     // MOV  res,0
 2078     emit_opcode( cbuf, 0xB8 + res_encoding);
 2079     emit_d32( cbuf, 0 );
 2080     // JNE,s  fail
 2081     emit_opcode(cbuf,0x75);
 2082     emit_d8(cbuf, 5 );
 2083     // MOV  res,1
 2084     emit_opcode( cbuf, 0xB8 + res_encoding);
 2085     emit_d32( cbuf, 1 );
 2086     // fail:
 2087   %}
 2088 
 2089   enc_class set_instruction_start( ) %{
 2090     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2091   %}
 2092 
 2093   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2094     int reg_encoding = $ereg$$reg;
 2095     int base  = $mem$$base;
 2096     int index = $mem$$index;
 2097     int scale = $mem$$scale;
 2098     int displace = $mem$$disp;
 2099     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2100     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2101   %}
 2102 
 2103   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2104     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2105     int base  = $mem$$base;
 2106     int index = $mem$$index;
 2107     int scale = $mem$$scale;
 2108     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2109     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2110     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2111   %}
 2112 
 2113   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2114     int r1, r2;
 2115     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2116     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2117     emit_opcode(cbuf,0x0F);
 2118     emit_opcode(cbuf,$tertiary);
 2119     emit_rm(cbuf, 0x3, r1, r2);
 2120     emit_d8(cbuf,$cnt$$constant);
 2121     emit_d8(cbuf,$primary);
 2122     emit_rm(cbuf, 0x3, $secondary, r1);
 2123     emit_d8(cbuf,$cnt$$constant);
 2124   %}
 2125 
 2126   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2127     emit_opcode( cbuf, 0x8B ); // Move
 2128     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2129     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2130       emit_d8(cbuf,$primary);
 2131       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2132       emit_d8(cbuf,$cnt$$constant-32);
 2133     }
 2134     emit_d8(cbuf,$primary);
 2135     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2136     emit_d8(cbuf,31);
 2137   %}
 2138 
 2139   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2140     int r1, r2;
 2141     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2142     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2143 
 2144     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2145     emit_rm(cbuf, 0x3, r1, r2);
 2146     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2147       emit_opcode(cbuf,$primary);
 2148       emit_rm(cbuf, 0x3, $secondary, r1);
 2149       emit_d8(cbuf,$cnt$$constant-32);
 2150     }
 2151     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2152     emit_rm(cbuf, 0x3, r2, r2);
 2153   %}
 2154 
 2155   // Clone of RegMem but accepts an extra parameter to access each
 2156   // half of a double in memory; it never needs relocation info.
 2157   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2158     emit_opcode(cbuf,$opcode$$constant);
 2159     int reg_encoding = $rm_reg$$reg;
 2160     int base     = $mem$$base;
 2161     int index    = $mem$$index;
 2162     int scale    = $mem$$scale;
 2163     int displace = $mem$$disp + $disp_for_half$$constant;
 2164     relocInfo::relocType disp_reloc = relocInfo::none;
 2165     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2166   %}
 2167 
 2168   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2169   //
 2170   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2171   // and it never needs relocation information.
 2172   // Frequently used to move data between FPU's Stack Top and memory.
 2173   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2174     int rm_byte_opcode = $rm_opcode$$constant;
 2175     int base     = $mem$$base;
 2176     int index    = $mem$$index;
 2177     int scale    = $mem$$scale;
 2178     int displace = $mem$$disp;
 2179     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2180     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2181   %}
 2182 
 2183   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2184     int rm_byte_opcode = $rm_opcode$$constant;
 2185     int base     = $mem$$base;
 2186     int index    = $mem$$index;
 2187     int scale    = $mem$$scale;
 2188     int displace = $mem$$disp;
 2189     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2190     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2191   %}
 2192 
 2193   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2194     int reg_encoding = $dst$$reg;
 2195     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2196     int index        = 0x04;            // 0x04 indicates no index
 2197     int scale        = 0x00;            // 0x00 indicates no scale
 2198     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2199     relocInfo::relocType disp_reloc = relocInfo::none;
 2200     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2201   %}
 2202 
 2203   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2204     // Compare dst,src
 2205     emit_opcode(cbuf,0x3B);
 2206     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2207     // jmp dst < src around move
 2208     emit_opcode(cbuf,0x7C);
 2209     emit_d8(cbuf,2);
 2210     // move dst,src
 2211     emit_opcode(cbuf,0x8B);
 2212     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2213   %}
 2214 
 2215   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2216     // Compare dst,src
 2217     emit_opcode(cbuf,0x3B);
 2218     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2219     // jmp dst > src around move
 2220     emit_opcode(cbuf,0x7F);
 2221     emit_d8(cbuf,2);
 2222     // move dst,src
 2223     emit_opcode(cbuf,0x8B);
 2224     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2225   %}
 2226 
 2227   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2228     // If src is FPR1, we can just FST to store it.
 2229     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2230     int reg_encoding = 0x2; // Just store
 2231     int base  = $mem$$base;
 2232     int index = $mem$$index;
 2233     int scale = $mem$$scale;
 2234     int displace = $mem$$disp;
 2235     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2236     if( $src$$reg != FPR1L_enc ) {
 2237       reg_encoding = 0x3;  // Store & pop
 2238       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2239       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2240     }
 2241     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2242     emit_opcode(cbuf,$primary);
 2243     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2244   %}
 2245 
 2246   enc_class neg_reg(rRegI dst) %{
 2247     // NEG $dst
 2248     emit_opcode(cbuf,0xF7);
 2249     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2250   %}
 2251 
 2252   enc_class setLT_reg(eCXRegI dst) %{
 2253     // SETLT $dst
 2254     emit_opcode(cbuf,0x0F);
 2255     emit_opcode(cbuf,0x9C);
 2256     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2257   %}
 2258 
 2259   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2260     int tmpReg = $tmp$$reg;
 2261 
 2262     // SUB $p,$q
 2263     emit_opcode(cbuf,0x2B);
 2264     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2265     // SBB $tmp,$tmp
 2266     emit_opcode(cbuf,0x1B);
 2267     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2268     // AND $tmp,$y
 2269     emit_opcode(cbuf,0x23);
 2270     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2271     // ADD $p,$tmp
 2272     emit_opcode(cbuf,0x03);
 2273     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2274   %}
 2275 
 2276   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2277     // TEST shift,32
 2278     emit_opcode(cbuf,0xF7);
 2279     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2280     emit_d32(cbuf,0x20);
 2281     // JEQ,s small
 2282     emit_opcode(cbuf, 0x74);
 2283     emit_d8(cbuf, 0x04);
 2284     // MOV    $dst.hi,$dst.lo
 2285     emit_opcode( cbuf, 0x8B );
 2286     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2287     // CLR    $dst.lo
 2288     emit_opcode(cbuf, 0x33);
 2289     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2290 // small:
 2291     // SHLD   $dst.hi,$dst.lo,$shift
 2292     emit_opcode(cbuf,0x0F);
 2293     emit_opcode(cbuf,0xA5);
 2294     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2295     // SHL    $dst.lo,$shift"
 2296     emit_opcode(cbuf,0xD3);
 2297     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2298   %}
 2299 
 2300   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2301     // TEST shift,32
 2302     emit_opcode(cbuf,0xF7);
 2303     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2304     emit_d32(cbuf,0x20);
 2305     // JEQ,s small
 2306     emit_opcode(cbuf, 0x74);
 2307     emit_d8(cbuf, 0x04);
 2308     // MOV    $dst.lo,$dst.hi
 2309     emit_opcode( cbuf, 0x8B );
 2310     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2311     // CLR    $dst.hi
 2312     emit_opcode(cbuf, 0x33);
 2313     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2314 // small:
 2315     // SHRD   $dst.lo,$dst.hi,$shift
 2316     emit_opcode(cbuf,0x0F);
 2317     emit_opcode(cbuf,0xAD);
 2318     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2319     // SHR    $dst.hi,$shift"
 2320     emit_opcode(cbuf,0xD3);
 2321     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2322   %}
 2323 
 2324   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2325     // TEST shift,32
 2326     emit_opcode(cbuf,0xF7);
 2327     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2328     emit_d32(cbuf,0x20);
 2329     // JEQ,s small
 2330     emit_opcode(cbuf, 0x74);
 2331     emit_d8(cbuf, 0x05);
 2332     // MOV    $dst.lo,$dst.hi
 2333     emit_opcode( cbuf, 0x8B );
 2334     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2335     // SAR    $dst.hi,31
 2336     emit_opcode(cbuf, 0xC1);
 2337     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338     emit_d8(cbuf, 0x1F );
 2339 // small:
 2340     // SHRD   $dst.lo,$dst.hi,$shift
 2341     emit_opcode(cbuf,0x0F);
 2342     emit_opcode(cbuf,0xAD);
 2343     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2344     // SAR    $dst.hi,$shift"
 2345     emit_opcode(cbuf,0xD3);
 2346     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2347   %}
 2348 
 2349 
 2350   // ----------------- Encodings for floating point unit -----------------
 2351   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2352   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2353     $$$emit8$primary;
 2354     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2355   %}
 2356 
 2357   // Pop argument in FPR0 with FSTP ST(0)
 2358   enc_class PopFPU() %{
 2359     emit_opcode( cbuf, 0xDD );
 2360     emit_d8( cbuf, 0xD8 );
 2361   %}
 2362 
 2363   // !!!!! equivalent to Pop_Reg_F
 2364   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2365     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2366     emit_d8( cbuf, 0xD8+$dst$$reg );
 2367   %}
 2368 
 2369   enc_class Push_Reg_DPR( regDPR dst ) %{
 2370     emit_opcode( cbuf, 0xD9 );
 2371     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2372   %}
 2373 
 2374   enc_class strictfp_bias1( regDPR dst ) %{
 2375     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2376     emit_opcode( cbuf, 0x2D );
 2377     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2378     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2379     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2380   %}
 2381 
 2382   enc_class strictfp_bias2( regDPR dst ) %{
 2383     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2384     emit_opcode( cbuf, 0x2D );
 2385     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2386     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2387     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2388   %}
 2389 
 2390   // Special case for moving an integer register to a stack slot.
 2391   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2392     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2393   %}
 2394 
 2395   // Special case for moving a register to a stack slot.
 2396   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2397     // Opcode already emitted
 2398     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2399     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2400     emit_d32(cbuf, $dst$$disp);   // Displacement
 2401   %}
 2402 
 2403   // Push the integer in stackSlot 'src' onto FP-stack
 2404   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2405     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2406   %}
 2407 
 2408   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2409   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2410     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2411   %}
 2412 
 2413   // Same as Pop_Mem_F except for opcode
 2414   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2415   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2416     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2417   %}
 2418 
 2419   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2420     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2421     emit_d8( cbuf, 0xD8+$dst$$reg );
 2422   %}
 2423 
 2424   enc_class Push_Reg_FPR( regFPR dst ) %{
 2425     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2426     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2427   %}
 2428 
 2429   // Push FPU's float to a stack-slot, and pop FPU-stack
 2430   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2431     int pop = 0x02;
 2432     if ($src$$reg != FPR1L_enc) {
 2433       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2434       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2435       pop = 0x03;
 2436     }
 2437     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2438   %}
 2439 
 2440   // Push FPU's double to a stack-slot, and pop FPU-stack
 2441   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2442     int pop = 0x02;
 2443     if ($src$$reg != FPR1L_enc) {
 2444       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2445       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2446       pop = 0x03;
 2447     }
 2448     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2449   %}
 2450 
 2451   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2452   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2453     int pop = 0xD0 - 1; // -1 since we skip FLD
 2454     if ($src$$reg != FPR1L_enc) {
 2455       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2456       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2457       pop = 0xD8;
 2458     }
 2459     emit_opcode( cbuf, 0xDD );
 2460     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2461   %}
 2462 
 2463 
 2464   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2465     // load dst in FPR0
 2466     emit_opcode( cbuf, 0xD9 );
 2467     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2468     if ($src$$reg != FPR1L_enc) {
 2469       // fincstp
 2470       emit_opcode (cbuf, 0xD9);
 2471       emit_opcode (cbuf, 0xF7);
 2472       // swap src with FPR1:
 2473       // FXCH FPR1 with src
 2474       emit_opcode(cbuf, 0xD9);
 2475       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2476       // fdecstp
 2477       emit_opcode (cbuf, 0xD9);
 2478       emit_opcode (cbuf, 0xF6);
 2479     }
 2480   %}
 2481 
 2482   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ subptr(rsp, 8);
 2485     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2486     __ fld_d(Address(rsp, 0));
 2487     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2488     __ fld_d(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2492     MacroAssembler _masm(&cbuf);
 2493     __ subptr(rsp, 4);
 2494     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2495     __ fld_s(Address(rsp, 0));
 2496     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2497     __ fld_s(Address(rsp, 0));
 2498   %}
 2499 
 2500   enc_class Push_ResultD(regD dst) %{
 2501     MacroAssembler _masm(&cbuf);
 2502     __ fstp_d(Address(rsp, 0));
 2503     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2504     __ addptr(rsp, 8);
 2505   %}
 2506 
 2507   enc_class Push_ResultF(regF dst, immI d8) %{
 2508     MacroAssembler _masm(&cbuf);
 2509     __ fstp_s(Address(rsp, 0));
 2510     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2511     __ addptr(rsp, $d8$$constant);
 2512   %}
 2513 
 2514   enc_class Push_SrcD(regD src) %{
 2515     MacroAssembler _masm(&cbuf);
 2516     __ subptr(rsp, 8);
 2517     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2518     __ fld_d(Address(rsp, 0));
 2519   %}
 2520 
 2521   enc_class push_stack_temp_qword() %{
 2522     MacroAssembler _masm(&cbuf);
 2523     __ subptr(rsp, 8);
 2524   %}
 2525 
 2526   enc_class pop_stack_temp_qword() %{
 2527     MacroAssembler _masm(&cbuf);
 2528     __ addptr(rsp, 8);
 2529   %}
 2530 
 2531   enc_class push_xmm_to_fpr1(regD src) %{
 2532     MacroAssembler _masm(&cbuf);
 2533     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2534     __ fld_d(Address(rsp, 0));
 2535   %}
 2536 
 2537   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2538     if ($src$$reg != FPR1L_enc) {
 2539       // fincstp
 2540       emit_opcode (cbuf, 0xD9);
 2541       emit_opcode (cbuf, 0xF7);
 2542       // FXCH FPR1 with src
 2543       emit_opcode(cbuf, 0xD9);
 2544       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2545       // fdecstp
 2546       emit_opcode (cbuf, 0xD9);
 2547       emit_opcode (cbuf, 0xF6);
 2548     }
 2549     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2550     // // FSTP   FPR$dst$$reg
 2551     // emit_opcode( cbuf, 0xDD );
 2552     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2553   %}
 2554 
 2555   enc_class fnstsw_sahf_skip_parity() %{
 2556     // fnstsw ax
 2557     emit_opcode( cbuf, 0xDF );
 2558     emit_opcode( cbuf, 0xE0 );
 2559     // sahf
 2560     emit_opcode( cbuf, 0x9E );
 2561     // jnp  ::skip
 2562     emit_opcode( cbuf, 0x7B );
 2563     emit_opcode( cbuf, 0x05 );
 2564   %}
 2565 
 2566   enc_class emitModDPR() %{
 2567     // fprem must be iterative
 2568     // :: loop
 2569     // fprem
 2570     emit_opcode( cbuf, 0xD9 );
 2571     emit_opcode( cbuf, 0xF8 );
 2572     // wait
 2573     emit_opcode( cbuf, 0x9b );
 2574     // fnstsw ax
 2575     emit_opcode( cbuf, 0xDF );
 2576     emit_opcode( cbuf, 0xE0 );
 2577     // sahf
 2578     emit_opcode( cbuf, 0x9E );
 2579     // jp  ::loop
 2580     emit_opcode( cbuf, 0x0F );
 2581     emit_opcode( cbuf, 0x8A );
 2582     emit_opcode( cbuf, 0xF4 );
 2583     emit_opcode( cbuf, 0xFF );
 2584     emit_opcode( cbuf, 0xFF );
 2585     emit_opcode( cbuf, 0xFF );
 2586   %}
 2587 
 2588   enc_class fpu_flags() %{
 2589     // fnstsw_ax
 2590     emit_opcode( cbuf, 0xDF);
 2591     emit_opcode( cbuf, 0xE0);
 2592     // test ax,0x0400
 2593     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2594     emit_opcode( cbuf, 0xA9 );
 2595     emit_d16   ( cbuf, 0x0400 );
 2596     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2597     // // test rax,0x0400
 2598     // emit_opcode( cbuf, 0xA9 );
 2599     // emit_d32   ( cbuf, 0x00000400 );
 2600     //
 2601     // jz exit (no unordered comparison)
 2602     emit_opcode( cbuf, 0x74 );
 2603     emit_d8    ( cbuf, 0x02 );
 2604     // mov ah,1 - treat as LT case (set carry flag)
 2605     emit_opcode( cbuf, 0xB4 );
 2606     emit_d8    ( cbuf, 0x01 );
 2607     // sahf
 2608     emit_opcode( cbuf, 0x9E);
 2609   %}
 2610 
 2611   enc_class cmpF_P6_fixup() %{
 2612     // Fixup the integer flags in case comparison involved a NaN
 2613     //
 2614     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2615     emit_opcode( cbuf, 0x7B );
 2616     emit_d8    ( cbuf, 0x03 );
 2617     // MOV AH,1 - treat as LT case (set carry flag)
 2618     emit_opcode( cbuf, 0xB4 );
 2619     emit_d8    ( cbuf, 0x01 );
 2620     // SAHF
 2621     emit_opcode( cbuf, 0x9E);
 2622     // NOP     // target for branch to avoid branch to branch
 2623     emit_opcode( cbuf, 0x90);
 2624   %}
 2625 
 2626 //     fnstsw_ax();
 2627 //     sahf();
 2628 //     movl(dst, nan_result);
 2629 //     jcc(Assembler::parity, exit);
 2630 //     movl(dst, less_result);
 2631 //     jcc(Assembler::below, exit);
 2632 //     movl(dst, equal_result);
 2633 //     jcc(Assembler::equal, exit);
 2634 //     movl(dst, greater_result);
 2635 
 2636 // less_result     =  1;
 2637 // greater_result  = -1;
 2638 // equal_result    = 0;
 2639 // nan_result      = -1;
 2640 
 2641   enc_class CmpF_Result(rRegI dst) %{
 2642     // fnstsw_ax();
 2643     emit_opcode( cbuf, 0xDF);
 2644     emit_opcode( cbuf, 0xE0);
 2645     // sahf
 2646     emit_opcode( cbuf, 0x9E);
 2647     // movl(dst, nan_result);
 2648     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2649     emit_d32( cbuf, -1 );
 2650     // jcc(Assembler::parity, exit);
 2651     emit_opcode( cbuf, 0x7A );
 2652     emit_d8    ( cbuf, 0x13 );
 2653     // movl(dst, less_result);
 2654     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2655     emit_d32( cbuf, -1 );
 2656     // jcc(Assembler::below, exit);
 2657     emit_opcode( cbuf, 0x72 );
 2658     emit_d8    ( cbuf, 0x0C );
 2659     // movl(dst, equal_result);
 2660     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2661     emit_d32( cbuf, 0 );
 2662     // jcc(Assembler::equal, exit);
 2663     emit_opcode( cbuf, 0x74 );
 2664     emit_d8    ( cbuf, 0x05 );
 2665     // movl(dst, greater_result);
 2666     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2667     emit_d32( cbuf, 1 );
 2668   %}
 2669 
 2670 
 2671   // Compare the longs and set flags
 2672   // BROKEN!  Do Not use as-is
 2673   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2674     // CMP    $src1.hi,$src2.hi
 2675     emit_opcode( cbuf, 0x3B );
 2676     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2677     // JNE,s  done
 2678     emit_opcode(cbuf,0x75);
 2679     emit_d8(cbuf, 2 );
 2680     // CMP    $src1.lo,$src2.lo
 2681     emit_opcode( cbuf, 0x3B );
 2682     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2683 // done:
 2684   %}
 2685 
 2686   enc_class convert_int_long( regL dst, rRegI src ) %{
 2687     // mov $dst.lo,$src
 2688     int dst_encoding = $dst$$reg;
 2689     int src_encoding = $src$$reg;
 2690     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2691     // mov $dst.hi,$src
 2692     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2693     // sar $dst.hi,31
 2694     emit_opcode( cbuf, 0xC1 );
 2695     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2696     emit_d8(cbuf, 0x1F );
 2697   %}
 2698 
 2699   enc_class convert_long_double( eRegL src ) %{
 2700     // push $src.hi
 2701     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2702     // push $src.lo
 2703     emit_opcode(cbuf, 0x50+$src$$reg  );
 2704     // fild 64-bits at [SP]
 2705     emit_opcode(cbuf,0xdf);
 2706     emit_d8(cbuf, 0x6C);
 2707     emit_d8(cbuf, 0x24);
 2708     emit_d8(cbuf, 0x00);
 2709     // pop stack
 2710     emit_opcode(cbuf, 0x83); // add  SP, #8
 2711     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2712     emit_d8(cbuf, 0x8);
 2713   %}
 2714 
 2715   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2716     // IMUL   EDX:EAX,$src1
 2717     emit_opcode( cbuf, 0xF7 );
 2718     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2719     // SAR    EDX,$cnt-32
 2720     int shift_count = ((int)$cnt$$constant) - 32;
 2721     if (shift_count > 0) {
 2722       emit_opcode(cbuf, 0xC1);
 2723       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2724       emit_d8(cbuf, shift_count);
 2725     }
 2726   %}
 2727 
 2728   // this version doesn't have add sp, 8
 2729   enc_class convert_long_double2( eRegL src ) %{
 2730     // push $src.hi
 2731     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2732     // push $src.lo
 2733     emit_opcode(cbuf, 0x50+$src$$reg  );
 2734     // fild 64-bits at [SP]
 2735     emit_opcode(cbuf,0xdf);
 2736     emit_d8(cbuf, 0x6C);
 2737     emit_d8(cbuf, 0x24);
 2738     emit_d8(cbuf, 0x00);
 2739   %}
 2740 
 2741   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2742     // Basic idea: long = (long)int * (long)int
 2743     // IMUL EDX:EAX, src
 2744     emit_opcode( cbuf, 0xF7 );
 2745     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2746   %}
 2747 
 2748   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2749     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2750     // MUL EDX:EAX, src
 2751     emit_opcode( cbuf, 0xF7 );
 2752     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2753   %}
 2754 
 2755   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2756     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2757     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2758     // MOV    $tmp,$src.lo
 2759     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2760     // IMUL   $tmp,EDX
 2761     emit_opcode( cbuf, 0x0F );
 2762     emit_opcode( cbuf, 0xAF );
 2763     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2764     // MOV    EDX,$src.hi
 2765     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2766     // IMUL   EDX,EAX
 2767     emit_opcode( cbuf, 0x0F );
 2768     emit_opcode( cbuf, 0xAF );
 2769     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2770     // ADD    $tmp,EDX
 2771     emit_opcode( cbuf, 0x03 );
 2772     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2773     // MUL   EDX:EAX,$src.lo
 2774     emit_opcode( cbuf, 0xF7 );
 2775     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2776     // ADD    EDX,ESI
 2777     emit_opcode( cbuf, 0x03 );
 2778     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2779   %}
 2780 
 2781   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2782     // Basic idea: lo(result) = lo(src * y_lo)
 2783     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2784     // IMUL   $tmp,EDX,$src
 2785     emit_opcode( cbuf, 0x6B );
 2786     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2787     emit_d8( cbuf, (int)$src$$constant );
 2788     // MOV    EDX,$src
 2789     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2790     emit_d32( cbuf, (int)$src$$constant );
 2791     // MUL   EDX:EAX,EDX
 2792     emit_opcode( cbuf, 0xF7 );
 2793     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2794     // ADD    EDX,ESI
 2795     emit_opcode( cbuf, 0x03 );
 2796     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2797   %}
 2798 
 2799   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2800     // PUSH src1.hi
 2801     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2802     // PUSH src1.lo
 2803     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2804     // PUSH src2.hi
 2805     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2806     // PUSH src2.lo
 2807     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2808     // CALL directly to the runtime
 2809     MacroAssembler _masm(&cbuf);
 2810     cbuf.set_insts_mark();
 2811     emit_opcode(cbuf,0xE8);       // Call into runtime
 2812     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2813     __ post_call_nop();
 2814     // Restore stack
 2815     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2816     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2817     emit_d8(cbuf, 4*4);
 2818   %}
 2819 
 2820   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2821     // PUSH src1.hi
 2822     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2823     // PUSH src1.lo
 2824     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2825     // PUSH src2.hi
 2826     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2827     // PUSH src2.lo
 2828     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2829     // CALL directly to the runtime
 2830     MacroAssembler _masm(&cbuf);
 2831     cbuf.set_insts_mark();
 2832     emit_opcode(cbuf,0xE8);       // Call into runtime
 2833     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2834     __ post_call_nop();
 2835     // Restore stack
 2836     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2837     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2838     emit_d8(cbuf, 4*4);
 2839   %}
 2840 
 2841   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2842     // MOV   $tmp,$src.lo
 2843     emit_opcode(cbuf, 0x8B);
 2844     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2845     // OR    $tmp,$src.hi
 2846     emit_opcode(cbuf, 0x0B);
 2847     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2848   %}
 2849 
 2850   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2851     // CMP    $src1.lo,$src2.lo
 2852     emit_opcode( cbuf, 0x3B );
 2853     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2854     // JNE,s  skip
 2855     emit_cc(cbuf, 0x70, 0x5);
 2856     emit_d8(cbuf,2);
 2857     // CMP    $src1.hi,$src2.hi
 2858     emit_opcode( cbuf, 0x3B );
 2859     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2860   %}
 2861 
 2862   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2863     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2864     emit_opcode( cbuf, 0x3B );
 2865     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2866     // MOV    $tmp,$src1.hi
 2867     emit_opcode( cbuf, 0x8B );
 2868     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2869     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2870     emit_opcode( cbuf, 0x1B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2872   %}
 2873 
 2874   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2875     // XOR    $tmp,$tmp
 2876     emit_opcode(cbuf,0x33);  // XOR
 2877     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2878     // CMP    $tmp,$src.lo
 2879     emit_opcode( cbuf, 0x3B );
 2880     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2881     // SBB    $tmp,$src.hi
 2882     emit_opcode( cbuf, 0x1B );
 2883     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2884   %}
 2885 
 2886  // Sniff, sniff... smells like Gnu Superoptimizer
 2887   enc_class neg_long( eRegL dst ) %{
 2888     emit_opcode(cbuf,0xF7);    // NEG hi
 2889     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2890     emit_opcode(cbuf,0xF7);    // NEG lo
 2891     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2892     emit_opcode(cbuf,0x83);    // SBB hi,0
 2893     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2894     emit_d8    (cbuf,0 );
 2895   %}
 2896 
 2897   enc_class enc_pop_rdx() %{
 2898     emit_opcode(cbuf,0x5A);
 2899   %}
 2900 
 2901   enc_class enc_rethrow() %{
 2902     MacroAssembler _masm(&cbuf);
 2903     cbuf.set_insts_mark();
 2904     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2905     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2906                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2907     __ post_call_nop();
 2908   %}
 2909 
 2910 
 2911   // Convert a double to an int.  Java semantics require we do complex
 2912   // manglelations in the corner cases.  So we set the rounding mode to
 2913   // 'zero', store the darned double down as an int, and reset the
 2914   // rounding mode to 'nearest'.  The hardware throws an exception which
 2915   // patches up the correct value directly to the stack.
 2916   enc_class DPR2I_encoding( regDPR src ) %{
 2917     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2918     // exceptions here, so that a NAN or other corner-case value will
 2919     // thrown an exception (but normal values get converted at full speed).
 2920     // However, I2C adapters and other float-stack manglers leave pending
 2921     // invalid-op exceptions hanging.  We would have to clear them before
 2922     // enabling them and that is more expensive than just testing for the
 2923     // invalid value Intel stores down in the corner cases.
 2924     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2925     emit_opcode(cbuf,0x2D);
 2926     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2927     // Allocate a word
 2928     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2929     emit_opcode(cbuf,0xEC);
 2930     emit_d8(cbuf,0x04);
 2931     // Encoding assumes a double has been pushed into FPR0.
 2932     // Store down the double as an int, popping the FPU stack
 2933     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2934     emit_opcode(cbuf,0x1C);
 2935     emit_d8(cbuf,0x24);
 2936     // Restore the rounding mode; mask the exception
 2937     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2938     emit_opcode(cbuf,0x2D);
 2939     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2940         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2941         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2942 
 2943     // Load the converted int; adjust CPU stack
 2944     emit_opcode(cbuf,0x58);       // POP EAX
 2945     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2946     emit_d32   (cbuf,0x80000000); //         0x80000000
 2947     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2948     emit_d8    (cbuf,0x07);       // Size of slow_call
 2949     // Push src onto stack slow-path
 2950     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2951     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2952     // CALL directly to the runtime
 2953     MacroAssembler _masm(&cbuf);
 2954     cbuf.set_insts_mark();
 2955     emit_opcode(cbuf,0xE8);       // Call into runtime
 2956     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2957     __ post_call_nop();
 2958     // Carry on here...
 2959   %}
 2960 
 2961   enc_class DPR2L_encoding( regDPR src ) %{
 2962     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2963     emit_opcode(cbuf,0x2D);
 2964     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2965     // Allocate a word
 2966     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2967     emit_opcode(cbuf,0xEC);
 2968     emit_d8(cbuf,0x08);
 2969     // Encoding assumes a double has been pushed into FPR0.
 2970     // Store down the double as a long, popping the FPU stack
 2971     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2972     emit_opcode(cbuf,0x3C);
 2973     emit_d8(cbuf,0x24);
 2974     // Restore the rounding mode; mask the exception
 2975     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2976     emit_opcode(cbuf,0x2D);
 2977     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2978         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2979         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2980 
 2981     // Load the converted int; adjust CPU stack
 2982     emit_opcode(cbuf,0x58);       // POP EAX
 2983     emit_opcode(cbuf,0x5A);       // POP EDX
 2984     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2985     emit_d8    (cbuf,0xFA);       // rdx
 2986     emit_d32   (cbuf,0x80000000); //         0x80000000
 2987     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2988     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2989     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2990     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2991     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2992     emit_d8    (cbuf,0x07);       // Size of slow_call
 2993     // Push src onto stack slow-path
 2994     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2995     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2996     // CALL directly to the runtime
 2997     MacroAssembler _masm(&cbuf);
 2998     cbuf.set_insts_mark();
 2999     emit_opcode(cbuf,0xE8);       // Call into runtime
 3000     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 3001     __ post_call_nop();
 3002     // Carry on here...
 3003   %}
 3004 
 3005   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 3006     // Operand was loaded from memory into fp ST (stack top)
 3007     // FMUL   ST,$src  /* D8 C8+i */
 3008     emit_opcode(cbuf, 0xD8);
 3009     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3010   %}
 3011 
 3012   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3013     // FADDP  ST,src2  /* D8 C0+i */
 3014     emit_opcode(cbuf, 0xD8);
 3015     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3016     //could use FADDP  src2,fpST  /* DE C0+i */
 3017   %}
 3018 
 3019   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3020     // FADDP  src2,ST  /* DE C0+i */
 3021     emit_opcode(cbuf, 0xDE);
 3022     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3023   %}
 3024 
 3025   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3026     // Operand has been loaded into fp ST (stack top)
 3027       // FSUB   ST,$src1
 3028       emit_opcode(cbuf, 0xD8);
 3029       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3030 
 3031       // FDIV
 3032       emit_opcode(cbuf, 0xD8);
 3033       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3034   %}
 3035 
 3036   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3037     // Operand was loaded from memory into fp ST (stack top)
 3038     // FADD   ST,$src  /* D8 C0+i */
 3039     emit_opcode(cbuf, 0xD8);
 3040     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3041 
 3042     // FMUL  ST,src2  /* D8 C*+i */
 3043     emit_opcode(cbuf, 0xD8);
 3044     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3045   %}
 3046 
 3047 
 3048   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3049     // Operand was loaded from memory into fp ST (stack top)
 3050     // FADD   ST,$src  /* D8 C0+i */
 3051     emit_opcode(cbuf, 0xD8);
 3052     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3053 
 3054     // FMULP  src2,ST  /* DE C8+i */
 3055     emit_opcode(cbuf, 0xDE);
 3056     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3057   %}
 3058 
 3059   // Atomically load the volatile long
 3060   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3061     emit_opcode(cbuf,0xDF);
 3062     int rm_byte_opcode = 0x05;
 3063     int base     = $mem$$base;
 3064     int index    = $mem$$index;
 3065     int scale    = $mem$$scale;
 3066     int displace = $mem$$disp;
 3067     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3068     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3069     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3070   %}
 3071 
 3072   // Volatile Store Long.  Must be atomic, so move it into
 3073   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3074   // target address before the store (for null-ptr checks)
 3075   // so the memory operand is used twice in the encoding.
 3076   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3077     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3078     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3079     emit_opcode(cbuf,0xDF);
 3080     int rm_byte_opcode = 0x07;
 3081     int base     = $mem$$base;
 3082     int index    = $mem$$index;
 3083     int scale    = $mem$$scale;
 3084     int displace = $mem$$disp;
 3085     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3086     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3087   %}
 3088 
 3089 %}
 3090 
 3091 
 3092 //----------FRAME--------------------------------------------------------------
 3093 // Definition of frame structure and management information.
 3094 //
 3095 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3096 //                             |   (to get allocators register number
 3097 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3098 //  r   CALLER     |        |
 3099 //  o     |        +--------+      pad to even-align allocators stack-slot
 3100 //  w     V        |  pad0  |        numbers; owned by CALLER
 3101 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3102 //  h     ^        |   in   |  5
 3103 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3104 //  |     |        |        |  3
 3105 //  |     |        +--------+
 3106 //  V     |        | old out|      Empty on Intel, window on Sparc
 3107 //        |    old |preserve|      Must be even aligned.
 3108 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3109 //        |        |   in   |  3   area for Intel ret address
 3110 //     Owned by    |preserve|      Empty on Sparc.
 3111 //       SELF      +--------+
 3112 //        |        |  pad2  |  2   pad to align old SP
 3113 //        |        +--------+  1
 3114 //        |        | locks  |  0
 3115 //        |        +--------+----> OptoReg::stack0(), even aligned
 3116 //        |        |  pad1  | 11   pad to align new SP
 3117 //        |        +--------+
 3118 //        |        |        | 10
 3119 //        |        | spills |  9   spills
 3120 //        V        |        |  8   (pad0 slot for callee)
 3121 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3122 //        ^        |  out   |  7
 3123 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3124 //     Owned by    +--------+
 3125 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3126 //        |    new |preserve|      Must be even-aligned.
 3127 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3128 //        |        |        |
 3129 //
 3130 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3131 //         known from SELF's arguments and the Java calling convention.
 3132 //         Region 6-7 is determined per call site.
 3133 // Note 2: If the calling convention leaves holes in the incoming argument
 3134 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3135 //         are owned by the CALLEE.  Holes should not be necessary in the
 3136 //         incoming area, as the Java calling convention is completely under
 3137 //         the control of the AD file.  Doubles can be sorted and packed to
 3138 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3139 //         varargs C calling conventions.
 3140 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3141 //         even aligned with pad0 as needed.
 3142 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3143 //         region 6-11 is even aligned; it may be padded out more so that
 3144 //         the region from SP to FP meets the minimum stack alignment.
 3145 
 3146 frame %{
 3147   // These three registers define part of the calling convention
 3148   // between compiled code and the interpreter.
 3149   inline_cache_reg(EAX);                // Inline Cache Register
 3150 
 3151   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3152   cisc_spilling_operand_name(indOffset32);
 3153 
 3154   // Number of stack slots consumed by locking an object
 3155   sync_stack_slots(1);
 3156 
 3157   // Compiled code's Frame Pointer
 3158   frame_pointer(ESP);
 3159   // Interpreter stores its frame pointer in a register which is
 3160   // stored to the stack by I2CAdaptors.
 3161   // I2CAdaptors convert from interpreted java to compiled java.
 3162   interpreter_frame_pointer(EBP);
 3163 
 3164   // Stack alignment requirement
 3165   // Alignment size in bytes (128-bit -> 16 bytes)
 3166   stack_alignment(StackAlignmentInBytes);
 3167 
 3168   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3169   // for calls to C.  Supports the var-args backing area for register parms.
 3170   varargs_C_out_slots_killed(0);
 3171 
 3172   // The after-PROLOG location of the return address.  Location of
 3173   // return address specifies a type (REG or STACK) and a number
 3174   // representing the register number (i.e. - use a register name) or
 3175   // stack slot.
 3176   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3177   // Otherwise, it is above the locks and verification slot and alignment word
 3178   return_addr(STACK - 1 +
 3179               align_up((Compile::current()->in_preserve_stack_slots() +
 3180                         Compile::current()->fixed_slots()),
 3181                        stack_alignment_in_slots()));
 3182 
 3183   // Location of C & interpreter return values
 3184   c_return_value %{
 3185     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3186     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3187     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3188 
 3189     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3190     // that C functions return float and double results in XMM0.
 3191     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3192       return OptoRegPair(XMM0b_num,XMM0_num);
 3193     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3194       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3195 
 3196     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3197   %}
 3198 
 3199   // Location of return values
 3200   return_value %{
 3201     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3202     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3203     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3204     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3205       return OptoRegPair(XMM0b_num,XMM0_num);
 3206     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3207       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3208     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3209   %}
 3210 
 3211 %}
 3212 
 3213 //----------ATTRIBUTES---------------------------------------------------------
 3214 //----------Operand Attributes-------------------------------------------------
 3215 op_attrib op_cost(0);        // Required cost attribute
 3216 
 3217 //----------Instruction Attributes---------------------------------------------
 3218 ins_attrib ins_cost(100);       // Required cost attribute
 3219 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3220 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3221                                 // non-matching short branch variant of some
 3222                                                             // long branch?
 3223 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3224                                 // specifies the alignment that some part of the instruction (not
 3225                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3226                                 // function must be provided for the instruction
 3227 
 3228 //----------OPERANDS-----------------------------------------------------------
 3229 // Operand definitions must precede instruction definitions for correct parsing
 3230 // in the ADLC because operands constitute user defined types which are used in
 3231 // instruction definitions.
 3232 
 3233 //----------Simple Operands----------------------------------------------------
 3234 // Immediate Operands
 3235 // Integer Immediate
 3236 operand immI() %{
 3237   match(ConI);
 3238 
 3239   op_cost(10);
 3240   format %{ %}
 3241   interface(CONST_INTER);
 3242 %}
 3243 
 3244 // Constant for test vs zero
 3245 operand immI_0() %{
 3246   predicate(n->get_int() == 0);
 3247   match(ConI);
 3248 
 3249   op_cost(0);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 // Constant for increment
 3255 operand immI_1() %{
 3256   predicate(n->get_int() == 1);
 3257   match(ConI);
 3258 
 3259   op_cost(0);
 3260   format %{ %}
 3261   interface(CONST_INTER);
 3262 %}
 3263 
 3264 // Constant for decrement
 3265 operand immI_M1() %{
 3266   predicate(n->get_int() == -1);
 3267   match(ConI);
 3268 
 3269   op_cost(0);
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 // Valid scale values for addressing modes
 3275 operand immI2() %{
 3276   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3277   match(ConI);
 3278 
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 operand immI8() %{
 3284   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3285   match(ConI);
 3286 
 3287   op_cost(5);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 operand immU8() %{
 3293   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3294   match(ConI);
 3295 
 3296   op_cost(5);
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 operand immI16() %{
 3302   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3303   match(ConI);
 3304 
 3305   op_cost(10);
 3306   format %{ %}
 3307   interface(CONST_INTER);
 3308 %}
 3309 
 3310 // Int Immediate non-negative
 3311 operand immU31()
 3312 %{
 3313   predicate(n->get_int() >= 0);
 3314   match(ConI);
 3315 
 3316   op_cost(0);
 3317   format %{ %}
 3318   interface(CONST_INTER);
 3319 %}
 3320 
 3321 // Constant for long shifts
 3322 operand immI_32() %{
 3323   predicate( n->get_int() == 32 );
 3324   match(ConI);
 3325 
 3326   op_cost(0);
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_1_31() %{
 3332   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3333   match(ConI);
 3334 
 3335   op_cost(0);
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 operand immI_32_63() %{
 3341   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3342   match(ConI);
 3343   op_cost(0);
 3344 
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand immI_2() %{
 3350   predicate( n->get_int() == 2 );
 3351   match(ConI);
 3352 
 3353   op_cost(0);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 operand immI_3() %{
 3359   predicate( n->get_int() == 3 );
 3360   match(ConI);
 3361 
 3362   op_cost(0);
 3363   format %{ %}
 3364   interface(CONST_INTER);
 3365 %}
 3366 
 3367 operand immI_4()
 3368 %{
 3369   predicate(n->get_int() == 4);
 3370   match(ConI);
 3371 
 3372   op_cost(0);
 3373   format %{ %}
 3374   interface(CONST_INTER);
 3375 %}
 3376 
 3377 operand immI_8()
 3378 %{
 3379   predicate(n->get_int() == 8);
 3380   match(ConI);
 3381 
 3382   op_cost(0);
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Pointer Immediate
 3388 operand immP() %{
 3389   match(ConP);
 3390 
 3391   op_cost(10);
 3392   format %{ %}
 3393   interface(CONST_INTER);
 3394 %}
 3395 
 3396 // nullptr Pointer Immediate
 3397 operand immP0() %{
 3398   predicate( n->get_ptr() == 0 );
 3399   match(ConP);
 3400   op_cost(0);
 3401 
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long Immediate
 3407 operand immL() %{
 3408   match(ConL);
 3409 
 3410   op_cost(20);
 3411   format %{ %}
 3412   interface(CONST_INTER);
 3413 %}
 3414 
 3415 // Long Immediate zero
 3416 operand immL0() %{
 3417   predicate( n->get_long() == 0L );
 3418   match(ConL);
 3419   op_cost(0);
 3420 
 3421   format %{ %}
 3422   interface(CONST_INTER);
 3423 %}
 3424 
 3425 // Long Immediate zero
 3426 operand immL_M1() %{
 3427   predicate( n->get_long() == -1L );
 3428   match(ConL);
 3429   op_cost(0);
 3430 
 3431   format %{ %}
 3432   interface(CONST_INTER);
 3433 %}
 3434 
 3435 // Long immediate from 0 to 127.
 3436 // Used for a shorter form of long mul by 10.
 3437 operand immL_127() %{
 3438   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3439   match(ConL);
 3440   op_cost(0);
 3441 
 3442   format %{ %}
 3443   interface(CONST_INTER);
 3444 %}
 3445 
 3446 // Long Immediate: low 32-bit mask
 3447 operand immL_32bits() %{
 3448   predicate(n->get_long() == 0xFFFFFFFFL);
 3449   match(ConL);
 3450   op_cost(0);
 3451 
 3452   format %{ %}
 3453   interface(CONST_INTER);
 3454 %}
 3455 
 3456 // Long Immediate: low 32-bit mask
 3457 operand immL32() %{
 3458   predicate(n->get_long() == (int)(n->get_long()));
 3459   match(ConL);
 3460   op_cost(20);
 3461 
 3462   format %{ %}
 3463   interface(CONST_INTER);
 3464 %}
 3465 
 3466 //Double Immediate zero
 3467 operand immDPR0() %{
 3468   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3469   // bug that generates code such that NaNs compare equal to 0.0
 3470   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3471   match(ConD);
 3472 
 3473   op_cost(5);
 3474   format %{ %}
 3475   interface(CONST_INTER);
 3476 %}
 3477 
 3478 // Double Immediate one
 3479 operand immDPR1() %{
 3480   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3481   match(ConD);
 3482 
 3483   op_cost(5);
 3484   format %{ %}
 3485   interface(CONST_INTER);
 3486 %}
 3487 
 3488 // Double Immediate
 3489 operand immDPR() %{
 3490   predicate(UseSSE<=1);
 3491   match(ConD);
 3492 
 3493   op_cost(5);
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 operand immD() %{
 3499   predicate(UseSSE>=2);
 3500   match(ConD);
 3501 
 3502   op_cost(5);
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Double Immediate zero
 3508 operand immD0() %{
 3509   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3510   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3511   // compare equal to -0.0.
 3512   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3513   match(ConD);
 3514 
 3515   format %{ %}
 3516   interface(CONST_INTER);
 3517 %}
 3518 
 3519 // Float Immediate zero
 3520 operand immFPR0() %{
 3521   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3522   match(ConF);
 3523 
 3524   op_cost(5);
 3525   format %{ %}
 3526   interface(CONST_INTER);
 3527 %}
 3528 
 3529 // Float Immediate one
 3530 operand immFPR1() %{
 3531   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3532   match(ConF);
 3533 
 3534   op_cost(5);
 3535   format %{ %}
 3536   interface(CONST_INTER);
 3537 %}
 3538 
 3539 // Float Immediate
 3540 operand immFPR() %{
 3541   predicate( UseSSE == 0 );
 3542   match(ConF);
 3543 
 3544   op_cost(5);
 3545   format %{ %}
 3546   interface(CONST_INTER);
 3547 %}
 3548 
 3549 // Float Immediate
 3550 operand immF() %{
 3551   predicate(UseSSE >= 1);
 3552   match(ConF);
 3553 
 3554   op_cost(5);
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 // Float Immediate zero.  Zero and not -0.0
 3560 operand immF0() %{
 3561   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3562   match(ConF);
 3563 
 3564   op_cost(5);
 3565   format %{ %}
 3566   interface(CONST_INTER);
 3567 %}
 3568 
 3569 // Immediates for special shifts (sign extend)
 3570 
 3571 // Constants for increment
 3572 operand immI_16() %{
 3573   predicate( n->get_int() == 16 );
 3574   match(ConI);
 3575 
 3576   format %{ %}
 3577   interface(CONST_INTER);
 3578 %}
 3579 
 3580 operand immI_24() %{
 3581   predicate( n->get_int() == 24 );
 3582   match(ConI);
 3583 
 3584   format %{ %}
 3585   interface(CONST_INTER);
 3586 %}
 3587 
 3588 // Constant for byte-wide masking
 3589 operand immI_255() %{
 3590   predicate( n->get_int() == 255 );
 3591   match(ConI);
 3592 
 3593   format %{ %}
 3594   interface(CONST_INTER);
 3595 %}
 3596 
 3597 // Constant for short-wide masking
 3598 operand immI_65535() %{
 3599   predicate(n->get_int() == 65535);
 3600   match(ConI);
 3601 
 3602   format %{ %}
 3603   interface(CONST_INTER);
 3604 %}
 3605 
 3606 operand kReg()
 3607 %{
 3608   constraint(ALLOC_IN_RC(vectmask_reg));
 3609   match(RegVectMask);
 3610   format %{%}
 3611   interface(REG_INTER);
 3612 %}
 3613 
 3614 operand kReg_K1()
 3615 %{
 3616   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3617   match(RegVectMask);
 3618   format %{%}
 3619   interface(REG_INTER);
 3620 %}
 3621 
 3622 operand kReg_K2()
 3623 %{
 3624   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3625   match(RegVectMask);
 3626   format %{%}
 3627   interface(REG_INTER);
 3628 %}
 3629 
 3630 // Special Registers
 3631 operand kReg_K3()
 3632 %{
 3633   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3634   match(RegVectMask);
 3635   format %{%}
 3636   interface(REG_INTER);
 3637 %}
 3638 
 3639 operand kReg_K4()
 3640 %{
 3641   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3642   match(RegVectMask);
 3643   format %{%}
 3644   interface(REG_INTER);
 3645 %}
 3646 
 3647 operand kReg_K5()
 3648 %{
 3649   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3650   match(RegVectMask);
 3651   format %{%}
 3652   interface(REG_INTER);
 3653 %}
 3654 
 3655 operand kReg_K6()
 3656 %{
 3657   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3658   match(RegVectMask);
 3659   format %{%}
 3660   interface(REG_INTER);
 3661 %}
 3662 
 3663 // Special Registers
 3664 operand kReg_K7()
 3665 %{
 3666   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3667   match(RegVectMask);
 3668   format %{%}
 3669   interface(REG_INTER);
 3670 %}
 3671 
 3672 // Register Operands
 3673 // Integer Register
 3674 operand rRegI() %{
 3675   constraint(ALLOC_IN_RC(int_reg));
 3676   match(RegI);
 3677   match(xRegI);
 3678   match(eAXRegI);
 3679   match(eBXRegI);
 3680   match(eCXRegI);
 3681   match(eDXRegI);
 3682   match(eDIRegI);
 3683   match(eSIRegI);
 3684 
 3685   format %{ %}
 3686   interface(REG_INTER);
 3687 %}
 3688 
 3689 // Subset of Integer Register
 3690 operand xRegI(rRegI reg) %{
 3691   constraint(ALLOC_IN_RC(int_x_reg));
 3692   match(reg);
 3693   match(eAXRegI);
 3694   match(eBXRegI);
 3695   match(eCXRegI);
 3696   match(eDXRegI);
 3697 
 3698   format %{ %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 // Special Registers
 3703 operand eAXRegI(xRegI reg) %{
 3704   constraint(ALLOC_IN_RC(eax_reg));
 3705   match(reg);
 3706   match(rRegI);
 3707 
 3708   format %{ "EAX" %}
 3709   interface(REG_INTER);
 3710 %}
 3711 
 3712 // Special Registers
 3713 operand eBXRegI(xRegI reg) %{
 3714   constraint(ALLOC_IN_RC(ebx_reg));
 3715   match(reg);
 3716   match(rRegI);
 3717 
 3718   format %{ "EBX" %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand eCXRegI(xRegI reg) %{
 3723   constraint(ALLOC_IN_RC(ecx_reg));
 3724   match(reg);
 3725   match(rRegI);
 3726 
 3727   format %{ "ECX" %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand eDXRegI(xRegI reg) %{
 3732   constraint(ALLOC_IN_RC(edx_reg));
 3733   match(reg);
 3734   match(rRegI);
 3735 
 3736   format %{ "EDX" %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 operand eDIRegI(xRegI reg) %{
 3741   constraint(ALLOC_IN_RC(edi_reg));
 3742   match(reg);
 3743   match(rRegI);
 3744 
 3745   format %{ "EDI" %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 operand naxRegI() %{
 3750   constraint(ALLOC_IN_RC(nax_reg));
 3751   match(RegI);
 3752   match(eCXRegI);
 3753   match(eDXRegI);
 3754   match(eSIRegI);
 3755   match(eDIRegI);
 3756 
 3757   format %{ %}
 3758   interface(REG_INTER);
 3759 %}
 3760 
 3761 operand nadxRegI() %{
 3762   constraint(ALLOC_IN_RC(nadx_reg));
 3763   match(RegI);
 3764   match(eBXRegI);
 3765   match(eCXRegI);
 3766   match(eSIRegI);
 3767   match(eDIRegI);
 3768 
 3769   format %{ %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 operand ncxRegI() %{
 3774   constraint(ALLOC_IN_RC(ncx_reg));
 3775   match(RegI);
 3776   match(eAXRegI);
 3777   match(eDXRegI);
 3778   match(eSIRegI);
 3779   match(eDIRegI);
 3780 
 3781   format %{ %}
 3782   interface(REG_INTER);
 3783 %}
 3784 
 3785 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3786 // //
 3787 operand eSIRegI(xRegI reg) %{
 3788    constraint(ALLOC_IN_RC(esi_reg));
 3789    match(reg);
 3790    match(rRegI);
 3791 
 3792    format %{ "ESI" %}
 3793    interface(REG_INTER);
 3794 %}
 3795 
 3796 // Pointer Register
 3797 operand anyRegP() %{
 3798   constraint(ALLOC_IN_RC(any_reg));
 3799   match(RegP);
 3800   match(eAXRegP);
 3801   match(eBXRegP);
 3802   match(eCXRegP);
 3803   match(eDIRegP);
 3804   match(eRegP);
 3805 
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand eRegP() %{
 3811   constraint(ALLOC_IN_RC(int_reg));
 3812   match(RegP);
 3813   match(eAXRegP);
 3814   match(eBXRegP);
 3815   match(eCXRegP);
 3816   match(eDIRegP);
 3817 
 3818   format %{ %}
 3819   interface(REG_INTER);
 3820 %}
 3821 
 3822 operand rRegP() %{
 3823   constraint(ALLOC_IN_RC(int_reg));
 3824   match(RegP);
 3825   match(eAXRegP);
 3826   match(eBXRegP);
 3827   match(eCXRegP);
 3828   match(eDIRegP);
 3829 
 3830   format %{ %}
 3831   interface(REG_INTER);
 3832 %}
 3833 
 3834 // On windows95, EBP is not safe to use for implicit null tests.
 3835 operand eRegP_no_EBP() %{
 3836   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3837   match(RegP);
 3838   match(eAXRegP);
 3839   match(eBXRegP);
 3840   match(eCXRegP);
 3841   match(eDIRegP);
 3842 
 3843   op_cost(100);
 3844   format %{ %}
 3845   interface(REG_INTER);
 3846 %}
 3847 
 3848 operand naxRegP() %{
 3849   constraint(ALLOC_IN_RC(nax_reg));
 3850   match(RegP);
 3851   match(eBXRegP);
 3852   match(eDXRegP);
 3853   match(eCXRegP);
 3854   match(eSIRegP);
 3855   match(eDIRegP);
 3856 
 3857   format %{ %}
 3858   interface(REG_INTER);
 3859 %}
 3860 
 3861 operand nabxRegP() %{
 3862   constraint(ALLOC_IN_RC(nabx_reg));
 3863   match(RegP);
 3864   match(eCXRegP);
 3865   match(eDXRegP);
 3866   match(eSIRegP);
 3867   match(eDIRegP);
 3868 
 3869   format %{ %}
 3870   interface(REG_INTER);
 3871 %}
 3872 
 3873 operand pRegP() %{
 3874   constraint(ALLOC_IN_RC(p_reg));
 3875   match(RegP);
 3876   match(eBXRegP);
 3877   match(eDXRegP);
 3878   match(eSIRegP);
 3879   match(eDIRegP);
 3880 
 3881   format %{ %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Special Registers
 3886 // Return a pointer value
 3887 operand eAXRegP(eRegP reg) %{
 3888   constraint(ALLOC_IN_RC(eax_reg));
 3889   match(reg);
 3890   format %{ "EAX" %}
 3891   interface(REG_INTER);
 3892 %}
 3893 
 3894 // Used in AtomicAdd
 3895 operand eBXRegP(eRegP reg) %{
 3896   constraint(ALLOC_IN_RC(ebx_reg));
 3897   match(reg);
 3898   format %{ "EBX" %}
 3899   interface(REG_INTER);
 3900 %}
 3901 
 3902 // Tail-call (interprocedural jump) to interpreter
 3903 operand eCXRegP(eRegP reg) %{
 3904   constraint(ALLOC_IN_RC(ecx_reg));
 3905   match(reg);
 3906   format %{ "ECX" %}
 3907   interface(REG_INTER);
 3908 %}
 3909 
 3910 operand eDXRegP(eRegP reg) %{
 3911   constraint(ALLOC_IN_RC(edx_reg));
 3912   match(reg);
 3913   format %{ "EDX" %}
 3914   interface(REG_INTER);
 3915 %}
 3916 
 3917 operand eSIRegP(eRegP reg) %{
 3918   constraint(ALLOC_IN_RC(esi_reg));
 3919   match(reg);
 3920   format %{ "ESI" %}
 3921   interface(REG_INTER);
 3922 %}
 3923 
 3924 // Used in rep stosw
 3925 operand eDIRegP(eRegP reg) %{
 3926   constraint(ALLOC_IN_RC(edi_reg));
 3927   match(reg);
 3928   format %{ "EDI" %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand eRegL() %{
 3933   constraint(ALLOC_IN_RC(long_reg));
 3934   match(RegL);
 3935   match(eADXRegL);
 3936 
 3937   format %{ %}
 3938   interface(REG_INTER);
 3939 %}
 3940 
 3941 operand eADXRegL( eRegL reg ) %{
 3942   constraint(ALLOC_IN_RC(eadx_reg));
 3943   match(reg);
 3944 
 3945   format %{ "EDX:EAX" %}
 3946   interface(REG_INTER);
 3947 %}
 3948 
 3949 operand eBCXRegL( eRegL reg ) %{
 3950   constraint(ALLOC_IN_RC(ebcx_reg));
 3951   match(reg);
 3952 
 3953   format %{ "EBX:ECX" %}
 3954   interface(REG_INTER);
 3955 %}
 3956 
 3957 operand eBDPRegL( eRegL reg ) %{
 3958   constraint(ALLOC_IN_RC(ebpd_reg));
 3959   match(reg);
 3960 
 3961   format %{ "EBP:EDI" %}
 3962   interface(REG_INTER);
 3963 %}
 3964 // Special case for integer high multiply
 3965 operand eADXRegL_low_only() %{
 3966   constraint(ALLOC_IN_RC(eadx_reg));
 3967   match(RegL);
 3968 
 3969   format %{ "EAX" %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Flags register, used as output of compare instructions
 3974 operand rFlagsReg() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977 
 3978   format %{ "EFLAGS" %}
 3979   interface(REG_INTER);
 3980 %}
 3981 
 3982 // Flags register, used as output of compare instructions
 3983 operand eFlagsReg() %{
 3984   constraint(ALLOC_IN_RC(int_flags));
 3985   match(RegFlags);
 3986 
 3987   format %{ "EFLAGS" %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 // Flags register, used as output of FLOATING POINT compare instructions
 3992 operand eFlagsRegU() %{
 3993   constraint(ALLOC_IN_RC(int_flags));
 3994   match(RegFlags);
 3995 
 3996   format %{ "EFLAGS_U" %}
 3997   interface(REG_INTER);
 3998 %}
 3999 
 4000 operand eFlagsRegUCF() %{
 4001   constraint(ALLOC_IN_RC(int_flags));
 4002   match(RegFlags);
 4003   predicate(false);
 4004 
 4005   format %{ "EFLAGS_U_CF" %}
 4006   interface(REG_INTER);
 4007 %}
 4008 
 4009 // Condition Code Register used by long compare
 4010 operand flagsReg_long_LTGE() %{
 4011   constraint(ALLOC_IN_RC(int_flags));
 4012   match(RegFlags);
 4013   format %{ "FLAGS_LTGE" %}
 4014   interface(REG_INTER);
 4015 %}
 4016 operand flagsReg_long_EQNE() %{
 4017   constraint(ALLOC_IN_RC(int_flags));
 4018   match(RegFlags);
 4019   format %{ "FLAGS_EQNE" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 operand flagsReg_long_LEGT() %{
 4023   constraint(ALLOC_IN_RC(int_flags));
 4024   match(RegFlags);
 4025   format %{ "FLAGS_LEGT" %}
 4026   interface(REG_INTER);
 4027 %}
 4028 
 4029 // Condition Code Register used by unsigned long compare
 4030 operand flagsReg_ulong_LTGE() %{
 4031   constraint(ALLOC_IN_RC(int_flags));
 4032   match(RegFlags);
 4033   format %{ "FLAGS_U_LTGE" %}
 4034   interface(REG_INTER);
 4035 %}
 4036 operand flagsReg_ulong_EQNE() %{
 4037   constraint(ALLOC_IN_RC(int_flags));
 4038   match(RegFlags);
 4039   format %{ "FLAGS_U_EQNE" %}
 4040   interface(REG_INTER);
 4041 %}
 4042 operand flagsReg_ulong_LEGT() %{
 4043   constraint(ALLOC_IN_RC(int_flags));
 4044   match(RegFlags);
 4045   format %{ "FLAGS_U_LEGT" %}
 4046   interface(REG_INTER);
 4047 %}
 4048 
 4049 // Float register operands
 4050 operand regDPR() %{
 4051   predicate( UseSSE < 2 );
 4052   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4053   match(RegD);
 4054   match(regDPR1);
 4055   match(regDPR2);
 4056   format %{ %}
 4057   interface(REG_INTER);
 4058 %}
 4059 
 4060 operand regDPR1(regDPR reg) %{
 4061   predicate( UseSSE < 2 );
 4062   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4063   match(reg);
 4064   format %{ "FPR1" %}
 4065   interface(REG_INTER);
 4066 %}
 4067 
 4068 operand regDPR2(regDPR reg) %{
 4069   predicate( UseSSE < 2 );
 4070   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4071   match(reg);
 4072   format %{ "FPR2" %}
 4073   interface(REG_INTER);
 4074 %}
 4075 
 4076 operand regnotDPR1(regDPR reg) %{
 4077   predicate( UseSSE < 2 );
 4078   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4079   match(reg);
 4080   format %{ %}
 4081   interface(REG_INTER);
 4082 %}
 4083 
 4084 // Float register operands
 4085 operand regFPR() %{
 4086   predicate( UseSSE < 2 );
 4087   constraint(ALLOC_IN_RC(fp_flt_reg));
 4088   match(RegF);
 4089   match(regFPR1);
 4090   format %{ %}
 4091   interface(REG_INTER);
 4092 %}
 4093 
 4094 // Float register operands
 4095 operand regFPR1(regFPR reg) %{
 4096   predicate( UseSSE < 2 );
 4097   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4098   match(reg);
 4099   format %{ "FPR1" %}
 4100   interface(REG_INTER);
 4101 %}
 4102 
 4103 // XMM Float register operands
 4104 operand regF() %{
 4105   predicate( UseSSE>=1 );
 4106   constraint(ALLOC_IN_RC(float_reg_legacy));
 4107   match(RegF);
 4108   format %{ %}
 4109   interface(REG_INTER);
 4110 %}
 4111 
 4112 operand legRegF() %{
 4113   predicate( UseSSE>=1 );
 4114   constraint(ALLOC_IN_RC(float_reg_legacy));
 4115   match(RegF);
 4116   format %{ %}
 4117   interface(REG_INTER);
 4118 %}
 4119 
 4120 // Float register operands
 4121 operand vlRegF() %{
 4122    constraint(ALLOC_IN_RC(float_reg_vl));
 4123    match(RegF);
 4124 
 4125    format %{ %}
 4126    interface(REG_INTER);
 4127 %}
 4128 
 4129 // XMM Double register operands
 4130 operand regD() %{
 4131   predicate( UseSSE>=2 );
 4132   constraint(ALLOC_IN_RC(double_reg_legacy));
 4133   match(RegD);
 4134   format %{ %}
 4135   interface(REG_INTER);
 4136 %}
 4137 
 4138 // Double register operands
 4139 operand legRegD() %{
 4140   predicate( UseSSE>=2 );
 4141   constraint(ALLOC_IN_RC(double_reg_legacy));
 4142   match(RegD);
 4143   format %{ %}
 4144   interface(REG_INTER);
 4145 %}
 4146 
 4147 operand vlRegD() %{
 4148    constraint(ALLOC_IN_RC(double_reg_vl));
 4149    match(RegD);
 4150 
 4151    format %{ %}
 4152    interface(REG_INTER);
 4153 %}
 4154 
 4155 //----------Memory Operands----------------------------------------------------
 4156 // Direct Memory Operand
 4157 operand direct(immP addr) %{
 4158   match(addr);
 4159 
 4160   format %{ "[$addr]" %}
 4161   interface(MEMORY_INTER) %{
 4162     base(0xFFFFFFFF);
 4163     index(0x4);
 4164     scale(0x0);
 4165     disp($addr);
 4166   %}
 4167 %}
 4168 
 4169 // Indirect Memory Operand
 4170 operand indirect(eRegP reg) %{
 4171   constraint(ALLOC_IN_RC(int_reg));
 4172   match(reg);
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base($reg);
 4177     index(0x4);
 4178     scale(0x0);
 4179     disp(0x0);
 4180   %}
 4181 %}
 4182 
 4183 // Indirect Memory Plus Short Offset Operand
 4184 operand indOffset8(eRegP reg, immI8 off) %{
 4185   match(AddP reg off);
 4186 
 4187   format %{ "[$reg + $off]" %}
 4188   interface(MEMORY_INTER) %{
 4189     base($reg);
 4190     index(0x4);
 4191     scale(0x0);
 4192     disp($off);
 4193   %}
 4194 %}
 4195 
 4196 // Indirect Memory Plus Long Offset Operand
 4197 operand indOffset32(eRegP reg, immI off) %{
 4198   match(AddP reg off);
 4199 
 4200   format %{ "[$reg + $off]" %}
 4201   interface(MEMORY_INTER) %{
 4202     base($reg);
 4203     index(0x4);
 4204     scale(0x0);
 4205     disp($off);
 4206   %}
 4207 %}
 4208 
 4209 // Indirect Memory Plus Long Offset Operand
 4210 operand indOffset32X(rRegI reg, immP off) %{
 4211   match(AddP off reg);
 4212 
 4213   format %{ "[$reg + $off]" %}
 4214   interface(MEMORY_INTER) %{
 4215     base($reg);
 4216     index(0x4);
 4217     scale(0x0);
 4218     disp($off);
 4219   %}
 4220 %}
 4221 
 4222 // Indirect Memory Plus Index Register Plus Offset Operand
 4223 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4224   match(AddP (AddP reg ireg) off);
 4225 
 4226   op_cost(10);
 4227   format %{"[$reg + $off + $ireg]" %}
 4228   interface(MEMORY_INTER) %{
 4229     base($reg);
 4230     index($ireg);
 4231     scale(0x0);
 4232     disp($off);
 4233   %}
 4234 %}
 4235 
 4236 // Indirect Memory Plus Index Register Plus Offset Operand
 4237 operand indIndex(eRegP reg, rRegI ireg) %{
 4238   match(AddP reg ireg);
 4239 
 4240   op_cost(10);
 4241   format %{"[$reg + $ireg]" %}
 4242   interface(MEMORY_INTER) %{
 4243     base($reg);
 4244     index($ireg);
 4245     scale(0x0);
 4246     disp(0x0);
 4247   %}
 4248 %}
 4249 
 4250 // // -------------------------------------------------------------------------
 4251 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4252 // // -------------------------------------------------------------------------
 4253 // // Scaled Memory Operands
 4254 // // Indirect Memory Times Scale Plus Offset Operand
 4255 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4256 //   match(AddP off (LShiftI ireg scale));
 4257 //
 4258 //   op_cost(10);
 4259 //   format %{"[$off + $ireg << $scale]" %}
 4260 //   interface(MEMORY_INTER) %{
 4261 //     base(0x4);
 4262 //     index($ireg);
 4263 //     scale($scale);
 4264 //     disp($off);
 4265 //   %}
 4266 // %}
 4267 
 4268 // Indirect Memory Times Scale Plus Index Register
 4269 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4270   match(AddP reg (LShiftI ireg scale));
 4271 
 4272   op_cost(10);
 4273   format %{"[$reg + $ireg << $scale]" %}
 4274   interface(MEMORY_INTER) %{
 4275     base($reg);
 4276     index($ireg);
 4277     scale($scale);
 4278     disp(0x0);
 4279   %}
 4280 %}
 4281 
 4282 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4283 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4284   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4285 
 4286   op_cost(10);
 4287   format %{"[$reg + $off + $ireg << $scale]" %}
 4288   interface(MEMORY_INTER) %{
 4289     base($reg);
 4290     index($ireg);
 4291     scale($scale);
 4292     disp($off);
 4293   %}
 4294 %}
 4295 
 4296 //----------Load Long Memory Operands------------------------------------------
 4297 // The load-long idiom will use it's address expression again after loading
 4298 // the first word of the long.  If the load-long destination overlaps with
 4299 // registers used in the addressing expression, the 2nd half will be loaded
 4300 // from a clobbered address.  Fix this by requiring that load-long use
 4301 // address registers that do not overlap with the load-long target.
 4302 
 4303 // load-long support
 4304 operand load_long_RegP() %{
 4305   constraint(ALLOC_IN_RC(esi_reg));
 4306   match(RegP);
 4307   match(eSIRegP);
 4308   op_cost(100);
 4309   format %{  %}
 4310   interface(REG_INTER);
 4311 %}
 4312 
 4313 // Indirect Memory Operand Long
 4314 operand load_long_indirect(load_long_RegP reg) %{
 4315   constraint(ALLOC_IN_RC(esi_reg));
 4316   match(reg);
 4317 
 4318   format %{ "[$reg]" %}
 4319   interface(MEMORY_INTER) %{
 4320     base($reg);
 4321     index(0x4);
 4322     scale(0x0);
 4323     disp(0x0);
 4324   %}
 4325 %}
 4326 
 4327 // Indirect Memory Plus Long Offset Operand
 4328 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4329   match(AddP reg off);
 4330 
 4331   format %{ "[$reg + $off]" %}
 4332   interface(MEMORY_INTER) %{
 4333     base($reg);
 4334     index(0x4);
 4335     scale(0x0);
 4336     disp($off);
 4337   %}
 4338 %}
 4339 
 4340 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4341 
 4342 
 4343 //----------Special Memory Operands--------------------------------------------
 4344 // Stack Slot Operand - This operand is used for loading and storing temporary
 4345 //                      values on the stack where a match requires a value to
 4346 //                      flow through memory.
 4347 operand stackSlotP(sRegP reg) %{
 4348   constraint(ALLOC_IN_RC(stack_slots));
 4349   // No match rule because this operand is only generated in matching
 4350   format %{ "[$reg]" %}
 4351   interface(MEMORY_INTER) %{
 4352     base(0x4);   // ESP
 4353     index(0x4);  // No Index
 4354     scale(0x0);  // No Scale
 4355     disp($reg);  // Stack Offset
 4356   %}
 4357 %}
 4358 
 4359 operand stackSlotI(sRegI reg) %{
 4360   constraint(ALLOC_IN_RC(stack_slots));
 4361   // No match rule because this operand is only generated in matching
 4362   format %{ "[$reg]" %}
 4363   interface(MEMORY_INTER) %{
 4364     base(0x4);   // ESP
 4365     index(0x4);  // No Index
 4366     scale(0x0);  // No Scale
 4367     disp($reg);  // Stack Offset
 4368   %}
 4369 %}
 4370 
 4371 operand stackSlotF(sRegF reg) %{
 4372   constraint(ALLOC_IN_RC(stack_slots));
 4373   // No match rule because this operand is only generated in matching
 4374   format %{ "[$reg]" %}
 4375   interface(MEMORY_INTER) %{
 4376     base(0x4);   // ESP
 4377     index(0x4);  // No Index
 4378     scale(0x0);  // No Scale
 4379     disp($reg);  // Stack Offset
 4380   %}
 4381 %}
 4382 
 4383 operand stackSlotD(sRegD reg) %{
 4384   constraint(ALLOC_IN_RC(stack_slots));
 4385   // No match rule because this operand is only generated in matching
 4386   format %{ "[$reg]" %}
 4387   interface(MEMORY_INTER) %{
 4388     base(0x4);   // ESP
 4389     index(0x4);  // No Index
 4390     scale(0x0);  // No Scale
 4391     disp($reg);  // Stack Offset
 4392   %}
 4393 %}
 4394 
 4395 operand stackSlotL(sRegL reg) %{
 4396   constraint(ALLOC_IN_RC(stack_slots));
 4397   // No match rule because this operand is only generated in matching
 4398   format %{ "[$reg]" %}
 4399   interface(MEMORY_INTER) %{
 4400     base(0x4);   // ESP
 4401     index(0x4);  // No Index
 4402     scale(0x0);  // No Scale
 4403     disp($reg);  // Stack Offset
 4404   %}
 4405 %}
 4406 
 4407 //----------Conditional Branch Operands----------------------------------------
 4408 // Comparison Op  - This is the operation of the comparison, and is limited to
 4409 //                  the following set of codes:
 4410 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4411 //
 4412 // Other attributes of the comparison, such as unsignedness, are specified
 4413 // by the comparison instruction that sets a condition code flags register.
 4414 // That result is represented by a flags operand whose subtype is appropriate
 4415 // to the unsignedness (etc.) of the comparison.
 4416 //
 4417 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4418 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4419 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4420 
 4421 // Comparison Code
 4422 operand cmpOp() %{
 4423   match(Bool);
 4424 
 4425   format %{ "" %}
 4426   interface(COND_INTER) %{
 4427     equal(0x4, "e");
 4428     not_equal(0x5, "ne");
 4429     less(0xC, "l");
 4430     greater_equal(0xD, "ge");
 4431     less_equal(0xE, "le");
 4432     greater(0xF, "g");
 4433     overflow(0x0, "o");
 4434     no_overflow(0x1, "no");
 4435   %}
 4436 %}
 4437 
 4438 // Comparison Code, unsigned compare.  Used by FP also, with
 4439 // C2 (unordered) turned into GT or LT already.  The other bits
 4440 // C0 and C3 are turned into Carry & Zero flags.
 4441 operand cmpOpU() %{
 4442   match(Bool);
 4443 
 4444   format %{ "" %}
 4445   interface(COND_INTER) %{
 4446     equal(0x4, "e");
 4447     not_equal(0x5, "ne");
 4448     less(0x2, "b");
 4449     greater_equal(0x3, "nb");
 4450     less_equal(0x6, "be");
 4451     greater(0x7, "nbe");
 4452     overflow(0x0, "o");
 4453     no_overflow(0x1, "no");
 4454   %}
 4455 %}
 4456 
 4457 // Floating comparisons that don't require any fixup for the unordered case
 4458 operand cmpOpUCF() %{
 4459   match(Bool);
 4460   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4461             n->as_Bool()->_test._test == BoolTest::ge ||
 4462             n->as_Bool()->_test._test == BoolTest::le ||
 4463             n->as_Bool()->_test._test == BoolTest::gt);
 4464   format %{ "" %}
 4465   interface(COND_INTER) %{
 4466     equal(0x4, "e");
 4467     not_equal(0x5, "ne");
 4468     less(0x2, "b");
 4469     greater_equal(0x3, "nb");
 4470     less_equal(0x6, "be");
 4471     greater(0x7, "nbe");
 4472     overflow(0x0, "o");
 4473     no_overflow(0x1, "no");
 4474   %}
 4475 %}
 4476 
 4477 
 4478 // Floating comparisons that can be fixed up with extra conditional jumps
 4479 operand cmpOpUCF2() %{
 4480   match(Bool);
 4481   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4482             n->as_Bool()->_test._test == BoolTest::eq);
 4483   format %{ "" %}
 4484   interface(COND_INTER) %{
 4485     equal(0x4, "e");
 4486     not_equal(0x5, "ne");
 4487     less(0x2, "b");
 4488     greater_equal(0x3, "nb");
 4489     less_equal(0x6, "be");
 4490     greater(0x7, "nbe");
 4491     overflow(0x0, "o");
 4492     no_overflow(0x1, "no");
 4493   %}
 4494 %}
 4495 
 4496 // Comparison Code for FP conditional move
 4497 operand cmpOp_fcmov() %{
 4498   match(Bool);
 4499 
 4500   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4501             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4502   format %{ "" %}
 4503   interface(COND_INTER) %{
 4504     equal        (0x0C8);
 4505     not_equal    (0x1C8);
 4506     less         (0x0C0);
 4507     greater_equal(0x1C0);
 4508     less_equal   (0x0D0);
 4509     greater      (0x1D0);
 4510     overflow(0x0, "o"); // not really supported by the instruction
 4511     no_overflow(0x1, "no"); // not really supported by the instruction
 4512   %}
 4513 %}
 4514 
 4515 // Comparison Code used in long compares
 4516 operand cmpOp_commute() %{
 4517   match(Bool);
 4518 
 4519   format %{ "" %}
 4520   interface(COND_INTER) %{
 4521     equal(0x4, "e");
 4522     not_equal(0x5, "ne");
 4523     less(0xF, "g");
 4524     greater_equal(0xE, "le");
 4525     less_equal(0xD, "ge");
 4526     greater(0xC, "l");
 4527     overflow(0x0, "o");
 4528     no_overflow(0x1, "no");
 4529   %}
 4530 %}
 4531 
 4532 // Comparison Code used in unsigned long compares
 4533 operand cmpOpU_commute() %{
 4534   match(Bool);
 4535 
 4536   format %{ "" %}
 4537   interface(COND_INTER) %{
 4538     equal(0x4, "e");
 4539     not_equal(0x5, "ne");
 4540     less(0x7, "nbe");
 4541     greater_equal(0x6, "be");
 4542     less_equal(0x3, "nb");
 4543     greater(0x2, "b");
 4544     overflow(0x0, "o");
 4545     no_overflow(0x1, "no");
 4546   %}
 4547 %}
 4548 
 4549 //----------OPERAND CLASSES----------------------------------------------------
 4550 // Operand Classes are groups of operands that are used as to simplify
 4551 // instruction definitions by not requiring the AD writer to specify separate
 4552 // instructions for every form of operand when the instruction accepts
 4553 // multiple operand types with the same basic encoding and format.  The classic
 4554 // case of this is memory operands.
 4555 
 4556 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4557                indIndex, indIndexScale, indIndexScaleOffset);
 4558 
 4559 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4560 // This means some kind of offset is always required and you cannot use
 4561 // an oop as the offset (done when working on static globals).
 4562 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4563                     indIndex, indIndexScale, indIndexScaleOffset);
 4564 
 4565 
 4566 //----------PIPELINE-----------------------------------------------------------
 4567 // Rules which define the behavior of the target architectures pipeline.
 4568 pipeline %{
 4569 
 4570 //----------ATTRIBUTES---------------------------------------------------------
 4571 attributes %{
 4572   variable_size_instructions;        // Fixed size instructions
 4573   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4574   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4575   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4576   instruction_fetch_units = 1;       // of 16 bytes
 4577 
 4578   // List of nop instructions
 4579   nops( MachNop );
 4580 %}
 4581 
 4582 //----------RESOURCES----------------------------------------------------------
 4583 // Resources are the functional units available to the machine
 4584 
 4585 // Generic P2/P3 pipeline
 4586 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4587 // 3 instructions decoded per cycle.
 4588 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4589 // 2 ALU op, only ALU0 handles mul/div instructions.
 4590 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4591            MS0, MS1, MEM = MS0 | MS1,
 4592            BR, FPU,
 4593            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4594 
 4595 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4596 // Pipeline Description specifies the stages in the machine's pipeline
 4597 
 4598 // Generic P2/P3 pipeline
 4599 pipe_desc(S0, S1, S2, S3, S4, S5);
 4600 
 4601 //----------PIPELINE CLASSES---------------------------------------------------
 4602 // Pipeline Classes describe the stages in which input and output are
 4603 // referenced by the hardware pipeline.
 4604 
 4605 // Naming convention: ialu or fpu
 4606 // Then: _reg
 4607 // Then: _reg if there is a 2nd register
 4608 // Then: _long if it's a pair of instructions implementing a long
 4609 // Then: _fat if it requires the big decoder
 4610 //   Or: _mem if it requires the big decoder and a memory unit.
 4611 
 4612 // Integer ALU reg operation
 4613 pipe_class ialu_reg(rRegI dst) %{
 4614     single_instruction;
 4615     dst    : S4(write);
 4616     dst    : S3(read);
 4617     DECODE : S0;        // any decoder
 4618     ALU    : S3;        // any alu
 4619 %}
 4620 
 4621 // Long ALU reg operation
 4622 pipe_class ialu_reg_long(eRegL dst) %{
 4623     instruction_count(2);
 4624     dst    : S4(write);
 4625     dst    : S3(read);
 4626     DECODE : S0(2);     // any 2 decoders
 4627     ALU    : S3(2);     // both alus
 4628 %}
 4629 
 4630 // Integer ALU reg operation using big decoder
 4631 pipe_class ialu_reg_fat(rRegI dst) %{
 4632     single_instruction;
 4633     dst    : S4(write);
 4634     dst    : S3(read);
 4635     D0     : S0;        // big decoder only
 4636     ALU    : S3;        // any alu
 4637 %}
 4638 
 4639 // Long ALU reg operation using big decoder
 4640 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4641     instruction_count(2);
 4642     dst    : S4(write);
 4643     dst    : S3(read);
 4644     D0     : S0(2);     // big decoder only; twice
 4645     ALU    : S3(2);     // any 2 alus
 4646 %}
 4647 
 4648 // Integer ALU reg-reg operation
 4649 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4650     single_instruction;
 4651     dst    : S4(write);
 4652     src    : S3(read);
 4653     DECODE : S0;        // any decoder
 4654     ALU    : S3;        // any alu
 4655 %}
 4656 
 4657 // Long ALU reg-reg operation
 4658 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4659     instruction_count(2);
 4660     dst    : S4(write);
 4661     src    : S3(read);
 4662     DECODE : S0(2);     // any 2 decoders
 4663     ALU    : S3(2);     // both alus
 4664 %}
 4665 
 4666 // Integer ALU reg-reg operation
 4667 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4668     single_instruction;
 4669     dst    : S4(write);
 4670     src    : S3(read);
 4671     D0     : S0;        // big decoder only
 4672     ALU    : S3;        // any alu
 4673 %}
 4674 
 4675 // Long ALU reg-reg operation
 4676 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4677     instruction_count(2);
 4678     dst    : S4(write);
 4679     src    : S3(read);
 4680     D0     : S0(2);     // big decoder only; twice
 4681     ALU    : S3(2);     // both alus
 4682 %}
 4683 
 4684 // Integer ALU reg-mem operation
 4685 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4686     single_instruction;
 4687     dst    : S5(write);
 4688     mem    : S3(read);
 4689     D0     : S0;        // big decoder only
 4690     ALU    : S4;        // any alu
 4691     MEM    : S3;        // any mem
 4692 %}
 4693 
 4694 // Long ALU reg-mem operation
 4695 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4696     instruction_count(2);
 4697     dst    : S5(write);
 4698     mem    : S3(read);
 4699     D0     : S0(2);     // big decoder only; twice
 4700     ALU    : S4(2);     // any 2 alus
 4701     MEM    : S3(2);     // both mems
 4702 %}
 4703 
 4704 // Integer mem operation (prefetch)
 4705 pipe_class ialu_mem(memory mem)
 4706 %{
 4707     single_instruction;
 4708     mem    : S3(read);
 4709     D0     : S0;        // big decoder only
 4710     MEM    : S3;        // any mem
 4711 %}
 4712 
 4713 // Integer Store to Memory
 4714 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4715     single_instruction;
 4716     mem    : S3(read);
 4717     src    : S5(read);
 4718     D0     : S0;        // big decoder only
 4719     ALU    : S4;        // any alu
 4720     MEM    : S3;
 4721 %}
 4722 
 4723 // Long Store to Memory
 4724 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4725     instruction_count(2);
 4726     mem    : S3(read);
 4727     src    : S5(read);
 4728     D0     : S0(2);     // big decoder only; twice
 4729     ALU    : S4(2);     // any 2 alus
 4730     MEM    : S3(2);     // Both mems
 4731 %}
 4732 
 4733 // Integer Store to Memory
 4734 pipe_class ialu_mem_imm(memory mem) %{
 4735     single_instruction;
 4736     mem    : S3(read);
 4737     D0     : S0;        // big decoder only
 4738     ALU    : S4;        // any alu
 4739     MEM    : S3;
 4740 %}
 4741 
 4742 // Integer ALU0 reg-reg operation
 4743 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4744     single_instruction;
 4745     dst    : S4(write);
 4746     src    : S3(read);
 4747     D0     : S0;        // Big decoder only
 4748     ALU0   : S3;        // only alu0
 4749 %}
 4750 
 4751 // Integer ALU0 reg-mem operation
 4752 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4753     single_instruction;
 4754     dst    : S5(write);
 4755     mem    : S3(read);
 4756     D0     : S0;        // big decoder only
 4757     ALU0   : S4;        // ALU0 only
 4758     MEM    : S3;        // any mem
 4759 %}
 4760 
 4761 // Integer ALU reg-reg operation
 4762 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4763     single_instruction;
 4764     cr     : S4(write);
 4765     src1   : S3(read);
 4766     src2   : S3(read);
 4767     DECODE : S0;        // any decoder
 4768     ALU    : S3;        // any alu
 4769 %}
 4770 
 4771 // Integer ALU reg-imm operation
 4772 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4773     single_instruction;
 4774     cr     : S4(write);
 4775     src1   : S3(read);
 4776     DECODE : S0;        // any decoder
 4777     ALU    : S3;        // any alu
 4778 %}
 4779 
 4780 // Integer ALU reg-mem operation
 4781 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4782     single_instruction;
 4783     cr     : S4(write);
 4784     src1   : S3(read);
 4785     src2   : S3(read);
 4786     D0     : S0;        // big decoder only
 4787     ALU    : S4;        // any alu
 4788     MEM    : S3;
 4789 %}
 4790 
 4791 // Conditional move reg-reg
 4792 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4793     instruction_count(4);
 4794     y      : S4(read);
 4795     q      : S3(read);
 4796     p      : S3(read);
 4797     DECODE : S0(4);     // any decoder
 4798 %}
 4799 
 4800 // Conditional move reg-reg
 4801 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4802     single_instruction;
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     cr     : S3(read);
 4806     DECODE : S0;        // any decoder
 4807 %}
 4808 
 4809 // Conditional move reg-mem
 4810 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4811     single_instruction;
 4812     dst    : S4(write);
 4813     src    : S3(read);
 4814     cr     : S3(read);
 4815     DECODE : S0;        // any decoder
 4816     MEM    : S3;
 4817 %}
 4818 
 4819 // Conditional move reg-reg long
 4820 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4821     single_instruction;
 4822     dst    : S4(write);
 4823     src    : S3(read);
 4824     cr     : S3(read);
 4825     DECODE : S0(2);     // any 2 decoders
 4826 %}
 4827 
 4828 // Conditional move double reg-reg
 4829 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4830     single_instruction;
 4831     dst    : S4(write);
 4832     src    : S3(read);
 4833     cr     : S3(read);
 4834     DECODE : S0;        // any decoder
 4835 %}
 4836 
 4837 // Float reg-reg operation
 4838 pipe_class fpu_reg(regDPR dst) %{
 4839     instruction_count(2);
 4840     dst    : S3(read);
 4841     DECODE : S0(2);     // any 2 decoders
 4842     FPU    : S3;
 4843 %}
 4844 
 4845 // Float reg-reg operation
 4846 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4847     instruction_count(2);
 4848     dst    : S4(write);
 4849     src    : S3(read);
 4850     DECODE : S0(2);     // any 2 decoders
 4851     FPU    : S3;
 4852 %}
 4853 
 4854 // Float reg-reg operation
 4855 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4856     instruction_count(3);
 4857     dst    : S4(write);
 4858     src1   : S3(read);
 4859     src2   : S3(read);
 4860     DECODE : S0(3);     // any 3 decoders
 4861     FPU    : S3(2);
 4862 %}
 4863 
 4864 // Float reg-reg operation
 4865 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4866     instruction_count(4);
 4867     dst    : S4(write);
 4868     src1   : S3(read);
 4869     src2   : S3(read);
 4870     src3   : S3(read);
 4871     DECODE : S0(4);     // any 3 decoders
 4872     FPU    : S3(2);
 4873 %}
 4874 
 4875 // Float reg-reg operation
 4876 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4877     instruction_count(4);
 4878     dst    : S4(write);
 4879     src1   : S3(read);
 4880     src2   : S3(read);
 4881     src3   : S3(read);
 4882     DECODE : S1(3);     // any 3 decoders
 4883     D0     : S0;        // Big decoder only
 4884     FPU    : S3(2);
 4885     MEM    : S3;
 4886 %}
 4887 
 4888 // Float reg-mem operation
 4889 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4890     instruction_count(2);
 4891     dst    : S5(write);
 4892     mem    : S3(read);
 4893     D0     : S0;        // big decoder only
 4894     DECODE : S1;        // any decoder for FPU POP
 4895     FPU    : S4;
 4896     MEM    : S3;        // any mem
 4897 %}
 4898 
 4899 // Float reg-mem operation
 4900 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4901     instruction_count(3);
 4902     dst    : S5(write);
 4903     src1   : S3(read);
 4904     mem    : S3(read);
 4905     D0     : S0;        // big decoder only
 4906     DECODE : S1(2);     // any decoder for FPU POP
 4907     FPU    : S4;
 4908     MEM    : S3;        // any mem
 4909 %}
 4910 
 4911 // Float mem-reg operation
 4912 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4913     instruction_count(2);
 4914     src    : S5(read);
 4915     mem    : S3(read);
 4916     DECODE : S0;        // any decoder for FPU PUSH
 4917     D0     : S1;        // big decoder only
 4918     FPU    : S4;
 4919     MEM    : S3;        // any mem
 4920 %}
 4921 
 4922 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4923     instruction_count(3);
 4924     src1   : S3(read);
 4925     src2   : S3(read);
 4926     mem    : S3(read);
 4927     DECODE : S0(2);     // any decoder for FPU PUSH
 4928     D0     : S1;        // big decoder only
 4929     FPU    : S4;
 4930     MEM    : S3;        // any mem
 4931 %}
 4932 
 4933 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4934     instruction_count(3);
 4935     src1   : S3(read);
 4936     src2   : S3(read);
 4937     mem    : S4(read);
 4938     DECODE : S0;        // any decoder for FPU PUSH
 4939     D0     : S0(2);     // big decoder only
 4940     FPU    : S4;
 4941     MEM    : S3(2);     // any mem
 4942 %}
 4943 
 4944 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4945     instruction_count(2);
 4946     src1   : S3(read);
 4947     dst    : S4(read);
 4948     D0     : S0(2);     // big decoder only
 4949     MEM    : S3(2);     // any mem
 4950 %}
 4951 
 4952 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4953     instruction_count(3);
 4954     src1   : S3(read);
 4955     src2   : S3(read);
 4956     dst    : S4(read);
 4957     D0     : S0(3);     // big decoder only
 4958     FPU    : S4;
 4959     MEM    : S3(3);     // any mem
 4960 %}
 4961 
 4962 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4963     instruction_count(3);
 4964     src1   : S4(read);
 4965     mem    : S4(read);
 4966     DECODE : S0;        // any decoder for FPU PUSH
 4967     D0     : S0(2);     // big decoder only
 4968     FPU    : S4;
 4969     MEM    : S3(2);     // any mem
 4970 %}
 4971 
 4972 // Float load constant
 4973 pipe_class fpu_reg_con(regDPR dst) %{
 4974     instruction_count(2);
 4975     dst    : S5(write);
 4976     D0     : S0;        // big decoder only for the load
 4977     DECODE : S1;        // any decoder for FPU POP
 4978     FPU    : S4;
 4979     MEM    : S3;        // any mem
 4980 %}
 4981 
 4982 // Float load constant
 4983 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4984     instruction_count(3);
 4985     dst    : S5(write);
 4986     src    : S3(read);
 4987     D0     : S0;        // big decoder only for the load
 4988     DECODE : S1(2);     // any decoder for FPU POP
 4989     FPU    : S4;
 4990     MEM    : S3;        // any mem
 4991 %}
 4992 
 4993 // UnConditional branch
 4994 pipe_class pipe_jmp( label labl ) %{
 4995     single_instruction;
 4996     BR   : S3;
 4997 %}
 4998 
 4999 // Conditional branch
 5000 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 5001     single_instruction;
 5002     cr    : S1(read);
 5003     BR    : S3;
 5004 %}
 5005 
 5006 // Allocation idiom
 5007 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 5008     instruction_count(1); force_serialization;
 5009     fixed_latency(6);
 5010     heap_ptr : S3(read);
 5011     DECODE   : S0(3);
 5012     D0       : S2;
 5013     MEM      : S3;
 5014     ALU      : S3(2);
 5015     dst      : S5(write);
 5016     BR       : S5;
 5017 %}
 5018 
 5019 // Generic big/slow expanded idiom
 5020 pipe_class pipe_slow(  ) %{
 5021     instruction_count(10); multiple_bundles; force_serialization;
 5022     fixed_latency(100);
 5023     D0  : S0(2);
 5024     MEM : S3(2);
 5025 %}
 5026 
 5027 // The real do-nothing guy
 5028 pipe_class empty( ) %{
 5029     instruction_count(0);
 5030 %}
 5031 
 5032 // Define the class for the Nop node
 5033 define %{
 5034    MachNop = empty;
 5035 %}
 5036 
 5037 %}
 5038 
 5039 //----------INSTRUCTIONS-------------------------------------------------------
 5040 //
 5041 // match      -- States which machine-independent subtree may be replaced
 5042 //               by this instruction.
 5043 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5044 //               selection to identify a minimum cost tree of machine
 5045 //               instructions that matches a tree of machine-independent
 5046 //               instructions.
 5047 // format     -- A string providing the disassembly for this instruction.
 5048 //               The value of an instruction's operand may be inserted
 5049 //               by referring to it with a '$' prefix.
 5050 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5051 //               to within an encode class as $primary, $secondary, and $tertiary
 5052 //               respectively.  The primary opcode is commonly used to
 5053 //               indicate the type of machine instruction, while secondary
 5054 //               and tertiary are often used for prefix options or addressing
 5055 //               modes.
 5056 // ins_encode -- A list of encode classes with parameters. The encode class
 5057 //               name must have been defined in an 'enc_class' specification
 5058 //               in the encode section of the architecture description.
 5059 
 5060 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5061 // Load Float
 5062 instruct MoveF2LEG(legRegF dst, regF src) %{
 5063   match(Set dst src);
 5064   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5065   ins_encode %{
 5066     ShouldNotReachHere();
 5067   %}
 5068   ins_pipe( fpu_reg_reg );
 5069 %}
 5070 
 5071 // Load Float
 5072 instruct MoveLEG2F(regF dst, legRegF src) %{
 5073   match(Set dst src);
 5074   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5075   ins_encode %{
 5076     ShouldNotReachHere();
 5077   %}
 5078   ins_pipe( fpu_reg_reg );
 5079 %}
 5080 
 5081 // Load Float
 5082 instruct MoveF2VL(vlRegF dst, regF src) %{
 5083   match(Set dst src);
 5084   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5085   ins_encode %{
 5086     ShouldNotReachHere();
 5087   %}
 5088   ins_pipe( fpu_reg_reg );
 5089 %}
 5090 
 5091 // Load Float
 5092 instruct MoveVL2F(regF dst, vlRegF src) %{
 5093   match(Set dst src);
 5094   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5095   ins_encode %{
 5096     ShouldNotReachHere();
 5097   %}
 5098   ins_pipe( fpu_reg_reg );
 5099 %}
 5100 
 5101 
 5102 
 5103 // Load Double
 5104 instruct MoveD2LEG(legRegD dst, regD src) %{
 5105   match(Set dst src);
 5106   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5107   ins_encode %{
 5108     ShouldNotReachHere();
 5109   %}
 5110   ins_pipe( fpu_reg_reg );
 5111 %}
 5112 
 5113 // Load Double
 5114 instruct MoveLEG2D(regD dst, legRegD src) %{
 5115   match(Set dst src);
 5116   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5117   ins_encode %{
 5118     ShouldNotReachHere();
 5119   %}
 5120   ins_pipe( fpu_reg_reg );
 5121 %}
 5122 
 5123 // Load Double
 5124 instruct MoveD2VL(vlRegD dst, regD src) %{
 5125   match(Set dst src);
 5126   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5127   ins_encode %{
 5128     ShouldNotReachHere();
 5129   %}
 5130   ins_pipe( fpu_reg_reg );
 5131 %}
 5132 
 5133 // Load Double
 5134 instruct MoveVL2D(regD dst, vlRegD src) %{
 5135   match(Set dst src);
 5136   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5137   ins_encode %{
 5138     ShouldNotReachHere();
 5139   %}
 5140   ins_pipe( fpu_reg_reg );
 5141 %}
 5142 
 5143 //----------BSWAP-Instruction--------------------------------------------------
 5144 instruct bytes_reverse_int(rRegI dst) %{
 5145   match(Set dst (ReverseBytesI dst));
 5146 
 5147   format %{ "BSWAP  $dst" %}
 5148   opcode(0x0F, 0xC8);
 5149   ins_encode( OpcP, OpcSReg(dst) );
 5150   ins_pipe( ialu_reg );
 5151 %}
 5152 
 5153 instruct bytes_reverse_long(eRegL dst) %{
 5154   match(Set dst (ReverseBytesL dst));
 5155 
 5156   format %{ "BSWAP  $dst.lo\n\t"
 5157             "BSWAP  $dst.hi\n\t"
 5158             "XCHG   $dst.lo $dst.hi" %}
 5159 
 5160   ins_cost(125);
 5161   ins_encode( bswap_long_bytes(dst) );
 5162   ins_pipe( ialu_reg_reg);
 5163 %}
 5164 
 5165 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5166   match(Set dst (ReverseBytesUS dst));
 5167   effect(KILL cr);
 5168 
 5169   format %{ "BSWAP  $dst\n\t"
 5170             "SHR    $dst,16\n\t" %}
 5171   ins_encode %{
 5172     __ bswapl($dst$$Register);
 5173     __ shrl($dst$$Register, 16);
 5174   %}
 5175   ins_pipe( ialu_reg );
 5176 %}
 5177 
 5178 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5179   match(Set dst (ReverseBytesS dst));
 5180   effect(KILL cr);
 5181 
 5182   format %{ "BSWAP  $dst\n\t"
 5183             "SAR    $dst,16\n\t" %}
 5184   ins_encode %{
 5185     __ bswapl($dst$$Register);
 5186     __ sarl($dst$$Register, 16);
 5187   %}
 5188   ins_pipe( ialu_reg );
 5189 %}
 5190 
 5191 
 5192 //---------- Zeros Count Instructions ------------------------------------------
 5193 
 5194 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5195   predicate(UseCountLeadingZerosInstruction);
 5196   match(Set dst (CountLeadingZerosI src));
 5197   effect(KILL cr);
 5198 
 5199   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5200   ins_encode %{
 5201     __ lzcntl($dst$$Register, $src$$Register);
 5202   %}
 5203   ins_pipe(ialu_reg);
 5204 %}
 5205 
 5206 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5207   predicate(!UseCountLeadingZerosInstruction);
 5208   match(Set dst (CountLeadingZerosI src));
 5209   effect(KILL cr);
 5210 
 5211   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5212             "JNZ    skip\n\t"
 5213             "MOV    $dst, -1\n"
 5214       "skip:\n\t"
 5215             "NEG    $dst\n\t"
 5216             "ADD    $dst, 31" %}
 5217   ins_encode %{
 5218     Register Rdst = $dst$$Register;
 5219     Register Rsrc = $src$$Register;
 5220     Label skip;
 5221     __ bsrl(Rdst, Rsrc);
 5222     __ jccb(Assembler::notZero, skip);
 5223     __ movl(Rdst, -1);
 5224     __ bind(skip);
 5225     __ negl(Rdst);
 5226     __ addl(Rdst, BitsPerInt - 1);
 5227   %}
 5228   ins_pipe(ialu_reg);
 5229 %}
 5230 
 5231 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5232   predicate(UseCountLeadingZerosInstruction);
 5233   match(Set dst (CountLeadingZerosL src));
 5234   effect(TEMP dst, KILL cr);
 5235 
 5236   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5237             "JNC    done\n\t"
 5238             "LZCNT  $dst, $src.lo\n\t"
 5239             "ADD    $dst, 32\n"
 5240       "done:" %}
 5241   ins_encode %{
 5242     Register Rdst = $dst$$Register;
 5243     Register Rsrc = $src$$Register;
 5244     Label done;
 5245     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5246     __ jccb(Assembler::carryClear, done);
 5247     __ lzcntl(Rdst, Rsrc);
 5248     __ addl(Rdst, BitsPerInt);
 5249     __ bind(done);
 5250   %}
 5251   ins_pipe(ialu_reg);
 5252 %}
 5253 
 5254 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5255   predicate(!UseCountLeadingZerosInstruction);
 5256   match(Set dst (CountLeadingZerosL src));
 5257   effect(TEMP dst, KILL cr);
 5258 
 5259   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5260             "JZ     msw_is_zero\n\t"
 5261             "ADD    $dst, 32\n\t"
 5262             "JMP    not_zero\n"
 5263       "msw_is_zero:\n\t"
 5264             "BSR    $dst, $src.lo\n\t"
 5265             "JNZ    not_zero\n\t"
 5266             "MOV    $dst, -1\n"
 5267       "not_zero:\n\t"
 5268             "NEG    $dst\n\t"
 5269             "ADD    $dst, 63\n" %}
 5270  ins_encode %{
 5271     Register Rdst = $dst$$Register;
 5272     Register Rsrc = $src$$Register;
 5273     Label msw_is_zero;
 5274     Label not_zero;
 5275     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5276     __ jccb(Assembler::zero, msw_is_zero);
 5277     __ addl(Rdst, BitsPerInt);
 5278     __ jmpb(not_zero);
 5279     __ bind(msw_is_zero);
 5280     __ bsrl(Rdst, Rsrc);
 5281     __ jccb(Assembler::notZero, not_zero);
 5282     __ movl(Rdst, -1);
 5283     __ bind(not_zero);
 5284     __ negl(Rdst);
 5285     __ addl(Rdst, BitsPerLong - 1);
 5286   %}
 5287   ins_pipe(ialu_reg);
 5288 %}
 5289 
 5290 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5291   predicate(UseCountTrailingZerosInstruction);
 5292   match(Set dst (CountTrailingZerosI src));
 5293   effect(KILL cr);
 5294 
 5295   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5296   ins_encode %{
 5297     __ tzcntl($dst$$Register, $src$$Register);
 5298   %}
 5299   ins_pipe(ialu_reg);
 5300 %}
 5301 
 5302 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5303   predicate(!UseCountTrailingZerosInstruction);
 5304   match(Set dst (CountTrailingZerosI src));
 5305   effect(KILL cr);
 5306 
 5307   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5308             "JNZ    done\n\t"
 5309             "MOV    $dst, 32\n"
 5310       "done:" %}
 5311   ins_encode %{
 5312     Register Rdst = $dst$$Register;
 5313     Label done;
 5314     __ bsfl(Rdst, $src$$Register);
 5315     __ jccb(Assembler::notZero, done);
 5316     __ movl(Rdst, BitsPerInt);
 5317     __ bind(done);
 5318   %}
 5319   ins_pipe(ialu_reg);
 5320 %}
 5321 
 5322 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5323   predicate(UseCountTrailingZerosInstruction);
 5324   match(Set dst (CountTrailingZerosL src));
 5325   effect(TEMP dst, KILL cr);
 5326 
 5327   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5328             "JNC    done\n\t"
 5329             "TZCNT  $dst, $src.hi\n\t"
 5330             "ADD    $dst, 32\n"
 5331             "done:" %}
 5332   ins_encode %{
 5333     Register Rdst = $dst$$Register;
 5334     Register Rsrc = $src$$Register;
 5335     Label done;
 5336     __ tzcntl(Rdst, Rsrc);
 5337     __ jccb(Assembler::carryClear, done);
 5338     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5339     __ addl(Rdst, BitsPerInt);
 5340     __ bind(done);
 5341   %}
 5342   ins_pipe(ialu_reg);
 5343 %}
 5344 
 5345 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5346   predicate(!UseCountTrailingZerosInstruction);
 5347   match(Set dst (CountTrailingZerosL src));
 5348   effect(TEMP dst, KILL cr);
 5349 
 5350   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5351             "JNZ    done\n\t"
 5352             "BSF    $dst, $src.hi\n\t"
 5353             "JNZ    msw_not_zero\n\t"
 5354             "MOV    $dst, 32\n"
 5355       "msw_not_zero:\n\t"
 5356             "ADD    $dst, 32\n"
 5357       "done:" %}
 5358   ins_encode %{
 5359     Register Rdst = $dst$$Register;
 5360     Register Rsrc = $src$$Register;
 5361     Label msw_not_zero;
 5362     Label done;
 5363     __ bsfl(Rdst, Rsrc);
 5364     __ jccb(Assembler::notZero, done);
 5365     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5366     __ jccb(Assembler::notZero, msw_not_zero);
 5367     __ movl(Rdst, BitsPerInt);
 5368     __ bind(msw_not_zero);
 5369     __ addl(Rdst, BitsPerInt);
 5370     __ bind(done);
 5371   %}
 5372   ins_pipe(ialu_reg);
 5373 %}
 5374 
 5375 
 5376 //---------- Population Count Instructions -------------------------------------
 5377 
 5378 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5379   predicate(UsePopCountInstruction);
 5380   match(Set dst (PopCountI src));
 5381   effect(KILL cr);
 5382 
 5383   format %{ "POPCNT $dst, $src" %}
 5384   ins_encode %{
 5385     __ popcntl($dst$$Register, $src$$Register);
 5386   %}
 5387   ins_pipe(ialu_reg);
 5388 %}
 5389 
 5390 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5391   predicate(UsePopCountInstruction);
 5392   match(Set dst (PopCountI (LoadI mem)));
 5393   effect(KILL cr);
 5394 
 5395   format %{ "POPCNT $dst, $mem" %}
 5396   ins_encode %{
 5397     __ popcntl($dst$$Register, $mem$$Address);
 5398   %}
 5399   ins_pipe(ialu_reg);
 5400 %}
 5401 
 5402 // Note: Long.bitCount(long) returns an int.
 5403 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5404   predicate(UsePopCountInstruction);
 5405   match(Set dst (PopCountL src));
 5406   effect(KILL cr, TEMP tmp, TEMP dst);
 5407 
 5408   format %{ "POPCNT $dst, $src.lo\n\t"
 5409             "POPCNT $tmp, $src.hi\n\t"
 5410             "ADD    $dst, $tmp" %}
 5411   ins_encode %{
 5412     __ popcntl($dst$$Register, $src$$Register);
 5413     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5414     __ addl($dst$$Register, $tmp$$Register);
 5415   %}
 5416   ins_pipe(ialu_reg);
 5417 %}
 5418 
 5419 // Note: Long.bitCount(long) returns an int.
 5420 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5421   predicate(UsePopCountInstruction);
 5422   match(Set dst (PopCountL (LoadL mem)));
 5423   effect(KILL cr, TEMP tmp, TEMP dst);
 5424 
 5425   format %{ "POPCNT $dst, $mem\n\t"
 5426             "POPCNT $tmp, $mem+4\n\t"
 5427             "ADD    $dst, $tmp" %}
 5428   ins_encode %{
 5429     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5430     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5431     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5432     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5433     __ addl($dst$$Register, $tmp$$Register);
 5434   %}
 5435   ins_pipe(ialu_reg);
 5436 %}
 5437 
 5438 
 5439 //----------Load/Store/Move Instructions---------------------------------------
 5440 //----------Load Instructions--------------------------------------------------
 5441 // Load Byte (8bit signed)
 5442 instruct loadB(xRegI dst, memory mem) %{
 5443   match(Set dst (LoadB mem));
 5444 
 5445   ins_cost(125);
 5446   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5447 
 5448   ins_encode %{
 5449     __ movsbl($dst$$Register, $mem$$Address);
 5450   %}
 5451 
 5452   ins_pipe(ialu_reg_mem);
 5453 %}
 5454 
 5455 // Load Byte (8bit signed) into Long Register
 5456 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5457   match(Set dst (ConvI2L (LoadB mem)));
 5458   effect(KILL cr);
 5459 
 5460   ins_cost(375);
 5461   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5462             "MOV    $dst.hi,$dst.lo\n\t"
 5463             "SAR    $dst.hi,7" %}
 5464 
 5465   ins_encode %{
 5466     __ movsbl($dst$$Register, $mem$$Address);
 5467     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5468     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5469   %}
 5470 
 5471   ins_pipe(ialu_reg_mem);
 5472 %}
 5473 
 5474 // Load Unsigned Byte (8bit UNsigned)
 5475 instruct loadUB(xRegI dst, memory mem) %{
 5476   match(Set dst (LoadUB mem));
 5477 
 5478   ins_cost(125);
 5479   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5480 
 5481   ins_encode %{
 5482     __ movzbl($dst$$Register, $mem$$Address);
 5483   %}
 5484 
 5485   ins_pipe(ialu_reg_mem);
 5486 %}
 5487 
 5488 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5489 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5490   match(Set dst (ConvI2L (LoadUB mem)));
 5491   effect(KILL cr);
 5492 
 5493   ins_cost(250);
 5494   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5495             "XOR    $dst.hi,$dst.hi" %}
 5496 
 5497   ins_encode %{
 5498     Register Rdst = $dst$$Register;
 5499     __ movzbl(Rdst, $mem$$Address);
 5500     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5501   %}
 5502 
 5503   ins_pipe(ialu_reg_mem);
 5504 %}
 5505 
 5506 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5507 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5508   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5509   effect(KILL cr);
 5510 
 5511   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5512             "XOR    $dst.hi,$dst.hi\n\t"
 5513             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5514   ins_encode %{
 5515     Register Rdst = $dst$$Register;
 5516     __ movzbl(Rdst, $mem$$Address);
 5517     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5518     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5519   %}
 5520   ins_pipe(ialu_reg_mem);
 5521 %}
 5522 
 5523 // Load Short (16bit signed)
 5524 instruct loadS(rRegI dst, memory mem) %{
 5525   match(Set dst (LoadS mem));
 5526 
 5527   ins_cost(125);
 5528   format %{ "MOVSX  $dst,$mem\t# short" %}
 5529 
 5530   ins_encode %{
 5531     __ movswl($dst$$Register, $mem$$Address);
 5532   %}
 5533 
 5534   ins_pipe(ialu_reg_mem);
 5535 %}
 5536 
 5537 // Load Short (16 bit signed) to Byte (8 bit signed)
 5538 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5539   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5540 
 5541   ins_cost(125);
 5542   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5543   ins_encode %{
 5544     __ movsbl($dst$$Register, $mem$$Address);
 5545   %}
 5546   ins_pipe(ialu_reg_mem);
 5547 %}
 5548 
 5549 // Load Short (16bit signed) into Long Register
 5550 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5551   match(Set dst (ConvI2L (LoadS mem)));
 5552   effect(KILL cr);
 5553 
 5554   ins_cost(375);
 5555   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5556             "MOV    $dst.hi,$dst.lo\n\t"
 5557             "SAR    $dst.hi,15" %}
 5558 
 5559   ins_encode %{
 5560     __ movswl($dst$$Register, $mem$$Address);
 5561     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5562     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5563   %}
 5564 
 5565   ins_pipe(ialu_reg_mem);
 5566 %}
 5567 
 5568 // Load Unsigned Short/Char (16bit unsigned)
 5569 instruct loadUS(rRegI dst, memory mem) %{
 5570   match(Set dst (LoadUS mem));
 5571 
 5572   ins_cost(125);
 5573   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5574 
 5575   ins_encode %{
 5576     __ movzwl($dst$$Register, $mem$$Address);
 5577   %}
 5578 
 5579   ins_pipe(ialu_reg_mem);
 5580 %}
 5581 
 5582 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5583 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5584   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5585 
 5586   ins_cost(125);
 5587   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5588   ins_encode %{
 5589     __ movsbl($dst$$Register, $mem$$Address);
 5590   %}
 5591   ins_pipe(ialu_reg_mem);
 5592 %}
 5593 
 5594 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5595 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5596   match(Set dst (ConvI2L (LoadUS mem)));
 5597   effect(KILL cr);
 5598 
 5599   ins_cost(250);
 5600   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5601             "XOR    $dst.hi,$dst.hi" %}
 5602 
 5603   ins_encode %{
 5604     __ movzwl($dst$$Register, $mem$$Address);
 5605     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5606   %}
 5607 
 5608   ins_pipe(ialu_reg_mem);
 5609 %}
 5610 
 5611 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5612 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5613   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5614   effect(KILL cr);
 5615 
 5616   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5617             "XOR    $dst.hi,$dst.hi" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movzbl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5627 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5628   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5629   effect(KILL cr);
 5630 
 5631   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5632             "XOR    $dst.hi,$dst.hi\n\t"
 5633             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5634   ins_encode %{
 5635     Register Rdst = $dst$$Register;
 5636     __ movzwl(Rdst, $mem$$Address);
 5637     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5638     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5639   %}
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Integer
 5644 instruct loadI(rRegI dst, memory mem) %{
 5645   match(Set dst (LoadI mem));
 5646 
 5647   ins_cost(125);
 5648   format %{ "MOV    $dst,$mem\t# int" %}
 5649 
 5650   ins_encode %{
 5651     __ movl($dst$$Register, $mem$$Address);
 5652   %}
 5653 
 5654   ins_pipe(ialu_reg_mem);
 5655 %}
 5656 
 5657 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5658 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5659   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5660 
 5661   ins_cost(125);
 5662   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5663   ins_encode %{
 5664     __ movsbl($dst$$Register, $mem$$Address);
 5665   %}
 5666   ins_pipe(ialu_reg_mem);
 5667 %}
 5668 
 5669 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5670 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5671   match(Set dst (AndI (LoadI mem) mask));
 5672 
 5673   ins_cost(125);
 5674   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5675   ins_encode %{
 5676     __ movzbl($dst$$Register, $mem$$Address);
 5677   %}
 5678   ins_pipe(ialu_reg_mem);
 5679 %}
 5680 
 5681 // Load Integer (32 bit signed) to Short (16 bit signed)
 5682 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5683   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5684 
 5685   ins_cost(125);
 5686   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5687   ins_encode %{
 5688     __ movswl($dst$$Register, $mem$$Address);
 5689   %}
 5690   ins_pipe(ialu_reg_mem);
 5691 %}
 5692 
 5693 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5694 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5695   match(Set dst (AndI (LoadI mem) mask));
 5696 
 5697   ins_cost(125);
 5698   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5699   ins_encode %{
 5700     __ movzwl($dst$$Register, $mem$$Address);
 5701   %}
 5702   ins_pipe(ialu_reg_mem);
 5703 %}
 5704 
 5705 // Load Integer into Long Register
 5706 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5707   match(Set dst (ConvI2L (LoadI mem)));
 5708   effect(KILL cr);
 5709 
 5710   ins_cost(375);
 5711   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5712             "MOV    $dst.hi,$dst.lo\n\t"
 5713             "SAR    $dst.hi,31" %}
 5714 
 5715   ins_encode %{
 5716     __ movl($dst$$Register, $mem$$Address);
 5717     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5718     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5719   %}
 5720 
 5721   ins_pipe(ialu_reg_mem);
 5722 %}
 5723 
 5724 // Load Integer with mask 0xFF into Long Register
 5725 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5726   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5727   effect(KILL cr);
 5728 
 5729   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5730             "XOR    $dst.hi,$dst.hi" %}
 5731   ins_encode %{
 5732     Register Rdst = $dst$$Register;
 5733     __ movzbl(Rdst, $mem$$Address);
 5734     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5735   %}
 5736   ins_pipe(ialu_reg_mem);
 5737 %}
 5738 
 5739 // Load Integer with mask 0xFFFF into Long Register
 5740 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5741   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5742   effect(KILL cr);
 5743 
 5744   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5745             "XOR    $dst.hi,$dst.hi" %}
 5746   ins_encode %{
 5747     Register Rdst = $dst$$Register;
 5748     __ movzwl(Rdst, $mem$$Address);
 5749     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5750   %}
 5751   ins_pipe(ialu_reg_mem);
 5752 %}
 5753 
 5754 // Load Integer with 31-bit mask into Long Register
 5755 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5756   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5757   effect(KILL cr);
 5758 
 5759   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5760             "XOR    $dst.hi,$dst.hi\n\t"
 5761             "AND    $dst.lo,$mask" %}
 5762   ins_encode %{
 5763     Register Rdst = $dst$$Register;
 5764     __ movl(Rdst, $mem$$Address);
 5765     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5766     __ andl(Rdst, $mask$$constant);
 5767   %}
 5768   ins_pipe(ialu_reg_mem);
 5769 %}
 5770 
 5771 // Load Unsigned Integer into Long Register
 5772 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5773   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5774   effect(KILL cr);
 5775 
 5776   ins_cost(250);
 5777   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5778             "XOR    $dst.hi,$dst.hi" %}
 5779 
 5780   ins_encode %{
 5781     __ movl($dst$$Register, $mem$$Address);
 5782     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5783   %}
 5784 
 5785   ins_pipe(ialu_reg_mem);
 5786 %}
 5787 
 5788 // Load Long.  Cannot clobber address while loading, so restrict address
 5789 // register to ESI
 5790 instruct loadL(eRegL dst, load_long_memory mem) %{
 5791   predicate(!((LoadLNode*)n)->require_atomic_access());
 5792   match(Set dst (LoadL mem));
 5793 
 5794   ins_cost(250);
 5795   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5796             "MOV    $dst.hi,$mem+4" %}
 5797 
 5798   ins_encode %{
 5799     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5800     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5801     __ movl($dst$$Register, Amemlo);
 5802     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5803   %}
 5804 
 5805   ins_pipe(ialu_reg_long_mem);
 5806 %}
 5807 
 5808 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5809 // then store it down to the stack and reload on the int
 5810 // side.
 5811 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5812   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5813   match(Set dst (LoadL mem));
 5814 
 5815   ins_cost(200);
 5816   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5817             "FISTp  $dst" %}
 5818   ins_encode(enc_loadL_volatile(mem,dst));
 5819   ins_pipe( fpu_reg_mem );
 5820 %}
 5821 
 5822 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5823   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5824   match(Set dst (LoadL mem));
 5825   effect(TEMP tmp);
 5826   ins_cost(180);
 5827   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5828             "MOVSD  $dst,$tmp" %}
 5829   ins_encode %{
 5830     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5831     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5832   %}
 5833   ins_pipe( pipe_slow );
 5834 %}
 5835 
 5836 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5837   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5838   match(Set dst (LoadL mem));
 5839   effect(TEMP tmp);
 5840   ins_cost(160);
 5841   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5842             "MOVD   $dst.lo,$tmp\n\t"
 5843             "PSRLQ  $tmp,32\n\t"
 5844             "MOVD   $dst.hi,$tmp" %}
 5845   ins_encode %{
 5846     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5847     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5848     __ psrlq($tmp$$XMMRegister, 32);
 5849     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5850   %}
 5851   ins_pipe( pipe_slow );
 5852 %}
 5853 
 5854 // Load Range
 5855 instruct loadRange(rRegI dst, memory mem) %{
 5856   match(Set dst (LoadRange mem));
 5857 
 5858   ins_cost(125);
 5859   format %{ "MOV    $dst,$mem" %}
 5860   opcode(0x8B);
 5861   ins_encode( OpcP, RegMem(dst,mem));
 5862   ins_pipe( ialu_reg_mem );
 5863 %}
 5864 
 5865 
 5866 // Load Pointer
 5867 instruct loadP(eRegP dst, memory mem) %{
 5868   match(Set dst (LoadP mem));
 5869 
 5870   ins_cost(125);
 5871   format %{ "MOV    $dst,$mem" %}
 5872   opcode(0x8B);
 5873   ins_encode( OpcP, RegMem(dst,mem));
 5874   ins_pipe( ialu_reg_mem );
 5875 %}
 5876 
 5877 // Load Klass Pointer
 5878 instruct loadKlass(eRegP dst, memory mem) %{
 5879   match(Set dst (LoadKlass mem));
 5880 
 5881   ins_cost(125);
 5882   format %{ "MOV    $dst,$mem" %}
 5883   opcode(0x8B);
 5884   ins_encode( OpcP, RegMem(dst,mem));
 5885   ins_pipe( ialu_reg_mem );
 5886 %}
 5887 
 5888 // Load Double
 5889 instruct loadDPR(regDPR dst, memory mem) %{
 5890   predicate(UseSSE<=1);
 5891   match(Set dst (LoadD mem));
 5892 
 5893   ins_cost(150);
 5894   format %{ "FLD_D  ST,$mem\n\t"
 5895             "FSTP   $dst" %}
 5896   opcode(0xDD);               /* DD /0 */
 5897   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5898               Pop_Reg_DPR(dst) );
 5899   ins_pipe( fpu_reg_mem );
 5900 %}
 5901 
 5902 // Load Double to XMM
 5903 instruct loadD(regD dst, memory mem) %{
 5904   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5905   match(Set dst (LoadD mem));
 5906   ins_cost(145);
 5907   format %{ "MOVSD  $dst,$mem" %}
 5908   ins_encode %{
 5909     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5910   %}
 5911   ins_pipe( pipe_slow );
 5912 %}
 5913 
 5914 instruct loadD_partial(regD dst, memory mem) %{
 5915   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5916   match(Set dst (LoadD mem));
 5917   ins_cost(145);
 5918   format %{ "MOVLPD $dst,$mem" %}
 5919   ins_encode %{
 5920     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5921   %}
 5922   ins_pipe( pipe_slow );
 5923 %}
 5924 
 5925 // Load to XMM register (single-precision floating point)
 5926 // MOVSS instruction
 5927 instruct loadF(regF dst, memory mem) %{
 5928   predicate(UseSSE>=1);
 5929   match(Set dst (LoadF mem));
 5930   ins_cost(145);
 5931   format %{ "MOVSS  $dst,$mem" %}
 5932   ins_encode %{
 5933     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5934   %}
 5935   ins_pipe( pipe_slow );
 5936 %}
 5937 
 5938 // Load Float
 5939 instruct loadFPR(regFPR dst, memory mem) %{
 5940   predicate(UseSSE==0);
 5941   match(Set dst (LoadF mem));
 5942 
 5943   ins_cost(150);
 5944   format %{ "FLD_S  ST,$mem\n\t"
 5945             "FSTP   $dst" %}
 5946   opcode(0xD9);               /* D9 /0 */
 5947   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5948               Pop_Reg_FPR(dst) );
 5949   ins_pipe( fpu_reg_mem );
 5950 %}
 5951 
 5952 // Load Effective Address
 5953 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5954   match(Set dst mem);
 5955 
 5956   ins_cost(110);
 5957   format %{ "LEA    $dst,$mem" %}
 5958   opcode(0x8D);
 5959   ins_encode( OpcP, RegMem(dst,mem));
 5960   ins_pipe( ialu_reg_reg_fat );
 5961 %}
 5962 
 5963 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5964   match(Set dst mem);
 5965 
 5966   ins_cost(110);
 5967   format %{ "LEA    $dst,$mem" %}
 5968   opcode(0x8D);
 5969   ins_encode( OpcP, RegMem(dst,mem));
 5970   ins_pipe( ialu_reg_reg_fat );
 5971 %}
 5972 
 5973 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5974   match(Set dst mem);
 5975 
 5976   ins_cost(110);
 5977   format %{ "LEA    $dst,$mem" %}
 5978   opcode(0x8D);
 5979   ins_encode( OpcP, RegMem(dst,mem));
 5980   ins_pipe( ialu_reg_reg_fat );
 5981 %}
 5982 
 5983 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5984   match(Set dst mem);
 5985 
 5986   ins_cost(110);
 5987   format %{ "LEA    $dst,$mem" %}
 5988   opcode(0x8D);
 5989   ins_encode( OpcP, RegMem(dst,mem));
 5990   ins_pipe( ialu_reg_reg_fat );
 5991 %}
 5992 
 5993 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5994   match(Set dst mem);
 5995 
 5996   ins_cost(110);
 5997   format %{ "LEA    $dst,$mem" %}
 5998   opcode(0x8D);
 5999   ins_encode( OpcP, RegMem(dst,mem));
 6000   ins_pipe( ialu_reg_reg_fat );
 6001 %}
 6002 
 6003 // Load Constant
 6004 instruct loadConI(rRegI dst, immI src) %{
 6005   match(Set dst src);
 6006 
 6007   format %{ "MOV    $dst,$src" %}
 6008   ins_encode( LdImmI(dst, src) );
 6009   ins_pipe( ialu_reg_fat );
 6010 %}
 6011 
 6012 // Load Constant zero
 6013 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6014   match(Set dst src);
 6015   effect(KILL cr);
 6016 
 6017   ins_cost(50);
 6018   format %{ "XOR    $dst,$dst" %}
 6019   opcode(0x33);  /* + rd */
 6020   ins_encode( OpcP, RegReg( dst, dst ) );
 6021   ins_pipe( ialu_reg );
 6022 %}
 6023 
 6024 instruct loadConP(eRegP dst, immP src) %{
 6025   match(Set dst src);
 6026 
 6027   format %{ "MOV    $dst,$src" %}
 6028   opcode(0xB8);  /* + rd */
 6029   ins_encode( LdImmP(dst, src) );
 6030   ins_pipe( ialu_reg_fat );
 6031 %}
 6032 
 6033 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6034   match(Set dst src);
 6035   effect(KILL cr);
 6036   ins_cost(200);
 6037   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6038             "MOV    $dst.hi,$src.hi" %}
 6039   opcode(0xB8);
 6040   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6041   ins_pipe( ialu_reg_long_fat );
 6042 %}
 6043 
 6044 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6045   match(Set dst src);
 6046   effect(KILL cr);
 6047   ins_cost(150);
 6048   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6049             "XOR    $dst.hi,$dst.hi" %}
 6050   opcode(0x33,0x33);
 6051   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6052   ins_pipe( ialu_reg_long );
 6053 %}
 6054 
 6055 // The instruction usage is guarded by predicate in operand immFPR().
 6056 instruct loadConFPR(regFPR dst, immFPR con) %{
 6057   match(Set dst con);
 6058   ins_cost(125);
 6059   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6060             "FSTP   $dst" %}
 6061   ins_encode %{
 6062     __ fld_s($constantaddress($con));
 6063     __ fstp_d($dst$$reg);
 6064   %}
 6065   ins_pipe(fpu_reg_con);
 6066 %}
 6067 
 6068 // The instruction usage is guarded by predicate in operand immFPR0().
 6069 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6070   match(Set dst con);
 6071   ins_cost(125);
 6072   format %{ "FLDZ   ST\n\t"
 6073             "FSTP   $dst" %}
 6074   ins_encode %{
 6075     __ fldz();
 6076     __ fstp_d($dst$$reg);
 6077   %}
 6078   ins_pipe(fpu_reg_con);
 6079 %}
 6080 
 6081 // The instruction usage is guarded by predicate in operand immFPR1().
 6082 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6083   match(Set dst con);
 6084   ins_cost(125);
 6085   format %{ "FLD1   ST\n\t"
 6086             "FSTP   $dst" %}
 6087   ins_encode %{
 6088     __ fld1();
 6089     __ fstp_d($dst$$reg);
 6090   %}
 6091   ins_pipe(fpu_reg_con);
 6092 %}
 6093 
 6094 // The instruction usage is guarded by predicate in operand immF().
 6095 instruct loadConF(regF dst, immF con) %{
 6096   match(Set dst con);
 6097   ins_cost(125);
 6098   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6099   ins_encode %{
 6100     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6101   %}
 6102   ins_pipe(pipe_slow);
 6103 %}
 6104 
 6105 // The instruction usage is guarded by predicate in operand immF0().
 6106 instruct loadConF0(regF dst, immF0 src) %{
 6107   match(Set dst src);
 6108   ins_cost(100);
 6109   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6110   ins_encode %{
 6111     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6112   %}
 6113   ins_pipe(pipe_slow);
 6114 %}
 6115 
 6116 // The instruction usage is guarded by predicate in operand immDPR().
 6117 instruct loadConDPR(regDPR dst, immDPR con) %{
 6118   match(Set dst con);
 6119   ins_cost(125);
 6120 
 6121   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6122             "FSTP   $dst" %}
 6123   ins_encode %{
 6124     __ fld_d($constantaddress($con));
 6125     __ fstp_d($dst$$reg);
 6126   %}
 6127   ins_pipe(fpu_reg_con);
 6128 %}
 6129 
 6130 // The instruction usage is guarded by predicate in operand immDPR0().
 6131 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6132   match(Set dst con);
 6133   ins_cost(125);
 6134 
 6135   format %{ "FLDZ   ST\n\t"
 6136             "FSTP   $dst" %}
 6137   ins_encode %{
 6138     __ fldz();
 6139     __ fstp_d($dst$$reg);
 6140   %}
 6141   ins_pipe(fpu_reg_con);
 6142 %}
 6143 
 6144 // The instruction usage is guarded by predicate in operand immDPR1().
 6145 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6146   match(Set dst con);
 6147   ins_cost(125);
 6148 
 6149   format %{ "FLD1   ST\n\t"
 6150             "FSTP   $dst" %}
 6151   ins_encode %{
 6152     __ fld1();
 6153     __ fstp_d($dst$$reg);
 6154   %}
 6155   ins_pipe(fpu_reg_con);
 6156 %}
 6157 
 6158 // The instruction usage is guarded by predicate in operand immD().
 6159 instruct loadConD(regD dst, immD con) %{
 6160   match(Set dst con);
 6161   ins_cost(125);
 6162   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6163   ins_encode %{
 6164     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6165   %}
 6166   ins_pipe(pipe_slow);
 6167 %}
 6168 
 6169 // The instruction usage is guarded by predicate in operand immD0().
 6170 instruct loadConD0(regD dst, immD0 src) %{
 6171   match(Set dst src);
 6172   ins_cost(100);
 6173   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6174   ins_encode %{
 6175     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6176   %}
 6177   ins_pipe( pipe_slow );
 6178 %}
 6179 
 6180 // Load Stack Slot
 6181 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6182   match(Set dst src);
 6183   ins_cost(125);
 6184 
 6185   format %{ "MOV    $dst,$src" %}
 6186   opcode(0x8B);
 6187   ins_encode( OpcP, RegMem(dst,src));
 6188   ins_pipe( ialu_reg_mem );
 6189 %}
 6190 
 6191 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6192   match(Set dst src);
 6193 
 6194   ins_cost(200);
 6195   format %{ "MOV    $dst,$src.lo\n\t"
 6196             "MOV    $dst+4,$src.hi" %}
 6197   opcode(0x8B, 0x8B);
 6198   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6199   ins_pipe( ialu_mem_long_reg );
 6200 %}
 6201 
 6202 // Load Stack Slot
 6203 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6204   match(Set dst src);
 6205   ins_cost(125);
 6206 
 6207   format %{ "MOV    $dst,$src" %}
 6208   opcode(0x8B);
 6209   ins_encode( OpcP, RegMem(dst,src));
 6210   ins_pipe( ialu_reg_mem );
 6211 %}
 6212 
 6213 // Load Stack Slot
 6214 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6215   match(Set dst src);
 6216   ins_cost(125);
 6217 
 6218   format %{ "FLD_S  $src\n\t"
 6219             "FSTP   $dst" %}
 6220   opcode(0xD9);               /* D9 /0, FLD m32real */
 6221   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6222               Pop_Reg_FPR(dst) );
 6223   ins_pipe( fpu_reg_mem );
 6224 %}
 6225 
 6226 // Load Stack Slot
 6227 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6228   match(Set dst src);
 6229   ins_cost(125);
 6230 
 6231   format %{ "FLD_D  $src\n\t"
 6232             "FSTP   $dst" %}
 6233   opcode(0xDD);               /* DD /0, FLD m64real */
 6234   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6235               Pop_Reg_DPR(dst) );
 6236   ins_pipe( fpu_reg_mem );
 6237 %}
 6238 
 6239 // Prefetch instructions for allocation.
 6240 // Must be safe to execute with invalid address (cannot fault).
 6241 
 6242 instruct prefetchAlloc0( memory mem ) %{
 6243   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6244   match(PrefetchAllocation mem);
 6245   ins_cost(0);
 6246   size(0);
 6247   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6248   ins_encode();
 6249   ins_pipe(empty);
 6250 %}
 6251 
 6252 instruct prefetchAlloc( memory mem ) %{
 6253   predicate(AllocatePrefetchInstr==3);
 6254   match( PrefetchAllocation mem );
 6255   ins_cost(100);
 6256 
 6257   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6258   ins_encode %{
 6259     __ prefetchw($mem$$Address);
 6260   %}
 6261   ins_pipe(ialu_mem);
 6262 %}
 6263 
 6264 instruct prefetchAllocNTA( memory mem ) %{
 6265   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6266   match(PrefetchAllocation mem);
 6267   ins_cost(100);
 6268 
 6269   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6270   ins_encode %{
 6271     __ prefetchnta($mem$$Address);
 6272   %}
 6273   ins_pipe(ialu_mem);
 6274 %}
 6275 
 6276 instruct prefetchAllocT0( memory mem ) %{
 6277   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6278   match(PrefetchAllocation mem);
 6279   ins_cost(100);
 6280 
 6281   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6282   ins_encode %{
 6283     __ prefetcht0($mem$$Address);
 6284   %}
 6285   ins_pipe(ialu_mem);
 6286 %}
 6287 
 6288 instruct prefetchAllocT2( memory mem ) %{
 6289   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6290   match(PrefetchAllocation mem);
 6291   ins_cost(100);
 6292 
 6293   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6294   ins_encode %{
 6295     __ prefetcht2($mem$$Address);
 6296   %}
 6297   ins_pipe(ialu_mem);
 6298 %}
 6299 
 6300 //----------Store Instructions-------------------------------------------------
 6301 
 6302 // Store Byte
 6303 instruct storeB(memory mem, xRegI src) %{
 6304   match(Set mem (StoreB mem src));
 6305 
 6306   ins_cost(125);
 6307   format %{ "MOV8   $mem,$src" %}
 6308   opcode(0x88);
 6309   ins_encode( OpcP, RegMem( src, mem ) );
 6310   ins_pipe( ialu_mem_reg );
 6311 %}
 6312 
 6313 // Store Char/Short
 6314 instruct storeC(memory mem, rRegI src) %{
 6315   match(Set mem (StoreC mem src));
 6316 
 6317   ins_cost(125);
 6318   format %{ "MOV16  $mem,$src" %}
 6319   opcode(0x89, 0x66);
 6320   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6321   ins_pipe( ialu_mem_reg );
 6322 %}
 6323 
 6324 // Store Integer
 6325 instruct storeI(memory mem, rRegI src) %{
 6326   match(Set mem (StoreI mem src));
 6327 
 6328   ins_cost(125);
 6329   format %{ "MOV    $mem,$src" %}
 6330   opcode(0x89);
 6331   ins_encode( OpcP, RegMem( src, mem ) );
 6332   ins_pipe( ialu_mem_reg );
 6333 %}
 6334 
 6335 // Store Long
 6336 instruct storeL(long_memory mem, eRegL src) %{
 6337   predicate(!((StoreLNode*)n)->require_atomic_access());
 6338   match(Set mem (StoreL mem src));
 6339 
 6340   ins_cost(200);
 6341   format %{ "MOV    $mem,$src.lo\n\t"
 6342             "MOV    $mem+4,$src.hi" %}
 6343   opcode(0x89, 0x89);
 6344   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6345   ins_pipe( ialu_mem_long_reg );
 6346 %}
 6347 
 6348 // Store Long to Integer
 6349 instruct storeL2I(memory mem, eRegL src) %{
 6350   match(Set mem (StoreI mem (ConvL2I src)));
 6351 
 6352   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6353   ins_encode %{
 6354     __ movl($mem$$Address, $src$$Register);
 6355   %}
 6356   ins_pipe(ialu_mem_reg);
 6357 %}
 6358 
 6359 // Volatile Store Long.  Must be atomic, so move it into
 6360 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6361 // target address before the store (for null-ptr checks)
 6362 // so the memory operand is used twice in the encoding.
 6363 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6364   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6365   match(Set mem (StoreL mem src));
 6366   effect( KILL cr );
 6367   ins_cost(400);
 6368   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6369             "FILD   $src\n\t"
 6370             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6371   opcode(0x3B);
 6372   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6373   ins_pipe( fpu_reg_mem );
 6374 %}
 6375 
 6376 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6377   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6378   match(Set mem (StoreL mem src));
 6379   effect( TEMP tmp, KILL cr );
 6380   ins_cost(380);
 6381   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6382             "MOVSD  $tmp,$src\n\t"
 6383             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6384   ins_encode %{
 6385     __ cmpl(rax, $mem$$Address);
 6386     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6387     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6388   %}
 6389   ins_pipe( pipe_slow );
 6390 %}
 6391 
 6392 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6393   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6394   match(Set mem (StoreL mem src));
 6395   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6396   ins_cost(360);
 6397   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6398             "MOVD   $tmp,$src.lo\n\t"
 6399             "MOVD   $tmp2,$src.hi\n\t"
 6400             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6401             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6402   ins_encode %{
 6403     __ cmpl(rax, $mem$$Address);
 6404     __ movdl($tmp$$XMMRegister, $src$$Register);
 6405     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6406     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6407     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6408   %}
 6409   ins_pipe( pipe_slow );
 6410 %}
 6411 
 6412 // Store Pointer; for storing unknown oops and raw pointers
 6413 instruct storeP(memory mem, anyRegP src) %{
 6414   match(Set mem (StoreP mem src));
 6415 
 6416   ins_cost(125);
 6417   format %{ "MOV    $mem,$src" %}
 6418   opcode(0x89);
 6419   ins_encode( OpcP, RegMem( src, mem ) );
 6420   ins_pipe( ialu_mem_reg );
 6421 %}
 6422 
 6423 // Store Integer Immediate
 6424 instruct storeImmI(memory mem, immI src) %{
 6425   match(Set mem (StoreI mem src));
 6426 
 6427   ins_cost(150);
 6428   format %{ "MOV    $mem,$src" %}
 6429   opcode(0xC7);               /* C7 /0 */
 6430   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6431   ins_pipe( ialu_mem_imm );
 6432 %}
 6433 
 6434 // Store Short/Char Immediate
 6435 instruct storeImmI16(memory mem, immI16 src) %{
 6436   predicate(UseStoreImmI16);
 6437   match(Set mem (StoreC mem src));
 6438 
 6439   ins_cost(150);
 6440   format %{ "MOV16  $mem,$src" %}
 6441   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6442   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6443   ins_pipe( ialu_mem_imm );
 6444 %}
 6445 
 6446 // Store Pointer Immediate; null pointers or constant oops that do not
 6447 // need card-mark barriers.
 6448 instruct storeImmP(memory mem, immP src) %{
 6449   match(Set mem (StoreP mem src));
 6450 
 6451   ins_cost(150);
 6452   format %{ "MOV    $mem,$src" %}
 6453   opcode(0xC7);               /* C7 /0 */
 6454   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6455   ins_pipe( ialu_mem_imm );
 6456 %}
 6457 
 6458 // Store Byte Immediate
 6459 instruct storeImmB(memory mem, immI8 src) %{
 6460   match(Set mem (StoreB mem src));
 6461 
 6462   ins_cost(150);
 6463   format %{ "MOV8   $mem,$src" %}
 6464   opcode(0xC6);               /* C6 /0 */
 6465   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6466   ins_pipe( ialu_mem_imm );
 6467 %}
 6468 
 6469 // Store CMS card-mark Immediate
 6470 instruct storeImmCM(memory mem, immI8 src) %{
 6471   match(Set mem (StoreCM mem src));
 6472 
 6473   ins_cost(150);
 6474   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6475   opcode(0xC6);               /* C6 /0 */
 6476   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6477   ins_pipe( ialu_mem_imm );
 6478 %}
 6479 
 6480 // Store Double
 6481 instruct storeDPR( memory mem, regDPR1 src) %{
 6482   predicate(UseSSE<=1);
 6483   match(Set mem (StoreD mem src));
 6484 
 6485   ins_cost(100);
 6486   format %{ "FST_D  $mem,$src" %}
 6487   opcode(0xDD);       /* DD /2 */
 6488   ins_encode( enc_FPR_store(mem,src) );
 6489   ins_pipe( fpu_mem_reg );
 6490 %}
 6491 
 6492 // Store double does rounding on x86
 6493 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6494   predicate(UseSSE<=1);
 6495   match(Set mem (StoreD mem (RoundDouble src)));
 6496 
 6497   ins_cost(100);
 6498   format %{ "FST_D  $mem,$src\t# round" %}
 6499   opcode(0xDD);       /* DD /2 */
 6500   ins_encode( enc_FPR_store(mem,src) );
 6501   ins_pipe( fpu_mem_reg );
 6502 %}
 6503 
 6504 // Store XMM register to memory (double-precision floating points)
 6505 // MOVSD instruction
 6506 instruct storeD(memory mem, regD src) %{
 6507   predicate(UseSSE>=2);
 6508   match(Set mem (StoreD mem src));
 6509   ins_cost(95);
 6510   format %{ "MOVSD  $mem,$src" %}
 6511   ins_encode %{
 6512     __ movdbl($mem$$Address, $src$$XMMRegister);
 6513   %}
 6514   ins_pipe( pipe_slow );
 6515 %}
 6516 
 6517 // Store XMM register to memory (single-precision floating point)
 6518 // MOVSS instruction
 6519 instruct storeF(memory mem, regF src) %{
 6520   predicate(UseSSE>=1);
 6521   match(Set mem (StoreF mem src));
 6522   ins_cost(95);
 6523   format %{ "MOVSS  $mem,$src" %}
 6524   ins_encode %{
 6525     __ movflt($mem$$Address, $src$$XMMRegister);
 6526   %}
 6527   ins_pipe( pipe_slow );
 6528 %}
 6529 
 6530 
 6531 // Store Float
 6532 instruct storeFPR( memory mem, regFPR1 src) %{
 6533   predicate(UseSSE==0);
 6534   match(Set mem (StoreF mem src));
 6535 
 6536   ins_cost(100);
 6537   format %{ "FST_S  $mem,$src" %}
 6538   opcode(0xD9);       /* D9 /2 */
 6539   ins_encode( enc_FPR_store(mem,src) );
 6540   ins_pipe( fpu_mem_reg );
 6541 %}
 6542 
 6543 // Store Float does rounding on x86
 6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6545   predicate(UseSSE==0);
 6546   match(Set mem (StoreF mem (RoundFloat src)));
 6547 
 6548   ins_cost(100);
 6549   format %{ "FST_S  $mem,$src\t# round" %}
 6550   opcode(0xD9);       /* D9 /2 */
 6551   ins_encode( enc_FPR_store(mem,src) );
 6552   ins_pipe( fpu_mem_reg );
 6553 %}
 6554 
 6555 // Store Float does rounding on x86
 6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6557   predicate(UseSSE<=1);
 6558   match(Set mem (StoreF mem (ConvD2F src)));
 6559 
 6560   ins_cost(100);
 6561   format %{ "FST_S  $mem,$src\t# D-round" %}
 6562   opcode(0xD9);       /* D9 /2 */
 6563   ins_encode( enc_FPR_store(mem,src) );
 6564   ins_pipe( fpu_mem_reg );
 6565 %}
 6566 
 6567 // Store immediate Float value (it is faster than store from FPU register)
 6568 // The instruction usage is guarded by predicate in operand immFPR().
 6569 instruct storeFPR_imm( memory mem, immFPR src) %{
 6570   match(Set mem (StoreF mem src));
 6571 
 6572   ins_cost(50);
 6573   format %{ "MOV    $mem,$src\t# store float" %}
 6574   opcode(0xC7);               /* C7 /0 */
 6575   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6576   ins_pipe( ialu_mem_imm );
 6577 %}
 6578 
 6579 // Store immediate Float value (it is faster than store from XMM register)
 6580 // The instruction usage is guarded by predicate in operand immF().
 6581 instruct storeF_imm( memory mem, immF src) %{
 6582   match(Set mem (StoreF mem src));
 6583 
 6584   ins_cost(50);
 6585   format %{ "MOV    $mem,$src\t# store float" %}
 6586   opcode(0xC7);               /* C7 /0 */
 6587   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6588   ins_pipe( ialu_mem_imm );
 6589 %}
 6590 
 6591 // Store Integer to stack slot
 6592 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6593   match(Set dst src);
 6594 
 6595   ins_cost(100);
 6596   format %{ "MOV    $dst,$src" %}
 6597   opcode(0x89);
 6598   ins_encode( OpcPRegSS( dst, src ) );
 6599   ins_pipe( ialu_mem_reg );
 6600 %}
 6601 
 6602 // Store Integer to stack slot
 6603 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6604   match(Set dst src);
 6605 
 6606   ins_cost(100);
 6607   format %{ "MOV    $dst,$src" %}
 6608   opcode(0x89);
 6609   ins_encode( OpcPRegSS( dst, src ) );
 6610   ins_pipe( ialu_mem_reg );
 6611 %}
 6612 
 6613 // Store Long to stack slot
 6614 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6615   match(Set dst src);
 6616 
 6617   ins_cost(200);
 6618   format %{ "MOV    $dst,$src.lo\n\t"
 6619             "MOV    $dst+4,$src.hi" %}
 6620   opcode(0x89, 0x89);
 6621   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6622   ins_pipe( ialu_mem_long_reg );
 6623 %}
 6624 
 6625 //----------MemBar Instructions-----------------------------------------------
 6626 // Memory barrier flavors
 6627 
 6628 instruct membar_acquire() %{
 6629   match(MemBarAcquire);
 6630   match(LoadFence);
 6631   ins_cost(400);
 6632 
 6633   size(0);
 6634   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6635   ins_encode();
 6636   ins_pipe(empty);
 6637 %}
 6638 
 6639 instruct membar_acquire_lock() %{
 6640   match(MemBarAcquireLock);
 6641   ins_cost(0);
 6642 
 6643   size(0);
 6644   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6645   ins_encode( );
 6646   ins_pipe(empty);
 6647 %}
 6648 
 6649 instruct membar_release() %{
 6650   match(MemBarRelease);
 6651   match(StoreFence);
 6652   ins_cost(400);
 6653 
 6654   size(0);
 6655   format %{ "MEMBAR-release ! (empty encoding)" %}
 6656   ins_encode( );
 6657   ins_pipe(empty);
 6658 %}
 6659 
 6660 instruct membar_release_lock() %{
 6661   match(MemBarReleaseLock);
 6662   ins_cost(0);
 6663 
 6664   size(0);
 6665   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6666   ins_encode( );
 6667   ins_pipe(empty);
 6668 %}
 6669 
 6670 instruct membar_volatile(eFlagsReg cr) %{
 6671   match(MemBarVolatile);
 6672   effect(KILL cr);
 6673   ins_cost(400);
 6674 
 6675   format %{
 6676     $$template
 6677     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6678   %}
 6679   ins_encode %{
 6680     __ membar(Assembler::StoreLoad);
 6681   %}
 6682   ins_pipe(pipe_slow);
 6683 %}
 6684 
 6685 instruct unnecessary_membar_volatile() %{
 6686   match(MemBarVolatile);
 6687   predicate(Matcher::post_store_load_barrier(n));
 6688   ins_cost(0);
 6689 
 6690   size(0);
 6691   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6692   ins_encode( );
 6693   ins_pipe(empty);
 6694 %}
 6695 
 6696 instruct membar_storestore() %{
 6697   match(MemBarStoreStore);
 6698   match(StoreStoreFence);
 6699   ins_cost(0);
 6700 
 6701   size(0);
 6702   format %{ "MEMBAR-storestore (empty encoding)" %}
 6703   ins_encode( );
 6704   ins_pipe(empty);
 6705 %}
 6706 
 6707 //----------Move Instructions--------------------------------------------------
 6708 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6709   match(Set dst (CastX2P src));
 6710   format %{ "# X2P  $dst, $src" %}
 6711   ins_encode( /*empty encoding*/ );
 6712   ins_cost(0);
 6713   ins_pipe(empty);
 6714 %}
 6715 
 6716 instruct castP2X(rRegI dst, eRegP src ) %{
 6717   match(Set dst (CastP2X src));
 6718   ins_cost(50);
 6719   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6720   ins_encode( enc_Copy( dst, src) );
 6721   ins_pipe( ialu_reg_reg );
 6722 %}
 6723 
 6724 //----------Conditional Move---------------------------------------------------
 6725 // Conditional move
 6726 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6727   predicate(!VM_Version::supports_cmov() );
 6728   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6729   ins_cost(200);
 6730   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6731             "MOV    $dst,$src\n"
 6732       "skip:" %}
 6733   ins_encode %{
 6734     Label Lskip;
 6735     // Invert sense of branch from sense of CMOV
 6736     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6737     __ movl($dst$$Register, $src$$Register);
 6738     __ bind(Lskip);
 6739   %}
 6740   ins_pipe( pipe_cmov_reg );
 6741 %}
 6742 
 6743 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6744   predicate(!VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6746   ins_cost(200);
 6747   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6748             "MOV    $dst,$src\n"
 6749       "skip:" %}
 6750   ins_encode %{
 6751     Label Lskip;
 6752     // Invert sense of branch from sense of CMOV
 6753     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6754     __ movl($dst$$Register, $src$$Register);
 6755     __ bind(Lskip);
 6756   %}
 6757   ins_pipe( pipe_cmov_reg );
 6758 %}
 6759 
 6760 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6761   predicate(VM_Version::supports_cmov() );
 6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6763   ins_cost(200);
 6764   format %{ "CMOV$cop $dst,$src" %}
 6765   opcode(0x0F,0x40);
 6766   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6767   ins_pipe( pipe_cmov_reg );
 6768 %}
 6769 
 6770 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6771   predicate(VM_Version::supports_cmov() );
 6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6773   ins_cost(200);
 6774   format %{ "CMOV$cop $dst,$src" %}
 6775   opcode(0x0F,0x40);
 6776   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6777   ins_pipe( pipe_cmov_reg );
 6778 %}
 6779 
 6780 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6781   predicate(VM_Version::supports_cmov() );
 6782   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6783   ins_cost(200);
 6784   expand %{
 6785     cmovI_regU(cop, cr, dst, src);
 6786   %}
 6787 %}
 6788 
 6789 // Conditional move
 6790 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6791   predicate(VM_Version::supports_cmov() );
 6792   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6793   ins_cost(250);
 6794   format %{ "CMOV$cop $dst,$src" %}
 6795   opcode(0x0F,0x40);
 6796   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6797   ins_pipe( pipe_cmov_mem );
 6798 %}
 6799 
 6800 // Conditional move
 6801 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6802   predicate(VM_Version::supports_cmov() );
 6803   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6804   ins_cost(250);
 6805   format %{ "CMOV$cop $dst,$src" %}
 6806   opcode(0x0F,0x40);
 6807   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6808   ins_pipe( pipe_cmov_mem );
 6809 %}
 6810 
 6811 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6812   predicate(VM_Version::supports_cmov() );
 6813   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6814   ins_cost(250);
 6815   expand %{
 6816     cmovI_memU(cop, cr, dst, src);
 6817   %}
 6818 %}
 6819 
 6820 // Conditional move
 6821 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6822   predicate(VM_Version::supports_cmov() );
 6823   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6824   ins_cost(200);
 6825   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6826   opcode(0x0F,0x40);
 6827   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6828   ins_pipe( pipe_cmov_reg );
 6829 %}
 6830 
 6831 // Conditional move (non-P6 version)
 6832 // Note:  a CMoveP is generated for  stubs and native wrappers
 6833 //        regardless of whether we are on a P6, so we
 6834 //        emulate a cmov here
 6835 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6836   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6837   ins_cost(300);
 6838   format %{ "Jn$cop   skip\n\t"
 6839           "MOV    $dst,$src\t# pointer\n"
 6840       "skip:" %}
 6841   opcode(0x8b);
 6842   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6843   ins_pipe( pipe_cmov_reg );
 6844 %}
 6845 
 6846 // Conditional move
 6847 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6848   predicate(VM_Version::supports_cmov() );
 6849   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6850   ins_cost(200);
 6851   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6852   opcode(0x0F,0x40);
 6853   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6854   ins_pipe( pipe_cmov_reg );
 6855 %}
 6856 
 6857 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6858   predicate(VM_Version::supports_cmov() );
 6859   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6860   ins_cost(200);
 6861   expand %{
 6862     cmovP_regU(cop, cr, dst, src);
 6863   %}
 6864 %}
 6865 
 6866 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6867 // correctly meets the two pointer arguments; one is an incoming
 6868 // register but the other is a memory operand.  ALSO appears to
 6869 // be buggy with implicit null checks.
 6870 //
 6871 //// Conditional move
 6872 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6873 //  predicate(VM_Version::supports_cmov() );
 6874 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6875 //  ins_cost(250);
 6876 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6877 //  opcode(0x0F,0x40);
 6878 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6879 //  ins_pipe( pipe_cmov_mem );
 6880 //%}
 6881 //
 6882 //// Conditional move
 6883 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6884 //  predicate(VM_Version::supports_cmov() );
 6885 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6886 //  ins_cost(250);
 6887 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6888 //  opcode(0x0F,0x40);
 6889 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6890 //  ins_pipe( pipe_cmov_mem );
 6891 //%}
 6892 
 6893 // Conditional move
 6894 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6895   predicate(UseSSE<=1);
 6896   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6897   ins_cost(200);
 6898   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6899   opcode(0xDA);
 6900   ins_encode( enc_cmov_dpr(cop,src) );
 6901   ins_pipe( pipe_cmovDPR_reg );
 6902 %}
 6903 
 6904 // Conditional move
 6905 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6906   predicate(UseSSE==0);
 6907   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6908   ins_cost(200);
 6909   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6910   opcode(0xDA);
 6911   ins_encode( enc_cmov_dpr(cop,src) );
 6912   ins_pipe( pipe_cmovDPR_reg );
 6913 %}
 6914 
 6915 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6916 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6917   predicate(UseSSE<=1);
 6918   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6919   ins_cost(200);
 6920   format %{ "Jn$cop   skip\n\t"
 6921             "MOV    $dst,$src\t# double\n"
 6922       "skip:" %}
 6923   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6924   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6925   ins_pipe( pipe_cmovDPR_reg );
 6926 %}
 6927 
 6928 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6929 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6930   predicate(UseSSE==0);
 6931   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6932   ins_cost(200);
 6933   format %{ "Jn$cop    skip\n\t"
 6934             "MOV    $dst,$src\t# float\n"
 6935       "skip:" %}
 6936   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6937   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6938   ins_pipe( pipe_cmovDPR_reg );
 6939 %}
 6940 
 6941 // No CMOVE with SSE/SSE2
 6942 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6943   predicate (UseSSE>=1);
 6944   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6945   ins_cost(200);
 6946   format %{ "Jn$cop   skip\n\t"
 6947             "MOVSS  $dst,$src\t# float\n"
 6948       "skip:" %}
 6949   ins_encode %{
 6950     Label skip;
 6951     // Invert sense of branch from sense of CMOV
 6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6953     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6954     __ bind(skip);
 6955   %}
 6956   ins_pipe( pipe_slow );
 6957 %}
 6958 
 6959 // No CMOVE with SSE/SSE2
 6960 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6961   predicate (UseSSE>=2);
 6962   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6963   ins_cost(200);
 6964   format %{ "Jn$cop   skip\n\t"
 6965             "MOVSD  $dst,$src\t# float\n"
 6966       "skip:" %}
 6967   ins_encode %{
 6968     Label skip;
 6969     // Invert sense of branch from sense of CMOV
 6970     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6971     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6972     __ bind(skip);
 6973   %}
 6974   ins_pipe( pipe_slow );
 6975 %}
 6976 
 6977 // unsigned version
 6978 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6979   predicate (UseSSE>=1);
 6980   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6981   ins_cost(200);
 6982   format %{ "Jn$cop   skip\n\t"
 6983             "MOVSS  $dst,$src\t# float\n"
 6984       "skip:" %}
 6985   ins_encode %{
 6986     Label skip;
 6987     // Invert sense of branch from sense of CMOV
 6988     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6989     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6990     __ bind(skip);
 6991   %}
 6992   ins_pipe( pipe_slow );
 6993 %}
 6994 
 6995 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6996   predicate (UseSSE>=1);
 6997   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6998   ins_cost(200);
 6999   expand %{
 7000     fcmovF_regU(cop, cr, dst, src);
 7001   %}
 7002 %}
 7003 
 7004 // unsigned version
 7005 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 7006   predicate (UseSSE>=2);
 7007   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7008   ins_cost(200);
 7009   format %{ "Jn$cop   skip\n\t"
 7010             "MOVSD  $dst,$src\t# float\n"
 7011       "skip:" %}
 7012   ins_encode %{
 7013     Label skip;
 7014     // Invert sense of branch from sense of CMOV
 7015     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7016     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7017     __ bind(skip);
 7018   %}
 7019   ins_pipe( pipe_slow );
 7020 %}
 7021 
 7022 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7023   predicate (UseSSE>=2);
 7024   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7025   ins_cost(200);
 7026   expand %{
 7027     fcmovD_regU(cop, cr, dst, src);
 7028   %}
 7029 %}
 7030 
 7031 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7032   predicate(VM_Version::supports_cmov() );
 7033   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7034   ins_cost(200);
 7035   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7036             "CMOV$cop $dst.hi,$src.hi" %}
 7037   opcode(0x0F,0x40);
 7038   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7039   ins_pipe( pipe_cmov_reg_long );
 7040 %}
 7041 
 7042 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7043   predicate(VM_Version::supports_cmov() );
 7044   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7045   ins_cost(200);
 7046   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7047             "CMOV$cop $dst.hi,$src.hi" %}
 7048   opcode(0x0F,0x40);
 7049   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7050   ins_pipe( pipe_cmov_reg_long );
 7051 %}
 7052 
 7053 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7054   predicate(VM_Version::supports_cmov() );
 7055   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7056   ins_cost(200);
 7057   expand %{
 7058     cmovL_regU(cop, cr, dst, src);
 7059   %}
 7060 %}
 7061 
 7062 //----------Arithmetic Instructions--------------------------------------------
 7063 //----------Addition Instructions----------------------------------------------
 7064 
 7065 // Integer Addition Instructions
 7066 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7067   match(Set dst (AddI dst src));
 7068   effect(KILL cr);
 7069 
 7070   size(2);
 7071   format %{ "ADD    $dst,$src" %}
 7072   opcode(0x03);
 7073   ins_encode( OpcP, RegReg( dst, src) );
 7074   ins_pipe( ialu_reg_reg );
 7075 %}
 7076 
 7077 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7078   match(Set dst (AddI dst src));
 7079   effect(KILL cr);
 7080 
 7081   format %{ "ADD    $dst,$src" %}
 7082   opcode(0x81, 0x00); /* /0 id */
 7083   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7084   ins_pipe( ialu_reg );
 7085 %}
 7086 
 7087 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7088   predicate(UseIncDec);
 7089   match(Set dst (AddI dst src));
 7090   effect(KILL cr);
 7091 
 7092   size(1);
 7093   format %{ "INC    $dst" %}
 7094   opcode(0x40); /*  */
 7095   ins_encode( Opc_plus( primary, dst ) );
 7096   ins_pipe( ialu_reg );
 7097 %}
 7098 
 7099 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7100   match(Set dst (AddI src0 src1));
 7101   ins_cost(110);
 7102 
 7103   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7104   opcode(0x8D); /* 0x8D /r */
 7105   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7106   ins_pipe( ialu_reg_reg );
 7107 %}
 7108 
 7109 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7110   match(Set dst (AddP src0 src1));
 7111   ins_cost(110);
 7112 
 7113   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7114   opcode(0x8D); /* 0x8D /r */
 7115   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7116   ins_pipe( ialu_reg_reg );
 7117 %}
 7118 
 7119 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7120   predicate(UseIncDec);
 7121   match(Set dst (AddI dst src));
 7122   effect(KILL cr);
 7123 
 7124   size(1);
 7125   format %{ "DEC    $dst" %}
 7126   opcode(0x48); /*  */
 7127   ins_encode( Opc_plus( primary, dst ) );
 7128   ins_pipe( ialu_reg );
 7129 %}
 7130 
 7131 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7132   match(Set dst (AddP dst src));
 7133   effect(KILL cr);
 7134 
 7135   size(2);
 7136   format %{ "ADD    $dst,$src" %}
 7137   opcode(0x03);
 7138   ins_encode( OpcP, RegReg( dst, src) );
 7139   ins_pipe( ialu_reg_reg );
 7140 %}
 7141 
 7142 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7143   match(Set dst (AddP dst src));
 7144   effect(KILL cr);
 7145 
 7146   format %{ "ADD    $dst,$src" %}
 7147   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7148   // ins_encode( RegImm( dst, src) );
 7149   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7150   ins_pipe( ialu_reg );
 7151 %}
 7152 
 7153 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7154   match(Set dst (AddI dst (LoadI src)));
 7155   effect(KILL cr);
 7156 
 7157   ins_cost(150);
 7158   format %{ "ADD    $dst,$src" %}
 7159   opcode(0x03);
 7160   ins_encode( OpcP, RegMem( dst, src) );
 7161   ins_pipe( ialu_reg_mem );
 7162 %}
 7163 
 7164 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7165   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7166   effect(KILL cr);
 7167 
 7168   ins_cost(150);
 7169   format %{ "ADD    $dst,$src" %}
 7170   opcode(0x01);  /* Opcode 01 /r */
 7171   ins_encode( OpcP, RegMem( src, dst ) );
 7172   ins_pipe( ialu_mem_reg );
 7173 %}
 7174 
 7175 // Add Memory with Immediate
 7176 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7177   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7178   effect(KILL cr);
 7179 
 7180   ins_cost(125);
 7181   format %{ "ADD    $dst,$src" %}
 7182   opcode(0x81);               /* Opcode 81 /0 id */
 7183   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7184   ins_pipe( ialu_mem_imm );
 7185 %}
 7186 
 7187 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7188   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7189   effect(KILL cr);
 7190 
 7191   ins_cost(125);
 7192   format %{ "INC    $dst" %}
 7193   opcode(0xFF);               /* Opcode FF /0 */
 7194   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7195   ins_pipe( ialu_mem_imm );
 7196 %}
 7197 
 7198 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7199   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7200   effect(KILL cr);
 7201 
 7202   ins_cost(125);
 7203   format %{ "DEC    $dst" %}
 7204   opcode(0xFF);               /* Opcode FF /1 */
 7205   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7206   ins_pipe( ialu_mem_imm );
 7207 %}
 7208 
 7209 
 7210 instruct checkCastPP( eRegP dst ) %{
 7211   match(Set dst (CheckCastPP dst));
 7212 
 7213   size(0);
 7214   format %{ "#checkcastPP of $dst" %}
 7215   ins_encode( /*empty encoding*/ );
 7216   ins_pipe( empty );
 7217 %}
 7218 
 7219 instruct castPP( eRegP dst ) %{
 7220   match(Set dst (CastPP dst));
 7221   format %{ "#castPP of $dst" %}
 7222   ins_encode( /*empty encoding*/ );
 7223   ins_pipe( empty );
 7224 %}
 7225 
 7226 instruct castII( rRegI dst ) %{
 7227   match(Set dst (CastII dst));
 7228   format %{ "#castII of $dst" %}
 7229   ins_encode( /*empty encoding*/ );
 7230   ins_cost(0);
 7231   ins_pipe( empty );
 7232 %}
 7233 
 7234 instruct castLL( eRegL dst ) %{
 7235   match(Set dst (CastLL dst));
 7236   format %{ "#castLL of $dst" %}
 7237   ins_encode( /*empty encoding*/ );
 7238   ins_cost(0);
 7239   ins_pipe( empty );
 7240 %}
 7241 
 7242 instruct castFF( regF dst ) %{
 7243   predicate(UseSSE >= 1);
 7244   match(Set dst (CastFF dst));
 7245   format %{ "#castFF of $dst" %}
 7246   ins_encode( /*empty encoding*/ );
 7247   ins_cost(0);
 7248   ins_pipe( empty );
 7249 %}
 7250 
 7251 instruct castDD( regD dst ) %{
 7252   predicate(UseSSE >= 2);
 7253   match(Set dst (CastDD dst));
 7254   format %{ "#castDD of $dst" %}
 7255   ins_encode( /*empty encoding*/ );
 7256   ins_cost(0);
 7257   ins_pipe( empty );
 7258 %}
 7259 
 7260 instruct castFF_PR( regFPR dst ) %{
 7261   predicate(UseSSE < 1);
 7262   match(Set dst (CastFF dst));
 7263   format %{ "#castFF of $dst" %}
 7264   ins_encode( /*empty encoding*/ );
 7265   ins_cost(0);
 7266   ins_pipe( empty );
 7267 %}
 7268 
 7269 instruct castDD_PR( regDPR dst ) %{
 7270   predicate(UseSSE < 2);
 7271   match(Set dst (CastDD dst));
 7272   format %{ "#castDD of $dst" %}
 7273   ins_encode( /*empty encoding*/ );
 7274   ins_cost(0);
 7275   ins_pipe( empty );
 7276 %}
 7277 
 7278 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7279 
 7280 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7281   predicate(VM_Version::supports_cx8());
 7282   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7283   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7284   effect(KILL cr, KILL oldval);
 7285   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7286             "MOV    $res,0\n\t"
 7287             "JNE,s  fail\n\t"
 7288             "MOV    $res,1\n"
 7289           "fail:" %}
 7290   ins_encode( enc_cmpxchg8(mem_ptr),
 7291               enc_flags_ne_to_boolean(res) );
 7292   ins_pipe( pipe_cmpxchg );
 7293 %}
 7294 
 7295 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7296   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7297   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7298   effect(KILL cr, KILL oldval);
 7299   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7300             "MOV    $res,0\n\t"
 7301             "JNE,s  fail\n\t"
 7302             "MOV    $res,1\n"
 7303           "fail:" %}
 7304   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7305   ins_pipe( pipe_cmpxchg );
 7306 %}
 7307 
 7308 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7309   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7310   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7311   effect(KILL cr, KILL oldval);
 7312   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7313             "MOV    $res,0\n\t"
 7314             "JNE,s  fail\n\t"
 7315             "MOV    $res,1\n"
 7316           "fail:" %}
 7317   ins_encode( enc_cmpxchgb(mem_ptr),
 7318               enc_flags_ne_to_boolean(res) );
 7319   ins_pipe( pipe_cmpxchg );
 7320 %}
 7321 
 7322 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7323   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7324   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7325   effect(KILL cr, KILL oldval);
 7326   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7327             "MOV    $res,0\n\t"
 7328             "JNE,s  fail\n\t"
 7329             "MOV    $res,1\n"
 7330           "fail:" %}
 7331   ins_encode( enc_cmpxchgw(mem_ptr),
 7332               enc_flags_ne_to_boolean(res) );
 7333   ins_pipe( pipe_cmpxchg );
 7334 %}
 7335 
 7336 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7337   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7338   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7339   effect(KILL cr, KILL oldval);
 7340   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7341             "MOV    $res,0\n\t"
 7342             "JNE,s  fail\n\t"
 7343             "MOV    $res,1\n"
 7344           "fail:" %}
 7345   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7346   ins_pipe( pipe_cmpxchg );
 7347 %}
 7348 
 7349 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7350   predicate(VM_Version::supports_cx8());
 7351   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7352   effect(KILL cr);
 7353   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7354   ins_encode( enc_cmpxchg8(mem_ptr) );
 7355   ins_pipe( pipe_cmpxchg );
 7356 %}
 7357 
 7358 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7359   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7360   effect(KILL cr);
 7361   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7362   ins_encode( enc_cmpxchg(mem_ptr) );
 7363   ins_pipe( pipe_cmpxchg );
 7364 %}
 7365 
 7366 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7367   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7368   effect(KILL cr);
 7369   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7370   ins_encode( enc_cmpxchgb(mem_ptr) );
 7371   ins_pipe( pipe_cmpxchg );
 7372 %}
 7373 
 7374 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7375   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7376   effect(KILL cr);
 7377   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7378   ins_encode( enc_cmpxchgw(mem_ptr) );
 7379   ins_pipe( pipe_cmpxchg );
 7380 %}
 7381 
 7382 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7383   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7384   effect(KILL cr);
 7385   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7386   ins_encode( enc_cmpxchg(mem_ptr) );
 7387   ins_pipe( pipe_cmpxchg );
 7388 %}
 7389 
 7390 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7391   predicate(n->as_LoadStore()->result_not_used());
 7392   match(Set dummy (GetAndAddB mem add));
 7393   effect(KILL cr);
 7394   format %{ "ADDB  [$mem],$add" %}
 7395   ins_encode %{
 7396     __ lock();
 7397     __ addb($mem$$Address, $add$$constant);
 7398   %}
 7399   ins_pipe( pipe_cmpxchg );
 7400 %}
 7401 
 7402 // Important to match to xRegI: only 8-bit regs.
 7403 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7404   match(Set newval (GetAndAddB mem newval));
 7405   effect(KILL cr);
 7406   format %{ "XADDB  [$mem],$newval" %}
 7407   ins_encode %{
 7408     __ lock();
 7409     __ xaddb($mem$$Address, $newval$$Register);
 7410   %}
 7411   ins_pipe( pipe_cmpxchg );
 7412 %}
 7413 
 7414 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7415   predicate(n->as_LoadStore()->result_not_used());
 7416   match(Set dummy (GetAndAddS mem add));
 7417   effect(KILL cr);
 7418   format %{ "ADDS  [$mem],$add" %}
 7419   ins_encode %{
 7420     __ lock();
 7421     __ addw($mem$$Address, $add$$constant);
 7422   %}
 7423   ins_pipe( pipe_cmpxchg );
 7424 %}
 7425 
 7426 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7427   match(Set newval (GetAndAddS mem newval));
 7428   effect(KILL cr);
 7429   format %{ "XADDS  [$mem],$newval" %}
 7430   ins_encode %{
 7431     __ lock();
 7432     __ xaddw($mem$$Address, $newval$$Register);
 7433   %}
 7434   ins_pipe( pipe_cmpxchg );
 7435 %}
 7436 
 7437 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7438   predicate(n->as_LoadStore()->result_not_used());
 7439   match(Set dummy (GetAndAddI mem add));
 7440   effect(KILL cr);
 7441   format %{ "ADDL  [$mem],$add" %}
 7442   ins_encode %{
 7443     __ lock();
 7444     __ addl($mem$$Address, $add$$constant);
 7445   %}
 7446   ins_pipe( pipe_cmpxchg );
 7447 %}
 7448 
 7449 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7450   match(Set newval (GetAndAddI mem newval));
 7451   effect(KILL cr);
 7452   format %{ "XADDL  [$mem],$newval" %}
 7453   ins_encode %{
 7454     __ lock();
 7455     __ xaddl($mem$$Address, $newval$$Register);
 7456   %}
 7457   ins_pipe( pipe_cmpxchg );
 7458 %}
 7459 
 7460 // Important to match to xRegI: only 8-bit regs.
 7461 instruct xchgB( memory mem, xRegI newval) %{
 7462   match(Set newval (GetAndSetB mem newval));
 7463   format %{ "XCHGB  $newval,[$mem]" %}
 7464   ins_encode %{
 7465     __ xchgb($newval$$Register, $mem$$Address);
 7466   %}
 7467   ins_pipe( pipe_cmpxchg );
 7468 %}
 7469 
 7470 instruct xchgS( memory mem, rRegI newval) %{
 7471   match(Set newval (GetAndSetS mem newval));
 7472   format %{ "XCHGW  $newval,[$mem]" %}
 7473   ins_encode %{
 7474     __ xchgw($newval$$Register, $mem$$Address);
 7475   %}
 7476   ins_pipe( pipe_cmpxchg );
 7477 %}
 7478 
 7479 instruct xchgI( memory mem, rRegI newval) %{
 7480   match(Set newval (GetAndSetI mem newval));
 7481   format %{ "XCHGL  $newval,[$mem]" %}
 7482   ins_encode %{
 7483     __ xchgl($newval$$Register, $mem$$Address);
 7484   %}
 7485   ins_pipe( pipe_cmpxchg );
 7486 %}
 7487 
 7488 instruct xchgP( memory mem, pRegP newval) %{
 7489   match(Set newval (GetAndSetP mem newval));
 7490   format %{ "XCHGL  $newval,[$mem]" %}
 7491   ins_encode %{
 7492     __ xchgl($newval$$Register, $mem$$Address);
 7493   %}
 7494   ins_pipe( pipe_cmpxchg );
 7495 %}
 7496 
 7497 //----------Subtraction Instructions-------------------------------------------
 7498 
 7499 // Integer Subtraction Instructions
 7500 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7501   match(Set dst (SubI dst src));
 7502   effect(KILL cr);
 7503 
 7504   size(2);
 7505   format %{ "SUB    $dst,$src" %}
 7506   opcode(0x2B);
 7507   ins_encode( OpcP, RegReg( dst, src) );
 7508   ins_pipe( ialu_reg_reg );
 7509 %}
 7510 
 7511 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7512   match(Set dst (SubI dst src));
 7513   effect(KILL cr);
 7514 
 7515   format %{ "SUB    $dst,$src" %}
 7516   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7517   // ins_encode( RegImm( dst, src) );
 7518   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7519   ins_pipe( ialu_reg );
 7520 %}
 7521 
 7522 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7523   match(Set dst (SubI dst (LoadI src)));
 7524   effect(KILL cr);
 7525 
 7526   ins_cost(150);
 7527   format %{ "SUB    $dst,$src" %}
 7528   opcode(0x2B);
 7529   ins_encode( OpcP, RegMem( dst, src) );
 7530   ins_pipe( ialu_reg_mem );
 7531 %}
 7532 
 7533 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7534   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7535   effect(KILL cr);
 7536 
 7537   ins_cost(150);
 7538   format %{ "SUB    $dst,$src" %}
 7539   opcode(0x29);  /* Opcode 29 /r */
 7540   ins_encode( OpcP, RegMem( src, dst ) );
 7541   ins_pipe( ialu_mem_reg );
 7542 %}
 7543 
 7544 // Subtract from a pointer
 7545 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7546   match(Set dst (AddP dst (SubI zero src)));
 7547   effect(KILL cr);
 7548 
 7549   size(2);
 7550   format %{ "SUB    $dst,$src" %}
 7551   opcode(0x2B);
 7552   ins_encode( OpcP, RegReg( dst, src) );
 7553   ins_pipe( ialu_reg_reg );
 7554 %}
 7555 
 7556 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7557   match(Set dst (SubI zero dst));
 7558   effect(KILL cr);
 7559 
 7560   size(2);
 7561   format %{ "NEG    $dst" %}
 7562   opcode(0xF7,0x03);  // Opcode F7 /3
 7563   ins_encode( OpcP, RegOpc( dst ) );
 7564   ins_pipe( ialu_reg );
 7565 %}
 7566 
 7567 //----------Multiplication/Division Instructions-------------------------------
 7568 // Integer Multiplication Instructions
 7569 // Multiply Register
 7570 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7571   match(Set dst (MulI dst src));
 7572   effect(KILL cr);
 7573 
 7574   size(3);
 7575   ins_cost(300);
 7576   format %{ "IMUL   $dst,$src" %}
 7577   opcode(0xAF, 0x0F);
 7578   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7579   ins_pipe( ialu_reg_reg_alu0 );
 7580 %}
 7581 
 7582 // Multiply 32-bit Immediate
 7583 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7584   match(Set dst (MulI src imm));
 7585   effect(KILL cr);
 7586 
 7587   ins_cost(300);
 7588   format %{ "IMUL   $dst,$src,$imm" %}
 7589   opcode(0x69);  /* 69 /r id */
 7590   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7591   ins_pipe( ialu_reg_reg_alu0 );
 7592 %}
 7593 
 7594 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7595   match(Set dst src);
 7596   effect(KILL cr);
 7597 
 7598   // Note that this is artificially increased to make it more expensive than loadConL
 7599   ins_cost(250);
 7600   format %{ "MOV    EAX,$src\t// low word only" %}
 7601   opcode(0xB8);
 7602   ins_encode( LdImmL_Lo(dst, src) );
 7603   ins_pipe( ialu_reg_fat );
 7604 %}
 7605 
 7606 // Multiply by 32-bit Immediate, taking the shifted high order results
 7607 //  (special case for shift by 32)
 7608 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7609   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7610   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7611              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7613   effect(USE src1, KILL cr);
 7614 
 7615   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7616   ins_cost(0*100 + 1*400 - 150);
 7617   format %{ "IMUL   EDX:EAX,$src1" %}
 7618   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7619   ins_pipe( pipe_slow );
 7620 %}
 7621 
 7622 // Multiply by 32-bit Immediate, taking the shifted high order results
 7623 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7624   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7625   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7626              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7628   effect(USE src1, KILL cr);
 7629 
 7630   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7631   ins_cost(1*100 + 1*400 - 150);
 7632   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7633             "SAR    EDX,$cnt-32" %}
 7634   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7635   ins_pipe( pipe_slow );
 7636 %}
 7637 
 7638 // Multiply Memory 32-bit Immediate
 7639 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7640   match(Set dst (MulI (LoadI src) imm));
 7641   effect(KILL cr);
 7642 
 7643   ins_cost(300);
 7644   format %{ "IMUL   $dst,$src,$imm" %}
 7645   opcode(0x69);  /* 69 /r id */
 7646   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7647   ins_pipe( ialu_reg_mem_alu0 );
 7648 %}
 7649 
 7650 // Multiply Memory
 7651 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7652   match(Set dst (MulI dst (LoadI src)));
 7653   effect(KILL cr);
 7654 
 7655   ins_cost(350);
 7656   format %{ "IMUL   $dst,$src" %}
 7657   opcode(0xAF, 0x0F);
 7658   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7659   ins_pipe( ialu_reg_mem_alu0 );
 7660 %}
 7661 
 7662 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7663 %{
 7664   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7665   effect(KILL cr, KILL src2);
 7666 
 7667   expand %{ mulI_eReg(dst, src1, cr);
 7668            mulI_eReg(src2, src3, cr);
 7669            addI_eReg(dst, src2, cr); %}
 7670 %}
 7671 
 7672 // Multiply Register Int to Long
 7673 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7674   // Basic Idea: long = (long)int * (long)int
 7675   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7676   effect(DEF dst, USE src, USE src1, KILL flags);
 7677 
 7678   ins_cost(300);
 7679   format %{ "IMUL   $dst,$src1" %}
 7680 
 7681   ins_encode( long_int_multiply( dst, src1 ) );
 7682   ins_pipe( ialu_reg_reg_alu0 );
 7683 %}
 7684 
 7685 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7686   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7687   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7688   effect(KILL flags);
 7689 
 7690   ins_cost(300);
 7691   format %{ "MUL    $dst,$src1" %}
 7692 
 7693   ins_encode( long_uint_multiply(dst, src1) );
 7694   ins_pipe( ialu_reg_reg_alu0 );
 7695 %}
 7696 
 7697 // Multiply Register Long
 7698 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7699   match(Set dst (MulL dst src));
 7700   effect(KILL cr, TEMP tmp);
 7701   ins_cost(4*100+3*400);
 7702 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7703 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7704   format %{ "MOV    $tmp,$src.lo\n\t"
 7705             "IMUL   $tmp,EDX\n\t"
 7706             "MOV    EDX,$src.hi\n\t"
 7707             "IMUL   EDX,EAX\n\t"
 7708             "ADD    $tmp,EDX\n\t"
 7709             "MUL    EDX:EAX,$src.lo\n\t"
 7710             "ADD    EDX,$tmp" %}
 7711   ins_encode( long_multiply( dst, src, tmp ) );
 7712   ins_pipe( pipe_slow );
 7713 %}
 7714 
 7715 // Multiply Register Long where the left operand's high 32 bits are zero
 7716 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7717   predicate(is_operand_hi32_zero(n->in(1)));
 7718   match(Set dst (MulL dst src));
 7719   effect(KILL cr, TEMP tmp);
 7720   ins_cost(2*100+2*400);
 7721 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7722 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7723   format %{ "MOV    $tmp,$src.hi\n\t"
 7724             "IMUL   $tmp,EAX\n\t"
 7725             "MUL    EDX:EAX,$src.lo\n\t"
 7726             "ADD    EDX,$tmp" %}
 7727   ins_encode %{
 7728     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7729     __ imull($tmp$$Register, rax);
 7730     __ mull($src$$Register);
 7731     __ addl(rdx, $tmp$$Register);
 7732   %}
 7733   ins_pipe( pipe_slow );
 7734 %}
 7735 
 7736 // Multiply Register Long where the right operand's high 32 bits are zero
 7737 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7738   predicate(is_operand_hi32_zero(n->in(2)));
 7739   match(Set dst (MulL dst src));
 7740   effect(KILL cr, TEMP tmp);
 7741   ins_cost(2*100+2*400);
 7742 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7743 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7744   format %{ "MOV    $tmp,$src.lo\n\t"
 7745             "IMUL   $tmp,EDX\n\t"
 7746             "MUL    EDX:EAX,$src.lo\n\t"
 7747             "ADD    EDX,$tmp" %}
 7748   ins_encode %{
 7749     __ movl($tmp$$Register, $src$$Register);
 7750     __ imull($tmp$$Register, rdx);
 7751     __ mull($src$$Register);
 7752     __ addl(rdx, $tmp$$Register);
 7753   %}
 7754   ins_pipe( pipe_slow );
 7755 %}
 7756 
 7757 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7758 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7759   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7760   match(Set dst (MulL dst src));
 7761   effect(KILL cr);
 7762   ins_cost(1*400);
 7763 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7764 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7765   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7766   ins_encode %{
 7767     __ mull($src$$Register);
 7768   %}
 7769   ins_pipe( pipe_slow );
 7770 %}
 7771 
 7772 // Multiply Register Long by small constant
 7773 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7774   match(Set dst (MulL dst src));
 7775   effect(KILL cr, TEMP tmp);
 7776   ins_cost(2*100+2*400);
 7777   size(12);
 7778 // Basic idea: lo(result) = lo(src * EAX)
 7779 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7780   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7781             "MOV    EDX,$src\n\t"
 7782             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7783             "ADD    EDX,$tmp" %}
 7784   ins_encode( long_multiply_con( dst, src, tmp ) );
 7785   ins_pipe( pipe_slow );
 7786 %}
 7787 
 7788 // Integer DIV with Register
 7789 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7790   match(Set rax (DivI rax div));
 7791   effect(KILL rdx, KILL cr);
 7792   size(26);
 7793   ins_cost(30*100+10*100);
 7794   format %{ "CMP    EAX,0x80000000\n\t"
 7795             "JNE,s  normal\n\t"
 7796             "XOR    EDX,EDX\n\t"
 7797             "CMP    ECX,-1\n\t"
 7798             "JE,s   done\n"
 7799     "normal: CDQ\n\t"
 7800             "IDIV   $div\n\t"
 7801     "done:"        %}
 7802   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7803   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7804   ins_pipe( ialu_reg_reg_alu0 );
 7805 %}
 7806 
 7807 // Divide Register Long
 7808 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7809   match(Set dst (DivL src1 src2));
 7810   effect(CALL);
 7811   ins_cost(10000);
 7812   format %{ "PUSH   $src1.hi\n\t"
 7813             "PUSH   $src1.lo\n\t"
 7814             "PUSH   $src2.hi\n\t"
 7815             "PUSH   $src2.lo\n\t"
 7816             "CALL   SharedRuntime::ldiv\n\t"
 7817             "ADD    ESP,16" %}
 7818   ins_encode( long_div(src1,src2) );
 7819   ins_pipe( pipe_slow );
 7820 %}
 7821 
 7822 // Integer DIVMOD with Register, both quotient and mod results
 7823 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7824   match(DivModI rax div);
 7825   effect(KILL cr);
 7826   size(26);
 7827   ins_cost(30*100+10*100);
 7828   format %{ "CMP    EAX,0x80000000\n\t"
 7829             "JNE,s  normal\n\t"
 7830             "XOR    EDX,EDX\n\t"
 7831             "CMP    ECX,-1\n\t"
 7832             "JE,s   done\n"
 7833     "normal: CDQ\n\t"
 7834             "IDIV   $div\n\t"
 7835     "done:"        %}
 7836   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7837   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7838   ins_pipe( pipe_slow );
 7839 %}
 7840 
 7841 // Integer MOD with Register
 7842 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7843   match(Set rdx (ModI rax div));
 7844   effect(KILL rax, KILL cr);
 7845 
 7846   size(26);
 7847   ins_cost(300);
 7848   format %{ "CDQ\n\t"
 7849             "IDIV   $div" %}
 7850   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7851   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7852   ins_pipe( ialu_reg_reg_alu0 );
 7853 %}
 7854 
 7855 // Remainder Register Long
 7856 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7857   match(Set dst (ModL src1 src2));
 7858   effect(CALL);
 7859   ins_cost(10000);
 7860   format %{ "PUSH   $src1.hi\n\t"
 7861             "PUSH   $src1.lo\n\t"
 7862             "PUSH   $src2.hi\n\t"
 7863             "PUSH   $src2.lo\n\t"
 7864             "CALL   SharedRuntime::lrem\n\t"
 7865             "ADD    ESP,16" %}
 7866   ins_encode( long_mod(src1,src2) );
 7867   ins_pipe( pipe_slow );
 7868 %}
 7869 
 7870 // Divide Register Long (no special case since divisor != -1)
 7871 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7872   match(Set dst (DivL dst imm));
 7873   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7874   ins_cost(1000);
 7875   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7876             "XOR    $tmp2,$tmp2\n\t"
 7877             "CMP    $tmp,EDX\n\t"
 7878             "JA,s   fast\n\t"
 7879             "MOV    $tmp2,EAX\n\t"
 7880             "MOV    EAX,EDX\n\t"
 7881             "MOV    EDX,0\n\t"
 7882             "JLE,s  pos\n\t"
 7883             "LNEG   EAX : $tmp2\n\t"
 7884             "DIV    $tmp # unsigned division\n\t"
 7885             "XCHG   EAX,$tmp2\n\t"
 7886             "DIV    $tmp\n\t"
 7887             "LNEG   $tmp2 : EAX\n\t"
 7888             "JMP,s  done\n"
 7889     "pos:\n\t"
 7890             "DIV    $tmp\n\t"
 7891             "XCHG   EAX,$tmp2\n"
 7892     "fast:\n\t"
 7893             "DIV    $tmp\n"
 7894     "done:\n\t"
 7895             "MOV    EDX,$tmp2\n\t"
 7896             "NEG    EDX:EAX # if $imm < 0" %}
 7897   ins_encode %{
 7898     int con = (int)$imm$$constant;
 7899     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7900     int pcon = (con > 0) ? con : -con;
 7901     Label Lfast, Lpos, Ldone;
 7902 
 7903     __ movl($tmp$$Register, pcon);
 7904     __ xorl($tmp2$$Register,$tmp2$$Register);
 7905     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7906     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7907 
 7908     __ movl($tmp2$$Register, $dst$$Register); // save
 7909     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7910     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7911     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7912 
 7913     // Negative dividend.
 7914     // convert value to positive to use unsigned division
 7915     __ lneg($dst$$Register, $tmp2$$Register);
 7916     __ divl($tmp$$Register);
 7917     __ xchgl($dst$$Register, $tmp2$$Register);
 7918     __ divl($tmp$$Register);
 7919     // revert result back to negative
 7920     __ lneg($tmp2$$Register, $dst$$Register);
 7921     __ jmpb(Ldone);
 7922 
 7923     __ bind(Lpos);
 7924     __ divl($tmp$$Register); // Use unsigned division
 7925     __ xchgl($dst$$Register, $tmp2$$Register);
 7926     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7927 
 7928     __ bind(Lfast);
 7929     // fast path: src is positive
 7930     __ divl($tmp$$Register); // Use unsigned division
 7931 
 7932     __ bind(Ldone);
 7933     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7934     if (con < 0) {
 7935       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7936     }
 7937   %}
 7938   ins_pipe( pipe_slow );
 7939 %}
 7940 
 7941 // Remainder Register Long (remainder fit into 32 bits)
 7942 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7943   match(Set dst (ModL dst imm));
 7944   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7945   ins_cost(1000);
 7946   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7947             "CMP    $tmp,EDX\n\t"
 7948             "JA,s   fast\n\t"
 7949             "MOV    $tmp2,EAX\n\t"
 7950             "MOV    EAX,EDX\n\t"
 7951             "MOV    EDX,0\n\t"
 7952             "JLE,s  pos\n\t"
 7953             "LNEG   EAX : $tmp2\n\t"
 7954             "DIV    $tmp # unsigned division\n\t"
 7955             "MOV    EAX,$tmp2\n\t"
 7956             "DIV    $tmp\n\t"
 7957             "NEG    EDX\n\t"
 7958             "JMP,s  done\n"
 7959     "pos:\n\t"
 7960             "DIV    $tmp\n\t"
 7961             "MOV    EAX,$tmp2\n"
 7962     "fast:\n\t"
 7963             "DIV    $tmp\n"
 7964     "done:\n\t"
 7965             "MOV    EAX,EDX\n\t"
 7966             "SAR    EDX,31\n\t" %}
 7967   ins_encode %{
 7968     int con = (int)$imm$$constant;
 7969     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7970     int pcon = (con > 0) ? con : -con;
 7971     Label  Lfast, Lpos, Ldone;
 7972 
 7973     __ movl($tmp$$Register, pcon);
 7974     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7975     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7976 
 7977     __ movl($tmp2$$Register, $dst$$Register); // save
 7978     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7979     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7980     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7981 
 7982     // Negative dividend.
 7983     // convert value to positive to use unsigned division
 7984     __ lneg($dst$$Register, $tmp2$$Register);
 7985     __ divl($tmp$$Register);
 7986     __ movl($dst$$Register, $tmp2$$Register);
 7987     __ divl($tmp$$Register);
 7988     // revert remainder back to negative
 7989     __ negl(HIGH_FROM_LOW($dst$$Register));
 7990     __ jmpb(Ldone);
 7991 
 7992     __ bind(Lpos);
 7993     __ divl($tmp$$Register);
 7994     __ movl($dst$$Register, $tmp2$$Register);
 7995 
 7996     __ bind(Lfast);
 7997     // fast path: src is positive
 7998     __ divl($tmp$$Register);
 7999 
 8000     __ bind(Ldone);
 8001     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8002     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8003 
 8004   %}
 8005   ins_pipe( pipe_slow );
 8006 %}
 8007 
 8008 // Integer Shift Instructions
 8009 // Shift Left by one
 8010 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8011   match(Set dst (LShiftI dst shift));
 8012   effect(KILL cr);
 8013 
 8014   size(2);
 8015   format %{ "SHL    $dst,$shift" %}
 8016   opcode(0xD1, 0x4);  /* D1 /4 */
 8017   ins_encode( OpcP, RegOpc( dst ) );
 8018   ins_pipe( ialu_reg );
 8019 %}
 8020 
 8021 // Shift Left by 8-bit immediate
 8022 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8023   match(Set dst (LShiftI dst shift));
 8024   effect(KILL cr);
 8025 
 8026   size(3);
 8027   format %{ "SHL    $dst,$shift" %}
 8028   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8029   ins_encode( RegOpcImm( dst, shift) );
 8030   ins_pipe( ialu_reg );
 8031 %}
 8032 
 8033 // Shift Left by variable
 8034 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8035   match(Set dst (LShiftI dst shift));
 8036   effect(KILL cr);
 8037 
 8038   size(2);
 8039   format %{ "SHL    $dst,$shift" %}
 8040   opcode(0xD3, 0x4);  /* D3 /4 */
 8041   ins_encode( OpcP, RegOpc( dst ) );
 8042   ins_pipe( ialu_reg_reg );
 8043 %}
 8044 
 8045 // Arithmetic shift right by one
 8046 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8047   match(Set dst (RShiftI dst shift));
 8048   effect(KILL cr);
 8049 
 8050   size(2);
 8051   format %{ "SAR    $dst,$shift" %}
 8052   opcode(0xD1, 0x7);  /* D1 /7 */
 8053   ins_encode( OpcP, RegOpc( dst ) );
 8054   ins_pipe( ialu_reg );
 8055 %}
 8056 
 8057 // Arithmetic shift right by one
 8058 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8059   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8060   effect(KILL cr);
 8061   format %{ "SAR    $dst,$shift" %}
 8062   opcode(0xD1, 0x7);  /* D1 /7 */
 8063   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8064   ins_pipe( ialu_mem_imm );
 8065 %}
 8066 
 8067 // Arithmetic Shift Right by 8-bit immediate
 8068 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8069   match(Set dst (RShiftI dst shift));
 8070   effect(KILL cr);
 8071 
 8072   size(3);
 8073   format %{ "SAR    $dst,$shift" %}
 8074   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8075   ins_encode( RegOpcImm( dst, shift ) );
 8076   ins_pipe( ialu_mem_imm );
 8077 %}
 8078 
 8079 // Arithmetic Shift Right by 8-bit immediate
 8080 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8081   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8082   effect(KILL cr);
 8083 
 8084   format %{ "SAR    $dst,$shift" %}
 8085   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8086   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8087   ins_pipe( ialu_mem_imm );
 8088 %}
 8089 
 8090 // Arithmetic Shift Right by variable
 8091 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8092   match(Set dst (RShiftI dst shift));
 8093   effect(KILL cr);
 8094 
 8095   size(2);
 8096   format %{ "SAR    $dst,$shift" %}
 8097   opcode(0xD3, 0x7);  /* D3 /7 */
 8098   ins_encode( OpcP, RegOpc( dst ) );
 8099   ins_pipe( ialu_reg_reg );
 8100 %}
 8101 
 8102 // Logical shift right by one
 8103 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8104   match(Set dst (URShiftI dst shift));
 8105   effect(KILL cr);
 8106 
 8107   size(2);
 8108   format %{ "SHR    $dst,$shift" %}
 8109   opcode(0xD1, 0x5);  /* D1 /5 */
 8110   ins_encode( OpcP, RegOpc( dst ) );
 8111   ins_pipe( ialu_reg );
 8112 %}
 8113 
 8114 // Logical Shift Right by 8-bit immediate
 8115 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8116   match(Set dst (URShiftI dst shift));
 8117   effect(KILL cr);
 8118 
 8119   size(3);
 8120   format %{ "SHR    $dst,$shift" %}
 8121   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8122   ins_encode( RegOpcImm( dst, shift) );
 8123   ins_pipe( ialu_reg );
 8124 %}
 8125 
 8126 
 8127 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8128 // This idiom is used by the compiler for the i2b bytecode.
 8129 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8130   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8131 
 8132   size(3);
 8133   format %{ "MOVSX  $dst,$src :8" %}
 8134   ins_encode %{
 8135     __ movsbl($dst$$Register, $src$$Register);
 8136   %}
 8137   ins_pipe(ialu_reg_reg);
 8138 %}
 8139 
 8140 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8141 // This idiom is used by the compiler the i2s bytecode.
 8142 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8143   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8144 
 8145   size(3);
 8146   format %{ "MOVSX  $dst,$src :16" %}
 8147   ins_encode %{
 8148     __ movswl($dst$$Register, $src$$Register);
 8149   %}
 8150   ins_pipe(ialu_reg_reg);
 8151 %}
 8152 
 8153 
 8154 // Logical Shift Right by variable
 8155 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8156   match(Set dst (URShiftI dst shift));
 8157   effect(KILL cr);
 8158 
 8159   size(2);
 8160   format %{ "SHR    $dst,$shift" %}
 8161   opcode(0xD3, 0x5);  /* D3 /5 */
 8162   ins_encode( OpcP, RegOpc( dst ) );
 8163   ins_pipe( ialu_reg_reg );
 8164 %}
 8165 
 8166 
 8167 //----------Logical Instructions-----------------------------------------------
 8168 //----------Integer Logical Instructions---------------------------------------
 8169 // And Instructions
 8170 // And Register with Register
 8171 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8172   match(Set dst (AndI dst src));
 8173   effect(KILL cr);
 8174 
 8175   size(2);
 8176   format %{ "AND    $dst,$src" %}
 8177   opcode(0x23);
 8178   ins_encode( OpcP, RegReg( dst, src) );
 8179   ins_pipe( ialu_reg_reg );
 8180 %}
 8181 
 8182 // And Register with Immediate
 8183 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8184   match(Set dst (AndI dst src));
 8185   effect(KILL cr);
 8186 
 8187   format %{ "AND    $dst,$src" %}
 8188   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8189   // ins_encode( RegImm( dst, src) );
 8190   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8191   ins_pipe( ialu_reg );
 8192 %}
 8193 
 8194 // And Register with Memory
 8195 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8196   match(Set dst (AndI dst (LoadI src)));
 8197   effect(KILL cr);
 8198 
 8199   ins_cost(150);
 8200   format %{ "AND    $dst,$src" %}
 8201   opcode(0x23);
 8202   ins_encode( OpcP, RegMem( dst, src) );
 8203   ins_pipe( ialu_reg_mem );
 8204 %}
 8205 
 8206 // And Memory with Register
 8207 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8208   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8209   effect(KILL cr);
 8210 
 8211   ins_cost(150);
 8212   format %{ "AND    $dst,$src" %}
 8213   opcode(0x21);  /* Opcode 21 /r */
 8214   ins_encode( OpcP, RegMem( src, dst ) );
 8215   ins_pipe( ialu_mem_reg );
 8216 %}
 8217 
 8218 // And Memory with Immediate
 8219 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8220   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8221   effect(KILL cr);
 8222 
 8223   ins_cost(125);
 8224   format %{ "AND    $dst,$src" %}
 8225   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8226   // ins_encode( MemImm( dst, src) );
 8227   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8228   ins_pipe( ialu_mem_imm );
 8229 %}
 8230 
 8231 // BMI1 instructions
 8232 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8233   match(Set dst (AndI (XorI src1 minus_1) src2));
 8234   predicate(UseBMI1Instructions);
 8235   effect(KILL cr);
 8236 
 8237   format %{ "ANDNL  $dst, $src1, $src2" %}
 8238 
 8239   ins_encode %{
 8240     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8241   %}
 8242   ins_pipe(ialu_reg);
 8243 %}
 8244 
 8245 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8246   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8247   predicate(UseBMI1Instructions);
 8248   effect(KILL cr);
 8249 
 8250   ins_cost(125);
 8251   format %{ "ANDNL  $dst, $src1, $src2" %}
 8252 
 8253   ins_encode %{
 8254     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8255   %}
 8256   ins_pipe(ialu_reg_mem);
 8257 %}
 8258 
 8259 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8260   match(Set dst (AndI (SubI imm_zero src) src));
 8261   predicate(UseBMI1Instructions);
 8262   effect(KILL cr);
 8263 
 8264   format %{ "BLSIL  $dst, $src" %}
 8265 
 8266   ins_encode %{
 8267     __ blsil($dst$$Register, $src$$Register);
 8268   %}
 8269   ins_pipe(ialu_reg);
 8270 %}
 8271 
 8272 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8273   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8274   predicate(UseBMI1Instructions);
 8275   effect(KILL cr);
 8276 
 8277   ins_cost(125);
 8278   format %{ "BLSIL  $dst, $src" %}
 8279 
 8280   ins_encode %{
 8281     __ blsil($dst$$Register, $src$$Address);
 8282   %}
 8283   ins_pipe(ialu_reg_mem);
 8284 %}
 8285 
 8286 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8287 %{
 8288   match(Set dst (XorI (AddI src minus_1) src));
 8289   predicate(UseBMI1Instructions);
 8290   effect(KILL cr);
 8291 
 8292   format %{ "BLSMSKL $dst, $src" %}
 8293 
 8294   ins_encode %{
 8295     __ blsmskl($dst$$Register, $src$$Register);
 8296   %}
 8297 
 8298   ins_pipe(ialu_reg);
 8299 %}
 8300 
 8301 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8302 %{
 8303   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8304   predicate(UseBMI1Instructions);
 8305   effect(KILL cr);
 8306 
 8307   ins_cost(125);
 8308   format %{ "BLSMSKL $dst, $src" %}
 8309 
 8310   ins_encode %{
 8311     __ blsmskl($dst$$Register, $src$$Address);
 8312   %}
 8313 
 8314   ins_pipe(ialu_reg_mem);
 8315 %}
 8316 
 8317 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8318 %{
 8319   match(Set dst (AndI (AddI src minus_1) src) );
 8320   predicate(UseBMI1Instructions);
 8321   effect(KILL cr);
 8322 
 8323   format %{ "BLSRL  $dst, $src" %}
 8324 
 8325   ins_encode %{
 8326     __ blsrl($dst$$Register, $src$$Register);
 8327   %}
 8328 
 8329   ins_pipe(ialu_reg);
 8330 %}
 8331 
 8332 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8333 %{
 8334   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8335   predicate(UseBMI1Instructions);
 8336   effect(KILL cr);
 8337 
 8338   ins_cost(125);
 8339   format %{ "BLSRL  $dst, $src" %}
 8340 
 8341   ins_encode %{
 8342     __ blsrl($dst$$Register, $src$$Address);
 8343   %}
 8344 
 8345   ins_pipe(ialu_reg_mem);
 8346 %}
 8347 
 8348 // Or Instructions
 8349 // Or Register with Register
 8350 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8351   match(Set dst (OrI dst src));
 8352   effect(KILL cr);
 8353 
 8354   size(2);
 8355   format %{ "OR     $dst,$src" %}
 8356   opcode(0x0B);
 8357   ins_encode( OpcP, RegReg( dst, src) );
 8358   ins_pipe( ialu_reg_reg );
 8359 %}
 8360 
 8361 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8362   match(Set dst (OrI dst (CastP2X src)));
 8363   effect(KILL cr);
 8364 
 8365   size(2);
 8366   format %{ "OR     $dst,$src" %}
 8367   opcode(0x0B);
 8368   ins_encode( OpcP, RegReg( dst, src) );
 8369   ins_pipe( ialu_reg_reg );
 8370 %}
 8371 
 8372 
 8373 // Or Register with Immediate
 8374 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8375   match(Set dst (OrI dst src));
 8376   effect(KILL cr);
 8377 
 8378   format %{ "OR     $dst,$src" %}
 8379   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8380   // ins_encode( RegImm( dst, src) );
 8381   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8382   ins_pipe( ialu_reg );
 8383 %}
 8384 
 8385 // Or Register with Memory
 8386 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8387   match(Set dst (OrI dst (LoadI src)));
 8388   effect(KILL cr);
 8389 
 8390   ins_cost(150);
 8391   format %{ "OR     $dst,$src" %}
 8392   opcode(0x0B);
 8393   ins_encode( OpcP, RegMem( dst, src) );
 8394   ins_pipe( ialu_reg_mem );
 8395 %}
 8396 
 8397 // Or Memory with Register
 8398 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8399   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8400   effect(KILL cr);
 8401 
 8402   ins_cost(150);
 8403   format %{ "OR     $dst,$src" %}
 8404   opcode(0x09);  /* Opcode 09 /r */
 8405   ins_encode( OpcP, RegMem( src, dst ) );
 8406   ins_pipe( ialu_mem_reg );
 8407 %}
 8408 
 8409 // Or Memory with Immediate
 8410 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8411   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8412   effect(KILL cr);
 8413 
 8414   ins_cost(125);
 8415   format %{ "OR     $dst,$src" %}
 8416   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8417   // ins_encode( MemImm( dst, src) );
 8418   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8419   ins_pipe( ialu_mem_imm );
 8420 %}
 8421 
 8422 // ROL/ROR
 8423 // ROL expand
 8424 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8425   effect(USE_DEF dst, USE shift, KILL cr);
 8426 
 8427   format %{ "ROL    $dst, $shift" %}
 8428   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8429   ins_encode( OpcP, RegOpc( dst ));
 8430   ins_pipe( ialu_reg );
 8431 %}
 8432 
 8433 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8434   effect(USE_DEF dst, USE shift, KILL cr);
 8435 
 8436   format %{ "ROL    $dst, $shift" %}
 8437   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8438   ins_encode( RegOpcImm(dst, shift) );
 8439   ins_pipe(ialu_reg);
 8440 %}
 8441 
 8442 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8443   effect(USE_DEF dst, USE shift, KILL cr);
 8444 
 8445   format %{ "ROL    $dst, $shift" %}
 8446   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8447   ins_encode(OpcP, RegOpc(dst));
 8448   ins_pipe( ialu_reg_reg );
 8449 %}
 8450 // end of ROL expand
 8451 
 8452 // ROL 32bit by one once
 8453 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8454   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8455 
 8456   expand %{
 8457     rolI_eReg_imm1(dst, lshift, cr);
 8458   %}
 8459 %}
 8460 
 8461 // ROL 32bit var by imm8 once
 8462 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8463   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8464   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8465 
 8466   expand %{
 8467     rolI_eReg_imm8(dst, lshift, cr);
 8468   %}
 8469 %}
 8470 
 8471 // ROL 32bit var by var once
 8472 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8473   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8474 
 8475   expand %{
 8476     rolI_eReg_CL(dst, shift, cr);
 8477   %}
 8478 %}
 8479 
 8480 // ROL 32bit var by var once
 8481 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8482   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8483 
 8484   expand %{
 8485     rolI_eReg_CL(dst, shift, cr);
 8486   %}
 8487 %}
 8488 
 8489 // ROR expand
 8490 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8491   effect(USE_DEF dst, USE shift, KILL cr);
 8492 
 8493   format %{ "ROR    $dst, $shift" %}
 8494   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8495   ins_encode( OpcP, RegOpc( dst ) );
 8496   ins_pipe( ialu_reg );
 8497 %}
 8498 
 8499 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8500   effect (USE_DEF dst, USE shift, KILL cr);
 8501 
 8502   format %{ "ROR    $dst, $shift" %}
 8503   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8504   ins_encode( RegOpcImm(dst, shift) );
 8505   ins_pipe( ialu_reg );
 8506 %}
 8507 
 8508 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8509   effect(USE_DEF dst, USE shift, KILL cr);
 8510 
 8511   format %{ "ROR    $dst, $shift" %}
 8512   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8513   ins_encode(OpcP, RegOpc(dst));
 8514   ins_pipe( ialu_reg_reg );
 8515 %}
 8516 // end of ROR expand
 8517 
 8518 // ROR right once
 8519 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8520   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8521 
 8522   expand %{
 8523     rorI_eReg_imm1(dst, rshift, cr);
 8524   %}
 8525 %}
 8526 
 8527 // ROR 32bit by immI8 once
 8528 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8529   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8530   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8531 
 8532   expand %{
 8533     rorI_eReg_imm8(dst, rshift, cr);
 8534   %}
 8535 %}
 8536 
 8537 // ROR 32bit var by var once
 8538 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8539   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8540 
 8541   expand %{
 8542     rorI_eReg_CL(dst, shift, cr);
 8543   %}
 8544 %}
 8545 
 8546 // ROR 32bit var by var once
 8547 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8548   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8549 
 8550   expand %{
 8551     rorI_eReg_CL(dst, shift, cr);
 8552   %}
 8553 %}
 8554 
 8555 // Xor Instructions
 8556 // Xor Register with Register
 8557 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8558   match(Set dst (XorI dst src));
 8559   effect(KILL cr);
 8560 
 8561   size(2);
 8562   format %{ "XOR    $dst,$src" %}
 8563   opcode(0x33);
 8564   ins_encode( OpcP, RegReg( dst, src) );
 8565   ins_pipe( ialu_reg_reg );
 8566 %}
 8567 
 8568 // Xor Register with Immediate -1
 8569 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8570   match(Set dst (XorI dst imm));
 8571 
 8572   size(2);
 8573   format %{ "NOT    $dst" %}
 8574   ins_encode %{
 8575      __ notl($dst$$Register);
 8576   %}
 8577   ins_pipe( ialu_reg );
 8578 %}
 8579 
 8580 // Xor Register with Immediate
 8581 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8582   match(Set dst (XorI dst src));
 8583   effect(KILL cr);
 8584 
 8585   format %{ "XOR    $dst,$src" %}
 8586   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8587   // ins_encode( RegImm( dst, src) );
 8588   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8589   ins_pipe( ialu_reg );
 8590 %}
 8591 
 8592 // Xor Register with Memory
 8593 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8594   match(Set dst (XorI dst (LoadI src)));
 8595   effect(KILL cr);
 8596 
 8597   ins_cost(150);
 8598   format %{ "XOR    $dst,$src" %}
 8599   opcode(0x33);
 8600   ins_encode( OpcP, RegMem(dst, src) );
 8601   ins_pipe( ialu_reg_mem );
 8602 %}
 8603 
 8604 // Xor Memory with Register
 8605 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8606   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8607   effect(KILL cr);
 8608 
 8609   ins_cost(150);
 8610   format %{ "XOR    $dst,$src" %}
 8611   opcode(0x31);  /* Opcode 31 /r */
 8612   ins_encode( OpcP, RegMem( src, dst ) );
 8613   ins_pipe( ialu_mem_reg );
 8614 %}
 8615 
 8616 // Xor Memory with Immediate
 8617 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8618   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8619   effect(KILL cr);
 8620 
 8621   ins_cost(125);
 8622   format %{ "XOR    $dst,$src" %}
 8623   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8624   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8625   ins_pipe( ialu_mem_imm );
 8626 %}
 8627 
 8628 //----------Convert Int to Boolean---------------------------------------------
 8629 
 8630 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8631   effect( DEF dst, USE src );
 8632   format %{ "MOV    $dst,$src" %}
 8633   ins_encode( enc_Copy( dst, src) );
 8634   ins_pipe( ialu_reg_reg );
 8635 %}
 8636 
 8637 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8638   effect( USE_DEF dst, USE src, KILL cr );
 8639 
 8640   size(4);
 8641   format %{ "NEG    $dst\n\t"
 8642             "ADC    $dst,$src" %}
 8643   ins_encode( neg_reg(dst),
 8644               OpcRegReg(0x13,dst,src) );
 8645   ins_pipe( ialu_reg_reg_long );
 8646 %}
 8647 
 8648 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8649   match(Set dst (Conv2B src));
 8650 
 8651   expand %{
 8652     movI_nocopy(dst,src);
 8653     ci2b(dst,src,cr);
 8654   %}
 8655 %}
 8656 
 8657 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8658   effect( DEF dst, USE src );
 8659   format %{ "MOV    $dst,$src" %}
 8660   ins_encode( enc_Copy( dst, src) );
 8661   ins_pipe( ialu_reg_reg );
 8662 %}
 8663 
 8664 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8665   effect( USE_DEF dst, USE src, KILL cr );
 8666   format %{ "NEG    $dst\n\t"
 8667             "ADC    $dst,$src" %}
 8668   ins_encode( neg_reg(dst),
 8669               OpcRegReg(0x13,dst,src) );
 8670   ins_pipe( ialu_reg_reg_long );
 8671 %}
 8672 
 8673 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8674   match(Set dst (Conv2B src));
 8675 
 8676   expand %{
 8677     movP_nocopy(dst,src);
 8678     cp2b(dst,src,cr);
 8679   %}
 8680 %}
 8681 
 8682 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8683   match(Set dst (CmpLTMask p q));
 8684   effect(KILL cr);
 8685   ins_cost(400);
 8686 
 8687   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8688   format %{ "XOR    $dst,$dst\n\t"
 8689             "CMP    $p,$q\n\t"
 8690             "SETlt  $dst\n\t"
 8691             "NEG    $dst" %}
 8692   ins_encode %{
 8693     Register Rp = $p$$Register;
 8694     Register Rq = $q$$Register;
 8695     Register Rd = $dst$$Register;
 8696     Label done;
 8697     __ xorl(Rd, Rd);
 8698     __ cmpl(Rp, Rq);
 8699     __ setb(Assembler::less, Rd);
 8700     __ negl(Rd);
 8701   %}
 8702 
 8703   ins_pipe(pipe_slow);
 8704 %}
 8705 
 8706 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8707   match(Set dst (CmpLTMask dst zero));
 8708   effect(DEF dst, KILL cr);
 8709   ins_cost(100);
 8710 
 8711   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8712   ins_encode %{
 8713   __ sarl($dst$$Register, 31);
 8714   %}
 8715   ins_pipe(ialu_reg);
 8716 %}
 8717 
 8718 /* better to save a register than avoid a branch */
 8719 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8720   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8721   effect(KILL cr);
 8722   ins_cost(400);
 8723   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8724             "JGE    done\n\t"
 8725             "ADD    $p,$y\n"
 8726             "done:  " %}
 8727   ins_encode %{
 8728     Register Rp = $p$$Register;
 8729     Register Rq = $q$$Register;
 8730     Register Ry = $y$$Register;
 8731     Label done;
 8732     __ subl(Rp, Rq);
 8733     __ jccb(Assembler::greaterEqual, done);
 8734     __ addl(Rp, Ry);
 8735     __ bind(done);
 8736   %}
 8737 
 8738   ins_pipe(pipe_cmplt);
 8739 %}
 8740 
 8741 /* better to save a register than avoid a branch */
 8742 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8743   match(Set y (AndI (CmpLTMask p q) y));
 8744   effect(KILL cr);
 8745 
 8746   ins_cost(300);
 8747 
 8748   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8749             "JLT      done\n\t"
 8750             "XORL     $y, $y\n"
 8751             "done:  " %}
 8752   ins_encode %{
 8753     Register Rp = $p$$Register;
 8754     Register Rq = $q$$Register;
 8755     Register Ry = $y$$Register;
 8756     Label done;
 8757     __ cmpl(Rp, Rq);
 8758     __ jccb(Assembler::less, done);
 8759     __ xorl(Ry, Ry);
 8760     __ bind(done);
 8761   %}
 8762 
 8763   ins_pipe(pipe_cmplt);
 8764 %}
 8765 
 8766 /* If I enable this, I encourage spilling in the inner loop of compress.
 8767 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8768   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8769 */
 8770 //----------Overflow Math Instructions-----------------------------------------
 8771 
 8772 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8773 %{
 8774   match(Set cr (OverflowAddI op1 op2));
 8775   effect(DEF cr, USE_KILL op1, USE op2);
 8776 
 8777   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8778 
 8779   ins_encode %{
 8780     __ addl($op1$$Register, $op2$$Register);
 8781   %}
 8782   ins_pipe(ialu_reg_reg);
 8783 %}
 8784 
 8785 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8786 %{
 8787   match(Set cr (OverflowAddI op1 op2));
 8788   effect(DEF cr, USE_KILL op1, USE op2);
 8789 
 8790   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8791 
 8792   ins_encode %{
 8793     __ addl($op1$$Register, $op2$$constant);
 8794   %}
 8795   ins_pipe(ialu_reg_reg);
 8796 %}
 8797 
 8798 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8799 %{
 8800   match(Set cr (OverflowSubI op1 op2));
 8801 
 8802   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8803   ins_encode %{
 8804     __ cmpl($op1$$Register, $op2$$Register);
 8805   %}
 8806   ins_pipe(ialu_reg_reg);
 8807 %}
 8808 
 8809 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8810 %{
 8811   match(Set cr (OverflowSubI op1 op2));
 8812 
 8813   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8814   ins_encode %{
 8815     __ cmpl($op1$$Register, $op2$$constant);
 8816   %}
 8817   ins_pipe(ialu_reg_reg);
 8818 %}
 8819 
 8820 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8821 %{
 8822   match(Set cr (OverflowSubI zero op2));
 8823   effect(DEF cr, USE_KILL op2);
 8824 
 8825   format %{ "NEG    $op2\t# overflow check int" %}
 8826   ins_encode %{
 8827     __ negl($op2$$Register);
 8828   %}
 8829   ins_pipe(ialu_reg_reg);
 8830 %}
 8831 
 8832 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8833 %{
 8834   match(Set cr (OverflowMulI op1 op2));
 8835   effect(DEF cr, USE_KILL op1, USE op2);
 8836 
 8837   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8838   ins_encode %{
 8839     __ imull($op1$$Register, $op2$$Register);
 8840   %}
 8841   ins_pipe(ialu_reg_reg_alu0);
 8842 %}
 8843 
 8844 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8845 %{
 8846   match(Set cr (OverflowMulI op1 op2));
 8847   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8848 
 8849   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8850   ins_encode %{
 8851     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8852   %}
 8853   ins_pipe(ialu_reg_reg_alu0);
 8854 %}
 8855 
 8856 // Integer Absolute Instructions
 8857 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8858 %{
 8859   match(Set dst (AbsI src));
 8860   effect(TEMP dst, TEMP tmp, KILL cr);
 8861   format %{ "movl $tmp, $src\n\t"
 8862             "sarl $tmp, 31\n\t"
 8863             "movl $dst, $src\n\t"
 8864             "xorl $dst, $tmp\n\t"
 8865             "subl $dst, $tmp\n"
 8866           %}
 8867   ins_encode %{
 8868     __ movl($tmp$$Register, $src$$Register);
 8869     __ sarl($tmp$$Register, 31);
 8870     __ movl($dst$$Register, $src$$Register);
 8871     __ xorl($dst$$Register, $tmp$$Register);
 8872     __ subl($dst$$Register, $tmp$$Register);
 8873   %}
 8874 
 8875   ins_pipe(ialu_reg_reg);
 8876 %}
 8877 
 8878 //----------Long Instructions------------------------------------------------
 8879 // Add Long Register with Register
 8880 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8881   match(Set dst (AddL dst src));
 8882   effect(KILL cr);
 8883   ins_cost(200);
 8884   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8885             "ADC    $dst.hi,$src.hi" %}
 8886   opcode(0x03, 0x13);
 8887   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8888   ins_pipe( ialu_reg_reg_long );
 8889 %}
 8890 
 8891 // Add Long Register with Immediate
 8892 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8893   match(Set dst (AddL dst src));
 8894   effect(KILL cr);
 8895   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8896             "ADC    $dst.hi,$src.hi" %}
 8897   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8898   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8899   ins_pipe( ialu_reg_long );
 8900 %}
 8901 
 8902 // Add Long Register with Memory
 8903 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8904   match(Set dst (AddL dst (LoadL mem)));
 8905   effect(KILL cr);
 8906   ins_cost(125);
 8907   format %{ "ADD    $dst.lo,$mem\n\t"
 8908             "ADC    $dst.hi,$mem+4" %}
 8909   opcode(0x03, 0x13);
 8910   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8911   ins_pipe( ialu_reg_long_mem );
 8912 %}
 8913 
 8914 // Subtract Long Register with Register.
 8915 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8916   match(Set dst (SubL dst src));
 8917   effect(KILL cr);
 8918   ins_cost(200);
 8919   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8920             "SBB    $dst.hi,$src.hi" %}
 8921   opcode(0x2B, 0x1B);
 8922   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8923   ins_pipe( ialu_reg_reg_long );
 8924 %}
 8925 
 8926 // Subtract Long Register with Immediate
 8927 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8928   match(Set dst (SubL dst src));
 8929   effect(KILL cr);
 8930   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8931             "SBB    $dst.hi,$src.hi" %}
 8932   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8933   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8934   ins_pipe( ialu_reg_long );
 8935 %}
 8936 
 8937 // Subtract Long Register with Memory
 8938 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8939   match(Set dst (SubL dst (LoadL mem)));
 8940   effect(KILL cr);
 8941   ins_cost(125);
 8942   format %{ "SUB    $dst.lo,$mem\n\t"
 8943             "SBB    $dst.hi,$mem+4" %}
 8944   opcode(0x2B, 0x1B);
 8945   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8946   ins_pipe( ialu_reg_long_mem );
 8947 %}
 8948 
 8949 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8950   match(Set dst (SubL zero dst));
 8951   effect(KILL cr);
 8952   ins_cost(300);
 8953   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8954   ins_encode( neg_long(dst) );
 8955   ins_pipe( ialu_reg_reg_long );
 8956 %}
 8957 
 8958 // And Long Register with Register
 8959 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8960   match(Set dst (AndL dst src));
 8961   effect(KILL cr);
 8962   format %{ "AND    $dst.lo,$src.lo\n\t"
 8963             "AND    $dst.hi,$src.hi" %}
 8964   opcode(0x23,0x23);
 8965   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8966   ins_pipe( ialu_reg_reg_long );
 8967 %}
 8968 
 8969 // And Long Register with Immediate
 8970 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8971   match(Set dst (AndL dst src));
 8972   effect(KILL cr);
 8973   format %{ "AND    $dst.lo,$src.lo\n\t"
 8974             "AND    $dst.hi,$src.hi" %}
 8975   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8976   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8977   ins_pipe( ialu_reg_long );
 8978 %}
 8979 
 8980 // And Long Register with Memory
 8981 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8982   match(Set dst (AndL dst (LoadL mem)));
 8983   effect(KILL cr);
 8984   ins_cost(125);
 8985   format %{ "AND    $dst.lo,$mem\n\t"
 8986             "AND    $dst.hi,$mem+4" %}
 8987   opcode(0x23, 0x23);
 8988   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8989   ins_pipe( ialu_reg_long_mem );
 8990 %}
 8991 
 8992 // BMI1 instructions
 8993 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8994   match(Set dst (AndL (XorL src1 minus_1) src2));
 8995   predicate(UseBMI1Instructions);
 8996   effect(KILL cr, TEMP dst);
 8997 
 8998   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8999             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9000          %}
 9001 
 9002   ins_encode %{
 9003     Register Rdst = $dst$$Register;
 9004     Register Rsrc1 = $src1$$Register;
 9005     Register Rsrc2 = $src2$$Register;
 9006     __ andnl(Rdst, Rsrc1, Rsrc2);
 9007     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9008   %}
 9009   ins_pipe(ialu_reg_reg_long);
 9010 %}
 9011 
 9012 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9013   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9014   predicate(UseBMI1Instructions);
 9015   effect(KILL cr, TEMP dst);
 9016 
 9017   ins_cost(125);
 9018   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9019             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9020          %}
 9021 
 9022   ins_encode %{
 9023     Register Rdst = $dst$$Register;
 9024     Register Rsrc1 = $src1$$Register;
 9025     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9026 
 9027     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9028     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9029   %}
 9030   ins_pipe(ialu_reg_mem);
 9031 %}
 9032 
 9033 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9034   match(Set dst (AndL (SubL imm_zero src) src));
 9035   predicate(UseBMI1Instructions);
 9036   effect(KILL cr, TEMP dst);
 9037 
 9038   format %{ "MOVL   $dst.hi, 0\n\t"
 9039             "BLSIL  $dst.lo, $src.lo\n\t"
 9040             "JNZ    done\n\t"
 9041             "BLSIL  $dst.hi, $src.hi\n"
 9042             "done:"
 9043          %}
 9044 
 9045   ins_encode %{
 9046     Label done;
 9047     Register Rdst = $dst$$Register;
 9048     Register Rsrc = $src$$Register;
 9049     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9050     __ blsil(Rdst, Rsrc);
 9051     __ jccb(Assembler::notZero, done);
 9052     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9053     __ bind(done);
 9054   %}
 9055   ins_pipe(ialu_reg);
 9056 %}
 9057 
 9058 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9059   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9060   predicate(UseBMI1Instructions);
 9061   effect(KILL cr, TEMP dst);
 9062 
 9063   ins_cost(125);
 9064   format %{ "MOVL   $dst.hi, 0\n\t"
 9065             "BLSIL  $dst.lo, $src\n\t"
 9066             "JNZ    done\n\t"
 9067             "BLSIL  $dst.hi, $src+4\n"
 9068             "done:"
 9069          %}
 9070 
 9071   ins_encode %{
 9072     Label done;
 9073     Register Rdst = $dst$$Register;
 9074     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9075 
 9076     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9077     __ blsil(Rdst, $src$$Address);
 9078     __ jccb(Assembler::notZero, done);
 9079     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9080     __ bind(done);
 9081   %}
 9082   ins_pipe(ialu_reg_mem);
 9083 %}
 9084 
 9085 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9086 %{
 9087   match(Set dst (XorL (AddL src minus_1) src));
 9088   predicate(UseBMI1Instructions);
 9089   effect(KILL cr, TEMP dst);
 9090 
 9091   format %{ "MOVL    $dst.hi, 0\n\t"
 9092             "BLSMSKL $dst.lo, $src.lo\n\t"
 9093             "JNC     done\n\t"
 9094             "BLSMSKL $dst.hi, $src.hi\n"
 9095             "done:"
 9096          %}
 9097 
 9098   ins_encode %{
 9099     Label done;
 9100     Register Rdst = $dst$$Register;
 9101     Register Rsrc = $src$$Register;
 9102     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9103     __ blsmskl(Rdst, Rsrc);
 9104     __ jccb(Assembler::carryClear, done);
 9105     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9106     __ bind(done);
 9107   %}
 9108 
 9109   ins_pipe(ialu_reg);
 9110 %}
 9111 
 9112 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9113 %{
 9114   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9115   predicate(UseBMI1Instructions);
 9116   effect(KILL cr, TEMP dst);
 9117 
 9118   ins_cost(125);
 9119   format %{ "MOVL    $dst.hi, 0\n\t"
 9120             "BLSMSKL $dst.lo, $src\n\t"
 9121             "JNC     done\n\t"
 9122             "BLSMSKL $dst.hi, $src+4\n"
 9123             "done:"
 9124          %}
 9125 
 9126   ins_encode %{
 9127     Label done;
 9128     Register Rdst = $dst$$Register;
 9129     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9130 
 9131     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9132     __ blsmskl(Rdst, $src$$Address);
 9133     __ jccb(Assembler::carryClear, done);
 9134     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9135     __ bind(done);
 9136   %}
 9137 
 9138   ins_pipe(ialu_reg_mem);
 9139 %}
 9140 
 9141 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9142 %{
 9143   match(Set dst (AndL (AddL src minus_1) src) );
 9144   predicate(UseBMI1Instructions);
 9145   effect(KILL cr, TEMP dst);
 9146 
 9147   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9148             "BLSRL  $dst.lo, $src.lo\n\t"
 9149             "JNC    done\n\t"
 9150             "BLSRL  $dst.hi, $src.hi\n"
 9151             "done:"
 9152   %}
 9153 
 9154   ins_encode %{
 9155     Label done;
 9156     Register Rdst = $dst$$Register;
 9157     Register Rsrc = $src$$Register;
 9158     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9159     __ blsrl(Rdst, Rsrc);
 9160     __ jccb(Assembler::carryClear, done);
 9161     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9162     __ bind(done);
 9163   %}
 9164 
 9165   ins_pipe(ialu_reg);
 9166 %}
 9167 
 9168 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9169 %{
 9170   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9171   predicate(UseBMI1Instructions);
 9172   effect(KILL cr, TEMP dst);
 9173 
 9174   ins_cost(125);
 9175   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9176             "BLSRL  $dst.lo, $src\n\t"
 9177             "JNC    done\n\t"
 9178             "BLSRL  $dst.hi, $src+4\n"
 9179             "done:"
 9180   %}
 9181 
 9182   ins_encode %{
 9183     Label done;
 9184     Register Rdst = $dst$$Register;
 9185     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9186     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9187     __ blsrl(Rdst, $src$$Address);
 9188     __ jccb(Assembler::carryClear, done);
 9189     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9190     __ bind(done);
 9191   %}
 9192 
 9193   ins_pipe(ialu_reg_mem);
 9194 %}
 9195 
 9196 // Or Long Register with Register
 9197 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9198   match(Set dst (OrL dst src));
 9199   effect(KILL cr);
 9200   format %{ "OR     $dst.lo,$src.lo\n\t"
 9201             "OR     $dst.hi,$src.hi" %}
 9202   opcode(0x0B,0x0B);
 9203   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9204   ins_pipe( ialu_reg_reg_long );
 9205 %}
 9206 
 9207 // Or Long Register with Immediate
 9208 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9209   match(Set dst (OrL dst src));
 9210   effect(KILL cr);
 9211   format %{ "OR     $dst.lo,$src.lo\n\t"
 9212             "OR     $dst.hi,$src.hi" %}
 9213   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9214   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9215   ins_pipe( ialu_reg_long );
 9216 %}
 9217 
 9218 // Or Long Register with Memory
 9219 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9220   match(Set dst (OrL dst (LoadL mem)));
 9221   effect(KILL cr);
 9222   ins_cost(125);
 9223   format %{ "OR     $dst.lo,$mem\n\t"
 9224             "OR     $dst.hi,$mem+4" %}
 9225   opcode(0x0B,0x0B);
 9226   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9227   ins_pipe( ialu_reg_long_mem );
 9228 %}
 9229 
 9230 // Xor Long Register with Register
 9231 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9232   match(Set dst (XorL dst src));
 9233   effect(KILL cr);
 9234   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9235             "XOR    $dst.hi,$src.hi" %}
 9236   opcode(0x33,0x33);
 9237   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9238   ins_pipe( ialu_reg_reg_long );
 9239 %}
 9240 
 9241 // Xor Long Register with Immediate -1
 9242 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9243   match(Set dst (XorL dst imm));
 9244   format %{ "NOT    $dst.lo\n\t"
 9245             "NOT    $dst.hi" %}
 9246   ins_encode %{
 9247      __ notl($dst$$Register);
 9248      __ notl(HIGH_FROM_LOW($dst$$Register));
 9249   %}
 9250   ins_pipe( ialu_reg_long );
 9251 %}
 9252 
 9253 // Xor Long Register with Immediate
 9254 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9255   match(Set dst (XorL dst src));
 9256   effect(KILL cr);
 9257   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9258             "XOR    $dst.hi,$src.hi" %}
 9259   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9260   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9261   ins_pipe( ialu_reg_long );
 9262 %}
 9263 
 9264 // Xor Long Register with Memory
 9265 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9266   match(Set dst (XorL dst (LoadL mem)));
 9267   effect(KILL cr);
 9268   ins_cost(125);
 9269   format %{ "XOR    $dst.lo,$mem\n\t"
 9270             "XOR    $dst.hi,$mem+4" %}
 9271   opcode(0x33,0x33);
 9272   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9273   ins_pipe( ialu_reg_long_mem );
 9274 %}
 9275 
 9276 // Shift Left Long by 1
 9277 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9278   predicate(UseNewLongLShift);
 9279   match(Set dst (LShiftL dst cnt));
 9280   effect(KILL cr);
 9281   ins_cost(100);
 9282   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9283             "ADC    $dst.hi,$dst.hi" %}
 9284   ins_encode %{
 9285     __ addl($dst$$Register,$dst$$Register);
 9286     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9287   %}
 9288   ins_pipe( ialu_reg_long );
 9289 %}
 9290 
 9291 // Shift Left Long by 2
 9292 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9293   predicate(UseNewLongLShift);
 9294   match(Set dst (LShiftL dst cnt));
 9295   effect(KILL cr);
 9296   ins_cost(100);
 9297   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9298             "ADC    $dst.hi,$dst.hi\n\t"
 9299             "ADD    $dst.lo,$dst.lo\n\t"
 9300             "ADC    $dst.hi,$dst.hi" %}
 9301   ins_encode %{
 9302     __ addl($dst$$Register,$dst$$Register);
 9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9304     __ addl($dst$$Register,$dst$$Register);
 9305     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9306   %}
 9307   ins_pipe( ialu_reg_long );
 9308 %}
 9309 
 9310 // Shift Left Long by 3
 9311 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9312   predicate(UseNewLongLShift);
 9313   match(Set dst (LShiftL dst cnt));
 9314   effect(KILL cr);
 9315   ins_cost(100);
 9316   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9317             "ADC    $dst.hi,$dst.hi\n\t"
 9318             "ADD    $dst.lo,$dst.lo\n\t"
 9319             "ADC    $dst.hi,$dst.hi\n\t"
 9320             "ADD    $dst.lo,$dst.lo\n\t"
 9321             "ADC    $dst.hi,$dst.hi" %}
 9322   ins_encode %{
 9323     __ addl($dst$$Register,$dst$$Register);
 9324     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9325     __ addl($dst$$Register,$dst$$Register);
 9326     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9327     __ addl($dst$$Register,$dst$$Register);
 9328     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9329   %}
 9330   ins_pipe( ialu_reg_long );
 9331 %}
 9332 
 9333 // Shift Left Long by 1-31
 9334 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9335   match(Set dst (LShiftL dst cnt));
 9336   effect(KILL cr);
 9337   ins_cost(200);
 9338   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9339             "SHL    $dst.lo,$cnt" %}
 9340   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9341   ins_encode( move_long_small_shift(dst,cnt) );
 9342   ins_pipe( ialu_reg_long );
 9343 %}
 9344 
 9345 // Shift Left Long by 32-63
 9346 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9347   match(Set dst (LShiftL dst cnt));
 9348   effect(KILL cr);
 9349   ins_cost(300);
 9350   format %{ "MOV    $dst.hi,$dst.lo\n"
 9351           "\tSHL    $dst.hi,$cnt-32\n"
 9352           "\tXOR    $dst.lo,$dst.lo" %}
 9353   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9354   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9355   ins_pipe( ialu_reg_long );
 9356 %}
 9357 
 9358 // Shift Left Long by variable
 9359 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9360   match(Set dst (LShiftL dst shift));
 9361   effect(KILL cr);
 9362   ins_cost(500+200);
 9363   size(17);
 9364   format %{ "TEST   $shift,32\n\t"
 9365             "JEQ,s  small\n\t"
 9366             "MOV    $dst.hi,$dst.lo\n\t"
 9367             "XOR    $dst.lo,$dst.lo\n"
 9368     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9369             "SHL    $dst.lo,$shift" %}
 9370   ins_encode( shift_left_long( dst, shift ) );
 9371   ins_pipe( pipe_slow );
 9372 %}
 9373 
 9374 // Shift Right Long by 1-31
 9375 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9376   match(Set dst (URShiftL dst cnt));
 9377   effect(KILL cr);
 9378   ins_cost(200);
 9379   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9380             "SHR    $dst.hi,$cnt" %}
 9381   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9382   ins_encode( move_long_small_shift(dst,cnt) );
 9383   ins_pipe( ialu_reg_long );
 9384 %}
 9385 
 9386 // Shift Right Long by 32-63
 9387 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9388   match(Set dst (URShiftL dst cnt));
 9389   effect(KILL cr);
 9390   ins_cost(300);
 9391   format %{ "MOV    $dst.lo,$dst.hi\n"
 9392           "\tSHR    $dst.lo,$cnt-32\n"
 9393           "\tXOR    $dst.hi,$dst.hi" %}
 9394   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9395   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9396   ins_pipe( ialu_reg_long );
 9397 %}
 9398 
 9399 // Shift Right Long by variable
 9400 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9401   match(Set dst (URShiftL dst shift));
 9402   effect(KILL cr);
 9403   ins_cost(600);
 9404   size(17);
 9405   format %{ "TEST   $shift,32\n\t"
 9406             "JEQ,s  small\n\t"
 9407             "MOV    $dst.lo,$dst.hi\n\t"
 9408             "XOR    $dst.hi,$dst.hi\n"
 9409     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9410             "SHR    $dst.hi,$shift" %}
 9411   ins_encode( shift_right_long( dst, shift ) );
 9412   ins_pipe( pipe_slow );
 9413 %}
 9414 
 9415 // Shift Right Long by 1-31
 9416 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9417   match(Set dst (RShiftL dst cnt));
 9418   effect(KILL cr);
 9419   ins_cost(200);
 9420   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9421             "SAR    $dst.hi,$cnt" %}
 9422   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9423   ins_encode( move_long_small_shift(dst,cnt) );
 9424   ins_pipe( ialu_reg_long );
 9425 %}
 9426 
 9427 // Shift Right Long by 32-63
 9428 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9429   match(Set dst (RShiftL dst cnt));
 9430   effect(KILL cr);
 9431   ins_cost(300);
 9432   format %{ "MOV    $dst.lo,$dst.hi\n"
 9433           "\tSAR    $dst.lo,$cnt-32\n"
 9434           "\tSAR    $dst.hi,31" %}
 9435   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9436   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9437   ins_pipe( ialu_reg_long );
 9438 %}
 9439 
 9440 // Shift Right arithmetic Long by variable
 9441 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9442   match(Set dst (RShiftL dst shift));
 9443   effect(KILL cr);
 9444   ins_cost(600);
 9445   size(18);
 9446   format %{ "TEST   $shift,32\n\t"
 9447             "JEQ,s  small\n\t"
 9448             "MOV    $dst.lo,$dst.hi\n\t"
 9449             "SAR    $dst.hi,31\n"
 9450     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9451             "SAR    $dst.hi,$shift" %}
 9452   ins_encode( shift_right_arith_long( dst, shift ) );
 9453   ins_pipe( pipe_slow );
 9454 %}
 9455 
 9456 
 9457 //----------Double Instructions------------------------------------------------
 9458 // Double Math
 9459 
 9460 // Compare & branch
 9461 
 9462 // P6 version of float compare, sets condition codes in EFLAGS
 9463 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9464   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9465   match(Set cr (CmpD src1 src2));
 9466   effect(KILL rax);
 9467   ins_cost(150);
 9468   format %{ "FLD    $src1\n\t"
 9469             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9470             "JNP    exit\n\t"
 9471             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9472             "SAHF\n"
 9473      "exit:\tNOP               // avoid branch to branch" %}
 9474   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9475   ins_encode( Push_Reg_DPR(src1),
 9476               OpcP, RegOpc(src2),
 9477               cmpF_P6_fixup );
 9478   ins_pipe( pipe_slow );
 9479 %}
 9480 
 9481 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9482   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9483   match(Set cr (CmpD src1 src2));
 9484   ins_cost(150);
 9485   format %{ "FLD    $src1\n\t"
 9486             "FUCOMIP ST,$src2  // P6 instruction" %}
 9487   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9488   ins_encode( Push_Reg_DPR(src1),
 9489               OpcP, RegOpc(src2));
 9490   ins_pipe( pipe_slow );
 9491 %}
 9492 
 9493 // Compare & branch
 9494 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9495   predicate(UseSSE<=1);
 9496   match(Set cr (CmpD src1 src2));
 9497   effect(KILL rax);
 9498   ins_cost(200);
 9499   format %{ "FLD    $src1\n\t"
 9500             "FCOMp  $src2\n\t"
 9501             "FNSTSW AX\n\t"
 9502             "TEST   AX,0x400\n\t"
 9503             "JZ,s   flags\n\t"
 9504             "MOV    AH,1\t# unordered treat as LT\n"
 9505     "flags:\tSAHF" %}
 9506   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9507   ins_encode( Push_Reg_DPR(src1),
 9508               OpcP, RegOpc(src2),
 9509               fpu_flags);
 9510   ins_pipe( pipe_slow );
 9511 %}
 9512 
 9513 // Compare vs zero into -1,0,1
 9514 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9515   predicate(UseSSE<=1);
 9516   match(Set dst (CmpD3 src1 zero));
 9517   effect(KILL cr, KILL rax);
 9518   ins_cost(280);
 9519   format %{ "FTSTD  $dst,$src1" %}
 9520   opcode(0xE4, 0xD9);
 9521   ins_encode( Push_Reg_DPR(src1),
 9522               OpcS, OpcP, PopFPU,
 9523               CmpF_Result(dst));
 9524   ins_pipe( pipe_slow );
 9525 %}
 9526 
 9527 // Compare into -1,0,1
 9528 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9529   predicate(UseSSE<=1);
 9530   match(Set dst (CmpD3 src1 src2));
 9531   effect(KILL cr, KILL rax);
 9532   ins_cost(300);
 9533   format %{ "FCMPD  $dst,$src1,$src2" %}
 9534   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9535   ins_encode( Push_Reg_DPR(src1),
 9536               OpcP, RegOpc(src2),
 9537               CmpF_Result(dst));
 9538   ins_pipe( pipe_slow );
 9539 %}
 9540 
 9541 // float compare and set condition codes in EFLAGS by XMM regs
 9542 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9543   predicate(UseSSE>=2);
 9544   match(Set cr (CmpD src1 src2));
 9545   ins_cost(145);
 9546   format %{ "UCOMISD $src1,$src2\n\t"
 9547             "JNP,s   exit\n\t"
 9548             "PUSHF\t# saw NaN, set CF\n\t"
 9549             "AND     [rsp], #0xffffff2b\n\t"
 9550             "POPF\n"
 9551     "exit:" %}
 9552   ins_encode %{
 9553     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9554     emit_cmpfp_fixup(_masm);
 9555   %}
 9556   ins_pipe( pipe_slow );
 9557 %}
 9558 
 9559 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9560   predicate(UseSSE>=2);
 9561   match(Set cr (CmpD src1 src2));
 9562   ins_cost(100);
 9563   format %{ "UCOMISD $src1,$src2" %}
 9564   ins_encode %{
 9565     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9566   %}
 9567   ins_pipe( pipe_slow );
 9568 %}
 9569 
 9570 // float compare and set condition codes in EFLAGS by XMM regs
 9571 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9572   predicate(UseSSE>=2);
 9573   match(Set cr (CmpD src1 (LoadD src2)));
 9574   ins_cost(145);
 9575   format %{ "UCOMISD $src1,$src2\n\t"
 9576             "JNP,s   exit\n\t"
 9577             "PUSHF\t# saw NaN, set CF\n\t"
 9578             "AND     [rsp], #0xffffff2b\n\t"
 9579             "POPF\n"
 9580     "exit:" %}
 9581   ins_encode %{
 9582     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9583     emit_cmpfp_fixup(_masm);
 9584   %}
 9585   ins_pipe( pipe_slow );
 9586 %}
 9587 
 9588 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9589   predicate(UseSSE>=2);
 9590   match(Set cr (CmpD src1 (LoadD src2)));
 9591   ins_cost(100);
 9592   format %{ "UCOMISD $src1,$src2" %}
 9593   ins_encode %{
 9594     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9595   %}
 9596   ins_pipe( pipe_slow );
 9597 %}
 9598 
 9599 // Compare into -1,0,1 in XMM
 9600 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9601   predicate(UseSSE>=2);
 9602   match(Set dst (CmpD3 src1 src2));
 9603   effect(KILL cr);
 9604   ins_cost(255);
 9605   format %{ "UCOMISD $src1, $src2\n\t"
 9606             "MOV     $dst, #-1\n\t"
 9607             "JP,s    done\n\t"
 9608             "JB,s    done\n\t"
 9609             "SETNE   $dst\n\t"
 9610             "MOVZB   $dst, $dst\n"
 9611     "done:" %}
 9612   ins_encode %{
 9613     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9614     emit_cmpfp3(_masm, $dst$$Register);
 9615   %}
 9616   ins_pipe( pipe_slow );
 9617 %}
 9618 
 9619 // Compare into -1,0,1 in XMM and memory
 9620 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9621   predicate(UseSSE>=2);
 9622   match(Set dst (CmpD3 src1 (LoadD src2)));
 9623   effect(KILL cr);
 9624   ins_cost(275);
 9625   format %{ "UCOMISD $src1, $src2\n\t"
 9626             "MOV     $dst, #-1\n\t"
 9627             "JP,s    done\n\t"
 9628             "JB,s    done\n\t"
 9629             "SETNE   $dst\n\t"
 9630             "MOVZB   $dst, $dst\n"
 9631     "done:" %}
 9632   ins_encode %{
 9633     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9634     emit_cmpfp3(_masm, $dst$$Register);
 9635   %}
 9636   ins_pipe( pipe_slow );
 9637 %}
 9638 
 9639 
 9640 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9641   predicate (UseSSE <=1);
 9642   match(Set dst (SubD dst src));
 9643 
 9644   format %{ "FLD    $src\n\t"
 9645             "DSUBp  $dst,ST" %}
 9646   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9647   ins_cost(150);
 9648   ins_encode( Push_Reg_DPR(src),
 9649               OpcP, RegOpc(dst) );
 9650   ins_pipe( fpu_reg_reg );
 9651 %}
 9652 
 9653 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9654   predicate (UseSSE <=1);
 9655   match(Set dst (RoundDouble (SubD src1 src2)));
 9656   ins_cost(250);
 9657 
 9658   format %{ "FLD    $src2\n\t"
 9659             "DSUB   ST,$src1\n\t"
 9660             "FSTP_D $dst\t# D-round" %}
 9661   opcode(0xD8, 0x5);
 9662   ins_encode( Push_Reg_DPR(src2),
 9663               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9664   ins_pipe( fpu_mem_reg_reg );
 9665 %}
 9666 
 9667 
 9668 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9669   predicate (UseSSE <=1);
 9670   match(Set dst (SubD dst (LoadD src)));
 9671   ins_cost(150);
 9672 
 9673   format %{ "FLD    $src\n\t"
 9674             "DSUBp  $dst,ST" %}
 9675   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9676   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9677               OpcP, RegOpc(dst) );
 9678   ins_pipe( fpu_reg_mem );
 9679 %}
 9680 
 9681 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9682   predicate (UseSSE<=1);
 9683   match(Set dst (AbsD src));
 9684   ins_cost(100);
 9685   format %{ "FABS" %}
 9686   opcode(0xE1, 0xD9);
 9687   ins_encode( OpcS, OpcP );
 9688   ins_pipe( fpu_reg_reg );
 9689 %}
 9690 
 9691 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9692   predicate(UseSSE<=1);
 9693   match(Set dst (NegD src));
 9694   ins_cost(100);
 9695   format %{ "FCHS" %}
 9696   opcode(0xE0, 0xD9);
 9697   ins_encode( OpcS, OpcP );
 9698   ins_pipe( fpu_reg_reg );
 9699 %}
 9700 
 9701 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9702   predicate(UseSSE<=1);
 9703   match(Set dst (AddD dst src));
 9704   format %{ "FLD    $src\n\t"
 9705             "DADD   $dst,ST" %}
 9706   size(4);
 9707   ins_cost(150);
 9708   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9709   ins_encode( Push_Reg_DPR(src),
 9710               OpcP, RegOpc(dst) );
 9711   ins_pipe( fpu_reg_reg );
 9712 %}
 9713 
 9714 
 9715 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9716   predicate(UseSSE<=1);
 9717   match(Set dst (RoundDouble (AddD src1 src2)));
 9718   ins_cost(250);
 9719 
 9720   format %{ "FLD    $src2\n\t"
 9721             "DADD   ST,$src1\n\t"
 9722             "FSTP_D $dst\t# D-round" %}
 9723   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9724   ins_encode( Push_Reg_DPR(src2),
 9725               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9726   ins_pipe( fpu_mem_reg_reg );
 9727 %}
 9728 
 9729 
 9730 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9731   predicate(UseSSE<=1);
 9732   match(Set dst (AddD dst (LoadD src)));
 9733   ins_cost(150);
 9734 
 9735   format %{ "FLD    $src\n\t"
 9736             "DADDp  $dst,ST" %}
 9737   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9738   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9739               OpcP, RegOpc(dst) );
 9740   ins_pipe( fpu_reg_mem );
 9741 %}
 9742 
 9743 // add-to-memory
 9744 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9745   predicate(UseSSE<=1);
 9746   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9747   ins_cost(150);
 9748 
 9749   format %{ "FLD_D  $dst\n\t"
 9750             "DADD   ST,$src\n\t"
 9751             "FST_D  $dst" %}
 9752   opcode(0xDD, 0x0);
 9753   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9754               Opcode(0xD8), RegOpc(src),
 9755               set_instruction_start,
 9756               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9757   ins_pipe( fpu_reg_mem );
 9758 %}
 9759 
 9760 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9761   predicate(UseSSE<=1);
 9762   match(Set dst (AddD dst con));
 9763   ins_cost(125);
 9764   format %{ "FLD1\n\t"
 9765             "DADDp  $dst,ST" %}
 9766   ins_encode %{
 9767     __ fld1();
 9768     __ faddp($dst$$reg);
 9769   %}
 9770   ins_pipe(fpu_reg);
 9771 %}
 9772 
 9773 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9774   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9775   match(Set dst (AddD dst con));
 9776   ins_cost(200);
 9777   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9778             "DADDp  $dst,ST" %}
 9779   ins_encode %{
 9780     __ fld_d($constantaddress($con));
 9781     __ faddp($dst$$reg);
 9782   %}
 9783   ins_pipe(fpu_reg_mem);
 9784 %}
 9785 
 9786 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9787   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9788   match(Set dst (RoundDouble (AddD src con)));
 9789   ins_cost(200);
 9790   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9791             "DADD   ST,$src\n\t"
 9792             "FSTP_D $dst\t# D-round" %}
 9793   ins_encode %{
 9794     __ fld_d($constantaddress($con));
 9795     __ fadd($src$$reg);
 9796     __ fstp_d(Address(rsp, $dst$$disp));
 9797   %}
 9798   ins_pipe(fpu_mem_reg_con);
 9799 %}
 9800 
 9801 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9802   predicate(UseSSE<=1);
 9803   match(Set dst (MulD dst src));
 9804   format %{ "FLD    $src\n\t"
 9805             "DMULp  $dst,ST" %}
 9806   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9807   ins_cost(150);
 9808   ins_encode( Push_Reg_DPR(src),
 9809               OpcP, RegOpc(dst) );
 9810   ins_pipe( fpu_reg_reg );
 9811 %}
 9812 
 9813 // Strict FP instruction biases argument before multiply then
 9814 // biases result to avoid double rounding of subnormals.
 9815 //
 9816 // scale arg1 by multiplying arg1 by 2^(-15360)
 9817 // load arg2
 9818 // multiply scaled arg1 by arg2
 9819 // rescale product by 2^(15360)
 9820 //
 9821 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9822   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9823   match(Set dst (MulD dst src));
 9824   ins_cost(1);   // Select this instruction for all FP double multiplies
 9825 
 9826   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9827             "DMULp  $dst,ST\n\t"
 9828             "FLD    $src\n\t"
 9829             "DMULp  $dst,ST\n\t"
 9830             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9831             "DMULp  $dst,ST\n\t" %}
 9832   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9833   ins_encode( strictfp_bias1(dst),
 9834               Push_Reg_DPR(src),
 9835               OpcP, RegOpc(dst),
 9836               strictfp_bias2(dst) );
 9837   ins_pipe( fpu_reg_reg );
 9838 %}
 9839 
 9840 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9841   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9842   match(Set dst (MulD dst con));
 9843   ins_cost(200);
 9844   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9845             "DMULp  $dst,ST" %}
 9846   ins_encode %{
 9847     __ fld_d($constantaddress($con));
 9848     __ fmulp($dst$$reg);
 9849   %}
 9850   ins_pipe(fpu_reg_mem);
 9851 %}
 9852 
 9853 
 9854 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9855   predicate( UseSSE<=1 );
 9856   match(Set dst (MulD dst (LoadD src)));
 9857   ins_cost(200);
 9858   format %{ "FLD_D  $src\n\t"
 9859             "DMULp  $dst,ST" %}
 9860   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9861   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9862               OpcP, RegOpc(dst) );
 9863   ins_pipe( fpu_reg_mem );
 9864 %}
 9865 
 9866 //
 9867 // Cisc-alternate to reg-reg multiply
 9868 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9869   predicate( UseSSE<=1 );
 9870   match(Set dst (MulD src (LoadD mem)));
 9871   ins_cost(250);
 9872   format %{ "FLD_D  $mem\n\t"
 9873             "DMUL   ST,$src\n\t"
 9874             "FSTP_D $dst" %}
 9875   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9876   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9877               OpcReg_FPR(src),
 9878               Pop_Reg_DPR(dst) );
 9879   ins_pipe( fpu_reg_reg_mem );
 9880 %}
 9881 
 9882 
 9883 // MACRO3 -- addDPR a mulDPR
 9884 // This instruction is a '2-address' instruction in that the result goes
 9885 // back to src2.  This eliminates a move from the macro; possibly the
 9886 // register allocator will have to add it back (and maybe not).
 9887 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9888   predicate( UseSSE<=1 );
 9889   match(Set src2 (AddD (MulD src0 src1) src2));
 9890   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9891             "DMUL   ST,$src1\n\t"
 9892             "DADDp  $src2,ST" %}
 9893   ins_cost(250);
 9894   opcode(0xDD); /* LoadD DD /0 */
 9895   ins_encode( Push_Reg_FPR(src0),
 9896               FMul_ST_reg(src1),
 9897               FAddP_reg_ST(src2) );
 9898   ins_pipe( fpu_reg_reg_reg );
 9899 %}
 9900 
 9901 
 9902 // MACRO3 -- subDPR a mulDPR
 9903 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9904   predicate( UseSSE<=1 );
 9905   match(Set src2 (SubD (MulD src0 src1) src2));
 9906   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9907             "DMUL   ST,$src1\n\t"
 9908             "DSUBRp $src2,ST" %}
 9909   ins_cost(250);
 9910   ins_encode( Push_Reg_FPR(src0),
 9911               FMul_ST_reg(src1),
 9912               Opcode(0xDE), Opc_plus(0xE0,src2));
 9913   ins_pipe( fpu_reg_reg_reg );
 9914 %}
 9915 
 9916 
 9917 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9918   predicate( UseSSE<=1 );
 9919   match(Set dst (DivD dst src));
 9920 
 9921   format %{ "FLD    $src\n\t"
 9922             "FDIVp  $dst,ST" %}
 9923   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9924   ins_cost(150);
 9925   ins_encode( Push_Reg_DPR(src),
 9926               OpcP, RegOpc(dst) );
 9927   ins_pipe( fpu_reg_reg );
 9928 %}
 9929 
 9930 // Strict FP instruction biases argument before division then
 9931 // biases result, to avoid double rounding of subnormals.
 9932 //
 9933 // scale dividend by multiplying dividend by 2^(-15360)
 9934 // load divisor
 9935 // divide scaled dividend by divisor
 9936 // rescale quotient by 2^(15360)
 9937 //
 9938 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9939   predicate (UseSSE<=1);
 9940   match(Set dst (DivD dst src));
 9941   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9942   ins_cost(01);
 9943 
 9944   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9945             "DMULp  $dst,ST\n\t"
 9946             "FLD    $src\n\t"
 9947             "FDIVp  $dst,ST\n\t"
 9948             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9949             "DMULp  $dst,ST\n\t" %}
 9950   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9951   ins_encode( strictfp_bias1(dst),
 9952               Push_Reg_DPR(src),
 9953               OpcP, RegOpc(dst),
 9954               strictfp_bias2(dst) );
 9955   ins_pipe( fpu_reg_reg );
 9956 %}
 9957 
 9958 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9959   predicate(UseSSE<=1);
 9960   match(Set dst (ModD dst src));
 9961   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9962 
 9963   format %{ "DMOD   $dst,$src" %}
 9964   ins_cost(250);
 9965   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9966               emitModDPR(),
 9967               Push_Result_Mod_DPR(src),
 9968               Pop_Reg_DPR(dst));
 9969   ins_pipe( pipe_slow );
 9970 %}
 9971 
 9972 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9973   predicate(UseSSE>=2);
 9974   match(Set dst (ModD src0 src1));
 9975   effect(KILL rax, KILL cr);
 9976 
 9977   format %{ "SUB    ESP,8\t # DMOD\n"
 9978           "\tMOVSD  [ESP+0],$src1\n"
 9979           "\tFLD_D  [ESP+0]\n"
 9980           "\tMOVSD  [ESP+0],$src0\n"
 9981           "\tFLD_D  [ESP+0]\n"
 9982      "loop:\tFPREM\n"
 9983           "\tFWAIT\n"
 9984           "\tFNSTSW AX\n"
 9985           "\tSAHF\n"
 9986           "\tJP     loop\n"
 9987           "\tFSTP_D [ESP+0]\n"
 9988           "\tMOVSD  $dst,[ESP+0]\n"
 9989           "\tADD    ESP,8\n"
 9990           "\tFSTP   ST0\t # Restore FPU Stack"
 9991     %}
 9992   ins_cost(250);
 9993   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9994   ins_pipe( pipe_slow );
 9995 %}
 9996 
 9997 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9998   predicate (UseSSE<=1);
 9999   match(Set dst(AtanD dst src));
10000   format %{ "DATA   $dst,$src" %}
10001   opcode(0xD9, 0xF3);
10002   ins_encode( Push_Reg_DPR(src),
10003               OpcP, OpcS, RegOpc(dst) );
10004   ins_pipe( pipe_slow );
10005 %}
10006 
10007 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10008   predicate (UseSSE>=2);
10009   match(Set dst(AtanD dst src));
10010   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10011   format %{ "DATA   $dst,$src" %}
10012   opcode(0xD9, 0xF3);
10013   ins_encode( Push_SrcD(src),
10014               OpcP, OpcS, Push_ResultD(dst) );
10015   ins_pipe( pipe_slow );
10016 %}
10017 
10018 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10019   predicate (UseSSE<=1);
10020   match(Set dst (SqrtD src));
10021   format %{ "DSQRT  $dst,$src" %}
10022   opcode(0xFA, 0xD9);
10023   ins_encode( Push_Reg_DPR(src),
10024               OpcS, OpcP, Pop_Reg_DPR(dst) );
10025   ins_pipe( pipe_slow );
10026 %}
10027 
10028 //-------------Float Instructions-------------------------------
10029 // Float Math
10030 
10031 // Code for float compare:
10032 //     fcompp();
10033 //     fwait(); fnstsw_ax();
10034 //     sahf();
10035 //     movl(dst, unordered_result);
10036 //     jcc(Assembler::parity, exit);
10037 //     movl(dst, less_result);
10038 //     jcc(Assembler::below, exit);
10039 //     movl(dst, equal_result);
10040 //     jcc(Assembler::equal, exit);
10041 //     movl(dst, greater_result);
10042 //   exit:
10043 
10044 // P6 version of float compare, sets condition codes in EFLAGS
10045 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10046   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10047   match(Set cr (CmpF src1 src2));
10048   effect(KILL rax);
10049   ins_cost(150);
10050   format %{ "FLD    $src1\n\t"
10051             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10052             "JNP    exit\n\t"
10053             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10054             "SAHF\n"
10055      "exit:\tNOP               // avoid branch to branch" %}
10056   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10057   ins_encode( Push_Reg_DPR(src1),
10058               OpcP, RegOpc(src2),
10059               cmpF_P6_fixup );
10060   ins_pipe( pipe_slow );
10061 %}
10062 
10063 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10064   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10065   match(Set cr (CmpF src1 src2));
10066   ins_cost(100);
10067   format %{ "FLD    $src1\n\t"
10068             "FUCOMIP ST,$src2  // P6 instruction" %}
10069   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10070   ins_encode( Push_Reg_DPR(src1),
10071               OpcP, RegOpc(src2));
10072   ins_pipe( pipe_slow );
10073 %}
10074 
10075 
10076 // Compare & branch
10077 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10078   predicate(UseSSE == 0);
10079   match(Set cr (CmpF src1 src2));
10080   effect(KILL rax);
10081   ins_cost(200);
10082   format %{ "FLD    $src1\n\t"
10083             "FCOMp  $src2\n\t"
10084             "FNSTSW AX\n\t"
10085             "TEST   AX,0x400\n\t"
10086             "JZ,s   flags\n\t"
10087             "MOV    AH,1\t# unordered treat as LT\n"
10088     "flags:\tSAHF" %}
10089   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10090   ins_encode( Push_Reg_DPR(src1),
10091               OpcP, RegOpc(src2),
10092               fpu_flags);
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 // Compare vs zero into -1,0,1
10097 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10098   predicate(UseSSE == 0);
10099   match(Set dst (CmpF3 src1 zero));
10100   effect(KILL cr, KILL rax);
10101   ins_cost(280);
10102   format %{ "FTSTF  $dst,$src1" %}
10103   opcode(0xE4, 0xD9);
10104   ins_encode( Push_Reg_DPR(src1),
10105               OpcS, OpcP, PopFPU,
10106               CmpF_Result(dst));
10107   ins_pipe( pipe_slow );
10108 %}
10109 
10110 // Compare into -1,0,1
10111 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10112   predicate(UseSSE == 0);
10113   match(Set dst (CmpF3 src1 src2));
10114   effect(KILL cr, KILL rax);
10115   ins_cost(300);
10116   format %{ "FCMPF  $dst,$src1,$src2" %}
10117   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10118   ins_encode( Push_Reg_DPR(src1),
10119               OpcP, RegOpc(src2),
10120               CmpF_Result(dst));
10121   ins_pipe( pipe_slow );
10122 %}
10123 
10124 // float compare and set condition codes in EFLAGS by XMM regs
10125 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10126   predicate(UseSSE>=1);
10127   match(Set cr (CmpF src1 src2));
10128   ins_cost(145);
10129   format %{ "UCOMISS $src1,$src2\n\t"
10130             "JNP,s   exit\n\t"
10131             "PUSHF\t# saw NaN, set CF\n\t"
10132             "AND     [rsp], #0xffffff2b\n\t"
10133             "POPF\n"
10134     "exit:" %}
10135   ins_encode %{
10136     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10137     emit_cmpfp_fixup(_masm);
10138   %}
10139   ins_pipe( pipe_slow );
10140 %}
10141 
10142 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10143   predicate(UseSSE>=1);
10144   match(Set cr (CmpF src1 src2));
10145   ins_cost(100);
10146   format %{ "UCOMISS $src1,$src2" %}
10147   ins_encode %{
10148     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10149   %}
10150   ins_pipe( pipe_slow );
10151 %}
10152 
10153 // float compare and set condition codes in EFLAGS by XMM regs
10154 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10155   predicate(UseSSE>=1);
10156   match(Set cr (CmpF src1 (LoadF src2)));
10157   ins_cost(165);
10158   format %{ "UCOMISS $src1,$src2\n\t"
10159             "JNP,s   exit\n\t"
10160             "PUSHF\t# saw NaN, set CF\n\t"
10161             "AND     [rsp], #0xffffff2b\n\t"
10162             "POPF\n"
10163     "exit:" %}
10164   ins_encode %{
10165     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10166     emit_cmpfp_fixup(_masm);
10167   %}
10168   ins_pipe( pipe_slow );
10169 %}
10170 
10171 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10172   predicate(UseSSE>=1);
10173   match(Set cr (CmpF src1 (LoadF src2)));
10174   ins_cost(100);
10175   format %{ "UCOMISS $src1,$src2" %}
10176   ins_encode %{
10177     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10178   %}
10179   ins_pipe( pipe_slow );
10180 %}
10181 
10182 // Compare into -1,0,1 in XMM
10183 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10184   predicate(UseSSE>=1);
10185   match(Set dst (CmpF3 src1 src2));
10186   effect(KILL cr);
10187   ins_cost(255);
10188   format %{ "UCOMISS $src1, $src2\n\t"
10189             "MOV     $dst, #-1\n\t"
10190             "JP,s    done\n\t"
10191             "JB,s    done\n\t"
10192             "SETNE   $dst\n\t"
10193             "MOVZB   $dst, $dst\n"
10194     "done:" %}
10195   ins_encode %{
10196     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10197     emit_cmpfp3(_masm, $dst$$Register);
10198   %}
10199   ins_pipe( pipe_slow );
10200 %}
10201 
10202 // Compare into -1,0,1 in XMM and memory
10203 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10204   predicate(UseSSE>=1);
10205   match(Set dst (CmpF3 src1 (LoadF src2)));
10206   effect(KILL cr);
10207   ins_cost(275);
10208   format %{ "UCOMISS $src1, $src2\n\t"
10209             "MOV     $dst, #-1\n\t"
10210             "JP,s    done\n\t"
10211             "JB,s    done\n\t"
10212             "SETNE   $dst\n\t"
10213             "MOVZB   $dst, $dst\n"
10214     "done:" %}
10215   ins_encode %{
10216     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10217     emit_cmpfp3(_masm, $dst$$Register);
10218   %}
10219   ins_pipe( pipe_slow );
10220 %}
10221 
10222 // Spill to obtain 24-bit precision
10223 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10224   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10225   match(Set dst (SubF src1 src2));
10226 
10227   format %{ "FSUB   $dst,$src1 - $src2" %}
10228   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10229   ins_encode( Push_Reg_FPR(src1),
10230               OpcReg_FPR(src2),
10231               Pop_Mem_FPR(dst) );
10232   ins_pipe( fpu_mem_reg_reg );
10233 %}
10234 //
10235 // This instruction does not round to 24-bits
10236 instruct subFPR_reg(regFPR dst, regFPR src) %{
10237   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10238   match(Set dst (SubF dst src));
10239 
10240   format %{ "FSUB   $dst,$src" %}
10241   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10242   ins_encode( Push_Reg_FPR(src),
10243               OpcP, RegOpc(dst) );
10244   ins_pipe( fpu_reg_reg );
10245 %}
10246 
10247 // Spill to obtain 24-bit precision
10248 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10249   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10250   match(Set dst (AddF src1 src2));
10251 
10252   format %{ "FADD   $dst,$src1,$src2" %}
10253   opcode(0xD8, 0x0); /* D8 C0+i */
10254   ins_encode( Push_Reg_FPR(src2),
10255               OpcReg_FPR(src1),
10256               Pop_Mem_FPR(dst) );
10257   ins_pipe( fpu_mem_reg_reg );
10258 %}
10259 //
10260 // This instruction does not round to 24-bits
10261 instruct addFPR_reg(regFPR dst, regFPR src) %{
10262   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10263   match(Set dst (AddF dst src));
10264 
10265   format %{ "FLD    $src\n\t"
10266             "FADDp  $dst,ST" %}
10267   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10268   ins_encode( Push_Reg_FPR(src),
10269               OpcP, RegOpc(dst) );
10270   ins_pipe( fpu_reg_reg );
10271 %}
10272 
10273 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10274   predicate(UseSSE==0);
10275   match(Set dst (AbsF src));
10276   ins_cost(100);
10277   format %{ "FABS" %}
10278   opcode(0xE1, 0xD9);
10279   ins_encode( OpcS, OpcP );
10280   ins_pipe( fpu_reg_reg );
10281 %}
10282 
10283 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10284   predicate(UseSSE==0);
10285   match(Set dst (NegF src));
10286   ins_cost(100);
10287   format %{ "FCHS" %}
10288   opcode(0xE0, 0xD9);
10289   ins_encode( OpcS, OpcP );
10290   ins_pipe( fpu_reg_reg );
10291 %}
10292 
10293 // Cisc-alternate to addFPR_reg
10294 // Spill to obtain 24-bit precision
10295 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10296   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10297   match(Set dst (AddF src1 (LoadF src2)));
10298 
10299   format %{ "FLD    $src2\n\t"
10300             "FADD   ST,$src1\n\t"
10301             "FSTP_S $dst" %}
10302   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10303   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10304               OpcReg_FPR(src1),
10305               Pop_Mem_FPR(dst) );
10306   ins_pipe( fpu_mem_reg_mem );
10307 %}
10308 //
10309 // Cisc-alternate to addFPR_reg
10310 // This instruction does not round to 24-bits
10311 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10312   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10313   match(Set dst (AddF dst (LoadF src)));
10314 
10315   format %{ "FADD   $dst,$src" %}
10316   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10317   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10318               OpcP, RegOpc(dst) );
10319   ins_pipe( fpu_reg_mem );
10320 %}
10321 
10322 // // Following two instructions for _222_mpegaudio
10323 // Spill to obtain 24-bit precision
10324 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10325   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10326   match(Set dst (AddF src1 src2));
10327 
10328   format %{ "FADD   $dst,$src1,$src2" %}
10329   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10330   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10331               OpcReg_FPR(src2),
10332               Pop_Mem_FPR(dst) );
10333   ins_pipe( fpu_mem_reg_mem );
10334 %}
10335 
10336 // Cisc-spill variant
10337 // Spill to obtain 24-bit precision
10338 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10339   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10340   match(Set dst (AddF src1 (LoadF src2)));
10341 
10342   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10343   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10344   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10345               set_instruction_start,
10346               OpcP, RMopc_Mem(secondary,src1),
10347               Pop_Mem_FPR(dst) );
10348   ins_pipe( fpu_mem_mem_mem );
10349 %}
10350 
10351 // Spill to obtain 24-bit precision
10352 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10353   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10354   match(Set dst (AddF src1 src2));
10355 
10356   format %{ "FADD   $dst,$src1,$src2" %}
10357   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10358   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10359               set_instruction_start,
10360               OpcP, RMopc_Mem(secondary,src1),
10361               Pop_Mem_FPR(dst) );
10362   ins_pipe( fpu_mem_mem_mem );
10363 %}
10364 
10365 
10366 // Spill to obtain 24-bit precision
10367 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10368   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10369   match(Set dst (AddF src con));
10370   format %{ "FLD    $src\n\t"
10371             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10372             "FSTP_S $dst"  %}
10373   ins_encode %{
10374     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10375     __ fadd_s($constantaddress($con));
10376     __ fstp_s(Address(rsp, $dst$$disp));
10377   %}
10378   ins_pipe(fpu_mem_reg_con);
10379 %}
10380 //
10381 // This instruction does not round to 24-bits
10382 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10383   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10384   match(Set dst (AddF src con));
10385   format %{ "FLD    $src\n\t"
10386             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10387             "FSTP   $dst"  %}
10388   ins_encode %{
10389     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10390     __ fadd_s($constantaddress($con));
10391     __ fstp_d($dst$$reg);
10392   %}
10393   ins_pipe(fpu_reg_reg_con);
10394 %}
10395 
10396 // Spill to obtain 24-bit precision
10397 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10398   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10399   match(Set dst (MulF src1 src2));
10400 
10401   format %{ "FLD    $src1\n\t"
10402             "FMUL   $src2\n\t"
10403             "FSTP_S $dst"  %}
10404   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10405   ins_encode( Push_Reg_FPR(src1),
10406               OpcReg_FPR(src2),
10407               Pop_Mem_FPR(dst) );
10408   ins_pipe( fpu_mem_reg_reg );
10409 %}
10410 //
10411 // This instruction does not round to 24-bits
10412 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10413   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10414   match(Set dst (MulF src1 src2));
10415 
10416   format %{ "FLD    $src1\n\t"
10417             "FMUL   $src2\n\t"
10418             "FSTP_S $dst"  %}
10419   opcode(0xD8, 0x1); /* D8 C8+i */
10420   ins_encode( Push_Reg_FPR(src2),
10421               OpcReg_FPR(src1),
10422               Pop_Reg_FPR(dst) );
10423   ins_pipe( fpu_reg_reg_reg );
10424 %}
10425 
10426 
10427 // Spill to obtain 24-bit precision
10428 // Cisc-alternate to reg-reg multiply
10429 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10430   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10431   match(Set dst (MulF src1 (LoadF src2)));
10432 
10433   format %{ "FLD_S  $src2\n\t"
10434             "FMUL   $src1\n\t"
10435             "FSTP_S $dst"  %}
10436   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10438               OpcReg_FPR(src1),
10439               Pop_Mem_FPR(dst) );
10440   ins_pipe( fpu_mem_reg_mem );
10441 %}
10442 //
10443 // This instruction does not round to 24-bits
10444 // Cisc-alternate to reg-reg multiply
10445 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10446   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447   match(Set dst (MulF src1 (LoadF src2)));
10448 
10449   format %{ "FMUL   $dst,$src1,$src2" %}
10450   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10451   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10452               OpcReg_FPR(src1),
10453               Pop_Reg_FPR(dst) );
10454   ins_pipe( fpu_reg_reg_mem );
10455 %}
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10459   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (MulF src1 src2));
10461 
10462   format %{ "FMUL   $dst,$src1,$src2" %}
10463   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10464   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10465               set_instruction_start,
10466               OpcP, RMopc_Mem(secondary,src1),
10467               Pop_Mem_FPR(dst) );
10468   ins_pipe( fpu_mem_mem_mem );
10469 %}
10470 
10471 // Spill to obtain 24-bit precision
10472 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10473   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10474   match(Set dst (MulF src con));
10475 
10476   format %{ "FLD    $src\n\t"
10477             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10478             "FSTP_S $dst"  %}
10479   ins_encode %{
10480     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10481     __ fmul_s($constantaddress($con));
10482     __ fstp_s(Address(rsp, $dst$$disp));
10483   %}
10484   ins_pipe(fpu_mem_reg_con);
10485 %}
10486 //
10487 // This instruction does not round to 24-bits
10488 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10489   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10490   match(Set dst (MulF src con));
10491 
10492   format %{ "FLD    $src\n\t"
10493             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10494             "FSTP   $dst"  %}
10495   ins_encode %{
10496     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10497     __ fmul_s($constantaddress($con));
10498     __ fstp_d($dst$$reg);
10499   %}
10500   ins_pipe(fpu_reg_reg_con);
10501 %}
10502 
10503 
10504 //
10505 // MACRO1 -- subsume unshared load into mulFPR
10506 // This instruction does not round to 24-bits
10507 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10508   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10509   match(Set dst (MulF (LoadF mem1) src));
10510 
10511   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10512             "FMUL   ST,$src\n\t"
10513             "FSTP   $dst" %}
10514   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10515   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10516               OpcReg_FPR(src),
10517               Pop_Reg_FPR(dst) );
10518   ins_pipe( fpu_reg_reg_mem );
10519 %}
10520 //
10521 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10522 // This instruction does not round to 24-bits
10523 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10524   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10525   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10526   ins_cost(95);
10527 
10528   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10529             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10530             "FADD   ST,$src2\n\t"
10531             "FSTP   $dst" %}
10532   opcode(0xD9); /* LoadF D9 /0 */
10533   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10534               FMul_ST_reg(src1),
10535               FAdd_ST_reg(src2),
10536               Pop_Reg_FPR(dst) );
10537   ins_pipe( fpu_reg_mem_reg_reg );
10538 %}
10539 
10540 // MACRO3 -- addFPR a mulFPR
10541 // This instruction does not round to 24-bits.  It is a '2-address'
10542 // instruction in that the result goes back to src2.  This eliminates
10543 // a move from the macro; possibly the register allocator will have
10544 // to add it back (and maybe not).
10545 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10546   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10547   match(Set src2 (AddF (MulF src0 src1) src2));
10548 
10549   format %{ "FLD    $src0     ===MACRO3===\n\t"
10550             "FMUL   ST,$src1\n\t"
10551             "FADDP  $src2,ST" %}
10552   opcode(0xD9); /* LoadF D9 /0 */
10553   ins_encode( Push_Reg_FPR(src0),
10554               FMul_ST_reg(src1),
10555               FAddP_reg_ST(src2) );
10556   ins_pipe( fpu_reg_reg_reg );
10557 %}
10558 
10559 // MACRO4 -- divFPR subFPR
10560 // This instruction does not round to 24-bits
10561 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10562   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10563   match(Set dst (DivF (SubF src2 src1) src3));
10564 
10565   format %{ "FLD    $src2   ===MACRO4===\n\t"
10566             "FSUB   ST,$src1\n\t"
10567             "FDIV   ST,$src3\n\t"
10568             "FSTP  $dst" %}
10569   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10570   ins_encode( Push_Reg_FPR(src2),
10571               subFPR_divFPR_encode(src1,src3),
10572               Pop_Reg_FPR(dst) );
10573   ins_pipe( fpu_reg_reg_reg_reg );
10574 %}
10575 
10576 // Spill to obtain 24-bit precision
10577 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10578   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10579   match(Set dst (DivF src1 src2));
10580 
10581   format %{ "FDIV   $dst,$src1,$src2" %}
10582   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10583   ins_encode( Push_Reg_FPR(src1),
10584               OpcReg_FPR(src2),
10585               Pop_Mem_FPR(dst) );
10586   ins_pipe( fpu_mem_reg_reg );
10587 %}
10588 //
10589 // This instruction does not round to 24-bits
10590 instruct divFPR_reg(regFPR dst, regFPR src) %{
10591   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10592   match(Set dst (DivF dst src));
10593 
10594   format %{ "FDIV   $dst,$src" %}
10595   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10596   ins_encode( Push_Reg_FPR(src),
10597               OpcP, RegOpc(dst) );
10598   ins_pipe( fpu_reg_reg );
10599 %}
10600 
10601 
10602 // Spill to obtain 24-bit precision
10603 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10604   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10605   match(Set dst (ModF src1 src2));
10606   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10607 
10608   format %{ "FMOD   $dst,$src1,$src2" %}
10609   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10610               emitModDPR(),
10611               Push_Result_Mod_DPR(src2),
10612               Pop_Mem_FPR(dst));
10613   ins_pipe( pipe_slow );
10614 %}
10615 //
10616 // This instruction does not round to 24-bits
10617 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10618   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10619   match(Set dst (ModF dst src));
10620   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10621 
10622   format %{ "FMOD   $dst,$src" %}
10623   ins_encode(Push_Reg_Mod_DPR(dst, src),
10624               emitModDPR(),
10625               Push_Result_Mod_DPR(src),
10626               Pop_Reg_FPR(dst));
10627   ins_pipe( pipe_slow );
10628 %}
10629 
10630 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10631   predicate(UseSSE>=1);
10632   match(Set dst (ModF src0 src1));
10633   effect(KILL rax, KILL cr);
10634   format %{ "SUB    ESP,4\t # FMOD\n"
10635           "\tMOVSS  [ESP+0],$src1\n"
10636           "\tFLD_S  [ESP+0]\n"
10637           "\tMOVSS  [ESP+0],$src0\n"
10638           "\tFLD_S  [ESP+0]\n"
10639      "loop:\tFPREM\n"
10640           "\tFWAIT\n"
10641           "\tFNSTSW AX\n"
10642           "\tSAHF\n"
10643           "\tJP     loop\n"
10644           "\tFSTP_S [ESP+0]\n"
10645           "\tMOVSS  $dst,[ESP+0]\n"
10646           "\tADD    ESP,4\n"
10647           "\tFSTP   ST0\t # Restore FPU Stack"
10648     %}
10649   ins_cost(250);
10650   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10651   ins_pipe( pipe_slow );
10652 %}
10653 
10654 
10655 //----------Arithmetic Conversion Instructions---------------------------------
10656 // The conversions operations are all Alpha sorted.  Please keep it that way!
10657 
10658 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10659   predicate(UseSSE==0);
10660   match(Set dst (RoundFloat src));
10661   ins_cost(125);
10662   format %{ "FST_S  $dst,$src\t# F-round" %}
10663   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10664   ins_pipe( fpu_mem_reg );
10665 %}
10666 
10667 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10668   predicate(UseSSE<=1);
10669   match(Set dst (RoundDouble src));
10670   ins_cost(125);
10671   format %{ "FST_D  $dst,$src\t# D-round" %}
10672   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10673   ins_pipe( fpu_mem_reg );
10674 %}
10675 
10676 // Force rounding to 24-bit precision and 6-bit exponent
10677 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10678   predicate(UseSSE==0);
10679   match(Set dst (ConvD2F src));
10680   format %{ "FST_S  $dst,$src\t# F-round" %}
10681   expand %{
10682     roundFloat_mem_reg(dst,src);
10683   %}
10684 %}
10685 
10686 // Force rounding to 24-bit precision and 6-bit exponent
10687 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10688   predicate(UseSSE==1);
10689   match(Set dst (ConvD2F src));
10690   effect( KILL cr );
10691   format %{ "SUB    ESP,4\n\t"
10692             "FST_S  [ESP],$src\t# F-round\n\t"
10693             "MOVSS  $dst,[ESP]\n\t"
10694             "ADD ESP,4" %}
10695   ins_encode %{
10696     __ subptr(rsp, 4);
10697     if ($src$$reg != FPR1L_enc) {
10698       __ fld_s($src$$reg-1);
10699       __ fstp_s(Address(rsp, 0));
10700     } else {
10701       __ fst_s(Address(rsp, 0));
10702     }
10703     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10704     __ addptr(rsp, 4);
10705   %}
10706   ins_pipe( pipe_slow );
10707 %}
10708 
10709 // Force rounding double precision to single precision
10710 instruct convD2F_reg(regF dst, regD src) %{
10711   predicate(UseSSE>=2);
10712   match(Set dst (ConvD2F src));
10713   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10714   ins_encode %{
10715     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10716   %}
10717   ins_pipe( pipe_slow );
10718 %}
10719 
10720 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10721   predicate(UseSSE==0);
10722   match(Set dst (ConvF2D src));
10723   format %{ "FST_S  $dst,$src\t# D-round" %}
10724   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10725   ins_pipe( fpu_reg_reg );
10726 %}
10727 
10728 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10729   predicate(UseSSE==1);
10730   match(Set dst (ConvF2D src));
10731   format %{ "FST_D  $dst,$src\t# D-round" %}
10732   expand %{
10733     roundDouble_mem_reg(dst,src);
10734   %}
10735 %}
10736 
10737 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10738   predicate(UseSSE==1);
10739   match(Set dst (ConvF2D src));
10740   effect( KILL cr );
10741   format %{ "SUB    ESP,4\n\t"
10742             "MOVSS  [ESP] $src\n\t"
10743             "FLD_S  [ESP]\n\t"
10744             "ADD    ESP,4\n\t"
10745             "FSTP   $dst\t# D-round" %}
10746   ins_encode %{
10747     __ subptr(rsp, 4);
10748     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10749     __ fld_s(Address(rsp, 0));
10750     __ addptr(rsp, 4);
10751     __ fstp_d($dst$$reg);
10752   %}
10753   ins_pipe( pipe_slow );
10754 %}
10755 
10756 instruct convF2D_reg(regD dst, regF src) %{
10757   predicate(UseSSE>=2);
10758   match(Set dst (ConvF2D src));
10759   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10760   ins_encode %{
10761     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10762   %}
10763   ins_pipe( pipe_slow );
10764 %}
10765 
10766 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10767 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10768   predicate(UseSSE<=1);
10769   match(Set dst (ConvD2I src));
10770   effect( KILL tmp, KILL cr );
10771   format %{ "FLD    $src\t# Convert double to int \n\t"
10772             "FLDCW  trunc mode\n\t"
10773             "SUB    ESP,4\n\t"
10774             "FISTp  [ESP + #0]\n\t"
10775             "FLDCW  std/24-bit mode\n\t"
10776             "POP    EAX\n\t"
10777             "CMP    EAX,0x80000000\n\t"
10778             "JNE,s  fast\n\t"
10779             "FLD_D  $src\n\t"
10780             "CALL   d2i_wrapper\n"
10781       "fast:" %}
10782   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10783   ins_pipe( pipe_slow );
10784 %}
10785 
10786 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10787 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10788   predicate(UseSSE>=2);
10789   match(Set dst (ConvD2I src));
10790   effect( KILL tmp, KILL cr );
10791   format %{ "CVTTSD2SI $dst, $src\n\t"
10792             "CMP    $dst,0x80000000\n\t"
10793             "JNE,s  fast\n\t"
10794             "SUB    ESP, 8\n\t"
10795             "MOVSD  [ESP], $src\n\t"
10796             "FLD_D  [ESP]\n\t"
10797             "ADD    ESP, 8\n\t"
10798             "CALL   d2i_wrapper\n"
10799       "fast:" %}
10800   ins_encode %{
10801     Label fast;
10802     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10803     __ cmpl($dst$$Register, 0x80000000);
10804     __ jccb(Assembler::notEqual, fast);
10805     __ subptr(rsp, 8);
10806     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10807     __ fld_d(Address(rsp, 0));
10808     __ addptr(rsp, 8);
10809     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10810     __ post_call_nop();
10811     __ bind(fast);
10812   %}
10813   ins_pipe( pipe_slow );
10814 %}
10815 
10816 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10817   predicate(UseSSE<=1);
10818   match(Set dst (ConvD2L src));
10819   effect( KILL cr );
10820   format %{ "FLD    $src\t# Convert double to long\n\t"
10821             "FLDCW  trunc mode\n\t"
10822             "SUB    ESP,8\n\t"
10823             "FISTp  [ESP + #0]\n\t"
10824             "FLDCW  std/24-bit mode\n\t"
10825             "POP    EAX\n\t"
10826             "POP    EDX\n\t"
10827             "CMP    EDX,0x80000000\n\t"
10828             "JNE,s  fast\n\t"
10829             "TEST   EAX,EAX\n\t"
10830             "JNE,s  fast\n\t"
10831             "FLD    $src\n\t"
10832             "CALL   d2l_wrapper\n"
10833       "fast:" %}
10834   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10835   ins_pipe( pipe_slow );
10836 %}
10837 
10838 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10839 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10840   predicate (UseSSE>=2);
10841   match(Set dst (ConvD2L src));
10842   effect( KILL cr );
10843   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10844             "MOVSD  [ESP],$src\n\t"
10845             "FLD_D  [ESP]\n\t"
10846             "FLDCW  trunc mode\n\t"
10847             "FISTp  [ESP + #0]\n\t"
10848             "FLDCW  std/24-bit mode\n\t"
10849             "POP    EAX\n\t"
10850             "POP    EDX\n\t"
10851             "CMP    EDX,0x80000000\n\t"
10852             "JNE,s  fast\n\t"
10853             "TEST   EAX,EAX\n\t"
10854             "JNE,s  fast\n\t"
10855             "SUB    ESP,8\n\t"
10856             "MOVSD  [ESP],$src\n\t"
10857             "FLD_D  [ESP]\n\t"
10858             "ADD    ESP,8\n\t"
10859             "CALL   d2l_wrapper\n"
10860       "fast:" %}
10861   ins_encode %{
10862     Label fast;
10863     __ subptr(rsp, 8);
10864     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10865     __ fld_d(Address(rsp, 0));
10866     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10867     __ fistp_d(Address(rsp, 0));
10868     // Restore the rounding mode, mask the exception
10869     if (Compile::current()->in_24_bit_fp_mode()) {
10870       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10871     } else {
10872       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10873     }
10874     // Load the converted long, adjust CPU stack
10875     __ pop(rax);
10876     __ pop(rdx);
10877     __ cmpl(rdx, 0x80000000);
10878     __ jccb(Assembler::notEqual, fast);
10879     __ testl(rax, rax);
10880     __ jccb(Assembler::notEqual, fast);
10881     __ subptr(rsp, 8);
10882     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10883     __ fld_d(Address(rsp, 0));
10884     __ addptr(rsp, 8);
10885     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10886     __ post_call_nop();
10887     __ bind(fast);
10888   %}
10889   ins_pipe( pipe_slow );
10890 %}
10891 
10892 // Convert a double to an int.  Java semantics require we do complex
10893 // manglations in the corner cases.  So we set the rounding mode to
10894 // 'zero', store the darned double down as an int, and reset the
10895 // rounding mode to 'nearest'.  The hardware stores a flag value down
10896 // if we would overflow or converted a NAN; we check for this and
10897 // and go the slow path if needed.
10898 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10899   predicate(UseSSE==0);
10900   match(Set dst (ConvF2I src));
10901   effect( KILL tmp, KILL cr );
10902   format %{ "FLD    $src\t# Convert float to int \n\t"
10903             "FLDCW  trunc mode\n\t"
10904             "SUB    ESP,4\n\t"
10905             "FISTp  [ESP + #0]\n\t"
10906             "FLDCW  std/24-bit mode\n\t"
10907             "POP    EAX\n\t"
10908             "CMP    EAX,0x80000000\n\t"
10909             "JNE,s  fast\n\t"
10910             "FLD    $src\n\t"
10911             "CALL   d2i_wrapper\n"
10912       "fast:" %}
10913   // DPR2I_encoding works for FPR2I
10914   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10915   ins_pipe( pipe_slow );
10916 %}
10917 
10918 // Convert a float in xmm to an int reg.
10919 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10920   predicate(UseSSE>=1);
10921   match(Set dst (ConvF2I src));
10922   effect( KILL tmp, KILL cr );
10923   format %{ "CVTTSS2SI $dst, $src\n\t"
10924             "CMP    $dst,0x80000000\n\t"
10925             "JNE,s  fast\n\t"
10926             "SUB    ESP, 4\n\t"
10927             "MOVSS  [ESP], $src\n\t"
10928             "FLD    [ESP]\n\t"
10929             "ADD    ESP, 4\n\t"
10930             "CALL   d2i_wrapper\n"
10931       "fast:" %}
10932   ins_encode %{
10933     Label fast;
10934     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10935     __ cmpl($dst$$Register, 0x80000000);
10936     __ jccb(Assembler::notEqual, fast);
10937     __ subptr(rsp, 4);
10938     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10939     __ fld_s(Address(rsp, 0));
10940     __ addptr(rsp, 4);
10941     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10942     __ post_call_nop();
10943     __ bind(fast);
10944   %}
10945   ins_pipe( pipe_slow );
10946 %}
10947 
10948 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10949   predicate(UseSSE==0);
10950   match(Set dst (ConvF2L src));
10951   effect( KILL cr );
10952   format %{ "FLD    $src\t# Convert float to long\n\t"
10953             "FLDCW  trunc mode\n\t"
10954             "SUB    ESP,8\n\t"
10955             "FISTp  [ESP + #0]\n\t"
10956             "FLDCW  std/24-bit mode\n\t"
10957             "POP    EAX\n\t"
10958             "POP    EDX\n\t"
10959             "CMP    EDX,0x80000000\n\t"
10960             "JNE,s  fast\n\t"
10961             "TEST   EAX,EAX\n\t"
10962             "JNE,s  fast\n\t"
10963             "FLD    $src\n\t"
10964             "CALL   d2l_wrapper\n"
10965       "fast:" %}
10966   // DPR2L_encoding works for FPR2L
10967   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10968   ins_pipe( pipe_slow );
10969 %}
10970 
10971 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10972 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10973   predicate (UseSSE>=1);
10974   match(Set dst (ConvF2L src));
10975   effect( KILL cr );
10976   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10977             "MOVSS  [ESP],$src\n\t"
10978             "FLD_S  [ESP]\n\t"
10979             "FLDCW  trunc mode\n\t"
10980             "FISTp  [ESP + #0]\n\t"
10981             "FLDCW  std/24-bit mode\n\t"
10982             "POP    EAX\n\t"
10983             "POP    EDX\n\t"
10984             "CMP    EDX,0x80000000\n\t"
10985             "JNE,s  fast\n\t"
10986             "TEST   EAX,EAX\n\t"
10987             "JNE,s  fast\n\t"
10988             "SUB    ESP,4\t# Convert float to long\n\t"
10989             "MOVSS  [ESP],$src\n\t"
10990             "FLD_S  [ESP]\n\t"
10991             "ADD    ESP,4\n\t"
10992             "CALL   d2l_wrapper\n"
10993       "fast:" %}
10994   ins_encode %{
10995     Label fast;
10996     __ subptr(rsp, 8);
10997     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10998     __ fld_s(Address(rsp, 0));
10999     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11000     __ fistp_d(Address(rsp, 0));
11001     // Restore the rounding mode, mask the exception
11002     if (Compile::current()->in_24_bit_fp_mode()) {
11003       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11004     } else {
11005       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11006     }
11007     // Load the converted long, adjust CPU stack
11008     __ pop(rax);
11009     __ pop(rdx);
11010     __ cmpl(rdx, 0x80000000);
11011     __ jccb(Assembler::notEqual, fast);
11012     __ testl(rax, rax);
11013     __ jccb(Assembler::notEqual, fast);
11014     __ subptr(rsp, 4);
11015     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11016     __ fld_s(Address(rsp, 0));
11017     __ addptr(rsp, 4);
11018     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11019     __ post_call_nop();
11020     __ bind(fast);
11021   %}
11022   ins_pipe( pipe_slow );
11023 %}
11024 
11025 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11026   predicate( UseSSE<=1 );
11027   match(Set dst (ConvI2D src));
11028   format %{ "FILD   $src\n\t"
11029             "FSTP   $dst" %}
11030   opcode(0xDB, 0x0);  /* DB /0 */
11031   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11032   ins_pipe( fpu_reg_mem );
11033 %}
11034 
11035 instruct convI2D_reg(regD dst, rRegI src) %{
11036   predicate( UseSSE>=2 && !UseXmmI2D );
11037   match(Set dst (ConvI2D src));
11038   format %{ "CVTSI2SD $dst,$src" %}
11039   ins_encode %{
11040     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11041   %}
11042   ins_pipe( pipe_slow );
11043 %}
11044 
11045 instruct convI2D_mem(regD dst, memory mem) %{
11046   predicate( UseSSE>=2 );
11047   match(Set dst (ConvI2D (LoadI mem)));
11048   format %{ "CVTSI2SD $dst,$mem" %}
11049   ins_encode %{
11050     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11051   %}
11052   ins_pipe( pipe_slow );
11053 %}
11054 
11055 instruct convXI2D_reg(regD dst, rRegI src)
11056 %{
11057   predicate( UseSSE>=2 && UseXmmI2D );
11058   match(Set dst (ConvI2D src));
11059 
11060   format %{ "MOVD  $dst,$src\n\t"
11061             "CVTDQ2PD $dst,$dst\t# i2d" %}
11062   ins_encode %{
11063     __ movdl($dst$$XMMRegister, $src$$Register);
11064     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11065   %}
11066   ins_pipe(pipe_slow); // XXX
11067 %}
11068 
11069 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11070   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11071   match(Set dst (ConvI2D (LoadI mem)));
11072   format %{ "FILD   $mem\n\t"
11073             "FSTP   $dst" %}
11074   opcode(0xDB);      /* DB /0 */
11075   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11076               Pop_Reg_DPR(dst));
11077   ins_pipe( fpu_reg_mem );
11078 %}
11079 
11080 // Convert a byte to a float; no rounding step needed.
11081 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11082   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11083   match(Set dst (ConvI2F src));
11084   format %{ "FILD   $src\n\t"
11085             "FSTP   $dst" %}
11086 
11087   opcode(0xDB, 0x0);  /* DB /0 */
11088   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11089   ins_pipe( fpu_reg_mem );
11090 %}
11091 
11092 // In 24-bit mode, force exponent rounding by storing back out
11093 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11094   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11095   match(Set dst (ConvI2F src));
11096   ins_cost(200);
11097   format %{ "FILD   $src\n\t"
11098             "FSTP_S $dst" %}
11099   opcode(0xDB, 0x0);  /* DB /0 */
11100   ins_encode( Push_Mem_I(src),
11101               Pop_Mem_FPR(dst));
11102   ins_pipe( fpu_mem_mem );
11103 %}
11104 
11105 // In 24-bit mode, force exponent rounding by storing back out
11106 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11107   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11108   match(Set dst (ConvI2F (LoadI mem)));
11109   ins_cost(200);
11110   format %{ "FILD   $mem\n\t"
11111             "FSTP_S $dst" %}
11112   opcode(0xDB);  /* DB /0 */
11113   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11114               Pop_Mem_FPR(dst));
11115   ins_pipe( fpu_mem_mem );
11116 %}
11117 
11118 // This instruction does not round to 24-bits
11119 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11120   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11121   match(Set dst (ConvI2F src));
11122   format %{ "FILD   $src\n\t"
11123             "FSTP   $dst" %}
11124   opcode(0xDB, 0x0);  /* DB /0 */
11125   ins_encode( Push_Mem_I(src),
11126               Pop_Reg_FPR(dst));
11127   ins_pipe( fpu_reg_mem );
11128 %}
11129 
11130 // This instruction does not round to 24-bits
11131 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11132   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11133   match(Set dst (ConvI2F (LoadI mem)));
11134   format %{ "FILD   $mem\n\t"
11135             "FSTP   $dst" %}
11136   opcode(0xDB);      /* DB /0 */
11137   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11138               Pop_Reg_FPR(dst));
11139   ins_pipe( fpu_reg_mem );
11140 %}
11141 
11142 // Convert an int to a float in xmm; no rounding step needed.
11143 instruct convI2F_reg(regF dst, rRegI src) %{
11144   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11145   match(Set dst (ConvI2F src));
11146   format %{ "CVTSI2SS $dst, $src" %}
11147   ins_encode %{
11148     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11149   %}
11150   ins_pipe( pipe_slow );
11151 %}
11152 
11153  instruct convXI2F_reg(regF dst, rRegI src)
11154 %{
11155   predicate( UseSSE>=2 && UseXmmI2F );
11156   match(Set dst (ConvI2F src));
11157 
11158   format %{ "MOVD  $dst,$src\n\t"
11159             "CVTDQ2PS $dst,$dst\t# i2f" %}
11160   ins_encode %{
11161     __ movdl($dst$$XMMRegister, $src$$Register);
11162     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11163   %}
11164   ins_pipe(pipe_slow); // XXX
11165 %}
11166 
11167 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11168   match(Set dst (ConvI2L src));
11169   effect(KILL cr);
11170   ins_cost(375);
11171   format %{ "MOV    $dst.lo,$src\n\t"
11172             "MOV    $dst.hi,$src\n\t"
11173             "SAR    $dst.hi,31" %}
11174   ins_encode(convert_int_long(dst,src));
11175   ins_pipe( ialu_reg_reg_long );
11176 %}
11177 
11178 // Zero-extend convert int to long
11179 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11180   match(Set dst (AndL (ConvI2L src) mask) );
11181   effect( KILL flags );
11182   ins_cost(250);
11183   format %{ "MOV    $dst.lo,$src\n\t"
11184             "XOR    $dst.hi,$dst.hi" %}
11185   opcode(0x33); // XOR
11186   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11187   ins_pipe( ialu_reg_reg_long );
11188 %}
11189 
11190 // Zero-extend long
11191 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11192   match(Set dst (AndL src mask) );
11193   effect( KILL flags );
11194   ins_cost(250);
11195   format %{ "MOV    $dst.lo,$src.lo\n\t"
11196             "XOR    $dst.hi,$dst.hi\n\t" %}
11197   opcode(0x33); // XOR
11198   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11199   ins_pipe( ialu_reg_reg_long );
11200 %}
11201 
11202 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11203   predicate (UseSSE<=1);
11204   match(Set dst (ConvL2D src));
11205   effect( KILL cr );
11206   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11207             "PUSH   $src.lo\n\t"
11208             "FILD   ST,[ESP + #0]\n\t"
11209             "ADD    ESP,8\n\t"
11210             "FSTP_D $dst\t# D-round" %}
11211   opcode(0xDF, 0x5);  /* DF /5 */
11212   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11213   ins_pipe( pipe_slow );
11214 %}
11215 
11216 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11217   predicate (UseSSE>=2);
11218   match(Set dst (ConvL2D src));
11219   effect( KILL cr );
11220   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11221             "PUSH   $src.lo\n\t"
11222             "FILD_D [ESP]\n\t"
11223             "FSTP_D [ESP]\n\t"
11224             "MOVSD  $dst,[ESP]\n\t"
11225             "ADD    ESP,8" %}
11226   opcode(0xDF, 0x5);  /* DF /5 */
11227   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11228   ins_pipe( pipe_slow );
11229 %}
11230 
11231 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11232   predicate (UseSSE>=1);
11233   match(Set dst (ConvL2F src));
11234   effect( KILL cr );
11235   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11236             "PUSH   $src.lo\n\t"
11237             "FILD_D [ESP]\n\t"
11238             "FSTP_S [ESP]\n\t"
11239             "MOVSS  $dst,[ESP]\n\t"
11240             "ADD    ESP,8" %}
11241   opcode(0xDF, 0x5);  /* DF /5 */
11242   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11243   ins_pipe( pipe_slow );
11244 %}
11245 
11246 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11247   match(Set dst (ConvL2F src));
11248   effect( KILL cr );
11249   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11250             "PUSH   $src.lo\n\t"
11251             "FILD   ST,[ESP + #0]\n\t"
11252             "ADD    ESP,8\n\t"
11253             "FSTP_S $dst\t# F-round" %}
11254   opcode(0xDF, 0x5);  /* DF /5 */
11255   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11256   ins_pipe( pipe_slow );
11257 %}
11258 
11259 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11260   match(Set dst (ConvL2I src));
11261   effect( DEF dst, USE src );
11262   format %{ "MOV    $dst,$src.lo" %}
11263   ins_encode(enc_CopyL_Lo(dst,src));
11264   ins_pipe( ialu_reg_reg );
11265 %}
11266 
11267 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11268   match(Set dst (MoveF2I src));
11269   effect( DEF dst, USE src );
11270   ins_cost(100);
11271   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11272   ins_encode %{
11273     __ movl($dst$$Register, Address(rsp, $src$$disp));
11274   %}
11275   ins_pipe( ialu_reg_mem );
11276 %}
11277 
11278 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11279   predicate(UseSSE==0);
11280   match(Set dst (MoveF2I src));
11281   effect( DEF dst, USE src );
11282 
11283   ins_cost(125);
11284   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11285   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11286   ins_pipe( fpu_mem_reg );
11287 %}
11288 
11289 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11290   predicate(UseSSE>=1);
11291   match(Set dst (MoveF2I src));
11292   effect( DEF dst, USE src );
11293 
11294   ins_cost(95);
11295   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11296   ins_encode %{
11297     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11298   %}
11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11303   predicate(UseSSE>=2);
11304   match(Set dst (MoveF2I src));
11305   effect( DEF dst, USE src );
11306   ins_cost(85);
11307   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11308   ins_encode %{
11309     __ movdl($dst$$Register, $src$$XMMRegister);
11310   %}
11311   ins_pipe( pipe_slow );
11312 %}
11313 
11314 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11315   match(Set dst (MoveI2F src));
11316   effect( DEF dst, USE src );
11317 
11318   ins_cost(100);
11319   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11320   ins_encode %{
11321     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11322   %}
11323   ins_pipe( ialu_mem_reg );
11324 %}
11325 
11326 
11327 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11328   predicate(UseSSE==0);
11329   match(Set dst (MoveI2F src));
11330   effect(DEF dst, USE src);
11331 
11332   ins_cost(125);
11333   format %{ "FLD_S  $src\n\t"
11334             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11335   opcode(0xD9);               /* D9 /0, FLD m32real */
11336   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11337               Pop_Reg_FPR(dst) );
11338   ins_pipe( fpu_reg_mem );
11339 %}
11340 
11341 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11342   predicate(UseSSE>=1);
11343   match(Set dst (MoveI2F src));
11344   effect( DEF dst, USE src );
11345 
11346   ins_cost(95);
11347   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11348   ins_encode %{
11349     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11350   %}
11351   ins_pipe( pipe_slow );
11352 %}
11353 
11354 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11355   predicate(UseSSE>=2);
11356   match(Set dst (MoveI2F src));
11357   effect( DEF dst, USE src );
11358 
11359   ins_cost(85);
11360   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11361   ins_encode %{
11362     __ movdl($dst$$XMMRegister, $src$$Register);
11363   %}
11364   ins_pipe( pipe_slow );
11365 %}
11366 
11367 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11368   match(Set dst (MoveD2L src));
11369   effect(DEF dst, USE src);
11370 
11371   ins_cost(250);
11372   format %{ "MOV    $dst.lo,$src\n\t"
11373             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11374   opcode(0x8B, 0x8B);
11375   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11376   ins_pipe( ialu_mem_long_reg );
11377 %}
11378 
11379 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11380   predicate(UseSSE<=1);
11381   match(Set dst (MoveD2L src));
11382   effect(DEF dst, USE src);
11383 
11384   ins_cost(125);
11385   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11386   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11387   ins_pipe( fpu_mem_reg );
11388 %}
11389 
11390 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11391   predicate(UseSSE>=2);
11392   match(Set dst (MoveD2L src));
11393   effect(DEF dst, USE src);
11394   ins_cost(95);
11395   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11396   ins_encode %{
11397     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11398   %}
11399   ins_pipe( pipe_slow );
11400 %}
11401 
11402 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11403   predicate(UseSSE>=2);
11404   match(Set dst (MoveD2L src));
11405   effect(DEF dst, USE src, TEMP tmp);
11406   ins_cost(85);
11407   format %{ "MOVD   $dst.lo,$src\n\t"
11408             "PSHUFLW $tmp,$src,0x4E\n\t"
11409             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11410   ins_encode %{
11411     __ movdl($dst$$Register, $src$$XMMRegister);
11412     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11413     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11414   %}
11415   ins_pipe( pipe_slow );
11416 %}
11417 
11418 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11419   match(Set dst (MoveL2D src));
11420   effect(DEF dst, USE src);
11421 
11422   ins_cost(200);
11423   format %{ "MOV    $dst,$src.lo\n\t"
11424             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11425   opcode(0x89, 0x89);
11426   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11427   ins_pipe( ialu_mem_long_reg );
11428 %}
11429 
11430 
11431 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11432   predicate(UseSSE<=1);
11433   match(Set dst (MoveL2D src));
11434   effect(DEF dst, USE src);
11435   ins_cost(125);
11436 
11437   format %{ "FLD_D  $src\n\t"
11438             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11439   opcode(0xDD);               /* DD /0, FLD m64real */
11440   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11441               Pop_Reg_DPR(dst) );
11442   ins_pipe( fpu_reg_mem );
11443 %}
11444 
11445 
11446 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11447   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11448   match(Set dst (MoveL2D src));
11449   effect(DEF dst, USE src);
11450 
11451   ins_cost(95);
11452   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11453   ins_encode %{
11454     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11455   %}
11456   ins_pipe( pipe_slow );
11457 %}
11458 
11459 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11460   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11461   match(Set dst (MoveL2D src));
11462   effect(DEF dst, USE src);
11463 
11464   ins_cost(95);
11465   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11466   ins_encode %{
11467     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11468   %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11473   predicate(UseSSE>=2);
11474   match(Set dst (MoveL2D src));
11475   effect(TEMP dst, USE src, TEMP tmp);
11476   ins_cost(85);
11477   format %{ "MOVD   $dst,$src.lo\n\t"
11478             "MOVD   $tmp,$src.hi\n\t"
11479             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11480   ins_encode %{
11481     __ movdl($dst$$XMMRegister, $src$$Register);
11482     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11483     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11484   %}
11485   ins_pipe( pipe_slow );
11486 %}
11487 
11488 //----------------------------- CompressBits/ExpandBits ------------------------
11489 
11490 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11491   predicate(n->bottom_type()->isa_long());
11492   match(Set dst (CompressBits src mask));
11493   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11494   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11495   ins_encode %{
11496     Label exit, partail_result;
11497     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11498     // Merge the results of upper and lower destination registers such that upper destination
11499     // results are contiguously laid out after the lower destination result.
11500     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11501     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11502     __ popcntl($rtmp$$Register, $mask$$Register);
11503     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11504     __ cmpl($rtmp$$Register, 32);
11505     __ jccb(Assembler::equal, exit);
11506     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11507     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11508     // Shift left the contents of upper destination register by true bit count of lower mask register
11509     // and merge with lower destination register.
11510     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11511     __ orl($dst$$Register, $rtmp$$Register);
11512     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11513     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11514     // since contents of upper destination have already been copied to lower destination
11515     // register.
11516     __ cmpl($rtmp$$Register, 0);
11517     __ jccb(Assembler::greater, partail_result);
11518     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11519     __ jmp(exit);
11520     __ bind(partail_result);
11521     // Perform right shift over upper destination register to move out bits already copied
11522     // to lower destination register.
11523     __ subl($rtmp$$Register, 32);
11524     __ negl($rtmp$$Register);
11525     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11526     __ bind(exit);
11527   %}
11528   ins_pipe( pipe_slow );
11529 %}
11530 
11531 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11532   predicate(n->bottom_type()->isa_long());
11533   match(Set dst (ExpandBits src mask));
11534   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11535   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11536   ins_encode %{
11537     // Extraction operation sequentially reads the bits from source register starting from LSB
11538     // and lays them out into destination register at bit locations corresponding to true bits
11539     // in mask register. Thus number of source bits read are equal to combined true bit count
11540     // of mask register pair.
11541     Label exit, mask_clipping;
11542     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11543     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11544     __ popcntl($rtmp$$Register, $mask$$Register);
11545     // If true bit count of lower mask register is 32 then none of bit of lower source register
11546     // will feed to upper destination register.
11547     __ cmpl($rtmp$$Register, 32);
11548     __ jccb(Assembler::equal, exit);
11549     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11550     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11551     // Shift right the contents of lower source register to remove already consumed bits.
11552     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11553     // Extract the bits from lower source register starting from LSB under the influence
11554     // of upper mask register.
11555     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11556     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11557     __ subl($rtmp$$Register, 32);
11558     __ negl($rtmp$$Register);
11559     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11560     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11561     // Clear the set bits in upper mask register which have been used to extract the contents
11562     // from lower source register.
11563     __ bind(mask_clipping);
11564     __ blsrl($mask$$Register, $mask$$Register);
11565     __ decrementl($rtmp$$Register, 1);
11566     __ jccb(Assembler::greater, mask_clipping);
11567     // Starting from LSB extract the bits from upper source register under the influence of
11568     // remaining set bits in upper mask register.
11569     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11570     // Merge the partial results extracted from lower and upper source register bits.
11571     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11572     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11573     __ bind(exit);
11574   %}
11575   ins_pipe( pipe_slow );
11576 %}
11577 
11578 // =======================================================================
11579 // fast clearing of an array
11580 // Small ClearArray non-AVX512.
11581 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11582   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11583   match(Set dummy (ClearArray cnt base));
11584   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11585 
11586   format %{ $$template
11587     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11588     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11589     $$emit$$"JG     LARGE\n\t"
11590     $$emit$$"SHL    ECX, 1\n\t"
11591     $$emit$$"DEC    ECX\n\t"
11592     $$emit$$"JS     DONE\t# Zero length\n\t"
11593     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11594     $$emit$$"DEC    ECX\n\t"
11595     $$emit$$"JGE    LOOP\n\t"
11596     $$emit$$"JMP    DONE\n\t"
11597     $$emit$$"# LARGE:\n\t"
11598     if (UseFastStosb) {
11599        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11600        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11601     } else if (UseXMMForObjInit) {
11602        $$emit$$"MOV     RDI,RAX\n\t"
11603        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11604        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11605        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11606        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11607        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11608        $$emit$$"ADD     0x40,RAX\n\t"
11609        $$emit$$"# L_zero_64_bytes:\n\t"
11610        $$emit$$"SUB     0x8,RCX\n\t"
11611        $$emit$$"JGE     L_loop\n\t"
11612        $$emit$$"ADD     0x4,RCX\n\t"
11613        $$emit$$"JL      L_tail\n\t"
11614        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11615        $$emit$$"ADD     0x20,RAX\n\t"
11616        $$emit$$"SUB     0x4,RCX\n\t"
11617        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11618        $$emit$$"ADD     0x4,RCX\n\t"
11619        $$emit$$"JLE     L_end\n\t"
11620        $$emit$$"DEC     RCX\n\t"
11621        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11622        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11623        $$emit$$"ADD     0x8,RAX\n\t"
11624        $$emit$$"DEC     RCX\n\t"
11625        $$emit$$"JGE     L_sloop\n\t"
11626        $$emit$$"# L_end:\n\t"
11627     } else {
11628        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11629        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11630     }
11631     $$emit$$"# DONE"
11632   %}
11633   ins_encode %{
11634     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11635                  $tmp$$XMMRegister, false, knoreg);
11636   %}
11637   ins_pipe( pipe_slow );
11638 %}
11639 
11640 // Small ClearArray AVX512 non-constant length.
11641 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11642   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11643   match(Set dummy (ClearArray cnt base));
11644   ins_cost(125);
11645   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11646 
11647   format %{ $$template
11648     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11649     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11650     $$emit$$"JG     LARGE\n\t"
11651     $$emit$$"SHL    ECX, 1\n\t"
11652     $$emit$$"DEC    ECX\n\t"
11653     $$emit$$"JS     DONE\t# Zero length\n\t"
11654     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11655     $$emit$$"DEC    ECX\n\t"
11656     $$emit$$"JGE    LOOP\n\t"
11657     $$emit$$"JMP    DONE\n\t"
11658     $$emit$$"# LARGE:\n\t"
11659     if (UseFastStosb) {
11660        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11661        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11662     } else if (UseXMMForObjInit) {
11663        $$emit$$"MOV     RDI,RAX\n\t"
11664        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11665        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11666        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11667        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11668        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11669        $$emit$$"ADD     0x40,RAX\n\t"
11670        $$emit$$"# L_zero_64_bytes:\n\t"
11671        $$emit$$"SUB     0x8,RCX\n\t"
11672        $$emit$$"JGE     L_loop\n\t"
11673        $$emit$$"ADD     0x4,RCX\n\t"
11674        $$emit$$"JL      L_tail\n\t"
11675        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11676        $$emit$$"ADD     0x20,RAX\n\t"
11677        $$emit$$"SUB     0x4,RCX\n\t"
11678        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11679        $$emit$$"ADD     0x4,RCX\n\t"
11680        $$emit$$"JLE     L_end\n\t"
11681        $$emit$$"DEC     RCX\n\t"
11682        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11683        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11684        $$emit$$"ADD     0x8,RAX\n\t"
11685        $$emit$$"DEC     RCX\n\t"
11686        $$emit$$"JGE     L_sloop\n\t"
11687        $$emit$$"# L_end:\n\t"
11688     } else {
11689        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11690        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11691     }
11692     $$emit$$"# DONE"
11693   %}
11694   ins_encode %{
11695     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11696                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11697   %}
11698   ins_pipe( pipe_slow );
11699 %}
11700 
11701 // Large ClearArray non-AVX512.
11702 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11703   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11704   match(Set dummy (ClearArray cnt base));
11705   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11706   format %{ $$template
11707     if (UseFastStosb) {
11708        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11709        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11710        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11711     } else if (UseXMMForObjInit) {
11712        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11713        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11714        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11715        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11716        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11717        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11718        $$emit$$"ADD     0x40,RAX\n\t"
11719        $$emit$$"# L_zero_64_bytes:\n\t"
11720        $$emit$$"SUB     0x8,RCX\n\t"
11721        $$emit$$"JGE     L_loop\n\t"
11722        $$emit$$"ADD     0x4,RCX\n\t"
11723        $$emit$$"JL      L_tail\n\t"
11724        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11725        $$emit$$"ADD     0x20,RAX\n\t"
11726        $$emit$$"SUB     0x4,RCX\n\t"
11727        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11728        $$emit$$"ADD     0x4,RCX\n\t"
11729        $$emit$$"JLE     L_end\n\t"
11730        $$emit$$"DEC     RCX\n\t"
11731        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11732        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11733        $$emit$$"ADD     0x8,RAX\n\t"
11734        $$emit$$"DEC     RCX\n\t"
11735        $$emit$$"JGE     L_sloop\n\t"
11736        $$emit$$"# L_end:\n\t"
11737     } else {
11738        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11739        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11740        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11741     }
11742     $$emit$$"# DONE"
11743   %}
11744   ins_encode %{
11745     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11746                  $tmp$$XMMRegister, true, knoreg);
11747   %}
11748   ins_pipe( pipe_slow );
11749 %}
11750 
11751 // Large ClearArray AVX512.
11752 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11753   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11754   match(Set dummy (ClearArray cnt base));
11755   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11756   format %{ $$template
11757     if (UseFastStosb) {
11758        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11759        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11760        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11761     } else if (UseXMMForObjInit) {
11762        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11763        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11764        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11765        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11766        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11767        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11768        $$emit$$"ADD     0x40,RAX\n\t"
11769        $$emit$$"# L_zero_64_bytes:\n\t"
11770        $$emit$$"SUB     0x8,RCX\n\t"
11771        $$emit$$"JGE     L_loop\n\t"
11772        $$emit$$"ADD     0x4,RCX\n\t"
11773        $$emit$$"JL      L_tail\n\t"
11774        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11775        $$emit$$"ADD     0x20,RAX\n\t"
11776        $$emit$$"SUB     0x4,RCX\n\t"
11777        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11778        $$emit$$"ADD     0x4,RCX\n\t"
11779        $$emit$$"JLE     L_end\n\t"
11780        $$emit$$"DEC     RCX\n\t"
11781        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11782        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11783        $$emit$$"ADD     0x8,RAX\n\t"
11784        $$emit$$"DEC     RCX\n\t"
11785        $$emit$$"JGE     L_sloop\n\t"
11786        $$emit$$"# L_end:\n\t"
11787     } else {
11788        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11789        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11790        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11791     }
11792     $$emit$$"# DONE"
11793   %}
11794   ins_encode %{
11795     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11796                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11797   %}
11798   ins_pipe( pipe_slow );
11799 %}
11800 
11801 // Small ClearArray AVX512 constant length.
11802 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11803 %{
11804   predicate(!((ClearArrayNode*)n)->is_large() &&
11805                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11806   match(Set dummy (ClearArray cnt base));
11807   ins_cost(100);
11808   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11809   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11810   ins_encode %{
11811    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11812   %}
11813   ins_pipe(pipe_slow);
11814 %}
11815 
11816 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11817                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11818   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11819   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11820   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11821 
11822   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11823   ins_encode %{
11824     __ string_compare($str1$$Register, $str2$$Register,
11825                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11826                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11827   %}
11828   ins_pipe( pipe_slow );
11829 %}
11830 
11831 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11832                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11833   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11834   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11835   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11836 
11837   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11838   ins_encode %{
11839     __ string_compare($str1$$Register, $str2$$Register,
11840                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11841                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11842   %}
11843   ins_pipe( pipe_slow );
11844 %}
11845 
11846 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11847                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11848   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11849   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11850   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11851 
11852   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11853   ins_encode %{
11854     __ string_compare($str1$$Register, $str2$$Register,
11855                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11856                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11857   %}
11858   ins_pipe( pipe_slow );
11859 %}
11860 
11861 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11862                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11863   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11864   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11865   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11866 
11867   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11868   ins_encode %{
11869     __ string_compare($str1$$Register, $str2$$Register,
11870                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11871                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11872   %}
11873   ins_pipe( pipe_slow );
11874 %}
11875 
11876 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11877                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11878   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11879   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11880   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11881 
11882   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11883   ins_encode %{
11884     __ string_compare($str1$$Register, $str2$$Register,
11885                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11886                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11887   %}
11888   ins_pipe( pipe_slow );
11889 %}
11890 
11891 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11892                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11893   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11894   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11895   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11896 
11897   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11898   ins_encode %{
11899     __ string_compare($str1$$Register, $str2$$Register,
11900                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11901                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11902   %}
11903   ins_pipe( pipe_slow );
11904 %}
11905 
11906 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11907                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11908   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11909   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11910   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11911 
11912   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11913   ins_encode %{
11914     __ string_compare($str2$$Register, $str1$$Register,
11915                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11916                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11917   %}
11918   ins_pipe( pipe_slow );
11919 %}
11920 
11921 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11922                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11923   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11924   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11925   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11926 
11927   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11928   ins_encode %{
11929     __ string_compare($str2$$Register, $str1$$Register,
11930                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11931                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11932   %}
11933   ins_pipe( pipe_slow );
11934 %}
11935 
11936 // fast string equals
11937 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11938                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11939   predicate(!VM_Version::supports_avx512vlbw());
11940   match(Set result (StrEquals (Binary str1 str2) cnt));
11941   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11942 
11943   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11944   ins_encode %{
11945     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11946                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11947                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11948   %}
11949 
11950   ins_pipe( pipe_slow );
11951 %}
11952 
11953 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11954                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11955   predicate(VM_Version::supports_avx512vlbw());
11956   match(Set result (StrEquals (Binary str1 str2) cnt));
11957   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11958 
11959   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11960   ins_encode %{
11961     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11962                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11963                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11964   %}
11965 
11966   ins_pipe( pipe_slow );
11967 %}
11968 
11969 
11970 // fast search of substring with known size.
11971 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11972                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11973   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11974   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11975   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11976 
11977   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11978   ins_encode %{
11979     int icnt2 = (int)$int_cnt2$$constant;
11980     if (icnt2 >= 16) {
11981       // IndexOf for constant substrings with size >= 16 elements
11982       // which don't need to be loaded through stack.
11983       __ string_indexofC8($str1$$Register, $str2$$Register,
11984                           $cnt1$$Register, $cnt2$$Register,
11985                           icnt2, $result$$Register,
11986                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11987     } else {
11988       // Small strings are loaded through stack if they cross page boundary.
11989       __ string_indexof($str1$$Register, $str2$$Register,
11990                         $cnt1$$Register, $cnt2$$Register,
11991                         icnt2, $result$$Register,
11992                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11993     }
11994   %}
11995   ins_pipe( pipe_slow );
11996 %}
11997 
11998 // fast search of substring with known size.
11999 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12000                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12001   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12002   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12003   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12004 
12005   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12006   ins_encode %{
12007     int icnt2 = (int)$int_cnt2$$constant;
12008     if (icnt2 >= 8) {
12009       // IndexOf for constant substrings with size >= 8 elements
12010       // which don't need to be loaded through stack.
12011       __ string_indexofC8($str1$$Register, $str2$$Register,
12012                           $cnt1$$Register, $cnt2$$Register,
12013                           icnt2, $result$$Register,
12014                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12015     } else {
12016       // Small strings are loaded through stack if they cross page boundary.
12017       __ string_indexof($str1$$Register, $str2$$Register,
12018                         $cnt1$$Register, $cnt2$$Register,
12019                         icnt2, $result$$Register,
12020                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12021     }
12022   %}
12023   ins_pipe( pipe_slow );
12024 %}
12025 
12026 // fast search of substring with known size.
12027 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12028                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12029   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12030   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12031   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12032 
12033   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12034   ins_encode %{
12035     int icnt2 = (int)$int_cnt2$$constant;
12036     if (icnt2 >= 8) {
12037       // IndexOf for constant substrings with size >= 8 elements
12038       // which don't need to be loaded through stack.
12039       __ string_indexofC8($str1$$Register, $str2$$Register,
12040                           $cnt1$$Register, $cnt2$$Register,
12041                           icnt2, $result$$Register,
12042                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12043     } else {
12044       // Small strings are loaded through stack if they cross page boundary.
12045       __ string_indexof($str1$$Register, $str2$$Register,
12046                         $cnt1$$Register, $cnt2$$Register,
12047                         icnt2, $result$$Register,
12048                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12049     }
12050   %}
12051   ins_pipe( pipe_slow );
12052 %}
12053 
12054 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12055                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12056   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12057   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12058   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12059 
12060   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12061   ins_encode %{
12062     __ string_indexof($str1$$Register, $str2$$Register,
12063                       $cnt1$$Register, $cnt2$$Register,
12064                       (-1), $result$$Register,
12065                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12066   %}
12067   ins_pipe( pipe_slow );
12068 %}
12069 
12070 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12071                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12072   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12073   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12074   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12075 
12076   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12077   ins_encode %{
12078     __ string_indexof($str1$$Register, $str2$$Register,
12079                       $cnt1$$Register, $cnt2$$Register,
12080                       (-1), $result$$Register,
12081                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12082   %}
12083   ins_pipe( pipe_slow );
12084 %}
12085 
12086 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12087                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12088   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12089   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12090   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12091 
12092   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12093   ins_encode %{
12094     __ string_indexof($str1$$Register, $str2$$Register,
12095                       $cnt1$$Register, $cnt2$$Register,
12096                       (-1), $result$$Register,
12097                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12098   %}
12099   ins_pipe( pipe_slow );
12100 %}
12101 
12102 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12103                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12104   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12105   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12106   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12107   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12108   ins_encode %{
12109     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12110                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12111   %}
12112   ins_pipe( pipe_slow );
12113 %}
12114 
12115 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12116                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12117   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12118   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12119   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12120   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12121   ins_encode %{
12122     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12123                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12124   %}
12125   ins_pipe( pipe_slow );
12126 %}
12127 
12128 
12129 // fast array equals
12130 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12131                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12132 %{
12133   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12134   match(Set result (AryEq ary1 ary2));
12135   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12136   //ins_cost(300);
12137 
12138   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12139   ins_encode %{
12140     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12141                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12142                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12143   %}
12144   ins_pipe( pipe_slow );
12145 %}
12146 
12147 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12148                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12149 %{
12150   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12151   match(Set result (AryEq ary1 ary2));
12152   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12153   //ins_cost(300);
12154 
12155   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12156   ins_encode %{
12157     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12158                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12159                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12160   %}
12161   ins_pipe( pipe_slow );
12162 %}
12163 
12164 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12165                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12166 %{
12167   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12168   match(Set result (AryEq ary1 ary2));
12169   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12170   //ins_cost(300);
12171 
12172   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12173   ins_encode %{
12174     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12175                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12176                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12177   %}
12178   ins_pipe( pipe_slow );
12179 %}
12180 
12181 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12182                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12183 %{
12184   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12185   match(Set result (AryEq ary1 ary2));
12186   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12187   //ins_cost(300);
12188 
12189   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12190   ins_encode %{
12191     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12192                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12193                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12194   %}
12195   ins_pipe( pipe_slow );
12196 %}
12197 
12198 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12199                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12200 %{
12201   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12202   match(Set result (CountPositives ary1 len));
12203   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12204 
12205   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12206   ins_encode %{
12207     __ count_positives($ary1$$Register, $len$$Register,
12208                        $result$$Register, $tmp3$$Register,
12209                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12210   %}
12211   ins_pipe( pipe_slow );
12212 %}
12213 
12214 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12215                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12216 %{
12217   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12218   match(Set result (CountPositives ary1 len));
12219   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12220 
12221   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12222   ins_encode %{
12223     __ count_positives($ary1$$Register, $len$$Register,
12224                        $result$$Register, $tmp3$$Register,
12225                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12226   %}
12227   ins_pipe( pipe_slow );
12228 %}
12229 
12230 
12231 // fast char[] to byte[] compression
12232 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12233                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12234   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12235   match(Set result (StrCompressedCopy src (Binary dst len)));
12236   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12237 
12238   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12239   ins_encode %{
12240     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12241                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12242                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12243                            knoreg, knoreg);
12244   %}
12245   ins_pipe( pipe_slow );
12246 %}
12247 
12248 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12249                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12250   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12251   match(Set result (StrCompressedCopy src (Binary dst len)));
12252   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12253 
12254   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12255   ins_encode %{
12256     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12257                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12258                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12259                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12260   %}
12261   ins_pipe( pipe_slow );
12262 %}
12263 
12264 // fast byte[] to char[] inflation
12265 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12266                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12267   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12268   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12269   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12270 
12271   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12272   ins_encode %{
12273     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12274                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12275   %}
12276   ins_pipe( pipe_slow );
12277 %}
12278 
12279 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12280                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12281   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12282   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12283   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12284 
12285   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12286   ins_encode %{
12287     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12288                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12289   %}
12290   ins_pipe( pipe_slow );
12291 %}
12292 
12293 // encode char[] to byte[] in ISO_8859_1
12294 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12295                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12296                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12297   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12298   match(Set result (EncodeISOArray src (Binary dst len)));
12299   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12300 
12301   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12302   ins_encode %{
12303     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12304                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12305                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12306   %}
12307   ins_pipe( pipe_slow );
12308 %}
12309 
12310 // encode char[] to byte[] in ASCII
12311 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12312                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12313                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12314   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12315   match(Set result (EncodeISOArray src (Binary dst len)));
12316   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12317 
12318   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12319   ins_encode %{
12320     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12321                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12322                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12323   %}
12324   ins_pipe( pipe_slow );
12325 %}
12326 
12327 //----------Control Flow Instructions------------------------------------------
12328 // Signed compare Instructions
12329 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12330   match(Set cr (CmpI op1 op2));
12331   effect( DEF cr, USE op1, USE op2 );
12332   format %{ "CMP    $op1,$op2" %}
12333   opcode(0x3B);  /* Opcode 3B /r */
12334   ins_encode( OpcP, RegReg( op1, op2) );
12335   ins_pipe( ialu_cr_reg_reg );
12336 %}
12337 
12338 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12339   match(Set cr (CmpI op1 op2));
12340   effect( DEF cr, USE op1 );
12341   format %{ "CMP    $op1,$op2" %}
12342   opcode(0x81,0x07);  /* Opcode 81 /7 */
12343   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12344   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12345   ins_pipe( ialu_cr_reg_imm );
12346 %}
12347 
12348 // Cisc-spilled version of cmpI_eReg
12349 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12350   match(Set cr (CmpI op1 (LoadI op2)));
12351 
12352   format %{ "CMP    $op1,$op2" %}
12353   ins_cost(500);
12354   opcode(0x3B);  /* Opcode 3B /r */
12355   ins_encode( OpcP, RegMem( op1, op2) );
12356   ins_pipe( ialu_cr_reg_mem );
12357 %}
12358 
12359 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12360   match(Set cr (CmpI src zero));
12361   effect( DEF cr, USE src );
12362 
12363   format %{ "TEST   $src,$src" %}
12364   opcode(0x85);
12365   ins_encode( OpcP, RegReg( src, src ) );
12366   ins_pipe( ialu_cr_reg_imm );
12367 %}
12368 
12369 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12370   match(Set cr (CmpI (AndI src con) zero));
12371 
12372   format %{ "TEST   $src,$con" %}
12373   opcode(0xF7,0x00);
12374   ins_encode( OpcP, RegOpc(src), Con32(con) );
12375   ins_pipe( ialu_cr_reg_imm );
12376 %}
12377 
12378 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12379   match(Set cr (CmpI (AndI src mem) zero));
12380 
12381   format %{ "TEST   $src,$mem" %}
12382   opcode(0x85);
12383   ins_encode( OpcP, RegMem( src, mem ) );
12384   ins_pipe( ialu_cr_reg_mem );
12385 %}
12386 
12387 // Unsigned compare Instructions; really, same as signed except they
12388 // produce an eFlagsRegU instead of eFlagsReg.
12389 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12390   match(Set cr (CmpU op1 op2));
12391 
12392   format %{ "CMPu   $op1,$op2" %}
12393   opcode(0x3B);  /* Opcode 3B /r */
12394   ins_encode( OpcP, RegReg( op1, op2) );
12395   ins_pipe( ialu_cr_reg_reg );
12396 %}
12397 
12398 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12399   match(Set cr (CmpU op1 op2));
12400 
12401   format %{ "CMPu   $op1,$op2" %}
12402   opcode(0x81,0x07);  /* Opcode 81 /7 */
12403   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12404   ins_pipe( ialu_cr_reg_imm );
12405 %}
12406 
12407 // // Cisc-spilled version of cmpU_eReg
12408 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12409   match(Set cr (CmpU op1 (LoadI op2)));
12410 
12411   format %{ "CMPu   $op1,$op2" %}
12412   ins_cost(500);
12413   opcode(0x3B);  /* Opcode 3B /r */
12414   ins_encode( OpcP, RegMem( op1, op2) );
12415   ins_pipe( ialu_cr_reg_mem );
12416 %}
12417 
12418 // // Cisc-spilled version of cmpU_eReg
12419 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12420 //  match(Set cr (CmpU (LoadI op1) op2));
12421 //
12422 //  format %{ "CMPu   $op1,$op2" %}
12423 //  ins_cost(500);
12424 //  opcode(0x39);  /* Opcode 39 /r */
12425 //  ins_encode( OpcP, RegMem( op1, op2) );
12426 //%}
12427 
12428 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12429   match(Set cr (CmpU src zero));
12430 
12431   format %{ "TESTu  $src,$src" %}
12432   opcode(0x85);
12433   ins_encode( OpcP, RegReg( src, src ) );
12434   ins_pipe( ialu_cr_reg_imm );
12435 %}
12436 
12437 // Unsigned pointer compare Instructions
12438 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12439   match(Set cr (CmpP op1 op2));
12440 
12441   format %{ "CMPu   $op1,$op2" %}
12442   opcode(0x3B);  /* Opcode 3B /r */
12443   ins_encode( OpcP, RegReg( op1, op2) );
12444   ins_pipe( ialu_cr_reg_reg );
12445 %}
12446 
12447 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12448   match(Set cr (CmpP op1 op2));
12449 
12450   format %{ "CMPu   $op1,$op2" %}
12451   opcode(0x81,0x07);  /* Opcode 81 /7 */
12452   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12453   ins_pipe( ialu_cr_reg_imm );
12454 %}
12455 
12456 // // Cisc-spilled version of cmpP_eReg
12457 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12458   match(Set cr (CmpP op1 (LoadP op2)));
12459 
12460   format %{ "CMPu   $op1,$op2" %}
12461   ins_cost(500);
12462   opcode(0x3B);  /* Opcode 3B /r */
12463   ins_encode( OpcP, RegMem( op1, op2) );
12464   ins_pipe( ialu_cr_reg_mem );
12465 %}
12466 
12467 // // Cisc-spilled version of cmpP_eReg
12468 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12469 //  match(Set cr (CmpP (LoadP op1) op2));
12470 //
12471 //  format %{ "CMPu   $op1,$op2" %}
12472 //  ins_cost(500);
12473 //  opcode(0x39);  /* Opcode 39 /r */
12474 //  ins_encode( OpcP, RegMem( op1, op2) );
12475 //%}
12476 
12477 // Compare raw pointer (used in out-of-heap check).
12478 // Only works because non-oop pointers must be raw pointers
12479 // and raw pointers have no anti-dependencies.
12480 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12481   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12482   match(Set cr (CmpP op1 (LoadP op2)));
12483 
12484   format %{ "CMPu   $op1,$op2" %}
12485   opcode(0x3B);  /* Opcode 3B /r */
12486   ins_encode( OpcP, RegMem( op1, op2) );
12487   ins_pipe( ialu_cr_reg_mem );
12488 %}
12489 
12490 //
12491 // This will generate a signed flags result. This should be ok
12492 // since any compare to a zero should be eq/neq.
12493 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12494   match(Set cr (CmpP src zero));
12495 
12496   format %{ "TEST   $src,$src" %}
12497   opcode(0x85);
12498   ins_encode( OpcP, RegReg( src, src ) );
12499   ins_pipe( ialu_cr_reg_imm );
12500 %}
12501 
12502 // Cisc-spilled version of testP_reg
12503 // This will generate a signed flags result. This should be ok
12504 // since any compare to a zero should be eq/neq.
12505 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12506   match(Set cr (CmpP (LoadP op) zero));
12507 
12508   format %{ "TEST   $op,0xFFFFFFFF" %}
12509   ins_cost(500);
12510   opcode(0xF7);               /* Opcode F7 /0 */
12511   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12512   ins_pipe( ialu_cr_reg_imm );
12513 %}
12514 
12515 // Yanked all unsigned pointer compare operations.
12516 // Pointer compares are done with CmpP which is already unsigned.
12517 
12518 //----------Max and Min--------------------------------------------------------
12519 // Min Instructions
12520 ////
12521 //   *** Min and Max using the conditional move are slower than the
12522 //   *** branch version on a Pentium III.
12523 // // Conditional move for min
12524 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12525 //  effect( USE_DEF op2, USE op1, USE cr );
12526 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12527 //  opcode(0x4C,0x0F);
12528 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12529 //  ins_pipe( pipe_cmov_reg );
12530 //%}
12531 //
12532 //// Min Register with Register (P6 version)
12533 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12534 //  predicate(VM_Version::supports_cmov() );
12535 //  match(Set op2 (MinI op1 op2));
12536 //  ins_cost(200);
12537 //  expand %{
12538 //    eFlagsReg cr;
12539 //    compI_eReg(cr,op1,op2);
12540 //    cmovI_reg_lt(op2,op1,cr);
12541 //  %}
12542 //%}
12543 
12544 // Min Register with Register (generic version)
12545 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12546   match(Set dst (MinI dst src));
12547   effect(KILL flags);
12548   ins_cost(300);
12549 
12550   format %{ "MIN    $dst,$src" %}
12551   opcode(0xCC);
12552   ins_encode( min_enc(dst,src) );
12553   ins_pipe( pipe_slow );
12554 %}
12555 
12556 // Max Register with Register
12557 //   *** Min and Max using the conditional move are slower than the
12558 //   *** branch version on a Pentium III.
12559 // // Conditional move for max
12560 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12561 //  effect( USE_DEF op2, USE op1, USE cr );
12562 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12563 //  opcode(0x4F,0x0F);
12564 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12565 //  ins_pipe( pipe_cmov_reg );
12566 //%}
12567 //
12568 // // Max Register with Register (P6 version)
12569 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12570 //  predicate(VM_Version::supports_cmov() );
12571 //  match(Set op2 (MaxI op1 op2));
12572 //  ins_cost(200);
12573 //  expand %{
12574 //    eFlagsReg cr;
12575 //    compI_eReg(cr,op1,op2);
12576 //    cmovI_reg_gt(op2,op1,cr);
12577 //  %}
12578 //%}
12579 
12580 // Max Register with Register (generic version)
12581 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12582   match(Set dst (MaxI dst src));
12583   effect(KILL flags);
12584   ins_cost(300);
12585 
12586   format %{ "MAX    $dst,$src" %}
12587   opcode(0xCC);
12588   ins_encode( max_enc(dst,src) );
12589   ins_pipe( pipe_slow );
12590 %}
12591 
12592 // ============================================================================
12593 // Counted Loop limit node which represents exact final iterator value.
12594 // Note: the resulting value should fit into integer range since
12595 // counted loops have limit check on overflow.
12596 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12597   match(Set limit (LoopLimit (Binary init limit) stride));
12598   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12599   ins_cost(300);
12600 
12601   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12602   ins_encode %{
12603     int strd = (int)$stride$$constant;
12604     assert(strd != 1 && strd != -1, "sanity");
12605     int m1 = (strd > 0) ? 1 : -1;
12606     // Convert limit to long (EAX:EDX)
12607     __ cdql();
12608     // Convert init to long (init:tmp)
12609     __ movl($tmp$$Register, $init$$Register);
12610     __ sarl($tmp$$Register, 31);
12611     // $limit - $init
12612     __ subl($limit$$Register, $init$$Register);
12613     __ sbbl($limit_hi$$Register, $tmp$$Register);
12614     // + ($stride - 1)
12615     if (strd > 0) {
12616       __ addl($limit$$Register, (strd - 1));
12617       __ adcl($limit_hi$$Register, 0);
12618       __ movl($tmp$$Register, strd);
12619     } else {
12620       __ addl($limit$$Register, (strd + 1));
12621       __ adcl($limit_hi$$Register, -1);
12622       __ lneg($limit_hi$$Register, $limit$$Register);
12623       __ movl($tmp$$Register, -strd);
12624     }
12625     // signed division: (EAX:EDX) / pos_stride
12626     __ idivl($tmp$$Register);
12627     if (strd < 0) {
12628       // restore sign
12629       __ negl($tmp$$Register);
12630     }
12631     // (EAX) * stride
12632     __ mull($tmp$$Register);
12633     // + init (ignore upper bits)
12634     __ addl($limit$$Register, $init$$Register);
12635   %}
12636   ins_pipe( pipe_slow );
12637 %}
12638 
12639 // ============================================================================
12640 // Branch Instructions
12641 // Jump Table
12642 instruct jumpXtnd(rRegI switch_val) %{
12643   match(Jump switch_val);
12644   ins_cost(350);
12645   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12646   ins_encode %{
12647     // Jump to Address(table_base + switch_reg)
12648     Address index(noreg, $switch_val$$Register, Address::times_1);
12649     __ jump(ArrayAddress($constantaddress, index), noreg);
12650   %}
12651   ins_pipe(pipe_jmp);
12652 %}
12653 
12654 // Jump Direct - Label defines a relative address from JMP+1
12655 instruct jmpDir(label labl) %{
12656   match(Goto);
12657   effect(USE labl);
12658 
12659   ins_cost(300);
12660   format %{ "JMP    $labl" %}
12661   size(5);
12662   ins_encode %{
12663     Label* L = $labl$$label;
12664     __ jmp(*L, false); // Always long jump
12665   %}
12666   ins_pipe( pipe_jmp );
12667 %}
12668 
12669 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12670 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12671   match(If cop cr);
12672   effect(USE labl);
12673 
12674   ins_cost(300);
12675   format %{ "J$cop    $labl" %}
12676   size(6);
12677   ins_encode %{
12678     Label* L = $labl$$label;
12679     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12680   %}
12681   ins_pipe( pipe_jcc );
12682 %}
12683 
12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12685 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12686   match(CountedLoopEnd cop cr);
12687   effect(USE labl);
12688 
12689   ins_cost(300);
12690   format %{ "J$cop    $labl\t# Loop end" %}
12691   size(6);
12692   ins_encode %{
12693     Label* L = $labl$$label;
12694     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12695   %}
12696   ins_pipe( pipe_jcc );
12697 %}
12698 
12699 // Jump Direct Conditional - using unsigned comparison
12700 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12701   match(If cop cmp);
12702   effect(USE labl);
12703 
12704   ins_cost(300);
12705   format %{ "J$cop,u  $labl" %}
12706   size(6);
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12710   %}
12711   ins_pipe(pipe_jcc);
12712 %}
12713 
12714 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12715   match(If cop cmp);
12716   effect(USE labl);
12717 
12718   ins_cost(200);
12719   format %{ "J$cop,u  $labl" %}
12720   size(6);
12721   ins_encode %{
12722     Label* L = $labl$$label;
12723     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12724   %}
12725   ins_pipe(pipe_jcc);
12726 %}
12727 
12728 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12729   match(If cop cmp);
12730   effect(USE labl);
12731 
12732   ins_cost(200);
12733   format %{ $$template
12734     if ($cop$$cmpcode == Assembler::notEqual) {
12735       $$emit$$"JP,u   $labl\n\t"
12736       $$emit$$"J$cop,u   $labl"
12737     } else {
12738       $$emit$$"JP,u   done\n\t"
12739       $$emit$$"J$cop,u   $labl\n\t"
12740       $$emit$$"done:"
12741     }
12742   %}
12743   ins_encode %{
12744     Label* l = $labl$$label;
12745     if ($cop$$cmpcode == Assembler::notEqual) {
12746       __ jcc(Assembler::parity, *l, false);
12747       __ jcc(Assembler::notEqual, *l, false);
12748     } else if ($cop$$cmpcode == Assembler::equal) {
12749       Label done;
12750       __ jccb(Assembler::parity, done);
12751       __ jcc(Assembler::equal, *l, false);
12752       __ bind(done);
12753     } else {
12754        ShouldNotReachHere();
12755     }
12756   %}
12757   ins_pipe(pipe_jcc);
12758 %}
12759 
12760 // ============================================================================
12761 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12762 // array for an instance of the superklass.  Set a hidden internal cache on a
12763 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12764 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12765 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12766   match(Set result (PartialSubtypeCheck sub super));
12767   effect( KILL rcx, KILL cr );
12768 
12769   ins_cost(1100);  // slightly larger than the next version
12770   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12771             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12772             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12773             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12774             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12775             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12776             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12777      "miss:\t" %}
12778 
12779   opcode(0x1); // Force a XOR of EDI
12780   ins_encode( enc_PartialSubtypeCheck() );
12781   ins_pipe( pipe_slow );
12782 %}
12783 
12784 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12785   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12786   effect( KILL rcx, KILL result );
12787 
12788   ins_cost(1000);
12789   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12790             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12791             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12792             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12793             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12794             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12795      "miss:\t" %}
12796 
12797   opcode(0x0);  // No need to XOR EDI
12798   ins_encode( enc_PartialSubtypeCheck() );
12799   ins_pipe( pipe_slow );
12800 %}
12801 
12802 // ============================================================================
12803 // Branch Instructions -- short offset versions
12804 //
12805 // These instructions are used to replace jumps of a long offset (the default
12806 // match) with jumps of a shorter offset.  These instructions are all tagged
12807 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12808 // match rules in general matching.  Instead, the ADLC generates a conversion
12809 // method in the MachNode which can be used to do in-place replacement of the
12810 // long variant with the shorter variant.  The compiler will determine if a
12811 // branch can be taken by the is_short_branch_offset() predicate in the machine
12812 // specific code section of the file.
12813 
12814 // Jump Direct - Label defines a relative address from JMP+1
12815 instruct jmpDir_short(label labl) %{
12816   match(Goto);
12817   effect(USE labl);
12818 
12819   ins_cost(300);
12820   format %{ "JMP,s  $labl" %}
12821   size(2);
12822   ins_encode %{
12823     Label* L = $labl$$label;
12824     __ jmpb(*L);
12825   %}
12826   ins_pipe( pipe_jmp );
12827   ins_short_branch(1);
12828 %}
12829 
12830 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12831 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12832   match(If cop cr);
12833   effect(USE labl);
12834 
12835   ins_cost(300);
12836   format %{ "J$cop,s  $labl" %}
12837   size(2);
12838   ins_encode %{
12839     Label* L = $labl$$label;
12840     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12841   %}
12842   ins_pipe( pipe_jcc );
12843   ins_short_branch(1);
12844 %}
12845 
12846 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12847 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12848   match(CountedLoopEnd cop cr);
12849   effect(USE labl);
12850 
12851   ins_cost(300);
12852   format %{ "J$cop,s  $labl\t# Loop end" %}
12853   size(2);
12854   ins_encode %{
12855     Label* L = $labl$$label;
12856     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12857   %}
12858   ins_pipe( pipe_jcc );
12859   ins_short_branch(1);
12860 %}
12861 
12862 // Jump Direct Conditional - using unsigned comparison
12863 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12864   match(If cop cmp);
12865   effect(USE labl);
12866 
12867   ins_cost(300);
12868   format %{ "J$cop,us $labl" %}
12869   size(2);
12870   ins_encode %{
12871     Label* L = $labl$$label;
12872     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12873   %}
12874   ins_pipe( pipe_jcc );
12875   ins_short_branch(1);
12876 %}
12877 
12878 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12879   match(If cop cmp);
12880   effect(USE labl);
12881 
12882   ins_cost(300);
12883   format %{ "J$cop,us $labl" %}
12884   size(2);
12885   ins_encode %{
12886     Label* L = $labl$$label;
12887     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12888   %}
12889   ins_pipe( pipe_jcc );
12890   ins_short_branch(1);
12891 %}
12892 
12893 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12894   match(If cop cmp);
12895   effect(USE labl);
12896 
12897   ins_cost(300);
12898   format %{ $$template
12899     if ($cop$$cmpcode == Assembler::notEqual) {
12900       $$emit$$"JP,u,s   $labl\n\t"
12901       $$emit$$"J$cop,u,s   $labl"
12902     } else {
12903       $$emit$$"JP,u,s   done\n\t"
12904       $$emit$$"J$cop,u,s  $labl\n\t"
12905       $$emit$$"done:"
12906     }
12907   %}
12908   size(4);
12909   ins_encode %{
12910     Label* l = $labl$$label;
12911     if ($cop$$cmpcode == Assembler::notEqual) {
12912       __ jccb(Assembler::parity, *l);
12913       __ jccb(Assembler::notEqual, *l);
12914     } else if ($cop$$cmpcode == Assembler::equal) {
12915       Label done;
12916       __ jccb(Assembler::parity, done);
12917       __ jccb(Assembler::equal, *l);
12918       __ bind(done);
12919     } else {
12920        ShouldNotReachHere();
12921     }
12922   %}
12923   ins_pipe(pipe_jcc);
12924   ins_short_branch(1);
12925 %}
12926 
12927 // ============================================================================
12928 // Long Compare
12929 //
12930 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12931 // is tricky.  The flavor of compare used depends on whether we are testing
12932 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12933 // The GE test is the negated LT test.  The LE test can be had by commuting
12934 // the operands (yielding a GE test) and then negating; negate again for the
12935 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12936 // NE test is negated from that.
12937 
12938 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12939 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12940 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12941 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12942 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12943 // foo match ends up with the wrong leaf.  One fix is to not match both
12944 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12945 // both forms beat the trinary form of long-compare and both are very useful
12946 // on Intel which has so few registers.
12947 
12948 // Manifest a CmpL result in an integer register.  Very painful.
12949 // This is the test to avoid.
12950 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12951   match(Set dst (CmpL3 src1 src2));
12952   effect( KILL flags );
12953   ins_cost(1000);
12954   format %{ "XOR    $dst,$dst\n\t"
12955             "CMP    $src1.hi,$src2.hi\n\t"
12956             "JLT,s  m_one\n\t"
12957             "JGT,s  p_one\n\t"
12958             "CMP    $src1.lo,$src2.lo\n\t"
12959             "JB,s   m_one\n\t"
12960             "JEQ,s  done\n"
12961     "p_one:\tINC    $dst\n\t"
12962             "JMP,s  done\n"
12963     "m_one:\tDEC    $dst\n"
12964      "done:" %}
12965   ins_encode %{
12966     Label p_one, m_one, done;
12967     __ xorptr($dst$$Register, $dst$$Register);
12968     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12969     __ jccb(Assembler::less,    m_one);
12970     __ jccb(Assembler::greater, p_one);
12971     __ cmpl($src1$$Register, $src2$$Register);
12972     __ jccb(Assembler::below,   m_one);
12973     __ jccb(Assembler::equal,   done);
12974     __ bind(p_one);
12975     __ incrementl($dst$$Register);
12976     __ jmpb(done);
12977     __ bind(m_one);
12978     __ decrementl($dst$$Register);
12979     __ bind(done);
12980   %}
12981   ins_pipe( pipe_slow );
12982 %}
12983 
12984 //======
12985 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12986 // compares.  Can be used for LE or GT compares by reversing arguments.
12987 // NOT GOOD FOR EQ/NE tests.
12988 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12989   match( Set flags (CmpL src zero ));
12990   ins_cost(100);
12991   format %{ "TEST   $src.hi,$src.hi" %}
12992   opcode(0x85);
12993   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12994   ins_pipe( ialu_cr_reg_reg );
12995 %}
12996 
12997 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12998 // compares.  Can be used for LE or GT compares by reversing arguments.
12999 // NOT GOOD FOR EQ/NE tests.
13000 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13001   match( Set flags (CmpL src1 src2 ));
13002   effect( TEMP tmp );
13003   ins_cost(300);
13004   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13005             "MOV    $tmp,$src1.hi\n\t"
13006             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13007   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13008   ins_pipe( ialu_cr_reg_reg );
13009 %}
13010 
13011 // Long compares reg < zero/req OR reg >= zero/req.
13012 // Just a wrapper for a normal branch, plus the predicate test.
13013 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13014   match(If cmp flags);
13015   effect(USE labl);
13016   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13017   expand %{
13018     jmpCon(cmp,flags,labl);    // JLT or JGE...
13019   %}
13020 %}
13021 
13022 //======
13023 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13024 // compares.  Can be used for LE or GT compares by reversing arguments.
13025 // NOT GOOD FOR EQ/NE tests.
13026 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13027   match(Set flags (CmpUL src zero));
13028   ins_cost(100);
13029   format %{ "TEST   $src.hi,$src.hi" %}
13030   opcode(0x85);
13031   ins_encode(OpcP, RegReg_Hi2(src, src));
13032   ins_pipe(ialu_cr_reg_reg);
13033 %}
13034 
13035 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13036 // compares.  Can be used for LE or GT compares by reversing arguments.
13037 // NOT GOOD FOR EQ/NE tests.
13038 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13039   match(Set flags (CmpUL src1 src2));
13040   effect(TEMP tmp);
13041   ins_cost(300);
13042   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13043             "MOV    $tmp,$src1.hi\n\t"
13044             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13045   ins_encode(long_cmp_flags2(src1, src2, tmp));
13046   ins_pipe(ialu_cr_reg_reg);
13047 %}
13048 
13049 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13050 // Just a wrapper for a normal branch, plus the predicate test.
13051 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13052   match(If cmp flags);
13053   effect(USE labl);
13054   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13055   expand %{
13056     jmpCon(cmp, flags, labl);    // JLT or JGE...
13057   %}
13058 %}
13059 
13060 // Compare 2 longs and CMOVE longs.
13061 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13062   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13063   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13064   ins_cost(400);
13065   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13066             "CMOV$cmp $dst.hi,$src.hi" %}
13067   opcode(0x0F,0x40);
13068   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13069   ins_pipe( pipe_cmov_reg_long );
13070 %}
13071 
13072 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13073   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13074   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13075   ins_cost(500);
13076   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13077             "CMOV$cmp $dst.hi,$src.hi" %}
13078   opcode(0x0F,0x40);
13079   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13080   ins_pipe( pipe_cmov_reg_long );
13081 %}
13082 
13083 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13084   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13085   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13086   ins_cost(400);
13087   expand %{
13088     cmovLL_reg_LTGE(cmp, flags, dst, src);
13089   %}
13090 %}
13091 
13092 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13093   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13094   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13095   ins_cost(500);
13096   expand %{
13097     cmovLL_mem_LTGE(cmp, flags, dst, src);
13098   %}
13099 %}
13100 
13101 // Compare 2 longs and CMOVE ints.
13102 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13103   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13104   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13105   ins_cost(200);
13106   format %{ "CMOV$cmp $dst,$src" %}
13107   opcode(0x0F,0x40);
13108   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13109   ins_pipe( pipe_cmov_reg );
13110 %}
13111 
13112 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13113   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13114   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13115   ins_cost(250);
13116   format %{ "CMOV$cmp $dst,$src" %}
13117   opcode(0x0F,0x40);
13118   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13119   ins_pipe( pipe_cmov_mem );
13120 %}
13121 
13122 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13125   ins_cost(200);
13126   expand %{
13127     cmovII_reg_LTGE(cmp, flags, dst, src);
13128   %}
13129 %}
13130 
13131 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13132   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13133   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13134   ins_cost(250);
13135   expand %{
13136     cmovII_mem_LTGE(cmp, flags, dst, src);
13137   %}
13138 %}
13139 
13140 // Compare 2 longs and CMOVE ptrs.
13141 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13142   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13143   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13144   ins_cost(200);
13145   format %{ "CMOV$cmp $dst,$src" %}
13146   opcode(0x0F,0x40);
13147   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13148   ins_pipe( pipe_cmov_reg );
13149 %}
13150 
13151 // Compare 2 unsigned longs and CMOVE ptrs.
13152 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13153   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13154   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13155   ins_cost(200);
13156   expand %{
13157     cmovPP_reg_LTGE(cmp,flags,dst,src);
13158   %}
13159 %}
13160 
13161 // Compare 2 longs and CMOVE doubles
13162 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13163   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13164   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13165   ins_cost(200);
13166   expand %{
13167     fcmovDPR_regS(cmp,flags,dst,src);
13168   %}
13169 %}
13170 
13171 // Compare 2 longs and CMOVE doubles
13172 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13173   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13174   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13175   ins_cost(200);
13176   expand %{
13177     fcmovD_regS(cmp,flags,dst,src);
13178   %}
13179 %}
13180 
13181 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13182   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13183   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13184   ins_cost(200);
13185   expand %{
13186     fcmovFPR_regS(cmp,flags,dst,src);
13187   %}
13188 %}
13189 
13190 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13191   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13192   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13193   ins_cost(200);
13194   expand %{
13195     fcmovF_regS(cmp,flags,dst,src);
13196   %}
13197 %}
13198 
13199 //======
13200 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13201 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13202   match( Set flags (CmpL src zero ));
13203   effect(TEMP tmp);
13204   ins_cost(200);
13205   format %{ "MOV    $tmp,$src.lo\n\t"
13206             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13207   ins_encode( long_cmp_flags0( src, tmp ) );
13208   ins_pipe( ialu_reg_reg_long );
13209 %}
13210 
13211 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13212 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13213   match( Set flags (CmpL src1 src2 ));
13214   ins_cost(200+300);
13215   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13216             "JNE,s  skip\n\t"
13217             "CMP    $src1.hi,$src2.hi\n\t"
13218      "skip:\t" %}
13219   ins_encode( long_cmp_flags1( src1, src2 ) );
13220   ins_pipe( ialu_cr_reg_reg );
13221 %}
13222 
13223 // Long compare reg == zero/reg OR reg != zero/reg
13224 // Just a wrapper for a normal branch, plus the predicate test.
13225 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13226   match(If cmp flags);
13227   effect(USE labl);
13228   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13229   expand %{
13230     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13231   %}
13232 %}
13233 
13234 //======
13235 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13236 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13237   match(Set flags (CmpUL src zero));
13238   effect(TEMP tmp);
13239   ins_cost(200);
13240   format %{ "MOV    $tmp,$src.lo\n\t"
13241             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13242   ins_encode(long_cmp_flags0(src, tmp));
13243   ins_pipe(ialu_reg_reg_long);
13244 %}
13245 
13246 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13247 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13248   match(Set flags (CmpUL src1 src2));
13249   ins_cost(200+300);
13250   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13251             "JNE,s  skip\n\t"
13252             "CMP    $src1.hi,$src2.hi\n\t"
13253      "skip:\t" %}
13254   ins_encode(long_cmp_flags1(src1, src2));
13255   ins_pipe(ialu_cr_reg_reg);
13256 %}
13257 
13258 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13259 // Just a wrapper for a normal branch, plus the predicate test.
13260 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13261   match(If cmp flags);
13262   effect(USE labl);
13263   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13264   expand %{
13265     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13266   %}
13267 %}
13268 
13269 // Compare 2 longs and CMOVE longs.
13270 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13271   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13272   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13273   ins_cost(400);
13274   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13275             "CMOV$cmp $dst.hi,$src.hi" %}
13276   opcode(0x0F,0x40);
13277   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13278   ins_pipe( pipe_cmov_reg_long );
13279 %}
13280 
13281 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13282   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13283   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13284   ins_cost(500);
13285   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13286             "CMOV$cmp $dst.hi,$src.hi" %}
13287   opcode(0x0F,0x40);
13288   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13289   ins_pipe( pipe_cmov_reg_long );
13290 %}
13291 
13292 // Compare 2 longs and CMOVE ints.
13293 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13294   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13295   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13296   ins_cost(200);
13297   format %{ "CMOV$cmp $dst,$src" %}
13298   opcode(0x0F,0x40);
13299   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13300   ins_pipe( pipe_cmov_reg );
13301 %}
13302 
13303 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13304   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13305   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13306   ins_cost(250);
13307   format %{ "CMOV$cmp $dst,$src" %}
13308   opcode(0x0F,0x40);
13309   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13310   ins_pipe( pipe_cmov_mem );
13311 %}
13312 
13313 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13314   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13315   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13316   ins_cost(200);
13317   expand %{
13318     cmovII_reg_EQNE(cmp, flags, dst, src);
13319   %}
13320 %}
13321 
13322 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13323   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13324   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13325   ins_cost(250);
13326   expand %{
13327     cmovII_mem_EQNE(cmp, flags, dst, src);
13328   %}
13329 %}
13330 
13331 // Compare 2 longs and CMOVE ptrs.
13332 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13333   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13334   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13335   ins_cost(200);
13336   format %{ "CMOV$cmp $dst,$src" %}
13337   opcode(0x0F,0x40);
13338   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13339   ins_pipe( pipe_cmov_reg );
13340 %}
13341 
13342 // Compare 2 unsigned longs and CMOVE ptrs.
13343 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13344   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13345   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13346   ins_cost(200);
13347   expand %{
13348     cmovPP_reg_EQNE(cmp,flags,dst,src);
13349   %}
13350 %}
13351 
13352 // Compare 2 longs and CMOVE doubles
13353 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13354   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13355   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13356   ins_cost(200);
13357   expand %{
13358     fcmovDPR_regS(cmp,flags,dst,src);
13359   %}
13360 %}
13361 
13362 // Compare 2 longs and CMOVE doubles
13363 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13364   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13365   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13366   ins_cost(200);
13367   expand %{
13368     fcmovD_regS(cmp,flags,dst,src);
13369   %}
13370 %}
13371 
13372 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13373   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13374   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13375   ins_cost(200);
13376   expand %{
13377     fcmovFPR_regS(cmp,flags,dst,src);
13378   %}
13379 %}
13380 
13381 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13382   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13383   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13384   ins_cost(200);
13385   expand %{
13386     fcmovF_regS(cmp,flags,dst,src);
13387   %}
13388 %}
13389 
13390 //======
13391 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13392 // Same as cmpL_reg_flags_LEGT except must negate src
13393 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13394   match( Set flags (CmpL src zero ));
13395   effect( TEMP tmp );
13396   ins_cost(300);
13397   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13398             "CMP    $tmp,$src.lo\n\t"
13399             "SBB    $tmp,$src.hi\n\t" %}
13400   ins_encode( long_cmp_flags3(src, tmp) );
13401   ins_pipe( ialu_reg_reg_long );
13402 %}
13403 
13404 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13405 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13406 // requires a commuted test to get the same result.
13407 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13408   match( Set flags (CmpL src1 src2 ));
13409   effect( TEMP tmp );
13410   ins_cost(300);
13411   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13412             "MOV    $tmp,$src2.hi\n\t"
13413             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13414   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13415   ins_pipe( ialu_cr_reg_reg );
13416 %}
13417 
13418 // Long compares reg < zero/req OR reg >= zero/req.
13419 // Just a wrapper for a normal branch, plus the predicate test
13420 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13421   match(If cmp flags);
13422   effect(USE labl);
13423   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13424   ins_cost(300);
13425   expand %{
13426     jmpCon(cmp,flags,labl);    // JGT or JLE...
13427   %}
13428 %}
13429 
13430 //======
13431 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13432 // Same as cmpUL_reg_flags_LEGT except must negate src
13433 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13434   match(Set flags (CmpUL src zero));
13435   effect(TEMP tmp);
13436   ins_cost(300);
13437   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13438             "CMP    $tmp,$src.lo\n\t"
13439             "SBB    $tmp,$src.hi\n\t" %}
13440   ins_encode(long_cmp_flags3(src, tmp));
13441   ins_pipe(ialu_reg_reg_long);
13442 %}
13443 
13444 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13445 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13446 // requires a commuted test to get the same result.
13447 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13448   match(Set flags (CmpUL src1 src2));
13449   effect(TEMP tmp);
13450   ins_cost(300);
13451   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13452             "MOV    $tmp,$src2.hi\n\t"
13453             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13454   ins_encode(long_cmp_flags2( src2, src1, tmp));
13455   ins_pipe(ialu_cr_reg_reg);
13456 %}
13457 
13458 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13459 // Just a wrapper for a normal branch, plus the predicate test
13460 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13461   match(If cmp flags);
13462   effect(USE labl);
13463   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13464   ins_cost(300);
13465   expand %{
13466     jmpCon(cmp, flags, labl);    // JGT or JLE...
13467   %}
13468 %}
13469 
13470 // Compare 2 longs and CMOVE longs.
13471 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13472   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13473   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13474   ins_cost(400);
13475   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13476             "CMOV$cmp $dst.hi,$src.hi" %}
13477   opcode(0x0F,0x40);
13478   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13479   ins_pipe( pipe_cmov_reg_long );
13480 %}
13481 
13482 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13483   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13484   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13485   ins_cost(500);
13486   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13487             "CMOV$cmp $dst.hi,$src.hi+4" %}
13488   opcode(0x0F,0x40);
13489   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13490   ins_pipe( pipe_cmov_reg_long );
13491 %}
13492 
13493 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13494   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13495   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13496   ins_cost(400);
13497   expand %{
13498     cmovLL_reg_LEGT(cmp, flags, dst, src);
13499   %}
13500 %}
13501 
13502 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13503   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13504   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13505   ins_cost(500);
13506   expand %{
13507     cmovLL_mem_LEGT(cmp, flags, dst, src);
13508   %}
13509 %}
13510 
13511 // Compare 2 longs and CMOVE ints.
13512 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13513   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13514   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   format %{ "CMOV$cmp $dst,$src" %}
13517   opcode(0x0F,0x40);
13518   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13519   ins_pipe( pipe_cmov_reg );
13520 %}
13521 
13522 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13523   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13524   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13525   ins_cost(250);
13526   format %{ "CMOV$cmp $dst,$src" %}
13527   opcode(0x0F,0x40);
13528   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13529   ins_pipe( pipe_cmov_mem );
13530 %}
13531 
13532 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13533   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13534   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13535   ins_cost(200);
13536   expand %{
13537     cmovII_reg_LEGT(cmp, flags, dst, src);
13538   %}
13539 %}
13540 
13541 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13542   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13543   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13544   ins_cost(250);
13545   expand %{
13546     cmovII_mem_LEGT(cmp, flags, dst, src);
13547   %}
13548 %}
13549 
13550 // Compare 2 longs and CMOVE ptrs.
13551 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13552   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13553   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13554   ins_cost(200);
13555   format %{ "CMOV$cmp $dst,$src" %}
13556   opcode(0x0F,0x40);
13557   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13558   ins_pipe( pipe_cmov_reg );
13559 %}
13560 
13561 // Compare 2 unsigned longs and CMOVE ptrs.
13562 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13563   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13564   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13565   ins_cost(200);
13566   expand %{
13567     cmovPP_reg_LEGT(cmp,flags,dst,src);
13568   %}
13569 %}
13570 
13571 // Compare 2 longs and CMOVE doubles
13572 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13573   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13574   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13575   ins_cost(200);
13576   expand %{
13577     fcmovDPR_regS(cmp,flags,dst,src);
13578   %}
13579 %}
13580 
13581 // Compare 2 longs and CMOVE doubles
13582 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13583   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13584   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13585   ins_cost(200);
13586   expand %{
13587     fcmovD_regS(cmp,flags,dst,src);
13588   %}
13589 %}
13590 
13591 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13592   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13593   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13594   ins_cost(200);
13595   expand %{
13596     fcmovFPR_regS(cmp,flags,dst,src);
13597   %}
13598 %}
13599 
13600 
13601 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13602   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13603   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13604   ins_cost(200);
13605   expand %{
13606     fcmovF_regS(cmp,flags,dst,src);
13607   %}
13608 %}
13609 
13610 
13611 // ============================================================================
13612 // Procedure Call/Return Instructions
13613 // Call Java Static Instruction
13614 // Note: If this code changes, the corresponding ret_addr_offset() and
13615 //       compute_padding() functions will have to be adjusted.
13616 instruct CallStaticJavaDirect(method meth) %{
13617   match(CallStaticJava);
13618   effect(USE meth);
13619 
13620   ins_cost(300);
13621   format %{ "CALL,static " %}
13622   opcode(0xE8); /* E8 cd */
13623   ins_encode( pre_call_resets,
13624               Java_Static_Call( meth ),
13625               call_epilog,
13626               post_call_FPU );
13627   ins_pipe( pipe_slow );
13628   ins_alignment(4);
13629 %}
13630 
13631 // Call Java Dynamic Instruction
13632 // Note: If this code changes, the corresponding ret_addr_offset() and
13633 //       compute_padding() functions will have to be adjusted.
13634 instruct CallDynamicJavaDirect(method meth) %{
13635   match(CallDynamicJava);
13636   effect(USE meth);
13637 
13638   ins_cost(300);
13639   format %{ "MOV    EAX,(oop)-1\n\t"
13640             "CALL,dynamic" %}
13641   opcode(0xE8); /* E8 cd */
13642   ins_encode( pre_call_resets,
13643               Java_Dynamic_Call( meth ),
13644               call_epilog,
13645               post_call_FPU );
13646   ins_pipe( pipe_slow );
13647   ins_alignment(4);
13648 %}
13649 
13650 // Call Runtime Instruction
13651 instruct CallRuntimeDirect(method meth) %{
13652   match(CallRuntime );
13653   effect(USE meth);
13654 
13655   ins_cost(300);
13656   format %{ "CALL,runtime " %}
13657   opcode(0xE8); /* E8 cd */
13658   // Use FFREEs to clear entries in float stack
13659   ins_encode( pre_call_resets,
13660               FFree_Float_Stack_All,
13661               Java_To_Runtime( meth ),
13662               post_call_FPU );
13663   ins_pipe( pipe_slow );
13664 %}
13665 
13666 // Call runtime without safepoint
13667 instruct CallLeafDirect(method meth) %{
13668   match(CallLeaf);
13669   effect(USE meth);
13670 
13671   ins_cost(300);
13672   format %{ "CALL_LEAF,runtime " %}
13673   opcode(0xE8); /* E8 cd */
13674   ins_encode( pre_call_resets,
13675               FFree_Float_Stack_All,
13676               Java_To_Runtime( meth ),
13677               Verify_FPU_For_Leaf, post_call_FPU );
13678   ins_pipe( pipe_slow );
13679 %}
13680 
13681 instruct CallLeafNoFPDirect(method meth) %{
13682   match(CallLeafNoFP);
13683   effect(USE meth);
13684 
13685   ins_cost(300);
13686   format %{ "CALL_LEAF_NOFP,runtime " %}
13687   opcode(0xE8); /* E8 cd */
13688   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13689   ins_pipe( pipe_slow );
13690 %}
13691 
13692 
13693 // Return Instruction
13694 // Remove the return address & jump to it.
13695 instruct Ret() %{
13696   match(Return);
13697   format %{ "RET" %}
13698   opcode(0xC3);
13699   ins_encode(OpcP);
13700   ins_pipe( pipe_jmp );
13701 %}
13702 
13703 // Tail Call; Jump from runtime stub to Java code.
13704 // Also known as an 'interprocedural jump'.
13705 // Target of jump will eventually return to caller.
13706 // TailJump below removes the return address.
13707 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13708   match(TailCall jump_target method_ptr);
13709   ins_cost(300);
13710   format %{ "JMP    $jump_target \t# EBX holds method" %}
13711   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13712   ins_encode( OpcP, RegOpc(jump_target) );
13713   ins_pipe( pipe_jmp );
13714 %}
13715 
13716 
13717 // Tail Jump; remove the return address; jump to target.
13718 // TailCall above leaves the return address around.
13719 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13720   match( TailJump jump_target ex_oop );
13721   ins_cost(300);
13722   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13723             "JMP    $jump_target " %}
13724   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13725   ins_encode( enc_pop_rdx,
13726               OpcP, RegOpc(jump_target) );
13727   ins_pipe( pipe_jmp );
13728 %}
13729 
13730 // Create exception oop: created by stack-crawling runtime code.
13731 // Created exception is now available to this handler, and is setup
13732 // just prior to jumping to this handler.  No code emitted.
13733 instruct CreateException( eAXRegP ex_oop )
13734 %{
13735   match(Set ex_oop (CreateEx));
13736 
13737   size(0);
13738   // use the following format syntax
13739   format %{ "# exception oop is in EAX; no code emitted" %}
13740   ins_encode();
13741   ins_pipe( empty );
13742 %}
13743 
13744 
13745 // Rethrow exception:
13746 // The exception oop will come in the first argument position.
13747 // Then JUMP (not call) to the rethrow stub code.
13748 instruct RethrowException()
13749 %{
13750   match(Rethrow);
13751 
13752   // use the following format syntax
13753   format %{ "JMP    rethrow_stub" %}
13754   ins_encode(enc_rethrow);
13755   ins_pipe( pipe_jmp );
13756 %}
13757 
13758 // inlined locking and unlocking
13759 
13760 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13761   predicate(Compile::current()->use_rtm());
13762   match(Set cr (FastLock object box));
13763   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13764   ins_cost(300);
13765   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13766   ins_encode %{
13767     __ get_thread($thread$$Register);
13768     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13769                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13770                  _rtm_counters, _stack_rtm_counters,
13771                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13772                  true, ra_->C->profile_rtm());
13773   %}
13774   ins_pipe(pipe_slow);
13775 %}
13776 
13777 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13778   predicate(!Compile::current()->use_rtm());
13779   match(Set cr (FastLock object box));
13780   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13781   ins_cost(300);
13782   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13783   ins_encode %{
13784     __ get_thread($thread$$Register);
13785     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13786                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13787   %}
13788   ins_pipe(pipe_slow);
13789 %}
13790 
13791 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13792   match(Set cr (FastUnlock object box));
13793   effect(TEMP tmp, USE_KILL box);
13794   ins_cost(300);
13795   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13796   ins_encode %{
13797     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13798   %}
13799   ins_pipe(pipe_slow);
13800 %}
13801 
13802 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13803   predicate(Matcher::vector_length(n) <= 32);
13804   match(Set dst (MaskAll src));
13805   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13806   ins_encode %{
13807     int mask_len = Matcher::vector_length(this);
13808     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13809   %}
13810   ins_pipe( pipe_slow );
13811 %}
13812 
13813 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13814   predicate(Matcher::vector_length(n) > 32);
13815   match(Set dst (MaskAll src));
13816   effect(TEMP ktmp);
13817   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13818   ins_encode %{
13819     int mask_len = Matcher::vector_length(this);
13820     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13821   %}
13822   ins_pipe( pipe_slow );
13823 %}
13824 
13825 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13826   predicate(Matcher::vector_length(n) > 32);
13827   match(Set dst (MaskAll src));
13828   effect(TEMP ktmp);
13829   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13830   ins_encode %{
13831     int mask_len = Matcher::vector_length(this);
13832     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13833   %}
13834   ins_pipe( pipe_slow );
13835 %}
13836 
13837 // ============================================================================
13838 // Safepoint Instruction
13839 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13840   match(SafePoint poll);
13841   effect(KILL cr, USE poll);
13842 
13843   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13844   ins_cost(125);
13845   // EBP would need size(3)
13846   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13847   ins_encode %{
13848     __ relocate(relocInfo::poll_type);
13849     address pre_pc = __ pc();
13850     __ testl(rax, Address($poll$$Register, 0));
13851     address post_pc = __ pc();
13852     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13853   %}
13854   ins_pipe(ialu_reg_mem);
13855 %}
13856 
13857 
13858 // ============================================================================
13859 // This name is KNOWN by the ADLC and cannot be changed.
13860 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13861 // for this guy.
13862 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13863   match(Set dst (ThreadLocal));
13864   effect(DEF dst, KILL cr);
13865 
13866   format %{ "MOV    $dst, Thread::current()" %}
13867   ins_encode %{
13868     Register dstReg = as_Register($dst$$reg);
13869     __ get_thread(dstReg);
13870   %}
13871   ins_pipe( ialu_reg_fat );
13872 %}
13873 
13874 
13875 
13876 //----------PEEPHOLE RULES-----------------------------------------------------
13877 // These must follow all instruction definitions as they use the names
13878 // defined in the instructions definitions.
13879 //
13880 // peepmatch ( root_instr_name [preceding_instruction]* );
13881 //
13882 // peepconstraint %{
13883 // (instruction_number.operand_name relational_op instruction_number.operand_name
13884 //  [, ...] );
13885 // // instruction numbers are zero-based using left to right order in peepmatch
13886 //
13887 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13888 // // provide an instruction_number.operand_name for each operand that appears
13889 // // in the replacement instruction's match rule
13890 //
13891 // ---------VM FLAGS---------------------------------------------------------
13892 //
13893 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13894 //
13895 // Each peephole rule is given an identifying number starting with zero and
13896 // increasing by one in the order seen by the parser.  An individual peephole
13897 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13898 // on the command-line.
13899 //
13900 // ---------CURRENT LIMITATIONS----------------------------------------------
13901 //
13902 // Only match adjacent instructions in same basic block
13903 // Only equality constraints
13904 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13905 // Only one replacement instruction
13906 //
13907 // ---------EXAMPLE----------------------------------------------------------
13908 //
13909 // // pertinent parts of existing instructions in architecture description
13910 // instruct movI(rRegI dst, rRegI src) %{
13911 //   match(Set dst (CopyI src));
13912 // %}
13913 //
13914 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13915 //   match(Set dst (AddI dst src));
13916 //   effect(KILL cr);
13917 // %}
13918 //
13919 // // Change (inc mov) to lea
13920 // peephole %{
13921 //   // increment preceded by register-register move
13922 //   peepmatch ( incI_eReg movI );
13923 //   // require that the destination register of the increment
13924 //   // match the destination register of the move
13925 //   peepconstraint ( 0.dst == 1.dst );
13926 //   // construct a replacement instruction that sets
13927 //   // the destination to ( move's source register + one )
13928 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13929 // %}
13930 //
13931 // Implementation no longer uses movX instructions since
13932 // machine-independent system no longer uses CopyX nodes.
13933 //
13934 // peephole %{
13935 //   peepmatch ( incI_eReg movI );
13936 //   peepconstraint ( 0.dst == 1.dst );
13937 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13938 // %}
13939 //
13940 // peephole %{
13941 //   peepmatch ( decI_eReg movI );
13942 //   peepconstraint ( 0.dst == 1.dst );
13943 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13944 // %}
13945 //
13946 // peephole %{
13947 //   peepmatch ( addI_eReg_imm movI );
13948 //   peepconstraint ( 0.dst == 1.dst );
13949 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13950 // %}
13951 //
13952 // peephole %{
13953 //   peepmatch ( addP_eReg_imm movP );
13954 //   peepconstraint ( 0.dst == 1.dst );
13955 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13956 // %}
13957 
13958 // // Change load of spilled value to only a spill
13959 // instruct storeI(memory mem, rRegI src) %{
13960 //   match(Set mem (StoreI mem src));
13961 // %}
13962 //
13963 // instruct loadI(rRegI dst, memory mem) %{
13964 //   match(Set dst (LoadI mem));
13965 // %}
13966 //
13967 peephole %{
13968   peepmatch ( loadI storeI );
13969   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13970   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13971 %}
13972 
13973 //----------SMARTSPILL RULES---------------------------------------------------
13974 // These must follow all instruction definitions as they use the names
13975 // defined in the instructions definitions.