1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   __ verified_entry(C);
  615 
  616   C->output()->set_frame_complete(cbuf.insts_size());
  617 
  618   if (C->has_mach_constant_base_node()) {
  619     // NOTE: We set the table base offset here because users might be
  620     // emitted before MachConstantBaseNode.
  621     ConstantTable& constant_table = C->output()->constant_table();
  622     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  623   }
  624 }
  625 
  626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  627   return MachNode::size(ra_); // too many variables; just compute it the hard way
  628 }
  629 
  630 int MachPrologNode::reloc() const {
  631   return 0; // a large enough number
  632 }
  633 
  634 //=============================================================================
  635 #ifndef PRODUCT
  636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  637   Compile *C = ra_->C;
  638   int framesize = C->output()->frame_size_in_bytes();
  639   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  640   // Remove two words for return addr and rbp,
  641   framesize -= 2*wordSize;
  642 
  643   if (C->max_vector_size() > 16) {
  644     st->print("VZEROUPPER");
  645     st->cr(); st->print("\t");
  646   }
  647   if (C->in_24_bit_fp_mode()) {
  648     st->print("FLDCW  standard control word");
  649     st->cr(); st->print("\t");
  650   }
  651   if (framesize) {
  652     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  653     st->cr(); st->print("\t");
  654   }
  655   st->print_cr("POPL   EBP"); st->print("\t");
  656   if (do_polling() && C->is_method_compilation()) {
  657     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  658               "JA      #safepoint_stub\t"
  659               "# Safepoint: poll for GC");
  660   }
  661 }
  662 #endif
  663 
  664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  665   Compile *C = ra_->C;
  666   MacroAssembler _masm(&cbuf);
  667 
  668   if (C->max_vector_size() > 16) {
  669     // Clear upper bits of YMM registers when current compiled code uses
  670     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  671     _masm.vzeroupper();
  672   }
  673   // If method set FPU control word, restore to standard control word
  674   if (C->in_24_bit_fp_mode()) {
  675     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  676   }
  677 
  678   int framesize = C->output()->frame_size_in_bytes();
  679   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  680   // Remove two words for return addr and rbp,
  681   framesize -= 2*wordSize;
  682 
  683   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  684 
  685   if (framesize >= 128) {
  686     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  687     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  688     emit_d32(cbuf, framesize);
  689   } else if (framesize) {
  690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d8(cbuf, framesize);
  693   }
  694 
  695   emit_opcode(cbuf, 0x58 | EBP_enc);
  696 
  697   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  698     __ reserved_stack_check();
  699   }
  700 
  701   if (do_polling() && C->is_method_compilation()) {
  702     Register thread = as_Register(EBX_enc);
  703     MacroAssembler masm(&cbuf);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ relocate(relocInfo::poll_return_type);
  713     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  714   }
  715 }
  716 
  717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  718   return MachNode::size(ra_); // too many variables; just compute it
  719                               // the hard way
  720 }
  721 
  722 int MachEpilogNode::reloc() const {
  723   return 0; // a large enough number
  724 }
  725 
  726 const Pipeline * MachEpilogNode::pipeline() const {
  727   return MachNode::pipeline_class();
  728 }
  729 
  730 //=============================================================================
  731 
  732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  733 static enum RC rc_class( OptoReg::Name reg ) {
  734 
  735   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  736   if (OptoReg::is_stack(reg)) return rc_stack;
  737 
  738   VMReg r = OptoReg::as_VMReg(reg);
  739   if (r->is_Register()) return rc_int;
  740   if (r->is_FloatRegister()) {
  741     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  742     return rc_float;
  743   }
  744   if (r->is_KRegister()) return rc_kreg;
  745   assert(r->is_XMMRegister(), "must be");
  746   return rc_xmm;
  747 }
  748 
  749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  750                         int opcode, const char *op_str, int size, outputStream* st ) {
  751   if( cbuf ) {
  752     emit_opcode  (*cbuf, opcode );
  753     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754 #ifndef PRODUCT
  755   } else if( !do_size ) {
  756     if( size != 0 ) st->print("\n\t");
  757     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  758       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  759       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  760     } else { // FLD, FST, PUSH, POP
  761       st->print("%s [ESP + #%d]",op_str,offset);
  762     }
  763 #endif
  764   }
  765   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  766   return size+3+offset_size;
  767 }
  768 
  769 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  771                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  772   int in_size_in_bits = Assembler::EVEX_32bit;
  773   int evex_encoding = 0;
  774   if (reg_lo+1 == reg_hi) {
  775     in_size_in_bits = Assembler::EVEX_64bit;
  776     evex_encoding = Assembler::VEX_W;
  777   }
  778   if (cbuf) {
  779     MacroAssembler _masm(cbuf);
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     _masm.set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (cbuf) {
  832     MacroAssembler _masm(cbuf);
  833     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  834     _masm.set_managed();
  835     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  836       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  837                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  838     } else {
  839       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     }
  842 #ifndef PRODUCT
  843   } else if (!do_size) {
  844     if (size != 0) st->print("\n\t");
  845     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  846       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  847         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  848       } else {
  849         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       }
  851     } else {
  852       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  853         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       } else {
  855         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       }
  857     }
  858 #endif
  859   }
  860   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  861   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  862   int sz = (UseAVX > 2) ? 6 : 4;
  863   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  864       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  865   return size + sz;
  866 }
  867 
  868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  869                             int src_hi, int dst_hi, int size, outputStream* st ) {
  870   // 32-bit
  871   if (cbuf) {
  872     MacroAssembler _masm(cbuf);
  873     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  874     _masm.set_managed();
  875     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  876              as_Register(Matcher::_regEncode[src_lo]));
  877 #ifndef PRODUCT
  878   } else if (!do_size) {
  879     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  880 #endif
  881   }
  882   return (UseAVX> 2) ? 6 : 4;
  883 }
  884 
  885 
  886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  887                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  888   // 32-bit
  889   if (cbuf) {
  890     MacroAssembler _masm(cbuf);
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     _masm.set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( cbuf ) {
  905     emit_opcode(*cbuf, 0x8B );
  906     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( cbuf ) {
  920       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (cbuf) {
  956     MacroAssembler _masm(cbuf);
  957     switch (ireg) {
  958     case Op_VecS:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       break;
  962     case Op_VecD:
  963       __ pushl(Address(rsp, src_offset));
  964       __ popl (Address(rsp, dst_offset));
  965       __ pushl(Address(rsp, src_offset+4));
  966       __ popl (Address(rsp, dst_offset+4));
  967       break;
  968     case Op_VecX:
  969       __ movdqu(Address(rsp, -16), xmm0);
  970       __ movdqu(xmm0, Address(rsp, src_offset));
  971       __ movdqu(Address(rsp, dst_offset), xmm0);
  972       __ movdqu(xmm0, Address(rsp, -16));
  973       break;
  974     case Op_VecY:
  975       __ vmovdqu(Address(rsp, -32), xmm0);
  976       __ vmovdqu(xmm0, Address(rsp, src_offset));
  977       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  978       __ vmovdqu(xmm0, Address(rsp, -32));
  979       break;
  980     case Op_VecZ:
  981       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  982       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  983       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  984       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  985       break;
  986     default:
  987       ShouldNotReachHere();
  988     }
  989 #ifndef PRODUCT
  990   } else {
  991     switch (ireg) {
  992     case Op_VecS:
  993       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  994                 "popl    [rsp + #%d]",
  995                 src_offset, dst_offset);
  996       break;
  997     case Op_VecD:
  998       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  999                 "popq    [rsp + #%d]\n\t"
 1000                 "pushl   [rsp + #%d]\n\t"
 1001                 "popq    [rsp + #%d]",
 1002                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1003       break;
 1004      case Op_VecX:
 1005       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1006                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1007                 "movdqu  [rsp + #%d], xmm0\n\t"
 1008                 "movdqu  xmm0, [rsp - #16]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecY:
 1012       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1013                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1014                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1015                 "vmovdqu xmm0, [rsp - #32]",
 1016                 src_offset, dst_offset);
 1017       break;
 1018     case Op_VecZ:
 1019       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1020                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1021                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1022                 "vmovdqu xmm0, [rsp - #64]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     default:
 1026       ShouldNotReachHere();
 1027     }
 1028 #endif
 1029   }
 1030 }
 1031 
 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1033   // Get registers to move
 1034   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1035   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1036   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1037   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1038 
 1039   enum RC src_second_rc = rc_class(src_second);
 1040   enum RC src_first_rc = rc_class(src_first);
 1041   enum RC dst_second_rc = rc_class(dst_second);
 1042   enum RC dst_first_rc = rc_class(dst_first);
 1043 
 1044   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1045 
 1046   // Generate spill code!
 1047   int size = 0;
 1048 
 1049   if( src_first == dst_first && src_second == dst_second )
 1050     return size;            // Self copy, no move
 1051 
 1052   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1053     uint ireg = ideal_reg();
 1054     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1055     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1056     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1057     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1058       // mem -> mem
 1059       int src_offset = ra_->reg2offset(src_first);
 1060       int dst_offset = ra_->reg2offset(dst_first);
 1061       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1063       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1064     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1065       int stack_offset = ra_->reg2offset(dst_first);
 1066       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1067     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1068       int stack_offset = ra_->reg2offset(src_first);
 1069       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1070     } else {
 1071       ShouldNotReachHere();
 1072     }
 1073     return 0;
 1074   }
 1075 
 1076   // --------------------------------------
 1077   // Check for mem-mem move.  push/pop to move.
 1078   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1079     if( src_second == dst_first ) { // overlapping stack copy ranges
 1080       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1081       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1082       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1083       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1084     }
 1085     // move low bits
 1086     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1087     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1088     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1089       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1090       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1091     }
 1092     return size;
 1093   }
 1094 
 1095   // --------------------------------------
 1096   // Check for integer reg-reg copy
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1098     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1099 
 1100   // Check for integer store
 1101   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1102     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1103 
 1104   // Check for integer load
 1105   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1106     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1107 
 1108   // Check for integer reg-xmm reg copy
 1109   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1110     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1111             "no 64 bit integer-float reg moves" );
 1112     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1113   }
 1114   // --------------------------------------
 1115   // Check for float reg-reg copy
 1116   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1117     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1118             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1119     if( cbuf ) {
 1120 
 1121       // Note the mucking with the register encode to compensate for the 0/1
 1122       // indexing issue mentioned in a comment in the reg_def sections
 1123       // for FPR registers many lines above here.
 1124 
 1125       if( src_first != FPR1L_num ) {
 1126         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1127         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1128         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1129         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1130      } else {
 1131         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1132         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1133      }
 1134 #ifndef PRODUCT
 1135     } else if( !do_size ) {
 1136       if( size != 0 ) st->print("\n\t");
 1137       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1138       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1139 #endif
 1140     }
 1141     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1142   }
 1143 
 1144   // Check for float store
 1145   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1146     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1147   }
 1148 
 1149   // Check for float load
 1150   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1151     int offset = ra_->reg2offset(src_first);
 1152     const char *op_str;
 1153     int op;
 1154     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1155       op_str = "FLD_D";
 1156       op = 0xDD;
 1157     } else {                   // 32-bit load
 1158       op_str = "FLD_S";
 1159       op = 0xD9;
 1160       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1161     }
 1162     if( cbuf ) {
 1163       emit_opcode  (*cbuf, op );
 1164       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1167 #ifndef PRODUCT
 1168     } else if( !do_size ) {
 1169       if( size != 0 ) st->print("\n\t");
 1170       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1171 #endif
 1172     }
 1173     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1174     return size + 3+offset_size+2;
 1175   }
 1176 
 1177   // Check for xmm reg-reg copy
 1178   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1179     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1180             (src_first+1 == src_second && dst_first+1 == dst_second),
 1181             "no non-adjacent float-moves" );
 1182     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1183   }
 1184 
 1185   // Check for xmm reg-integer reg copy
 1186   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1187     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1188             "no 64 bit float-integer reg moves" );
 1189     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1190   }
 1191 
 1192   // Check for xmm store
 1193   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1194     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1195   }
 1196 
 1197   // Check for float xmm load
 1198   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1199     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1200   }
 1201 
 1202   // Copy from float reg to xmm reg
 1203   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1204     // copy to the top of stack from floating point reg
 1205     // and use LEA to preserve flags
 1206     if( cbuf ) {
 1207       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1208       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1209       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1210       emit_d8(*cbuf,0xF8);
 1211 #ifndef PRODUCT
 1212     } else if( !do_size ) {
 1213       if( size != 0 ) st->print("\n\t");
 1214       st->print("LEA    ESP,[ESP-8]");
 1215 #endif
 1216     }
 1217     size += 4;
 1218 
 1219     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1220 
 1221     // Copy from the temp memory to the xmm reg.
 1222     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1223 
 1224     if( cbuf ) {
 1225       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1226       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1227       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1228       emit_d8(*cbuf,0x08);
 1229 #ifndef PRODUCT
 1230     } else if( !do_size ) {
 1231       if( size != 0 ) st->print("\n\t");
 1232       st->print("LEA    ESP,[ESP+8]");
 1233 #endif
 1234     }
 1235     size += 4;
 1236     return size;
 1237   }
 1238 
 1239   // AVX-512 opmask specific spilling.
 1240   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1241     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1242     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1243     int offset = ra_->reg2offset(src_first);
 1244     if (cbuf != nullptr) {
 1245       MacroAssembler _masm(cbuf);
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (cbuf != nullptr) {
 1260       MacroAssembler _masm(cbuf);
 1261       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1262 #ifndef PRODUCT
 1263     } else {
 1264       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1265 #endif
 1266     }
 1267     return 0;
 1268   }
 1269 
 1270   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1271     Unimplemented();
 1272     return 0;
 1273   }
 1274 
 1275   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1276     Unimplemented();
 1277     return 0;
 1278   }
 1279 
 1280   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1281     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1282     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1283     if (cbuf != nullptr) {
 1284       MacroAssembler _masm(cbuf);
 1285       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1286 #ifndef PRODUCT
 1287     } else {
 1288       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1289 #endif
 1290     }
 1291     return 0;
 1292   }
 1293 
 1294   assert( size > 0, "missed a case" );
 1295 
 1296   // --------------------------------------------------------------------
 1297   // Check for second bits still needing moving.
 1298   if( src_second == dst_second )
 1299     return size;               // Self copy; no move
 1300   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1301 
 1302   // Check for second word int-int move
 1303   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1304     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1305 
 1306   // Check for second word integer store
 1307   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1308     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1309 
 1310   // Check for second word integer load
 1311   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1312     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1313 
 1314   Unimplemented();
 1315   return 0; // Mute compiler
 1316 }
 1317 
 1318 #ifndef PRODUCT
 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1320   implementation( nullptr, ra_, false, st );
 1321 }
 1322 #endif
 1323 
 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1325   implementation( &cbuf, ra_, false, nullptr );
 1326 }
 1327 
 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1329   return MachNode::size(ra_);
 1330 }
 1331 
 1332 
 1333 //=============================================================================
 1334 #ifndef PRODUCT
 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1337   int reg = ra_->get_reg_first(this);
 1338   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1339 }
 1340 #endif
 1341 
 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1344   int reg = ra_->get_encode(this);
 1345   if( offset >= 128 ) {
 1346     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1347     emit_rm(cbuf, 0x2, reg, 0x04);
 1348     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1349     emit_d32(cbuf, offset);
 1350   }
 1351   else {
 1352     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1353     emit_rm(cbuf, 0x1, reg, 0x04);
 1354     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1355     emit_d8(cbuf, offset);
 1356   }
 1357 }
 1358 
 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1360   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1361   if( offset >= 128 ) {
 1362     return 7;
 1363   }
 1364   else {
 1365     return 4;
 1366   }
 1367 }
 1368 
 1369 //=============================================================================
 1370 #ifndef PRODUCT
 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1372   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1373   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1374   st->print_cr("\tNOP");
 1375   st->print_cr("\tNOP");
 1376   if( !OptoBreakpoint )
 1377     st->print_cr("\tNOP");
 1378 }
 1379 #endif
 1380 
 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1382   MacroAssembler masm(&cbuf);
 1383 #ifdef ASSERT
 1384   uint insts_size = cbuf.insts_size();
 1385 #endif
 1386   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1387   masm.jump_cc(Assembler::notEqual,
 1388                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1389   /* WARNING these NOPs are critical so that verified entry point is properly
 1390      aligned for patching by NativeJump::patch_verified_entry() */
 1391   int nops_cnt = 2;
 1392   if( !OptoBreakpoint ) // Leave space for int3
 1393      nops_cnt += 1;
 1394   masm.nop(nops_cnt);
 1395 
 1396   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1397 }
 1398 
 1399 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1400   return OptoBreakpoint ? 11 : 12;
 1401 }
 1402 
 1403 
 1404 //=============================================================================
 1405 
 1406 // Vector calling convention not supported.
 1407 bool Matcher::supports_vector_calling_convention() {
 1408   return false;
 1409 }
 1410 
 1411 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1412   Unimplemented();
 1413   return OptoRegPair(0, 0);
 1414 }
 1415 
 1416 // Is this branch offset short enough that a short branch can be used?
 1417 //
 1418 // NOTE: If the platform does not provide any short branch variants, then
 1419 //       this method should return false for offset 0.
 1420 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1421   // The passed offset is relative to address of the branch.
 1422   // On 86 a branch displacement is calculated relative to address
 1423   // of a next instruction.
 1424   offset -= br_size;
 1425 
 1426   // the short version of jmpConUCF2 contains multiple branches,
 1427   // making the reach slightly less
 1428   if (rule == jmpConUCF2_rule)
 1429     return (-126 <= offset && offset <= 125);
 1430   return (-128 <= offset && offset <= 127);
 1431 }
 1432 
 1433 // Return whether or not this register is ever used as an argument.  This
 1434 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1435 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1436 // arguments in those registers not be available to the callee.
 1437 bool Matcher::can_be_java_arg( int reg ) {
 1438   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1439   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1440   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1441   return false;
 1442 }
 1443 
 1444 bool Matcher::is_spillable_arg( int reg ) {
 1445   return can_be_java_arg(reg);
 1446 }
 1447 
 1448 uint Matcher::int_pressure_limit()
 1449 {
 1450   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1451 }
 1452 
 1453 uint Matcher::float_pressure_limit()
 1454 {
 1455   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1456 }
 1457 
 1458 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1459   // Use hardware integer DIV instruction when
 1460   // it is faster than a code which use multiply.
 1461   // Only when constant divisor fits into 32 bit
 1462   // (min_jint is excluded to get only correct
 1463   // positive 32 bit values from negative).
 1464   return VM_Version::has_fast_idiv() &&
 1465          (divisor == (int)divisor && divisor != min_jint);
 1466 }
 1467 
 1468 // Register for DIVI projection of divmodI
 1469 RegMask Matcher::divI_proj_mask() {
 1470   return EAX_REG_mask();
 1471 }
 1472 
 1473 // Register for MODI projection of divmodI
 1474 RegMask Matcher::modI_proj_mask() {
 1475   return EDX_REG_mask();
 1476 }
 1477 
 1478 // Register for DIVL projection of divmodL
 1479 RegMask Matcher::divL_proj_mask() {
 1480   ShouldNotReachHere();
 1481   return RegMask();
 1482 }
 1483 
 1484 // Register for MODL projection of divmodL
 1485 RegMask Matcher::modL_proj_mask() {
 1486   ShouldNotReachHere();
 1487   return RegMask();
 1488 }
 1489 
 1490 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1491   return NO_REG_mask();
 1492 }
 1493 
 1494 // Returns true if the high 32 bits of the value is known to be zero.
 1495 bool is_operand_hi32_zero(Node* n) {
 1496   int opc = n->Opcode();
 1497   if (opc == Op_AndL) {
 1498     Node* o2 = n->in(2);
 1499     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1500       return true;
 1501     }
 1502   }
 1503   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1504     return true;
 1505   }
 1506   return false;
 1507 }
 1508 
 1509 %}
 1510 
 1511 //----------ENCODING BLOCK-----------------------------------------------------
 1512 // This block specifies the encoding classes used by the compiler to output
 1513 // byte streams.  Encoding classes generate functions which are called by
 1514 // Machine Instruction Nodes in order to generate the bit encoding of the
 1515 // instruction.  Operands specify their base encoding interface with the
 1516 // interface keyword.  There are currently supported four interfaces,
 1517 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1518 // operand to generate a function which returns its register number when
 1519 // queried.   CONST_INTER causes an operand to generate a function which
 1520 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1521 // operand to generate four functions which return the Base Register, the
 1522 // Index Register, the Scale Value, and the Offset Value of the operand when
 1523 // queried.  COND_INTER causes an operand to generate six functions which
 1524 // return the encoding code (ie - encoding bits for the instruction)
 1525 // associated with each basic boolean condition for a conditional instruction.
 1526 // Instructions specify two basic values for encoding.  They use the
 1527 // ins_encode keyword to specify their encoding class (which must be one of
 1528 // the class names specified in the encoding block), and they use the
 1529 // opcode keyword to specify, in order, their primary, secondary, and
 1530 // tertiary opcode.  Only the opcode sections which a particular instruction
 1531 // needs for encoding need to be specified.
 1532 encode %{
 1533   // Build emit functions for each basic byte or larger field in the intel
 1534   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1535   // code in the enc_class source block.  Emit functions will live in the
 1536   // main source block for now.  In future, we can generalize this by
 1537   // adding a syntax that specifies the sizes of fields in an order,
 1538   // so that the adlc can build the emit functions automagically
 1539 
 1540   // Emit primary opcode
 1541   enc_class OpcP %{
 1542     emit_opcode(cbuf, $primary);
 1543   %}
 1544 
 1545   // Emit secondary opcode
 1546   enc_class OpcS %{
 1547     emit_opcode(cbuf, $secondary);
 1548   %}
 1549 
 1550   // Emit opcode directly
 1551   enc_class Opcode(immI d8) %{
 1552     emit_opcode(cbuf, $d8$$constant);
 1553   %}
 1554 
 1555   enc_class SizePrefix %{
 1556     emit_opcode(cbuf,0x66);
 1557   %}
 1558 
 1559   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1560     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1561   %}
 1562 
 1563   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1564     emit_opcode(cbuf,$opcode$$constant);
 1565     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1566   %}
 1567 
 1568   enc_class mov_r32_imm0( rRegI dst ) %{
 1569     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1570     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1571   %}
 1572 
 1573   enc_class cdq_enc %{
 1574     // Full implementation of Java idiv and irem; checks for
 1575     // special case as described in JVM spec., p.243 & p.271.
 1576     //
 1577     //         normal case                           special case
 1578     //
 1579     // input : rax,: dividend                         min_int
 1580     //         reg: divisor                          -1
 1581     //
 1582     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1583     //         rdx: remainder (= rax, irem reg)       0
 1584     //
 1585     //  Code sequnce:
 1586     //
 1587     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1588     //  0F 85 0B 00 00 00    jne         normal_case
 1589     //  33 D2                xor         rdx,edx
 1590     //  83 F9 FF             cmp         rcx,0FFh
 1591     //  0F 84 03 00 00 00    je          done
 1592     //                  normal_case:
 1593     //  99                   cdq
 1594     //  F7 F9                idiv        rax,ecx
 1595     //                  done:
 1596     //
 1597     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1598     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1599     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1600     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1601     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1603     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1604     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1605     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1606     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1607     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1608     // normal_case:
 1609     emit_opcode(cbuf,0x99);                                         // cdq
 1610     // idiv (note: must be emitted by the user of this rule)
 1611     // normal:
 1612   %}
 1613 
 1614   // Dense encoding for older common ops
 1615   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1616     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1617   %}
 1618 
 1619 
 1620   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1621   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1622     // Check for 8-bit immediate, and set sign extend bit in opcode
 1623     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1624       emit_opcode(cbuf, $primary | 0x02);
 1625     }
 1626     else {                          // If 32-bit immediate
 1627       emit_opcode(cbuf, $primary);
 1628     }
 1629   %}
 1630 
 1631   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1632     // Emit primary opcode and set sign-extend bit
 1633     // Check for 8-bit immediate, and set sign extend bit in opcode
 1634     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1635       emit_opcode(cbuf, $primary | 0x02);    }
 1636     else {                          // If 32-bit immediate
 1637       emit_opcode(cbuf, $primary);
 1638     }
 1639     // Emit r/m byte with secondary opcode, after primary opcode.
 1640     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1641   %}
 1642 
 1643   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1644     // Check for 8-bit immediate, and set sign extend bit in opcode
 1645     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1646       $$$emit8$imm$$constant;
 1647     }
 1648     else {                          // If 32-bit immediate
 1649       // Output immediate
 1650       $$$emit32$imm$$constant;
 1651     }
 1652   %}
 1653 
 1654   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1655     // Emit primary opcode and set sign-extend bit
 1656     // Check for 8-bit immediate, and set sign extend bit in opcode
 1657     int con = (int)$imm$$constant; // Throw away top bits
 1658     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1659     // Emit r/m byte with secondary opcode, after primary opcode.
 1660     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1661     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1662     else                               emit_d32(cbuf,con);
 1663   %}
 1664 
 1665   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1666     // Emit primary opcode and set sign-extend bit
 1667     // Check for 8-bit immediate, and set sign extend bit in opcode
 1668     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1669     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1670     // Emit r/m byte with tertiary opcode, after primary opcode.
 1671     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1672     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1673     else                               emit_d32(cbuf,con);
 1674   %}
 1675 
 1676   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1677     emit_cc(cbuf, $secondary, $dst$$reg );
 1678   %}
 1679 
 1680   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1681     int destlo = $dst$$reg;
 1682     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1683     // bswap lo
 1684     emit_opcode(cbuf, 0x0F);
 1685     emit_cc(cbuf, 0xC8, destlo);
 1686     // bswap hi
 1687     emit_opcode(cbuf, 0x0F);
 1688     emit_cc(cbuf, 0xC8, desthi);
 1689     // xchg lo and hi
 1690     emit_opcode(cbuf, 0x87);
 1691     emit_rm(cbuf, 0x3, destlo, desthi);
 1692   %}
 1693 
 1694   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1695     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1696   %}
 1697 
 1698   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1699     $$$emit8$primary;
 1700     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1701   %}
 1702 
 1703   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1704     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1705     emit_d8(cbuf, op >> 8 );
 1706     emit_d8(cbuf, op & 255);
 1707   %}
 1708 
 1709   // emulate a CMOV with a conditional branch around a MOV
 1710   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1711     // Invert sense of branch from sense of CMOV
 1712     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1713     emit_d8( cbuf, $brOffs$$constant );
 1714   %}
 1715 
 1716   enc_class enc_PartialSubtypeCheck( ) %{
 1717     Register Redi = as_Register(EDI_enc); // result register
 1718     Register Reax = as_Register(EAX_enc); // super class
 1719     Register Recx = as_Register(ECX_enc); // killed
 1720     Register Resi = as_Register(ESI_enc); // sub class
 1721     Label miss;
 1722 
 1723     MacroAssembler _masm(&cbuf);
 1724     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1725                                      nullptr, &miss,
 1726                                      /*set_cond_codes:*/ true);
 1727     if ($primary) {
 1728       __ xorptr(Redi, Redi);
 1729     }
 1730     __ bind(miss);
 1731   %}
 1732 
 1733   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1734     MacroAssembler masm(&cbuf);
 1735     int start = masm.offset();
 1736     if (UseSSE >= 2) {
 1737       if (VerifyFPU) {
 1738         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1739       }
 1740     } else {
 1741       // External c_calling_convention expects the FPU stack to be 'clean'.
 1742       // Compiled code leaves it dirty.  Do cleanup now.
 1743       masm.empty_FPU_stack();
 1744     }
 1745     if (sizeof_FFree_Float_Stack_All == -1) {
 1746       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1747     } else {
 1748       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1749     }
 1750   %}
 1751 
 1752   enc_class Verify_FPU_For_Leaf %{
 1753     if( VerifyFPU ) {
 1754       MacroAssembler masm(&cbuf);
 1755       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1756     }
 1757   %}
 1758 
 1759   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1760     // This is the instruction starting address for relocation info.
 1761     MacroAssembler _masm(&cbuf);
 1762     cbuf.set_insts_mark();
 1763     $$$emit8$primary;
 1764     // CALL directly to the runtime
 1765     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1766                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1767     __ post_call_nop();
 1768 
 1769     if (UseSSE >= 2) {
 1770       MacroAssembler _masm(&cbuf);
 1771       BasicType rt = tf()->return_type();
 1772 
 1773       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1774         // A C runtime call where the return value is unused.  In SSE2+
 1775         // mode the result needs to be removed from the FPU stack.  It's
 1776         // likely that this function call could be removed by the
 1777         // optimizer if the C function is a pure function.
 1778         __ ffree(0);
 1779       } else if (rt == T_FLOAT) {
 1780         __ lea(rsp, Address(rsp, -4));
 1781         __ fstp_s(Address(rsp, 0));
 1782         __ movflt(xmm0, Address(rsp, 0));
 1783         __ lea(rsp, Address(rsp,  4));
 1784       } else if (rt == T_DOUBLE) {
 1785         __ lea(rsp, Address(rsp, -8));
 1786         __ fstp_d(Address(rsp, 0));
 1787         __ movdbl(xmm0, Address(rsp, 0));
 1788         __ lea(rsp, Address(rsp,  8));
 1789       }
 1790     }
 1791   %}
 1792 
 1793   enc_class pre_call_resets %{
 1794     // If method sets FPU control word restore it here
 1795     debug_only(int off0 = cbuf.insts_size());
 1796     if (ra_->C->in_24_bit_fp_mode()) {
 1797       MacroAssembler _masm(&cbuf);
 1798       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1799     }
 1800     // Clear upper bits of YMM registers when current compiled code uses
 1801     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1802     MacroAssembler _masm(&cbuf);
 1803     __ vzeroupper();
 1804     debug_only(int off1 = cbuf.insts_size());
 1805     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1806   %}
 1807 
 1808   enc_class post_call_FPU %{
 1809     // If method sets FPU control word do it here also
 1810     if (Compile::current()->in_24_bit_fp_mode()) {
 1811       MacroAssembler masm(&cbuf);
 1812       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1813     }
 1814   %}
 1815 
 1816   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1817     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1818     // who we intended to call.
 1819     MacroAssembler _masm(&cbuf);
 1820     cbuf.set_insts_mark();
 1821     $$$emit8$primary;
 1822 
 1823     if (!_method) {
 1824       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1825                      runtime_call_Relocation::spec(),
 1826                      RELOC_IMM32);
 1827       __ post_call_nop();
 1828     } else {
 1829       int method_index = resolved_method_index(cbuf);
 1830       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1831                                                   : static_call_Relocation::spec(method_index);
 1832       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1833                      rspec, RELOC_DISP32);
 1834       __ post_call_nop();
 1835       address mark = cbuf.insts_mark();
 1836       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1837         // Calls of the same statically bound method can share
 1838         // a stub to the interpreter.
 1839         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1840       } else {
 1841         // Emit stubs for static call.
 1842         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1843         if (stub == nullptr) {
 1844           ciEnv::current()->record_failure("CodeCache is full");
 1845           return;
 1846         }
 1847       }
 1848     }
 1849   %}
 1850 
 1851   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1852     MacroAssembler _masm(&cbuf);
 1853     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1854     __ post_call_nop();
 1855   %}
 1856 
 1857   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1858     int disp = in_bytes(Method::from_compiled_offset());
 1859     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1860 
 1861     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1862     MacroAssembler _masm(&cbuf);
 1863     cbuf.set_insts_mark();
 1864     $$$emit8$primary;
 1865     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1866     emit_d8(cbuf, disp);             // Displacement
 1867     __ post_call_nop();
 1868   %}
 1869 
 1870 //   Following encoding is no longer used, but may be restored if calling
 1871 //   convention changes significantly.
 1872 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1873 //
 1874 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1875 //     // int ic_reg     = Matcher::inline_cache_reg();
 1876 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1877 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1878 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1879 //
 1880 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1881 //     // // so we load it immediately before the call
 1882 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1883 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1884 //
 1885 //     // xor rbp,ebp
 1886 //     emit_opcode(cbuf, 0x33);
 1887 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1888 //
 1889 //     // CALL to interpreter.
 1890 //     cbuf.set_insts_mark();
 1891 //     $$$emit8$primary;
 1892 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1893 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1894 //   %}
 1895 
 1896   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1897     $$$emit8$primary;
 1898     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1899     $$$emit8$shift$$constant;
 1900   %}
 1901 
 1902   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1903     // Load immediate does not have a zero or sign extended version
 1904     // for 8-bit immediates
 1905     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1906     $$$emit32$src$$constant;
 1907   %}
 1908 
 1909   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1910     // Load immediate does not have a zero or sign extended version
 1911     // for 8-bit immediates
 1912     emit_opcode(cbuf, $primary + $dst$$reg);
 1913     $$$emit32$src$$constant;
 1914   %}
 1915 
 1916   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1917     // Load immediate does not have a zero or sign extended version
 1918     // for 8-bit immediates
 1919     int dst_enc = $dst$$reg;
 1920     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1921     if (src_con == 0) {
 1922       // xor dst, dst
 1923       emit_opcode(cbuf, 0x33);
 1924       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1925     } else {
 1926       emit_opcode(cbuf, $primary + dst_enc);
 1927       emit_d32(cbuf, src_con);
 1928     }
 1929   %}
 1930 
 1931   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1932     // Load immediate does not have a zero or sign extended version
 1933     // for 8-bit immediates
 1934     int dst_enc = $dst$$reg + 2;
 1935     int src_con = ((julong)($src$$constant)) >> 32;
 1936     if (src_con == 0) {
 1937       // xor dst, dst
 1938       emit_opcode(cbuf, 0x33);
 1939       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1940     } else {
 1941       emit_opcode(cbuf, $primary + dst_enc);
 1942       emit_d32(cbuf, src_con);
 1943     }
 1944   %}
 1945 
 1946 
 1947   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1948   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1949     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1950   %}
 1951 
 1952   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1953     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1954   %}
 1955 
 1956   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1957     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1958   %}
 1959 
 1960   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1961     $$$emit8$primary;
 1962     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1963   %}
 1964 
 1965   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1966     $$$emit8$secondary;
 1967     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1968   %}
 1969 
 1970   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1971     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1972   %}
 1973 
 1974   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1975     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1976   %}
 1977 
 1978   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1979     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1980   %}
 1981 
 1982   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1983     // Output immediate
 1984     $$$emit32$src$$constant;
 1985   %}
 1986 
 1987   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1988     // Output Float immediate bits
 1989     jfloat jf = $src$$constant;
 1990     int    jf_as_bits = jint_cast( jf );
 1991     emit_d32(cbuf, jf_as_bits);
 1992   %}
 1993 
 1994   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1995     // Output Float immediate bits
 1996     jfloat jf = $src$$constant;
 1997     int    jf_as_bits = jint_cast( jf );
 1998     emit_d32(cbuf, jf_as_bits);
 1999   %}
 2000 
 2001   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 2002     // Output immediate
 2003     $$$emit16$src$$constant;
 2004   %}
 2005 
 2006   enc_class Con_d32(immI src) %{
 2007     emit_d32(cbuf,$src$$constant);
 2008   %}
 2009 
 2010   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2011     // Output immediate memory reference
 2012     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2013     emit_d32(cbuf, 0x00);
 2014   %}
 2015 
 2016   enc_class lock_prefix( ) %{
 2017     emit_opcode(cbuf,0xF0);         // [Lock]
 2018   %}
 2019 
 2020   // Cmp-xchg long value.
 2021   // Note: we need to swap rbx, and rcx before and after the
 2022   //       cmpxchg8 instruction because the instruction uses
 2023   //       rcx as the high order word of the new value to store but
 2024   //       our register encoding uses rbx,.
 2025   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2026 
 2027     // XCHG  rbx,ecx
 2028     emit_opcode(cbuf,0x87);
 2029     emit_opcode(cbuf,0xD9);
 2030     // [Lock]
 2031     emit_opcode(cbuf,0xF0);
 2032     // CMPXCHG8 [Eptr]
 2033     emit_opcode(cbuf,0x0F);
 2034     emit_opcode(cbuf,0xC7);
 2035     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2036     // XCHG  rbx,ecx
 2037     emit_opcode(cbuf,0x87);
 2038     emit_opcode(cbuf,0xD9);
 2039   %}
 2040 
 2041   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2042     // [Lock]
 2043     emit_opcode(cbuf,0xF0);
 2044 
 2045     // CMPXCHG [Eptr]
 2046     emit_opcode(cbuf,0x0F);
 2047     emit_opcode(cbuf,0xB1);
 2048     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2049   %}
 2050 
 2051   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2052     // [Lock]
 2053     emit_opcode(cbuf,0xF0);
 2054 
 2055     // CMPXCHGB [Eptr]
 2056     emit_opcode(cbuf,0x0F);
 2057     emit_opcode(cbuf,0xB0);
 2058     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2059   %}
 2060 
 2061   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2062     // [Lock]
 2063     emit_opcode(cbuf,0xF0);
 2064 
 2065     // 16-bit mode
 2066     emit_opcode(cbuf, 0x66);
 2067 
 2068     // CMPXCHGW [Eptr]
 2069     emit_opcode(cbuf,0x0F);
 2070     emit_opcode(cbuf,0xB1);
 2071     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2072   %}
 2073 
 2074   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2075     int res_encoding = $res$$reg;
 2076 
 2077     // MOV  res,0
 2078     emit_opcode( cbuf, 0xB8 + res_encoding);
 2079     emit_d32( cbuf, 0 );
 2080     // JNE,s  fail
 2081     emit_opcode(cbuf,0x75);
 2082     emit_d8(cbuf, 5 );
 2083     // MOV  res,1
 2084     emit_opcode( cbuf, 0xB8 + res_encoding);
 2085     emit_d32( cbuf, 1 );
 2086     // fail:
 2087   %}
 2088 
 2089   enc_class set_instruction_start( ) %{
 2090     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2091   %}
 2092 
 2093   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2094     int reg_encoding = $ereg$$reg;
 2095     int base  = $mem$$base;
 2096     int index = $mem$$index;
 2097     int scale = $mem$$scale;
 2098     int displace = $mem$$disp;
 2099     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2100     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2101   %}
 2102 
 2103   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2104     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2105     int base  = $mem$$base;
 2106     int index = $mem$$index;
 2107     int scale = $mem$$scale;
 2108     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2109     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2110     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2111   %}
 2112 
 2113   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2114     int r1, r2;
 2115     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2116     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2117     emit_opcode(cbuf,0x0F);
 2118     emit_opcode(cbuf,$tertiary);
 2119     emit_rm(cbuf, 0x3, r1, r2);
 2120     emit_d8(cbuf,$cnt$$constant);
 2121     emit_d8(cbuf,$primary);
 2122     emit_rm(cbuf, 0x3, $secondary, r1);
 2123     emit_d8(cbuf,$cnt$$constant);
 2124   %}
 2125 
 2126   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2127     emit_opcode( cbuf, 0x8B ); // Move
 2128     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2129     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2130       emit_d8(cbuf,$primary);
 2131       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2132       emit_d8(cbuf,$cnt$$constant-32);
 2133     }
 2134     emit_d8(cbuf,$primary);
 2135     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2136     emit_d8(cbuf,31);
 2137   %}
 2138 
 2139   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2140     int r1, r2;
 2141     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2142     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2143 
 2144     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2145     emit_rm(cbuf, 0x3, r1, r2);
 2146     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2147       emit_opcode(cbuf,$primary);
 2148       emit_rm(cbuf, 0x3, $secondary, r1);
 2149       emit_d8(cbuf,$cnt$$constant-32);
 2150     }
 2151     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2152     emit_rm(cbuf, 0x3, r2, r2);
 2153   %}
 2154 
 2155   // Clone of RegMem but accepts an extra parameter to access each
 2156   // half of a double in memory; it never needs relocation info.
 2157   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2158     emit_opcode(cbuf,$opcode$$constant);
 2159     int reg_encoding = $rm_reg$$reg;
 2160     int base     = $mem$$base;
 2161     int index    = $mem$$index;
 2162     int scale    = $mem$$scale;
 2163     int displace = $mem$$disp + $disp_for_half$$constant;
 2164     relocInfo::relocType disp_reloc = relocInfo::none;
 2165     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2166   %}
 2167 
 2168   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2169   //
 2170   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2171   // and it never needs relocation information.
 2172   // Frequently used to move data between FPU's Stack Top and memory.
 2173   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2174     int rm_byte_opcode = $rm_opcode$$constant;
 2175     int base     = $mem$$base;
 2176     int index    = $mem$$index;
 2177     int scale    = $mem$$scale;
 2178     int displace = $mem$$disp;
 2179     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2180     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2181   %}
 2182 
 2183   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2184     int rm_byte_opcode = $rm_opcode$$constant;
 2185     int base     = $mem$$base;
 2186     int index    = $mem$$index;
 2187     int scale    = $mem$$scale;
 2188     int displace = $mem$$disp;
 2189     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2190     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2191   %}
 2192 
 2193   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2194     int reg_encoding = $dst$$reg;
 2195     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2196     int index        = 0x04;            // 0x04 indicates no index
 2197     int scale        = 0x00;            // 0x00 indicates no scale
 2198     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2199     relocInfo::relocType disp_reloc = relocInfo::none;
 2200     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2201   %}
 2202 
 2203   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2204     // Compare dst,src
 2205     emit_opcode(cbuf,0x3B);
 2206     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2207     // jmp dst < src around move
 2208     emit_opcode(cbuf,0x7C);
 2209     emit_d8(cbuf,2);
 2210     // move dst,src
 2211     emit_opcode(cbuf,0x8B);
 2212     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2213   %}
 2214 
 2215   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2216     // Compare dst,src
 2217     emit_opcode(cbuf,0x3B);
 2218     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2219     // jmp dst > src around move
 2220     emit_opcode(cbuf,0x7F);
 2221     emit_d8(cbuf,2);
 2222     // move dst,src
 2223     emit_opcode(cbuf,0x8B);
 2224     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2225   %}
 2226 
 2227   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2228     // If src is FPR1, we can just FST to store it.
 2229     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2230     int reg_encoding = 0x2; // Just store
 2231     int base  = $mem$$base;
 2232     int index = $mem$$index;
 2233     int scale = $mem$$scale;
 2234     int displace = $mem$$disp;
 2235     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2236     if( $src$$reg != FPR1L_enc ) {
 2237       reg_encoding = 0x3;  // Store & pop
 2238       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2239       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2240     }
 2241     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2242     emit_opcode(cbuf,$primary);
 2243     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2244   %}
 2245 
 2246   enc_class neg_reg(rRegI dst) %{
 2247     // NEG $dst
 2248     emit_opcode(cbuf,0xF7);
 2249     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2250   %}
 2251 
 2252   enc_class setLT_reg(eCXRegI dst) %{
 2253     // SETLT $dst
 2254     emit_opcode(cbuf,0x0F);
 2255     emit_opcode(cbuf,0x9C);
 2256     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2257   %}
 2258 
 2259   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2260     int tmpReg = $tmp$$reg;
 2261 
 2262     // SUB $p,$q
 2263     emit_opcode(cbuf,0x2B);
 2264     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2265     // SBB $tmp,$tmp
 2266     emit_opcode(cbuf,0x1B);
 2267     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2268     // AND $tmp,$y
 2269     emit_opcode(cbuf,0x23);
 2270     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2271     // ADD $p,$tmp
 2272     emit_opcode(cbuf,0x03);
 2273     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2274   %}
 2275 
 2276   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2277     // TEST shift,32
 2278     emit_opcode(cbuf,0xF7);
 2279     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2280     emit_d32(cbuf,0x20);
 2281     // JEQ,s small
 2282     emit_opcode(cbuf, 0x74);
 2283     emit_d8(cbuf, 0x04);
 2284     // MOV    $dst.hi,$dst.lo
 2285     emit_opcode( cbuf, 0x8B );
 2286     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2287     // CLR    $dst.lo
 2288     emit_opcode(cbuf, 0x33);
 2289     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2290 // small:
 2291     // SHLD   $dst.hi,$dst.lo,$shift
 2292     emit_opcode(cbuf,0x0F);
 2293     emit_opcode(cbuf,0xA5);
 2294     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2295     // SHL    $dst.lo,$shift"
 2296     emit_opcode(cbuf,0xD3);
 2297     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2298   %}
 2299 
 2300   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2301     // TEST shift,32
 2302     emit_opcode(cbuf,0xF7);
 2303     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2304     emit_d32(cbuf,0x20);
 2305     // JEQ,s small
 2306     emit_opcode(cbuf, 0x74);
 2307     emit_d8(cbuf, 0x04);
 2308     // MOV    $dst.lo,$dst.hi
 2309     emit_opcode( cbuf, 0x8B );
 2310     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2311     // CLR    $dst.hi
 2312     emit_opcode(cbuf, 0x33);
 2313     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2314 // small:
 2315     // SHRD   $dst.lo,$dst.hi,$shift
 2316     emit_opcode(cbuf,0x0F);
 2317     emit_opcode(cbuf,0xAD);
 2318     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2319     // SHR    $dst.hi,$shift"
 2320     emit_opcode(cbuf,0xD3);
 2321     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2322   %}
 2323 
 2324   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2325     // TEST shift,32
 2326     emit_opcode(cbuf,0xF7);
 2327     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2328     emit_d32(cbuf,0x20);
 2329     // JEQ,s small
 2330     emit_opcode(cbuf, 0x74);
 2331     emit_d8(cbuf, 0x05);
 2332     // MOV    $dst.lo,$dst.hi
 2333     emit_opcode( cbuf, 0x8B );
 2334     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2335     // SAR    $dst.hi,31
 2336     emit_opcode(cbuf, 0xC1);
 2337     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338     emit_d8(cbuf, 0x1F );
 2339 // small:
 2340     // SHRD   $dst.lo,$dst.hi,$shift
 2341     emit_opcode(cbuf,0x0F);
 2342     emit_opcode(cbuf,0xAD);
 2343     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2344     // SAR    $dst.hi,$shift"
 2345     emit_opcode(cbuf,0xD3);
 2346     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2347   %}
 2348 
 2349 
 2350   // ----------------- Encodings for floating point unit -----------------
 2351   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2352   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2353     $$$emit8$primary;
 2354     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2355   %}
 2356 
 2357   // Pop argument in FPR0 with FSTP ST(0)
 2358   enc_class PopFPU() %{
 2359     emit_opcode( cbuf, 0xDD );
 2360     emit_d8( cbuf, 0xD8 );
 2361   %}
 2362 
 2363   // !!!!! equivalent to Pop_Reg_F
 2364   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2365     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2366     emit_d8( cbuf, 0xD8+$dst$$reg );
 2367   %}
 2368 
 2369   enc_class Push_Reg_DPR( regDPR dst ) %{
 2370     emit_opcode( cbuf, 0xD9 );
 2371     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2372   %}
 2373 
 2374   enc_class strictfp_bias1( regDPR dst ) %{
 2375     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2376     emit_opcode( cbuf, 0x2D );
 2377     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2378     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2379     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2380   %}
 2381 
 2382   enc_class strictfp_bias2( regDPR dst ) %{
 2383     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2384     emit_opcode( cbuf, 0x2D );
 2385     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2386     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2387     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2388   %}
 2389 
 2390   // Special case for moving an integer register to a stack slot.
 2391   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2392     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2393   %}
 2394 
 2395   // Special case for moving a register to a stack slot.
 2396   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2397     // Opcode already emitted
 2398     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2399     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2400     emit_d32(cbuf, $dst$$disp);   // Displacement
 2401   %}
 2402 
 2403   // Push the integer in stackSlot 'src' onto FP-stack
 2404   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2405     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2406   %}
 2407 
 2408   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2409   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2410     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2411   %}
 2412 
 2413   // Same as Pop_Mem_F except for opcode
 2414   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2415   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2416     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2417   %}
 2418 
 2419   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2420     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2421     emit_d8( cbuf, 0xD8+$dst$$reg );
 2422   %}
 2423 
 2424   enc_class Push_Reg_FPR( regFPR dst ) %{
 2425     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2426     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2427   %}
 2428 
 2429   // Push FPU's float to a stack-slot, and pop FPU-stack
 2430   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2431     int pop = 0x02;
 2432     if ($src$$reg != FPR1L_enc) {
 2433       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2434       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2435       pop = 0x03;
 2436     }
 2437     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2438   %}
 2439 
 2440   // Push FPU's double to a stack-slot, and pop FPU-stack
 2441   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2442     int pop = 0x02;
 2443     if ($src$$reg != FPR1L_enc) {
 2444       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2445       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2446       pop = 0x03;
 2447     }
 2448     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2449   %}
 2450 
 2451   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2452   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2453     int pop = 0xD0 - 1; // -1 since we skip FLD
 2454     if ($src$$reg != FPR1L_enc) {
 2455       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2456       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2457       pop = 0xD8;
 2458     }
 2459     emit_opcode( cbuf, 0xDD );
 2460     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2461   %}
 2462 
 2463 
 2464   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2465     // load dst in FPR0
 2466     emit_opcode( cbuf, 0xD9 );
 2467     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2468     if ($src$$reg != FPR1L_enc) {
 2469       // fincstp
 2470       emit_opcode (cbuf, 0xD9);
 2471       emit_opcode (cbuf, 0xF7);
 2472       // swap src with FPR1:
 2473       // FXCH FPR1 with src
 2474       emit_opcode(cbuf, 0xD9);
 2475       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2476       // fdecstp
 2477       emit_opcode (cbuf, 0xD9);
 2478       emit_opcode (cbuf, 0xF6);
 2479     }
 2480   %}
 2481 
 2482   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ subptr(rsp, 8);
 2485     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2486     __ fld_d(Address(rsp, 0));
 2487     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2488     __ fld_d(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2492     MacroAssembler _masm(&cbuf);
 2493     __ subptr(rsp, 4);
 2494     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2495     __ fld_s(Address(rsp, 0));
 2496     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2497     __ fld_s(Address(rsp, 0));
 2498   %}
 2499 
 2500   enc_class Push_ResultD(regD dst) %{
 2501     MacroAssembler _masm(&cbuf);
 2502     __ fstp_d(Address(rsp, 0));
 2503     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2504     __ addptr(rsp, 8);
 2505   %}
 2506 
 2507   enc_class Push_ResultF(regF dst, immI d8) %{
 2508     MacroAssembler _masm(&cbuf);
 2509     __ fstp_s(Address(rsp, 0));
 2510     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2511     __ addptr(rsp, $d8$$constant);
 2512   %}
 2513 
 2514   enc_class Push_SrcD(regD src) %{
 2515     MacroAssembler _masm(&cbuf);
 2516     __ subptr(rsp, 8);
 2517     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2518     __ fld_d(Address(rsp, 0));
 2519   %}
 2520 
 2521   enc_class push_stack_temp_qword() %{
 2522     MacroAssembler _masm(&cbuf);
 2523     __ subptr(rsp, 8);
 2524   %}
 2525 
 2526   enc_class pop_stack_temp_qword() %{
 2527     MacroAssembler _masm(&cbuf);
 2528     __ addptr(rsp, 8);
 2529   %}
 2530 
 2531   enc_class push_xmm_to_fpr1(regD src) %{
 2532     MacroAssembler _masm(&cbuf);
 2533     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2534     __ fld_d(Address(rsp, 0));
 2535   %}
 2536 
 2537   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2538     if ($src$$reg != FPR1L_enc) {
 2539       // fincstp
 2540       emit_opcode (cbuf, 0xD9);
 2541       emit_opcode (cbuf, 0xF7);
 2542       // FXCH FPR1 with src
 2543       emit_opcode(cbuf, 0xD9);
 2544       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2545       // fdecstp
 2546       emit_opcode (cbuf, 0xD9);
 2547       emit_opcode (cbuf, 0xF6);
 2548     }
 2549     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2550     // // FSTP   FPR$dst$$reg
 2551     // emit_opcode( cbuf, 0xDD );
 2552     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2553   %}
 2554 
 2555   enc_class fnstsw_sahf_skip_parity() %{
 2556     // fnstsw ax
 2557     emit_opcode( cbuf, 0xDF );
 2558     emit_opcode( cbuf, 0xE0 );
 2559     // sahf
 2560     emit_opcode( cbuf, 0x9E );
 2561     // jnp  ::skip
 2562     emit_opcode( cbuf, 0x7B );
 2563     emit_opcode( cbuf, 0x05 );
 2564   %}
 2565 
 2566   enc_class emitModDPR() %{
 2567     // fprem must be iterative
 2568     // :: loop
 2569     // fprem
 2570     emit_opcode( cbuf, 0xD9 );
 2571     emit_opcode( cbuf, 0xF8 );
 2572     // wait
 2573     emit_opcode( cbuf, 0x9b );
 2574     // fnstsw ax
 2575     emit_opcode( cbuf, 0xDF );
 2576     emit_opcode( cbuf, 0xE0 );
 2577     // sahf
 2578     emit_opcode( cbuf, 0x9E );
 2579     // jp  ::loop
 2580     emit_opcode( cbuf, 0x0F );
 2581     emit_opcode( cbuf, 0x8A );
 2582     emit_opcode( cbuf, 0xF4 );
 2583     emit_opcode( cbuf, 0xFF );
 2584     emit_opcode( cbuf, 0xFF );
 2585     emit_opcode( cbuf, 0xFF );
 2586   %}
 2587 
 2588   enc_class fpu_flags() %{
 2589     // fnstsw_ax
 2590     emit_opcode( cbuf, 0xDF);
 2591     emit_opcode( cbuf, 0xE0);
 2592     // test ax,0x0400
 2593     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2594     emit_opcode( cbuf, 0xA9 );
 2595     emit_d16   ( cbuf, 0x0400 );
 2596     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2597     // // test rax,0x0400
 2598     // emit_opcode( cbuf, 0xA9 );
 2599     // emit_d32   ( cbuf, 0x00000400 );
 2600     //
 2601     // jz exit (no unordered comparison)
 2602     emit_opcode( cbuf, 0x74 );
 2603     emit_d8    ( cbuf, 0x02 );
 2604     // mov ah,1 - treat as LT case (set carry flag)
 2605     emit_opcode( cbuf, 0xB4 );
 2606     emit_d8    ( cbuf, 0x01 );
 2607     // sahf
 2608     emit_opcode( cbuf, 0x9E);
 2609   %}
 2610 
 2611   enc_class cmpF_P6_fixup() %{
 2612     // Fixup the integer flags in case comparison involved a NaN
 2613     //
 2614     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2615     emit_opcode( cbuf, 0x7B );
 2616     emit_d8    ( cbuf, 0x03 );
 2617     // MOV AH,1 - treat as LT case (set carry flag)
 2618     emit_opcode( cbuf, 0xB4 );
 2619     emit_d8    ( cbuf, 0x01 );
 2620     // SAHF
 2621     emit_opcode( cbuf, 0x9E);
 2622     // NOP     // target for branch to avoid branch to branch
 2623     emit_opcode( cbuf, 0x90);
 2624   %}
 2625 
 2626 //     fnstsw_ax();
 2627 //     sahf();
 2628 //     movl(dst, nan_result);
 2629 //     jcc(Assembler::parity, exit);
 2630 //     movl(dst, less_result);
 2631 //     jcc(Assembler::below, exit);
 2632 //     movl(dst, equal_result);
 2633 //     jcc(Assembler::equal, exit);
 2634 //     movl(dst, greater_result);
 2635 
 2636 // less_result     =  1;
 2637 // greater_result  = -1;
 2638 // equal_result    = 0;
 2639 // nan_result      = -1;
 2640 
 2641   enc_class CmpF_Result(rRegI dst) %{
 2642     // fnstsw_ax();
 2643     emit_opcode( cbuf, 0xDF);
 2644     emit_opcode( cbuf, 0xE0);
 2645     // sahf
 2646     emit_opcode( cbuf, 0x9E);
 2647     // movl(dst, nan_result);
 2648     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2649     emit_d32( cbuf, -1 );
 2650     // jcc(Assembler::parity, exit);
 2651     emit_opcode( cbuf, 0x7A );
 2652     emit_d8    ( cbuf, 0x13 );
 2653     // movl(dst, less_result);
 2654     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2655     emit_d32( cbuf, -1 );
 2656     // jcc(Assembler::below, exit);
 2657     emit_opcode( cbuf, 0x72 );
 2658     emit_d8    ( cbuf, 0x0C );
 2659     // movl(dst, equal_result);
 2660     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2661     emit_d32( cbuf, 0 );
 2662     // jcc(Assembler::equal, exit);
 2663     emit_opcode( cbuf, 0x74 );
 2664     emit_d8    ( cbuf, 0x05 );
 2665     // movl(dst, greater_result);
 2666     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2667     emit_d32( cbuf, 1 );
 2668   %}
 2669 
 2670 
 2671   // Compare the longs and set flags
 2672   // BROKEN!  Do Not use as-is
 2673   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2674     // CMP    $src1.hi,$src2.hi
 2675     emit_opcode( cbuf, 0x3B );
 2676     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2677     // JNE,s  done
 2678     emit_opcode(cbuf,0x75);
 2679     emit_d8(cbuf, 2 );
 2680     // CMP    $src1.lo,$src2.lo
 2681     emit_opcode( cbuf, 0x3B );
 2682     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2683 // done:
 2684   %}
 2685 
 2686   enc_class convert_int_long( regL dst, rRegI src ) %{
 2687     // mov $dst.lo,$src
 2688     int dst_encoding = $dst$$reg;
 2689     int src_encoding = $src$$reg;
 2690     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2691     // mov $dst.hi,$src
 2692     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2693     // sar $dst.hi,31
 2694     emit_opcode( cbuf, 0xC1 );
 2695     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2696     emit_d8(cbuf, 0x1F );
 2697   %}
 2698 
 2699   enc_class convert_long_double( eRegL src ) %{
 2700     // push $src.hi
 2701     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2702     // push $src.lo
 2703     emit_opcode(cbuf, 0x50+$src$$reg  );
 2704     // fild 64-bits at [SP]
 2705     emit_opcode(cbuf,0xdf);
 2706     emit_d8(cbuf, 0x6C);
 2707     emit_d8(cbuf, 0x24);
 2708     emit_d8(cbuf, 0x00);
 2709     // pop stack
 2710     emit_opcode(cbuf, 0x83); // add  SP, #8
 2711     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2712     emit_d8(cbuf, 0x8);
 2713   %}
 2714 
 2715   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2716     // IMUL   EDX:EAX,$src1
 2717     emit_opcode( cbuf, 0xF7 );
 2718     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2719     // SAR    EDX,$cnt-32
 2720     int shift_count = ((int)$cnt$$constant) - 32;
 2721     if (shift_count > 0) {
 2722       emit_opcode(cbuf, 0xC1);
 2723       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2724       emit_d8(cbuf, shift_count);
 2725     }
 2726   %}
 2727 
 2728   // this version doesn't have add sp, 8
 2729   enc_class convert_long_double2( eRegL src ) %{
 2730     // push $src.hi
 2731     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2732     // push $src.lo
 2733     emit_opcode(cbuf, 0x50+$src$$reg  );
 2734     // fild 64-bits at [SP]
 2735     emit_opcode(cbuf,0xdf);
 2736     emit_d8(cbuf, 0x6C);
 2737     emit_d8(cbuf, 0x24);
 2738     emit_d8(cbuf, 0x00);
 2739   %}
 2740 
 2741   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2742     // Basic idea: long = (long)int * (long)int
 2743     // IMUL EDX:EAX, src
 2744     emit_opcode( cbuf, 0xF7 );
 2745     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2746   %}
 2747 
 2748   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2749     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2750     // MUL EDX:EAX, src
 2751     emit_opcode( cbuf, 0xF7 );
 2752     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2753   %}
 2754 
 2755   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2756     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2757     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2758     // MOV    $tmp,$src.lo
 2759     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2760     // IMUL   $tmp,EDX
 2761     emit_opcode( cbuf, 0x0F );
 2762     emit_opcode( cbuf, 0xAF );
 2763     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2764     // MOV    EDX,$src.hi
 2765     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2766     // IMUL   EDX,EAX
 2767     emit_opcode( cbuf, 0x0F );
 2768     emit_opcode( cbuf, 0xAF );
 2769     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2770     // ADD    $tmp,EDX
 2771     emit_opcode( cbuf, 0x03 );
 2772     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2773     // MUL   EDX:EAX,$src.lo
 2774     emit_opcode( cbuf, 0xF7 );
 2775     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2776     // ADD    EDX,ESI
 2777     emit_opcode( cbuf, 0x03 );
 2778     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2779   %}
 2780 
 2781   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2782     // Basic idea: lo(result) = lo(src * y_lo)
 2783     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2784     // IMUL   $tmp,EDX,$src
 2785     emit_opcode( cbuf, 0x6B );
 2786     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2787     emit_d8( cbuf, (int)$src$$constant );
 2788     // MOV    EDX,$src
 2789     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2790     emit_d32( cbuf, (int)$src$$constant );
 2791     // MUL   EDX:EAX,EDX
 2792     emit_opcode( cbuf, 0xF7 );
 2793     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2794     // ADD    EDX,ESI
 2795     emit_opcode( cbuf, 0x03 );
 2796     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2797   %}
 2798 
 2799   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2800     // PUSH src1.hi
 2801     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2802     // PUSH src1.lo
 2803     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2804     // PUSH src2.hi
 2805     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2806     // PUSH src2.lo
 2807     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2808     // CALL directly to the runtime
 2809     MacroAssembler _masm(&cbuf);
 2810     cbuf.set_insts_mark();
 2811     emit_opcode(cbuf,0xE8);       // Call into runtime
 2812     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2813     __ post_call_nop();
 2814     // Restore stack
 2815     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2816     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2817     emit_d8(cbuf, 4*4);
 2818   %}
 2819 
 2820   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2821     // PUSH src1.hi
 2822     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2823     // PUSH src1.lo
 2824     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2825     // PUSH src2.hi
 2826     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2827     // PUSH src2.lo
 2828     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2829     // CALL directly to the runtime
 2830     MacroAssembler _masm(&cbuf);
 2831     cbuf.set_insts_mark();
 2832     emit_opcode(cbuf,0xE8);       // Call into runtime
 2833     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2834     __ post_call_nop();
 2835     // Restore stack
 2836     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2837     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2838     emit_d8(cbuf, 4*4);
 2839   %}
 2840 
 2841   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2842     // MOV   $tmp,$src.lo
 2843     emit_opcode(cbuf, 0x8B);
 2844     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2845     // OR    $tmp,$src.hi
 2846     emit_opcode(cbuf, 0x0B);
 2847     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2848   %}
 2849 
 2850   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2851     // CMP    $src1.lo,$src2.lo
 2852     emit_opcode( cbuf, 0x3B );
 2853     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2854     // JNE,s  skip
 2855     emit_cc(cbuf, 0x70, 0x5);
 2856     emit_d8(cbuf,2);
 2857     // CMP    $src1.hi,$src2.hi
 2858     emit_opcode( cbuf, 0x3B );
 2859     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2860   %}
 2861 
 2862   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2863     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2864     emit_opcode( cbuf, 0x3B );
 2865     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2866     // MOV    $tmp,$src1.hi
 2867     emit_opcode( cbuf, 0x8B );
 2868     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2869     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2870     emit_opcode( cbuf, 0x1B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2872   %}
 2873 
 2874   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2875     // XOR    $tmp,$tmp
 2876     emit_opcode(cbuf,0x33);  // XOR
 2877     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2878     // CMP    $tmp,$src.lo
 2879     emit_opcode( cbuf, 0x3B );
 2880     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2881     // SBB    $tmp,$src.hi
 2882     emit_opcode( cbuf, 0x1B );
 2883     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2884   %}
 2885 
 2886  // Sniff, sniff... smells like Gnu Superoptimizer
 2887   enc_class neg_long( eRegL dst ) %{
 2888     emit_opcode(cbuf,0xF7);    // NEG hi
 2889     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2890     emit_opcode(cbuf,0xF7);    // NEG lo
 2891     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2892     emit_opcode(cbuf,0x83);    // SBB hi,0
 2893     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2894     emit_d8    (cbuf,0 );
 2895   %}
 2896 
 2897   enc_class enc_pop_rdx() %{
 2898     emit_opcode(cbuf,0x5A);
 2899   %}
 2900 
 2901   enc_class enc_rethrow() %{
 2902     MacroAssembler _masm(&cbuf);
 2903     cbuf.set_insts_mark();
 2904     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2905     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2906                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2907     __ post_call_nop();
 2908   %}
 2909 
 2910 
 2911   // Convert a double to an int.  Java semantics require we do complex
 2912   // manglelations in the corner cases.  So we set the rounding mode to
 2913   // 'zero', store the darned double down as an int, and reset the
 2914   // rounding mode to 'nearest'.  The hardware throws an exception which
 2915   // patches up the correct value directly to the stack.
 2916   enc_class DPR2I_encoding( regDPR src ) %{
 2917     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2918     // exceptions here, so that a NAN or other corner-case value will
 2919     // thrown an exception (but normal values get converted at full speed).
 2920     // However, I2C adapters and other float-stack manglers leave pending
 2921     // invalid-op exceptions hanging.  We would have to clear them before
 2922     // enabling them and that is more expensive than just testing for the
 2923     // invalid value Intel stores down in the corner cases.
 2924     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2925     emit_opcode(cbuf,0x2D);
 2926     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2927     // Allocate a word
 2928     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2929     emit_opcode(cbuf,0xEC);
 2930     emit_d8(cbuf,0x04);
 2931     // Encoding assumes a double has been pushed into FPR0.
 2932     // Store down the double as an int, popping the FPU stack
 2933     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2934     emit_opcode(cbuf,0x1C);
 2935     emit_d8(cbuf,0x24);
 2936     // Restore the rounding mode; mask the exception
 2937     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2938     emit_opcode(cbuf,0x2D);
 2939     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2940         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2941         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2942 
 2943     // Load the converted int; adjust CPU stack
 2944     emit_opcode(cbuf,0x58);       // POP EAX
 2945     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2946     emit_d32   (cbuf,0x80000000); //         0x80000000
 2947     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2948     emit_d8    (cbuf,0x07);       // Size of slow_call
 2949     // Push src onto stack slow-path
 2950     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2951     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2952     // CALL directly to the runtime
 2953     MacroAssembler _masm(&cbuf);
 2954     cbuf.set_insts_mark();
 2955     emit_opcode(cbuf,0xE8);       // Call into runtime
 2956     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2957     __ post_call_nop();
 2958     // Carry on here...
 2959   %}
 2960 
 2961   enc_class DPR2L_encoding( regDPR src ) %{
 2962     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2963     emit_opcode(cbuf,0x2D);
 2964     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2965     // Allocate a word
 2966     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2967     emit_opcode(cbuf,0xEC);
 2968     emit_d8(cbuf,0x08);
 2969     // Encoding assumes a double has been pushed into FPR0.
 2970     // Store down the double as a long, popping the FPU stack
 2971     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2972     emit_opcode(cbuf,0x3C);
 2973     emit_d8(cbuf,0x24);
 2974     // Restore the rounding mode; mask the exception
 2975     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2976     emit_opcode(cbuf,0x2D);
 2977     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2978         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2979         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2980 
 2981     // Load the converted int; adjust CPU stack
 2982     emit_opcode(cbuf,0x58);       // POP EAX
 2983     emit_opcode(cbuf,0x5A);       // POP EDX
 2984     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2985     emit_d8    (cbuf,0xFA);       // rdx
 2986     emit_d32   (cbuf,0x80000000); //         0x80000000
 2987     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2988     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2989     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2990     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2991     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2992     emit_d8    (cbuf,0x07);       // Size of slow_call
 2993     // Push src onto stack slow-path
 2994     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2995     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2996     // CALL directly to the runtime
 2997     MacroAssembler _masm(&cbuf);
 2998     cbuf.set_insts_mark();
 2999     emit_opcode(cbuf,0xE8);       // Call into runtime
 3000     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 3001     __ post_call_nop();
 3002     // Carry on here...
 3003   %}
 3004 
 3005   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 3006     // Operand was loaded from memory into fp ST (stack top)
 3007     // FMUL   ST,$src  /* D8 C8+i */
 3008     emit_opcode(cbuf, 0xD8);
 3009     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3010   %}
 3011 
 3012   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3013     // FADDP  ST,src2  /* D8 C0+i */
 3014     emit_opcode(cbuf, 0xD8);
 3015     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3016     //could use FADDP  src2,fpST  /* DE C0+i */
 3017   %}
 3018 
 3019   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3020     // FADDP  src2,ST  /* DE C0+i */
 3021     emit_opcode(cbuf, 0xDE);
 3022     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3023   %}
 3024 
 3025   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3026     // Operand has been loaded into fp ST (stack top)
 3027       // FSUB   ST,$src1
 3028       emit_opcode(cbuf, 0xD8);
 3029       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3030 
 3031       // FDIV
 3032       emit_opcode(cbuf, 0xD8);
 3033       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3034   %}
 3035 
 3036   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3037     // Operand was loaded from memory into fp ST (stack top)
 3038     // FADD   ST,$src  /* D8 C0+i */
 3039     emit_opcode(cbuf, 0xD8);
 3040     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3041 
 3042     // FMUL  ST,src2  /* D8 C*+i */
 3043     emit_opcode(cbuf, 0xD8);
 3044     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3045   %}
 3046 
 3047 
 3048   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3049     // Operand was loaded from memory into fp ST (stack top)
 3050     // FADD   ST,$src  /* D8 C0+i */
 3051     emit_opcode(cbuf, 0xD8);
 3052     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3053 
 3054     // FMULP  src2,ST  /* DE C8+i */
 3055     emit_opcode(cbuf, 0xDE);
 3056     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3057   %}
 3058 
 3059   // Atomically load the volatile long
 3060   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3061     emit_opcode(cbuf,0xDF);
 3062     int rm_byte_opcode = 0x05;
 3063     int base     = $mem$$base;
 3064     int index    = $mem$$index;
 3065     int scale    = $mem$$scale;
 3066     int displace = $mem$$disp;
 3067     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3068     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3069     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3070   %}
 3071 
 3072   // Volatile Store Long.  Must be atomic, so move it into
 3073   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3074   // target address before the store (for null-ptr checks)
 3075   // so the memory operand is used twice in the encoding.
 3076   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3077     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3078     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3079     emit_opcode(cbuf,0xDF);
 3080     int rm_byte_opcode = 0x07;
 3081     int base     = $mem$$base;
 3082     int index    = $mem$$index;
 3083     int scale    = $mem$$scale;
 3084     int displace = $mem$$disp;
 3085     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3086     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3087   %}
 3088 
 3089 %}
 3090 
 3091 
 3092 //----------FRAME--------------------------------------------------------------
 3093 // Definition of frame structure and management information.
 3094 //
 3095 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3096 //                             |   (to get allocators register number
 3097 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3098 //  r   CALLER     |        |
 3099 //  o     |        +--------+      pad to even-align allocators stack-slot
 3100 //  w     V        |  pad0  |        numbers; owned by CALLER
 3101 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3102 //  h     ^        |   in   |  5
 3103 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3104 //  |     |        |        |  3
 3105 //  |     |        +--------+
 3106 //  V     |        | old out|      Empty on Intel, window on Sparc
 3107 //        |    old |preserve|      Must be even aligned.
 3108 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3109 //        |        |   in   |  3   area for Intel ret address
 3110 //     Owned by    |preserve|      Empty on Sparc.
 3111 //       SELF      +--------+
 3112 //        |        |  pad2  |  2   pad to align old SP
 3113 //        |        +--------+  1
 3114 //        |        | locks  |  0
 3115 //        |        +--------+----> OptoReg::stack0(), even aligned
 3116 //        |        |  pad1  | 11   pad to align new SP
 3117 //        |        +--------+
 3118 //        |        |        | 10
 3119 //        |        | spills |  9   spills
 3120 //        V        |        |  8   (pad0 slot for callee)
 3121 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3122 //        ^        |  out   |  7
 3123 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3124 //     Owned by    +--------+
 3125 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3126 //        |    new |preserve|      Must be even-aligned.
 3127 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3128 //        |        |        |
 3129 //
 3130 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3131 //         known from SELF's arguments and the Java calling convention.
 3132 //         Region 6-7 is determined per call site.
 3133 // Note 2: If the calling convention leaves holes in the incoming argument
 3134 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3135 //         are owned by the CALLEE.  Holes should not be necessary in the
 3136 //         incoming area, as the Java calling convention is completely under
 3137 //         the control of the AD file.  Doubles can be sorted and packed to
 3138 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3139 //         varargs C calling conventions.
 3140 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3141 //         even aligned with pad0 as needed.
 3142 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3143 //         region 6-11 is even aligned; it may be padded out more so that
 3144 //         the region from SP to FP meets the minimum stack alignment.
 3145 
 3146 frame %{
 3147   // These three registers define part of the calling convention
 3148   // between compiled code and the interpreter.
 3149   inline_cache_reg(EAX);                // Inline Cache Register
 3150 
 3151   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3152   cisc_spilling_operand_name(indOffset32);
 3153 
 3154   // Number of stack slots consumed by locking an object
 3155   sync_stack_slots(1);
 3156 
 3157   // Compiled code's Frame Pointer
 3158   frame_pointer(ESP);
 3159   // Interpreter stores its frame pointer in a register which is
 3160   // stored to the stack by I2CAdaptors.
 3161   // I2CAdaptors convert from interpreted java to compiled java.
 3162   interpreter_frame_pointer(EBP);
 3163 
 3164   // Stack alignment requirement
 3165   // Alignment size in bytes (128-bit -> 16 bytes)
 3166   stack_alignment(StackAlignmentInBytes);
 3167 
 3168   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3169   // for calls to C.  Supports the var-args backing area for register parms.
 3170   varargs_C_out_slots_killed(0);
 3171 
 3172   // The after-PROLOG location of the return address.  Location of
 3173   // return address specifies a type (REG or STACK) and a number
 3174   // representing the register number (i.e. - use a register name) or
 3175   // stack slot.
 3176   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3177   // Otherwise, it is above the locks and verification slot and alignment word
 3178   return_addr(STACK - 1 +
 3179               align_up((Compile::current()->in_preserve_stack_slots() +
 3180                         Compile::current()->fixed_slots()),
 3181                        stack_alignment_in_slots()));
 3182 
 3183   // Location of C & interpreter return values
 3184   c_return_value %{
 3185     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3186     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3187     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3188 
 3189     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3190     // that C functions return float and double results in XMM0.
 3191     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3192       return OptoRegPair(XMM0b_num,XMM0_num);
 3193     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3194       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3195 
 3196     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3197   %}
 3198 
 3199   // Location of return values
 3200   return_value %{
 3201     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3202     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3203     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3204     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3205       return OptoRegPair(XMM0b_num,XMM0_num);
 3206     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3207       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3208     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3209   %}
 3210 
 3211 %}
 3212 
 3213 //----------ATTRIBUTES---------------------------------------------------------
 3214 //----------Operand Attributes-------------------------------------------------
 3215 op_attrib op_cost(0);        // Required cost attribute
 3216 
 3217 //----------Instruction Attributes---------------------------------------------
 3218 ins_attrib ins_cost(100);       // Required cost attribute
 3219 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3220 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3221                                 // non-matching short branch variant of some
 3222                                                             // long branch?
 3223 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3224                                 // specifies the alignment that some part of the instruction (not
 3225                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3226                                 // function must be provided for the instruction
 3227 
 3228 //----------OPERANDS-----------------------------------------------------------
 3229 // Operand definitions must precede instruction definitions for correct parsing
 3230 // in the ADLC because operands constitute user defined types which are used in
 3231 // instruction definitions.
 3232 
 3233 //----------Simple Operands----------------------------------------------------
 3234 // Immediate Operands
 3235 // Integer Immediate
 3236 operand immI() %{
 3237   match(ConI);
 3238 
 3239   op_cost(10);
 3240   format %{ %}
 3241   interface(CONST_INTER);
 3242 %}
 3243 
 3244 // Constant for test vs zero
 3245 operand immI_0() %{
 3246   predicate(n->get_int() == 0);
 3247   match(ConI);
 3248 
 3249   op_cost(0);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 // Constant for increment
 3255 operand immI_1() %{
 3256   predicate(n->get_int() == 1);
 3257   match(ConI);
 3258 
 3259   op_cost(0);
 3260   format %{ %}
 3261   interface(CONST_INTER);
 3262 %}
 3263 
 3264 // Constant for decrement
 3265 operand immI_M1() %{
 3266   predicate(n->get_int() == -1);
 3267   match(ConI);
 3268 
 3269   op_cost(0);
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 // Valid scale values for addressing modes
 3275 operand immI2() %{
 3276   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3277   match(ConI);
 3278 
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 operand immI8() %{
 3284   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3285   match(ConI);
 3286 
 3287   op_cost(5);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 operand immU8() %{
 3293   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3294   match(ConI);
 3295 
 3296   op_cost(5);
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 operand immI16() %{
 3302   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3303   match(ConI);
 3304 
 3305   op_cost(10);
 3306   format %{ %}
 3307   interface(CONST_INTER);
 3308 %}
 3309 
 3310 // Int Immediate non-negative
 3311 operand immU31()
 3312 %{
 3313   predicate(n->get_int() >= 0);
 3314   match(ConI);
 3315 
 3316   op_cost(0);
 3317   format %{ %}
 3318   interface(CONST_INTER);
 3319 %}
 3320 
 3321 // Constant for long shifts
 3322 operand immI_32() %{
 3323   predicate( n->get_int() == 32 );
 3324   match(ConI);
 3325 
 3326   op_cost(0);
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_1_31() %{
 3332   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3333   match(ConI);
 3334 
 3335   op_cost(0);
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 operand immI_32_63() %{
 3341   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3342   match(ConI);
 3343   op_cost(0);
 3344 
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand immI_2() %{
 3350   predicate( n->get_int() == 2 );
 3351   match(ConI);
 3352 
 3353   op_cost(0);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 operand immI_3() %{
 3359   predicate( n->get_int() == 3 );
 3360   match(ConI);
 3361 
 3362   op_cost(0);
 3363   format %{ %}
 3364   interface(CONST_INTER);
 3365 %}
 3366 
 3367 operand immI_4()
 3368 %{
 3369   predicate(n->get_int() == 4);
 3370   match(ConI);
 3371 
 3372   op_cost(0);
 3373   format %{ %}
 3374   interface(CONST_INTER);
 3375 %}
 3376 
 3377 operand immI_8()
 3378 %{
 3379   predicate(n->get_int() == 8);
 3380   match(ConI);
 3381 
 3382   op_cost(0);
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Pointer Immediate
 3388 operand immP() %{
 3389   match(ConP);
 3390 
 3391   op_cost(10);
 3392   format %{ %}
 3393   interface(CONST_INTER);
 3394 %}
 3395 
 3396 // nullptr Pointer Immediate
 3397 operand immP0() %{
 3398   predicate( n->get_ptr() == 0 );
 3399   match(ConP);
 3400   op_cost(0);
 3401 
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long Immediate
 3407 operand immL() %{
 3408   match(ConL);
 3409 
 3410   op_cost(20);
 3411   format %{ %}
 3412   interface(CONST_INTER);
 3413 %}
 3414 
 3415 // Long Immediate zero
 3416 operand immL0() %{
 3417   predicate( n->get_long() == 0L );
 3418   match(ConL);
 3419   op_cost(0);
 3420 
 3421   format %{ %}
 3422   interface(CONST_INTER);
 3423 %}
 3424 
 3425 // Long Immediate zero
 3426 operand immL_M1() %{
 3427   predicate( n->get_long() == -1L );
 3428   match(ConL);
 3429   op_cost(0);
 3430 
 3431   format %{ %}
 3432   interface(CONST_INTER);
 3433 %}
 3434 
 3435 // Long immediate from 0 to 127.
 3436 // Used for a shorter form of long mul by 10.
 3437 operand immL_127() %{
 3438   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3439   match(ConL);
 3440   op_cost(0);
 3441 
 3442   format %{ %}
 3443   interface(CONST_INTER);
 3444 %}
 3445 
 3446 // Long Immediate: low 32-bit mask
 3447 operand immL_32bits() %{
 3448   predicate(n->get_long() == 0xFFFFFFFFL);
 3449   match(ConL);
 3450   op_cost(0);
 3451 
 3452   format %{ %}
 3453   interface(CONST_INTER);
 3454 %}
 3455 
 3456 // Long Immediate: low 32-bit mask
 3457 operand immL32() %{
 3458   predicate(n->get_long() == (int)(n->get_long()));
 3459   match(ConL);
 3460   op_cost(20);
 3461 
 3462   format %{ %}
 3463   interface(CONST_INTER);
 3464 %}
 3465 
 3466 //Double Immediate zero
 3467 operand immDPR0() %{
 3468   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3469   // bug that generates code such that NaNs compare equal to 0.0
 3470   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3471   match(ConD);
 3472 
 3473   op_cost(5);
 3474   format %{ %}
 3475   interface(CONST_INTER);
 3476 %}
 3477 
 3478 // Double Immediate one
 3479 operand immDPR1() %{
 3480   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3481   match(ConD);
 3482 
 3483   op_cost(5);
 3484   format %{ %}
 3485   interface(CONST_INTER);
 3486 %}
 3487 
 3488 // Double Immediate
 3489 operand immDPR() %{
 3490   predicate(UseSSE<=1);
 3491   match(ConD);
 3492 
 3493   op_cost(5);
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 operand immD() %{
 3499   predicate(UseSSE>=2);
 3500   match(ConD);
 3501 
 3502   op_cost(5);
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Double Immediate zero
 3508 operand immD0() %{
 3509   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3510   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3511   // compare equal to -0.0.
 3512   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3513   match(ConD);
 3514 
 3515   format %{ %}
 3516   interface(CONST_INTER);
 3517 %}
 3518 
 3519 // Float Immediate zero
 3520 operand immFPR0() %{
 3521   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3522   match(ConF);
 3523 
 3524   op_cost(5);
 3525   format %{ %}
 3526   interface(CONST_INTER);
 3527 %}
 3528 
 3529 // Float Immediate one
 3530 operand immFPR1() %{
 3531   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3532   match(ConF);
 3533 
 3534   op_cost(5);
 3535   format %{ %}
 3536   interface(CONST_INTER);
 3537 %}
 3538 
 3539 // Float Immediate
 3540 operand immFPR() %{
 3541   predicate( UseSSE == 0 );
 3542   match(ConF);
 3543 
 3544   op_cost(5);
 3545   format %{ %}
 3546   interface(CONST_INTER);
 3547 %}
 3548 
 3549 // Float Immediate
 3550 operand immF() %{
 3551   predicate(UseSSE >= 1);
 3552   match(ConF);
 3553 
 3554   op_cost(5);
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 // Float Immediate zero.  Zero and not -0.0
 3560 operand immF0() %{
 3561   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3562   match(ConF);
 3563 
 3564   op_cost(5);
 3565   format %{ %}
 3566   interface(CONST_INTER);
 3567 %}
 3568 
 3569 // Immediates for special shifts (sign extend)
 3570 
 3571 // Constants for increment
 3572 operand immI_16() %{
 3573   predicate( n->get_int() == 16 );
 3574   match(ConI);
 3575 
 3576   format %{ %}
 3577   interface(CONST_INTER);
 3578 %}
 3579 
 3580 operand immI_24() %{
 3581   predicate( n->get_int() == 24 );
 3582   match(ConI);
 3583 
 3584   format %{ %}
 3585   interface(CONST_INTER);
 3586 %}
 3587 
 3588 // Constant for byte-wide masking
 3589 operand immI_255() %{
 3590   predicate( n->get_int() == 255 );
 3591   match(ConI);
 3592 
 3593   format %{ %}
 3594   interface(CONST_INTER);
 3595 %}
 3596 
 3597 // Constant for short-wide masking
 3598 operand immI_65535() %{
 3599   predicate(n->get_int() == 65535);
 3600   match(ConI);
 3601 
 3602   format %{ %}
 3603   interface(CONST_INTER);
 3604 %}
 3605 
 3606 operand kReg()
 3607 %{
 3608   constraint(ALLOC_IN_RC(vectmask_reg));
 3609   match(RegVectMask);
 3610   format %{%}
 3611   interface(REG_INTER);
 3612 %}
 3613 
 3614 operand kReg_K1()
 3615 %{
 3616   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3617   match(RegVectMask);
 3618   format %{%}
 3619   interface(REG_INTER);
 3620 %}
 3621 
 3622 operand kReg_K2()
 3623 %{
 3624   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3625   match(RegVectMask);
 3626   format %{%}
 3627   interface(REG_INTER);
 3628 %}
 3629 
 3630 // Special Registers
 3631 operand kReg_K3()
 3632 %{
 3633   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3634   match(RegVectMask);
 3635   format %{%}
 3636   interface(REG_INTER);
 3637 %}
 3638 
 3639 operand kReg_K4()
 3640 %{
 3641   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3642   match(RegVectMask);
 3643   format %{%}
 3644   interface(REG_INTER);
 3645 %}
 3646 
 3647 operand kReg_K5()
 3648 %{
 3649   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3650   match(RegVectMask);
 3651   format %{%}
 3652   interface(REG_INTER);
 3653 %}
 3654 
 3655 operand kReg_K6()
 3656 %{
 3657   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3658   match(RegVectMask);
 3659   format %{%}
 3660   interface(REG_INTER);
 3661 %}
 3662 
 3663 // Special Registers
 3664 operand kReg_K7()
 3665 %{
 3666   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3667   match(RegVectMask);
 3668   format %{%}
 3669   interface(REG_INTER);
 3670 %}
 3671 
 3672 // Register Operands
 3673 // Integer Register
 3674 operand rRegI() %{
 3675   constraint(ALLOC_IN_RC(int_reg));
 3676   match(RegI);
 3677   match(xRegI);
 3678   match(eAXRegI);
 3679   match(eBXRegI);
 3680   match(eCXRegI);
 3681   match(eDXRegI);
 3682   match(eDIRegI);
 3683   match(eSIRegI);
 3684 
 3685   format %{ %}
 3686   interface(REG_INTER);
 3687 %}
 3688 
 3689 // Subset of Integer Register
 3690 operand xRegI(rRegI reg) %{
 3691   constraint(ALLOC_IN_RC(int_x_reg));
 3692   match(reg);
 3693   match(eAXRegI);
 3694   match(eBXRegI);
 3695   match(eCXRegI);
 3696   match(eDXRegI);
 3697 
 3698   format %{ %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 // Special Registers
 3703 operand eAXRegI(xRegI reg) %{
 3704   constraint(ALLOC_IN_RC(eax_reg));
 3705   match(reg);
 3706   match(rRegI);
 3707 
 3708   format %{ "EAX" %}
 3709   interface(REG_INTER);
 3710 %}
 3711 
 3712 // Special Registers
 3713 operand eBXRegI(xRegI reg) %{
 3714   constraint(ALLOC_IN_RC(ebx_reg));
 3715   match(reg);
 3716   match(rRegI);
 3717 
 3718   format %{ "EBX" %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand eCXRegI(xRegI reg) %{
 3723   constraint(ALLOC_IN_RC(ecx_reg));
 3724   match(reg);
 3725   match(rRegI);
 3726 
 3727   format %{ "ECX" %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand eDXRegI(xRegI reg) %{
 3732   constraint(ALLOC_IN_RC(edx_reg));
 3733   match(reg);
 3734   match(rRegI);
 3735 
 3736   format %{ "EDX" %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 operand eDIRegI(xRegI reg) %{
 3741   constraint(ALLOC_IN_RC(edi_reg));
 3742   match(reg);
 3743   match(rRegI);
 3744 
 3745   format %{ "EDI" %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 operand naxRegI() %{
 3750   constraint(ALLOC_IN_RC(nax_reg));
 3751   match(RegI);
 3752   match(eCXRegI);
 3753   match(eDXRegI);
 3754   match(eSIRegI);
 3755   match(eDIRegI);
 3756 
 3757   format %{ %}
 3758   interface(REG_INTER);
 3759 %}
 3760 
 3761 operand nadxRegI() %{
 3762   constraint(ALLOC_IN_RC(nadx_reg));
 3763   match(RegI);
 3764   match(eBXRegI);
 3765   match(eCXRegI);
 3766   match(eSIRegI);
 3767   match(eDIRegI);
 3768 
 3769   format %{ %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 operand ncxRegI() %{
 3774   constraint(ALLOC_IN_RC(ncx_reg));
 3775   match(RegI);
 3776   match(eAXRegI);
 3777   match(eDXRegI);
 3778   match(eSIRegI);
 3779   match(eDIRegI);
 3780 
 3781   format %{ %}
 3782   interface(REG_INTER);
 3783 %}
 3784 
 3785 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3786 // //
 3787 operand eSIRegI(xRegI reg) %{
 3788    constraint(ALLOC_IN_RC(esi_reg));
 3789    match(reg);
 3790    match(rRegI);
 3791 
 3792    format %{ "ESI" %}
 3793    interface(REG_INTER);
 3794 %}
 3795 
 3796 // Pointer Register
 3797 operand anyRegP() %{
 3798   constraint(ALLOC_IN_RC(any_reg));
 3799   match(RegP);
 3800   match(eAXRegP);
 3801   match(eBXRegP);
 3802   match(eCXRegP);
 3803   match(eDIRegP);
 3804   match(eRegP);
 3805 
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand eRegP() %{
 3811   constraint(ALLOC_IN_RC(int_reg));
 3812   match(RegP);
 3813   match(eAXRegP);
 3814   match(eBXRegP);
 3815   match(eCXRegP);
 3816   match(eDIRegP);
 3817 
 3818   format %{ %}
 3819   interface(REG_INTER);
 3820 %}
 3821 
 3822 operand rRegP() %{
 3823   constraint(ALLOC_IN_RC(int_reg));
 3824   match(RegP);
 3825   match(eAXRegP);
 3826   match(eBXRegP);
 3827   match(eCXRegP);
 3828   match(eDIRegP);
 3829 
 3830   format %{ %}
 3831   interface(REG_INTER);
 3832 %}
 3833 
 3834 // On windows95, EBP is not safe to use for implicit null tests.
 3835 operand eRegP_no_EBP() %{
 3836   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3837   match(RegP);
 3838   match(eAXRegP);
 3839   match(eBXRegP);
 3840   match(eCXRegP);
 3841   match(eDIRegP);
 3842 
 3843   op_cost(100);
 3844   format %{ %}
 3845   interface(REG_INTER);
 3846 %}
 3847 
 3848 operand naxRegP() %{
 3849   constraint(ALLOC_IN_RC(nax_reg));
 3850   match(RegP);
 3851   match(eBXRegP);
 3852   match(eDXRegP);
 3853   match(eCXRegP);
 3854   match(eSIRegP);
 3855   match(eDIRegP);
 3856 
 3857   format %{ %}
 3858   interface(REG_INTER);
 3859 %}
 3860 
 3861 operand nabxRegP() %{
 3862   constraint(ALLOC_IN_RC(nabx_reg));
 3863   match(RegP);
 3864   match(eCXRegP);
 3865   match(eDXRegP);
 3866   match(eSIRegP);
 3867   match(eDIRegP);
 3868 
 3869   format %{ %}
 3870   interface(REG_INTER);
 3871 %}
 3872 
 3873 operand pRegP() %{
 3874   constraint(ALLOC_IN_RC(p_reg));
 3875   match(RegP);
 3876   match(eBXRegP);
 3877   match(eDXRegP);
 3878   match(eSIRegP);
 3879   match(eDIRegP);
 3880 
 3881   format %{ %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Special Registers
 3886 // Return a pointer value
 3887 operand eAXRegP(eRegP reg) %{
 3888   constraint(ALLOC_IN_RC(eax_reg));
 3889   match(reg);
 3890   format %{ "EAX" %}
 3891   interface(REG_INTER);
 3892 %}
 3893 
 3894 // Used in AtomicAdd
 3895 operand eBXRegP(eRegP reg) %{
 3896   constraint(ALLOC_IN_RC(ebx_reg));
 3897   match(reg);
 3898   format %{ "EBX" %}
 3899   interface(REG_INTER);
 3900 %}
 3901 
 3902 // Tail-call (interprocedural jump) to interpreter
 3903 operand eCXRegP(eRegP reg) %{
 3904   constraint(ALLOC_IN_RC(ecx_reg));
 3905   match(reg);
 3906   format %{ "ECX" %}
 3907   interface(REG_INTER);
 3908 %}
 3909 
 3910 operand eDXRegP(eRegP reg) %{
 3911   constraint(ALLOC_IN_RC(edx_reg));
 3912   match(reg);
 3913   format %{ "EDX" %}
 3914   interface(REG_INTER);
 3915 %}
 3916 
 3917 operand eSIRegP(eRegP reg) %{
 3918   constraint(ALLOC_IN_RC(esi_reg));
 3919   match(reg);
 3920   format %{ "ESI" %}
 3921   interface(REG_INTER);
 3922 %}
 3923 
 3924 // Used in rep stosw
 3925 operand eDIRegP(eRegP reg) %{
 3926   constraint(ALLOC_IN_RC(edi_reg));
 3927   match(reg);
 3928   format %{ "EDI" %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand eRegL() %{
 3933   constraint(ALLOC_IN_RC(long_reg));
 3934   match(RegL);
 3935   match(eADXRegL);
 3936 
 3937   format %{ %}
 3938   interface(REG_INTER);
 3939 %}
 3940 
 3941 operand eADXRegL( eRegL reg ) %{
 3942   constraint(ALLOC_IN_RC(eadx_reg));
 3943   match(reg);
 3944 
 3945   format %{ "EDX:EAX" %}
 3946   interface(REG_INTER);
 3947 %}
 3948 
 3949 operand eBCXRegL( eRegL reg ) %{
 3950   constraint(ALLOC_IN_RC(ebcx_reg));
 3951   match(reg);
 3952 
 3953   format %{ "EBX:ECX" %}
 3954   interface(REG_INTER);
 3955 %}
 3956 
 3957 operand eBDPRegL( eRegL reg ) %{
 3958   constraint(ALLOC_IN_RC(ebpd_reg));
 3959   match(reg);
 3960 
 3961   format %{ "EBP:EDI" %}
 3962   interface(REG_INTER);
 3963 %}
 3964 // Special case for integer high multiply
 3965 operand eADXRegL_low_only() %{
 3966   constraint(ALLOC_IN_RC(eadx_reg));
 3967   match(RegL);
 3968 
 3969   format %{ "EAX" %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Flags register, used as output of compare instructions
 3974 operand rFlagsReg() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977 
 3978   format %{ "EFLAGS" %}
 3979   interface(REG_INTER);
 3980 %}
 3981 
 3982 // Flags register, used as output of compare instructions
 3983 operand eFlagsReg() %{
 3984   constraint(ALLOC_IN_RC(int_flags));
 3985   match(RegFlags);
 3986 
 3987   format %{ "EFLAGS" %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 // Flags register, used as output of FLOATING POINT compare instructions
 3992 operand eFlagsRegU() %{
 3993   constraint(ALLOC_IN_RC(int_flags));
 3994   match(RegFlags);
 3995 
 3996   format %{ "EFLAGS_U" %}
 3997   interface(REG_INTER);
 3998 %}
 3999 
 4000 operand eFlagsRegUCF() %{
 4001   constraint(ALLOC_IN_RC(int_flags));
 4002   match(RegFlags);
 4003   predicate(false);
 4004 
 4005   format %{ "EFLAGS_U_CF" %}
 4006   interface(REG_INTER);
 4007 %}
 4008 
 4009 // Condition Code Register used by long compare
 4010 operand flagsReg_long_LTGE() %{
 4011   constraint(ALLOC_IN_RC(int_flags));
 4012   match(RegFlags);
 4013   format %{ "FLAGS_LTGE" %}
 4014   interface(REG_INTER);
 4015 %}
 4016 operand flagsReg_long_EQNE() %{
 4017   constraint(ALLOC_IN_RC(int_flags));
 4018   match(RegFlags);
 4019   format %{ "FLAGS_EQNE" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 operand flagsReg_long_LEGT() %{
 4023   constraint(ALLOC_IN_RC(int_flags));
 4024   match(RegFlags);
 4025   format %{ "FLAGS_LEGT" %}
 4026   interface(REG_INTER);
 4027 %}
 4028 
 4029 // Condition Code Register used by unsigned long compare
 4030 operand flagsReg_ulong_LTGE() %{
 4031   constraint(ALLOC_IN_RC(int_flags));
 4032   match(RegFlags);
 4033   format %{ "FLAGS_U_LTGE" %}
 4034   interface(REG_INTER);
 4035 %}
 4036 operand flagsReg_ulong_EQNE() %{
 4037   constraint(ALLOC_IN_RC(int_flags));
 4038   match(RegFlags);
 4039   format %{ "FLAGS_U_EQNE" %}
 4040   interface(REG_INTER);
 4041 %}
 4042 operand flagsReg_ulong_LEGT() %{
 4043   constraint(ALLOC_IN_RC(int_flags));
 4044   match(RegFlags);
 4045   format %{ "FLAGS_U_LEGT" %}
 4046   interface(REG_INTER);
 4047 %}
 4048 
 4049 // Float register operands
 4050 operand regDPR() %{
 4051   predicate( UseSSE < 2 );
 4052   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4053   match(RegD);
 4054   match(regDPR1);
 4055   match(regDPR2);
 4056   format %{ %}
 4057   interface(REG_INTER);
 4058 %}
 4059 
 4060 operand regDPR1(regDPR reg) %{
 4061   predicate( UseSSE < 2 );
 4062   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4063   match(reg);
 4064   format %{ "FPR1" %}
 4065   interface(REG_INTER);
 4066 %}
 4067 
 4068 operand regDPR2(regDPR reg) %{
 4069   predicate( UseSSE < 2 );
 4070   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4071   match(reg);
 4072   format %{ "FPR2" %}
 4073   interface(REG_INTER);
 4074 %}
 4075 
 4076 operand regnotDPR1(regDPR reg) %{
 4077   predicate( UseSSE < 2 );
 4078   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4079   match(reg);
 4080   format %{ %}
 4081   interface(REG_INTER);
 4082 %}
 4083 
 4084 // Float register operands
 4085 operand regFPR() %{
 4086   predicate( UseSSE < 2 );
 4087   constraint(ALLOC_IN_RC(fp_flt_reg));
 4088   match(RegF);
 4089   match(regFPR1);
 4090   format %{ %}
 4091   interface(REG_INTER);
 4092 %}
 4093 
 4094 // Float register operands
 4095 operand regFPR1(regFPR reg) %{
 4096   predicate( UseSSE < 2 );
 4097   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4098   match(reg);
 4099   format %{ "FPR1" %}
 4100   interface(REG_INTER);
 4101 %}
 4102 
 4103 // XMM Float register operands
 4104 operand regF() %{
 4105   predicate( UseSSE>=1 );
 4106   constraint(ALLOC_IN_RC(float_reg_legacy));
 4107   match(RegF);
 4108   format %{ %}
 4109   interface(REG_INTER);
 4110 %}
 4111 
 4112 operand legRegF() %{
 4113   predicate( UseSSE>=1 );
 4114   constraint(ALLOC_IN_RC(float_reg_legacy));
 4115   match(RegF);
 4116   format %{ %}
 4117   interface(REG_INTER);
 4118 %}
 4119 
 4120 // Float register operands
 4121 operand vlRegF() %{
 4122    constraint(ALLOC_IN_RC(float_reg_vl));
 4123    match(RegF);
 4124 
 4125    format %{ %}
 4126    interface(REG_INTER);
 4127 %}
 4128 
 4129 // XMM Double register operands
 4130 operand regD() %{
 4131   predicate( UseSSE>=2 );
 4132   constraint(ALLOC_IN_RC(double_reg_legacy));
 4133   match(RegD);
 4134   format %{ %}
 4135   interface(REG_INTER);
 4136 %}
 4137 
 4138 // Double register operands
 4139 operand legRegD() %{
 4140   predicate( UseSSE>=2 );
 4141   constraint(ALLOC_IN_RC(double_reg_legacy));
 4142   match(RegD);
 4143   format %{ %}
 4144   interface(REG_INTER);
 4145 %}
 4146 
 4147 operand vlRegD() %{
 4148    constraint(ALLOC_IN_RC(double_reg_vl));
 4149    match(RegD);
 4150 
 4151    format %{ %}
 4152    interface(REG_INTER);
 4153 %}
 4154 
 4155 //----------Memory Operands----------------------------------------------------
 4156 // Direct Memory Operand
 4157 operand direct(immP addr) %{
 4158   match(addr);
 4159 
 4160   format %{ "[$addr]" %}
 4161   interface(MEMORY_INTER) %{
 4162     base(0xFFFFFFFF);
 4163     index(0x4);
 4164     scale(0x0);
 4165     disp($addr);
 4166   %}
 4167 %}
 4168 
 4169 // Indirect Memory Operand
 4170 operand indirect(eRegP reg) %{
 4171   constraint(ALLOC_IN_RC(int_reg));
 4172   match(reg);
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base($reg);
 4177     index(0x4);
 4178     scale(0x0);
 4179     disp(0x0);
 4180   %}
 4181 %}
 4182 
 4183 // Indirect Memory Plus Short Offset Operand
 4184 operand indOffset8(eRegP reg, immI8 off) %{
 4185   match(AddP reg off);
 4186 
 4187   format %{ "[$reg + $off]" %}
 4188   interface(MEMORY_INTER) %{
 4189     base($reg);
 4190     index(0x4);
 4191     scale(0x0);
 4192     disp($off);
 4193   %}
 4194 %}
 4195 
 4196 // Indirect Memory Plus Long Offset Operand
 4197 operand indOffset32(eRegP reg, immI off) %{
 4198   match(AddP reg off);
 4199 
 4200   format %{ "[$reg + $off]" %}
 4201   interface(MEMORY_INTER) %{
 4202     base($reg);
 4203     index(0x4);
 4204     scale(0x0);
 4205     disp($off);
 4206   %}
 4207 %}
 4208 
 4209 // Indirect Memory Plus Long Offset Operand
 4210 operand indOffset32X(rRegI reg, immP off) %{
 4211   match(AddP off reg);
 4212 
 4213   format %{ "[$reg + $off]" %}
 4214   interface(MEMORY_INTER) %{
 4215     base($reg);
 4216     index(0x4);
 4217     scale(0x0);
 4218     disp($off);
 4219   %}
 4220 %}
 4221 
 4222 // Indirect Memory Plus Index Register Plus Offset Operand
 4223 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4224   match(AddP (AddP reg ireg) off);
 4225 
 4226   op_cost(10);
 4227   format %{"[$reg + $off + $ireg]" %}
 4228   interface(MEMORY_INTER) %{
 4229     base($reg);
 4230     index($ireg);
 4231     scale(0x0);
 4232     disp($off);
 4233   %}
 4234 %}
 4235 
 4236 // Indirect Memory Plus Index Register Plus Offset Operand
 4237 operand indIndex(eRegP reg, rRegI ireg) %{
 4238   match(AddP reg ireg);
 4239 
 4240   op_cost(10);
 4241   format %{"[$reg + $ireg]" %}
 4242   interface(MEMORY_INTER) %{
 4243     base($reg);
 4244     index($ireg);
 4245     scale(0x0);
 4246     disp(0x0);
 4247   %}
 4248 %}
 4249 
 4250 // // -------------------------------------------------------------------------
 4251 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4252 // // -------------------------------------------------------------------------
 4253 // // Scaled Memory Operands
 4254 // // Indirect Memory Times Scale Plus Offset Operand
 4255 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4256 //   match(AddP off (LShiftI ireg scale));
 4257 //
 4258 //   op_cost(10);
 4259 //   format %{"[$off + $ireg << $scale]" %}
 4260 //   interface(MEMORY_INTER) %{
 4261 //     base(0x4);
 4262 //     index($ireg);
 4263 //     scale($scale);
 4264 //     disp($off);
 4265 //   %}
 4266 // %}
 4267 
 4268 // Indirect Memory Times Scale Plus Index Register
 4269 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4270   match(AddP reg (LShiftI ireg scale));
 4271 
 4272   op_cost(10);
 4273   format %{"[$reg + $ireg << $scale]" %}
 4274   interface(MEMORY_INTER) %{
 4275     base($reg);
 4276     index($ireg);
 4277     scale($scale);
 4278     disp(0x0);
 4279   %}
 4280 %}
 4281 
 4282 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4283 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4284   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4285 
 4286   op_cost(10);
 4287   format %{"[$reg + $off + $ireg << $scale]" %}
 4288   interface(MEMORY_INTER) %{
 4289     base($reg);
 4290     index($ireg);
 4291     scale($scale);
 4292     disp($off);
 4293   %}
 4294 %}
 4295 
 4296 //----------Load Long Memory Operands------------------------------------------
 4297 // The load-long idiom will use it's address expression again after loading
 4298 // the first word of the long.  If the load-long destination overlaps with
 4299 // registers used in the addressing expression, the 2nd half will be loaded
 4300 // from a clobbered address.  Fix this by requiring that load-long use
 4301 // address registers that do not overlap with the load-long target.
 4302 
 4303 // load-long support
 4304 operand load_long_RegP() %{
 4305   constraint(ALLOC_IN_RC(esi_reg));
 4306   match(RegP);
 4307   match(eSIRegP);
 4308   op_cost(100);
 4309   format %{  %}
 4310   interface(REG_INTER);
 4311 %}
 4312 
 4313 // Indirect Memory Operand Long
 4314 operand load_long_indirect(load_long_RegP reg) %{
 4315   constraint(ALLOC_IN_RC(esi_reg));
 4316   match(reg);
 4317 
 4318   format %{ "[$reg]" %}
 4319   interface(MEMORY_INTER) %{
 4320     base($reg);
 4321     index(0x4);
 4322     scale(0x0);
 4323     disp(0x0);
 4324   %}
 4325 %}
 4326 
 4327 // Indirect Memory Plus Long Offset Operand
 4328 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4329   match(AddP reg off);
 4330 
 4331   format %{ "[$reg + $off]" %}
 4332   interface(MEMORY_INTER) %{
 4333     base($reg);
 4334     index(0x4);
 4335     scale(0x0);
 4336     disp($off);
 4337   %}
 4338 %}
 4339 
 4340 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4341 
 4342 
 4343 //----------Special Memory Operands--------------------------------------------
 4344 // Stack Slot Operand - This operand is used for loading and storing temporary
 4345 //                      values on the stack where a match requires a value to
 4346 //                      flow through memory.
 4347 operand stackSlotP(sRegP reg) %{
 4348   constraint(ALLOC_IN_RC(stack_slots));
 4349   // No match rule because this operand is only generated in matching
 4350   format %{ "[$reg]" %}
 4351   interface(MEMORY_INTER) %{
 4352     base(0x4);   // ESP
 4353     index(0x4);  // No Index
 4354     scale(0x0);  // No Scale
 4355     disp($reg);  // Stack Offset
 4356   %}
 4357 %}
 4358 
 4359 operand stackSlotI(sRegI reg) %{
 4360   constraint(ALLOC_IN_RC(stack_slots));
 4361   // No match rule because this operand is only generated in matching
 4362   format %{ "[$reg]" %}
 4363   interface(MEMORY_INTER) %{
 4364     base(0x4);   // ESP
 4365     index(0x4);  // No Index
 4366     scale(0x0);  // No Scale
 4367     disp($reg);  // Stack Offset
 4368   %}
 4369 %}
 4370 
 4371 operand stackSlotF(sRegF reg) %{
 4372   constraint(ALLOC_IN_RC(stack_slots));
 4373   // No match rule because this operand is only generated in matching
 4374   format %{ "[$reg]" %}
 4375   interface(MEMORY_INTER) %{
 4376     base(0x4);   // ESP
 4377     index(0x4);  // No Index
 4378     scale(0x0);  // No Scale
 4379     disp($reg);  // Stack Offset
 4380   %}
 4381 %}
 4382 
 4383 operand stackSlotD(sRegD reg) %{
 4384   constraint(ALLOC_IN_RC(stack_slots));
 4385   // No match rule because this operand is only generated in matching
 4386   format %{ "[$reg]" %}
 4387   interface(MEMORY_INTER) %{
 4388     base(0x4);   // ESP
 4389     index(0x4);  // No Index
 4390     scale(0x0);  // No Scale
 4391     disp($reg);  // Stack Offset
 4392   %}
 4393 %}
 4394 
 4395 operand stackSlotL(sRegL reg) %{
 4396   constraint(ALLOC_IN_RC(stack_slots));
 4397   // No match rule because this operand is only generated in matching
 4398   format %{ "[$reg]" %}
 4399   interface(MEMORY_INTER) %{
 4400     base(0x4);   // ESP
 4401     index(0x4);  // No Index
 4402     scale(0x0);  // No Scale
 4403     disp($reg);  // Stack Offset
 4404   %}
 4405 %}
 4406 
 4407 //----------Conditional Branch Operands----------------------------------------
 4408 // Comparison Op  - This is the operation of the comparison, and is limited to
 4409 //                  the following set of codes:
 4410 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4411 //
 4412 // Other attributes of the comparison, such as unsignedness, are specified
 4413 // by the comparison instruction that sets a condition code flags register.
 4414 // That result is represented by a flags operand whose subtype is appropriate
 4415 // to the unsignedness (etc.) of the comparison.
 4416 //
 4417 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4418 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4419 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4420 
 4421 // Comparison Code
 4422 operand cmpOp() %{
 4423   match(Bool);
 4424 
 4425   format %{ "" %}
 4426   interface(COND_INTER) %{
 4427     equal(0x4, "e");
 4428     not_equal(0x5, "ne");
 4429     less(0xC, "l");
 4430     greater_equal(0xD, "ge");
 4431     less_equal(0xE, "le");
 4432     greater(0xF, "g");
 4433     overflow(0x0, "o");
 4434     no_overflow(0x1, "no");
 4435   %}
 4436 %}
 4437 
 4438 // Comparison Code, unsigned compare.  Used by FP also, with
 4439 // C2 (unordered) turned into GT or LT already.  The other bits
 4440 // C0 and C3 are turned into Carry & Zero flags.
 4441 operand cmpOpU() %{
 4442   match(Bool);
 4443 
 4444   format %{ "" %}
 4445   interface(COND_INTER) %{
 4446     equal(0x4, "e");
 4447     not_equal(0x5, "ne");
 4448     less(0x2, "b");
 4449     greater_equal(0x3, "nb");
 4450     less_equal(0x6, "be");
 4451     greater(0x7, "nbe");
 4452     overflow(0x0, "o");
 4453     no_overflow(0x1, "no");
 4454   %}
 4455 %}
 4456 
 4457 // Floating comparisons that don't require any fixup for the unordered case
 4458 operand cmpOpUCF() %{
 4459   match(Bool);
 4460   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4461             n->as_Bool()->_test._test == BoolTest::ge ||
 4462             n->as_Bool()->_test._test == BoolTest::le ||
 4463             n->as_Bool()->_test._test == BoolTest::gt);
 4464   format %{ "" %}
 4465   interface(COND_INTER) %{
 4466     equal(0x4, "e");
 4467     not_equal(0x5, "ne");
 4468     less(0x2, "b");
 4469     greater_equal(0x3, "nb");
 4470     less_equal(0x6, "be");
 4471     greater(0x7, "nbe");
 4472     overflow(0x0, "o");
 4473     no_overflow(0x1, "no");
 4474   %}
 4475 %}
 4476 
 4477 
 4478 // Floating comparisons that can be fixed up with extra conditional jumps
 4479 operand cmpOpUCF2() %{
 4480   match(Bool);
 4481   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4482             n->as_Bool()->_test._test == BoolTest::eq);
 4483   format %{ "" %}
 4484   interface(COND_INTER) %{
 4485     equal(0x4, "e");
 4486     not_equal(0x5, "ne");
 4487     less(0x2, "b");
 4488     greater_equal(0x3, "nb");
 4489     less_equal(0x6, "be");
 4490     greater(0x7, "nbe");
 4491     overflow(0x0, "o");
 4492     no_overflow(0x1, "no");
 4493   %}
 4494 %}
 4495 
 4496 // Comparison Code for FP conditional move
 4497 operand cmpOp_fcmov() %{
 4498   match(Bool);
 4499 
 4500   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4501             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4502   format %{ "" %}
 4503   interface(COND_INTER) %{
 4504     equal        (0x0C8);
 4505     not_equal    (0x1C8);
 4506     less         (0x0C0);
 4507     greater_equal(0x1C0);
 4508     less_equal   (0x0D0);
 4509     greater      (0x1D0);
 4510     overflow(0x0, "o"); // not really supported by the instruction
 4511     no_overflow(0x1, "no"); // not really supported by the instruction
 4512   %}
 4513 %}
 4514 
 4515 // Comparison Code used in long compares
 4516 operand cmpOp_commute() %{
 4517   match(Bool);
 4518 
 4519   format %{ "" %}
 4520   interface(COND_INTER) %{
 4521     equal(0x4, "e");
 4522     not_equal(0x5, "ne");
 4523     less(0xF, "g");
 4524     greater_equal(0xE, "le");
 4525     less_equal(0xD, "ge");
 4526     greater(0xC, "l");
 4527     overflow(0x0, "o");
 4528     no_overflow(0x1, "no");
 4529   %}
 4530 %}
 4531 
 4532 // Comparison Code used in unsigned long compares
 4533 operand cmpOpU_commute() %{
 4534   match(Bool);
 4535 
 4536   format %{ "" %}
 4537   interface(COND_INTER) %{
 4538     equal(0x4, "e");
 4539     not_equal(0x5, "ne");
 4540     less(0x7, "nbe");
 4541     greater_equal(0x6, "be");
 4542     less_equal(0x3, "nb");
 4543     greater(0x2, "b");
 4544     overflow(0x0, "o");
 4545     no_overflow(0x1, "no");
 4546   %}
 4547 %}
 4548 
 4549 //----------OPERAND CLASSES----------------------------------------------------
 4550 // Operand Classes are groups of operands that are used as to simplify
 4551 // instruction definitions by not requiring the AD writer to specify separate
 4552 // instructions for every form of operand when the instruction accepts
 4553 // multiple operand types with the same basic encoding and format.  The classic
 4554 // case of this is memory operands.
 4555 
 4556 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4557                indIndex, indIndexScale, indIndexScaleOffset);
 4558 
 4559 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4560 // This means some kind of offset is always required and you cannot use
 4561 // an oop as the offset (done when working on static globals).
 4562 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4563                     indIndex, indIndexScale, indIndexScaleOffset);
 4564 
 4565 
 4566 //----------PIPELINE-----------------------------------------------------------
 4567 // Rules which define the behavior of the target architectures pipeline.
 4568 pipeline %{
 4569 
 4570 //----------ATTRIBUTES---------------------------------------------------------
 4571 attributes %{
 4572   variable_size_instructions;        // Fixed size instructions
 4573   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4574   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4575   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4576   instruction_fetch_units = 1;       // of 16 bytes
 4577 
 4578   // List of nop instructions
 4579   nops( MachNop );
 4580 %}
 4581 
 4582 //----------RESOURCES----------------------------------------------------------
 4583 // Resources are the functional units available to the machine
 4584 
 4585 // Generic P2/P3 pipeline
 4586 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4587 // 3 instructions decoded per cycle.
 4588 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4589 // 2 ALU op, only ALU0 handles mul/div instructions.
 4590 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4591            MS0, MS1, MEM = MS0 | MS1,
 4592            BR, FPU,
 4593            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4594 
 4595 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4596 // Pipeline Description specifies the stages in the machine's pipeline
 4597 
 4598 // Generic P2/P3 pipeline
 4599 pipe_desc(S0, S1, S2, S3, S4, S5);
 4600 
 4601 //----------PIPELINE CLASSES---------------------------------------------------
 4602 // Pipeline Classes describe the stages in which input and output are
 4603 // referenced by the hardware pipeline.
 4604 
 4605 // Naming convention: ialu or fpu
 4606 // Then: _reg
 4607 // Then: _reg if there is a 2nd register
 4608 // Then: _long if it's a pair of instructions implementing a long
 4609 // Then: _fat if it requires the big decoder
 4610 //   Or: _mem if it requires the big decoder and a memory unit.
 4611 
 4612 // Integer ALU reg operation
 4613 pipe_class ialu_reg(rRegI dst) %{
 4614     single_instruction;
 4615     dst    : S4(write);
 4616     dst    : S3(read);
 4617     DECODE : S0;        // any decoder
 4618     ALU    : S3;        // any alu
 4619 %}
 4620 
 4621 // Long ALU reg operation
 4622 pipe_class ialu_reg_long(eRegL dst) %{
 4623     instruction_count(2);
 4624     dst    : S4(write);
 4625     dst    : S3(read);
 4626     DECODE : S0(2);     // any 2 decoders
 4627     ALU    : S3(2);     // both alus
 4628 %}
 4629 
 4630 // Integer ALU reg operation using big decoder
 4631 pipe_class ialu_reg_fat(rRegI dst) %{
 4632     single_instruction;
 4633     dst    : S4(write);
 4634     dst    : S3(read);
 4635     D0     : S0;        // big decoder only
 4636     ALU    : S3;        // any alu
 4637 %}
 4638 
 4639 // Long ALU reg operation using big decoder
 4640 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4641     instruction_count(2);
 4642     dst    : S4(write);
 4643     dst    : S3(read);
 4644     D0     : S0(2);     // big decoder only; twice
 4645     ALU    : S3(2);     // any 2 alus
 4646 %}
 4647 
 4648 // Integer ALU reg-reg operation
 4649 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4650     single_instruction;
 4651     dst    : S4(write);
 4652     src    : S3(read);
 4653     DECODE : S0;        // any decoder
 4654     ALU    : S3;        // any alu
 4655 %}
 4656 
 4657 // Long ALU reg-reg operation
 4658 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4659     instruction_count(2);
 4660     dst    : S4(write);
 4661     src    : S3(read);
 4662     DECODE : S0(2);     // any 2 decoders
 4663     ALU    : S3(2);     // both alus
 4664 %}
 4665 
 4666 // Integer ALU reg-reg operation
 4667 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4668     single_instruction;
 4669     dst    : S4(write);
 4670     src    : S3(read);
 4671     D0     : S0;        // big decoder only
 4672     ALU    : S3;        // any alu
 4673 %}
 4674 
 4675 // Long ALU reg-reg operation
 4676 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4677     instruction_count(2);
 4678     dst    : S4(write);
 4679     src    : S3(read);
 4680     D0     : S0(2);     // big decoder only; twice
 4681     ALU    : S3(2);     // both alus
 4682 %}
 4683 
 4684 // Integer ALU reg-mem operation
 4685 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4686     single_instruction;
 4687     dst    : S5(write);
 4688     mem    : S3(read);
 4689     D0     : S0;        // big decoder only
 4690     ALU    : S4;        // any alu
 4691     MEM    : S3;        // any mem
 4692 %}
 4693 
 4694 // Long ALU reg-mem operation
 4695 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4696     instruction_count(2);
 4697     dst    : S5(write);
 4698     mem    : S3(read);
 4699     D0     : S0(2);     // big decoder only; twice
 4700     ALU    : S4(2);     // any 2 alus
 4701     MEM    : S3(2);     // both mems
 4702 %}
 4703 
 4704 // Integer mem operation (prefetch)
 4705 pipe_class ialu_mem(memory mem)
 4706 %{
 4707     single_instruction;
 4708     mem    : S3(read);
 4709     D0     : S0;        // big decoder only
 4710     MEM    : S3;        // any mem
 4711 %}
 4712 
 4713 // Integer Store to Memory
 4714 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4715     single_instruction;
 4716     mem    : S3(read);
 4717     src    : S5(read);
 4718     D0     : S0;        // big decoder only
 4719     ALU    : S4;        // any alu
 4720     MEM    : S3;
 4721 %}
 4722 
 4723 // Long Store to Memory
 4724 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4725     instruction_count(2);
 4726     mem    : S3(read);
 4727     src    : S5(read);
 4728     D0     : S0(2);     // big decoder only; twice
 4729     ALU    : S4(2);     // any 2 alus
 4730     MEM    : S3(2);     // Both mems
 4731 %}
 4732 
 4733 // Integer Store to Memory
 4734 pipe_class ialu_mem_imm(memory mem) %{
 4735     single_instruction;
 4736     mem    : S3(read);
 4737     D0     : S0;        // big decoder only
 4738     ALU    : S4;        // any alu
 4739     MEM    : S3;
 4740 %}
 4741 
 4742 // Integer ALU0 reg-reg operation
 4743 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4744     single_instruction;
 4745     dst    : S4(write);
 4746     src    : S3(read);
 4747     D0     : S0;        // Big decoder only
 4748     ALU0   : S3;        // only alu0
 4749 %}
 4750 
 4751 // Integer ALU0 reg-mem operation
 4752 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4753     single_instruction;
 4754     dst    : S5(write);
 4755     mem    : S3(read);
 4756     D0     : S0;        // big decoder only
 4757     ALU0   : S4;        // ALU0 only
 4758     MEM    : S3;        // any mem
 4759 %}
 4760 
 4761 // Integer ALU reg-reg operation
 4762 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4763     single_instruction;
 4764     cr     : S4(write);
 4765     src1   : S3(read);
 4766     src2   : S3(read);
 4767     DECODE : S0;        // any decoder
 4768     ALU    : S3;        // any alu
 4769 %}
 4770 
 4771 // Integer ALU reg-imm operation
 4772 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4773     single_instruction;
 4774     cr     : S4(write);
 4775     src1   : S3(read);
 4776     DECODE : S0;        // any decoder
 4777     ALU    : S3;        // any alu
 4778 %}
 4779 
 4780 // Integer ALU reg-mem operation
 4781 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4782     single_instruction;
 4783     cr     : S4(write);
 4784     src1   : S3(read);
 4785     src2   : S3(read);
 4786     D0     : S0;        // big decoder only
 4787     ALU    : S4;        // any alu
 4788     MEM    : S3;
 4789 %}
 4790 
 4791 // Conditional move reg-reg
 4792 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4793     instruction_count(4);
 4794     y      : S4(read);
 4795     q      : S3(read);
 4796     p      : S3(read);
 4797     DECODE : S0(4);     // any decoder
 4798 %}
 4799 
 4800 // Conditional move reg-reg
 4801 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4802     single_instruction;
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     cr     : S3(read);
 4806     DECODE : S0;        // any decoder
 4807 %}
 4808 
 4809 // Conditional move reg-mem
 4810 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4811     single_instruction;
 4812     dst    : S4(write);
 4813     src    : S3(read);
 4814     cr     : S3(read);
 4815     DECODE : S0;        // any decoder
 4816     MEM    : S3;
 4817 %}
 4818 
 4819 // Conditional move reg-reg long
 4820 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4821     single_instruction;
 4822     dst    : S4(write);
 4823     src    : S3(read);
 4824     cr     : S3(read);
 4825     DECODE : S0(2);     // any 2 decoders
 4826 %}
 4827 
 4828 // Conditional move double reg-reg
 4829 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4830     single_instruction;
 4831     dst    : S4(write);
 4832     src    : S3(read);
 4833     cr     : S3(read);
 4834     DECODE : S0;        // any decoder
 4835 %}
 4836 
 4837 // Float reg-reg operation
 4838 pipe_class fpu_reg(regDPR dst) %{
 4839     instruction_count(2);
 4840     dst    : S3(read);
 4841     DECODE : S0(2);     // any 2 decoders
 4842     FPU    : S3;
 4843 %}
 4844 
 4845 // Float reg-reg operation
 4846 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4847     instruction_count(2);
 4848     dst    : S4(write);
 4849     src    : S3(read);
 4850     DECODE : S0(2);     // any 2 decoders
 4851     FPU    : S3;
 4852 %}
 4853 
 4854 // Float reg-reg operation
 4855 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4856     instruction_count(3);
 4857     dst    : S4(write);
 4858     src1   : S3(read);
 4859     src2   : S3(read);
 4860     DECODE : S0(3);     // any 3 decoders
 4861     FPU    : S3(2);
 4862 %}
 4863 
 4864 // Float reg-reg operation
 4865 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4866     instruction_count(4);
 4867     dst    : S4(write);
 4868     src1   : S3(read);
 4869     src2   : S3(read);
 4870     src3   : S3(read);
 4871     DECODE : S0(4);     // any 3 decoders
 4872     FPU    : S3(2);
 4873 %}
 4874 
 4875 // Float reg-reg operation
 4876 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4877     instruction_count(4);
 4878     dst    : S4(write);
 4879     src1   : S3(read);
 4880     src2   : S3(read);
 4881     src3   : S3(read);
 4882     DECODE : S1(3);     // any 3 decoders
 4883     D0     : S0;        // Big decoder only
 4884     FPU    : S3(2);
 4885     MEM    : S3;
 4886 %}
 4887 
 4888 // Float reg-mem operation
 4889 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4890     instruction_count(2);
 4891     dst    : S5(write);
 4892     mem    : S3(read);
 4893     D0     : S0;        // big decoder only
 4894     DECODE : S1;        // any decoder for FPU POP
 4895     FPU    : S4;
 4896     MEM    : S3;        // any mem
 4897 %}
 4898 
 4899 // Float reg-mem operation
 4900 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4901     instruction_count(3);
 4902     dst    : S5(write);
 4903     src1   : S3(read);
 4904     mem    : S3(read);
 4905     D0     : S0;        // big decoder only
 4906     DECODE : S1(2);     // any decoder for FPU POP
 4907     FPU    : S4;
 4908     MEM    : S3;        // any mem
 4909 %}
 4910 
 4911 // Float mem-reg operation
 4912 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4913     instruction_count(2);
 4914     src    : S5(read);
 4915     mem    : S3(read);
 4916     DECODE : S0;        // any decoder for FPU PUSH
 4917     D0     : S1;        // big decoder only
 4918     FPU    : S4;
 4919     MEM    : S3;        // any mem
 4920 %}
 4921 
 4922 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4923     instruction_count(3);
 4924     src1   : S3(read);
 4925     src2   : S3(read);
 4926     mem    : S3(read);
 4927     DECODE : S0(2);     // any decoder for FPU PUSH
 4928     D0     : S1;        // big decoder only
 4929     FPU    : S4;
 4930     MEM    : S3;        // any mem
 4931 %}
 4932 
 4933 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4934     instruction_count(3);
 4935     src1   : S3(read);
 4936     src2   : S3(read);
 4937     mem    : S4(read);
 4938     DECODE : S0;        // any decoder for FPU PUSH
 4939     D0     : S0(2);     // big decoder only
 4940     FPU    : S4;
 4941     MEM    : S3(2);     // any mem
 4942 %}
 4943 
 4944 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4945     instruction_count(2);
 4946     src1   : S3(read);
 4947     dst    : S4(read);
 4948     D0     : S0(2);     // big decoder only
 4949     MEM    : S3(2);     // any mem
 4950 %}
 4951 
 4952 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4953     instruction_count(3);
 4954     src1   : S3(read);
 4955     src2   : S3(read);
 4956     dst    : S4(read);
 4957     D0     : S0(3);     // big decoder only
 4958     FPU    : S4;
 4959     MEM    : S3(3);     // any mem
 4960 %}
 4961 
 4962 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4963     instruction_count(3);
 4964     src1   : S4(read);
 4965     mem    : S4(read);
 4966     DECODE : S0;        // any decoder for FPU PUSH
 4967     D0     : S0(2);     // big decoder only
 4968     FPU    : S4;
 4969     MEM    : S3(2);     // any mem
 4970 %}
 4971 
 4972 // Float load constant
 4973 pipe_class fpu_reg_con(regDPR dst) %{
 4974     instruction_count(2);
 4975     dst    : S5(write);
 4976     D0     : S0;        // big decoder only for the load
 4977     DECODE : S1;        // any decoder for FPU POP
 4978     FPU    : S4;
 4979     MEM    : S3;        // any mem
 4980 %}
 4981 
 4982 // Float load constant
 4983 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4984     instruction_count(3);
 4985     dst    : S5(write);
 4986     src    : S3(read);
 4987     D0     : S0;        // big decoder only for the load
 4988     DECODE : S1(2);     // any decoder for FPU POP
 4989     FPU    : S4;
 4990     MEM    : S3;        // any mem
 4991 %}
 4992 
 4993 // UnConditional branch
 4994 pipe_class pipe_jmp( label labl ) %{
 4995     single_instruction;
 4996     BR   : S3;
 4997 %}
 4998 
 4999 // Conditional branch
 5000 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 5001     single_instruction;
 5002     cr    : S1(read);
 5003     BR    : S3;
 5004 %}
 5005 
 5006 // Allocation idiom
 5007 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 5008     instruction_count(1); force_serialization;
 5009     fixed_latency(6);
 5010     heap_ptr : S3(read);
 5011     DECODE   : S0(3);
 5012     D0       : S2;
 5013     MEM      : S3;
 5014     ALU      : S3(2);
 5015     dst      : S5(write);
 5016     BR       : S5;
 5017 %}
 5018 
 5019 // Generic big/slow expanded idiom
 5020 pipe_class pipe_slow(  ) %{
 5021     instruction_count(10); multiple_bundles; force_serialization;
 5022     fixed_latency(100);
 5023     D0  : S0(2);
 5024     MEM : S3(2);
 5025 %}
 5026 
 5027 // The real do-nothing guy
 5028 pipe_class empty( ) %{
 5029     instruction_count(0);
 5030 %}
 5031 
 5032 // Define the class for the Nop node
 5033 define %{
 5034    MachNop = empty;
 5035 %}
 5036 
 5037 %}
 5038 
 5039 //----------INSTRUCTIONS-------------------------------------------------------
 5040 //
 5041 // match      -- States which machine-independent subtree may be replaced
 5042 //               by this instruction.
 5043 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5044 //               selection to identify a minimum cost tree of machine
 5045 //               instructions that matches a tree of machine-independent
 5046 //               instructions.
 5047 // format     -- A string providing the disassembly for this instruction.
 5048 //               The value of an instruction's operand may be inserted
 5049 //               by referring to it with a '$' prefix.
 5050 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5051 //               to within an encode class as $primary, $secondary, and $tertiary
 5052 //               respectively.  The primary opcode is commonly used to
 5053 //               indicate the type of machine instruction, while secondary
 5054 //               and tertiary are often used for prefix options or addressing
 5055 //               modes.
 5056 // ins_encode -- A list of encode classes with parameters. The encode class
 5057 //               name must have been defined in an 'enc_class' specification
 5058 //               in the encode section of the architecture description.
 5059 
 5060 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5061 // Load Float
 5062 instruct MoveF2LEG(legRegF dst, regF src) %{
 5063   match(Set dst src);
 5064   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5065   ins_encode %{
 5066     ShouldNotReachHere();
 5067   %}
 5068   ins_pipe( fpu_reg_reg );
 5069 %}
 5070 
 5071 // Load Float
 5072 instruct MoveLEG2F(regF dst, legRegF src) %{
 5073   match(Set dst src);
 5074   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5075   ins_encode %{
 5076     ShouldNotReachHere();
 5077   %}
 5078   ins_pipe( fpu_reg_reg );
 5079 %}
 5080 
 5081 // Load Float
 5082 instruct MoveF2VL(vlRegF dst, regF src) %{
 5083   match(Set dst src);
 5084   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5085   ins_encode %{
 5086     ShouldNotReachHere();
 5087   %}
 5088   ins_pipe( fpu_reg_reg );
 5089 %}
 5090 
 5091 // Load Float
 5092 instruct MoveVL2F(regF dst, vlRegF src) %{
 5093   match(Set dst src);
 5094   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5095   ins_encode %{
 5096     ShouldNotReachHere();
 5097   %}
 5098   ins_pipe( fpu_reg_reg );
 5099 %}
 5100 
 5101 
 5102 
 5103 // Load Double
 5104 instruct MoveD2LEG(legRegD dst, regD src) %{
 5105   match(Set dst src);
 5106   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5107   ins_encode %{
 5108     ShouldNotReachHere();
 5109   %}
 5110   ins_pipe( fpu_reg_reg );
 5111 %}
 5112 
 5113 // Load Double
 5114 instruct MoveLEG2D(regD dst, legRegD src) %{
 5115   match(Set dst src);
 5116   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5117   ins_encode %{
 5118     ShouldNotReachHere();
 5119   %}
 5120   ins_pipe( fpu_reg_reg );
 5121 %}
 5122 
 5123 // Load Double
 5124 instruct MoveD2VL(vlRegD dst, regD src) %{
 5125   match(Set dst src);
 5126   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5127   ins_encode %{
 5128     ShouldNotReachHere();
 5129   %}
 5130   ins_pipe( fpu_reg_reg );
 5131 %}
 5132 
 5133 // Load Double
 5134 instruct MoveVL2D(regD dst, vlRegD src) %{
 5135   match(Set dst src);
 5136   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5137   ins_encode %{
 5138     ShouldNotReachHere();
 5139   %}
 5140   ins_pipe( fpu_reg_reg );
 5141 %}
 5142 
 5143 //----------BSWAP-Instruction--------------------------------------------------
 5144 instruct bytes_reverse_int(rRegI dst) %{
 5145   match(Set dst (ReverseBytesI dst));
 5146 
 5147   format %{ "BSWAP  $dst" %}
 5148   opcode(0x0F, 0xC8);
 5149   ins_encode( OpcP, OpcSReg(dst) );
 5150   ins_pipe( ialu_reg );
 5151 %}
 5152 
 5153 instruct bytes_reverse_long(eRegL dst) %{
 5154   match(Set dst (ReverseBytesL dst));
 5155 
 5156   format %{ "BSWAP  $dst.lo\n\t"
 5157             "BSWAP  $dst.hi\n\t"
 5158             "XCHG   $dst.lo $dst.hi" %}
 5159 
 5160   ins_cost(125);
 5161   ins_encode( bswap_long_bytes(dst) );
 5162   ins_pipe( ialu_reg_reg);
 5163 %}
 5164 
 5165 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5166   match(Set dst (ReverseBytesUS dst));
 5167   effect(KILL cr);
 5168 
 5169   format %{ "BSWAP  $dst\n\t"
 5170             "SHR    $dst,16\n\t" %}
 5171   ins_encode %{
 5172     __ bswapl($dst$$Register);
 5173     __ shrl($dst$$Register, 16);
 5174   %}
 5175   ins_pipe( ialu_reg );
 5176 %}
 5177 
 5178 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5179   match(Set dst (ReverseBytesS dst));
 5180   effect(KILL cr);
 5181 
 5182   format %{ "BSWAP  $dst\n\t"
 5183             "SAR    $dst,16\n\t" %}
 5184   ins_encode %{
 5185     __ bswapl($dst$$Register);
 5186     __ sarl($dst$$Register, 16);
 5187   %}
 5188   ins_pipe( ialu_reg );
 5189 %}
 5190 
 5191 
 5192 //---------- Zeros Count Instructions ------------------------------------------
 5193 
 5194 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5195   predicate(UseCountLeadingZerosInstruction);
 5196   match(Set dst (CountLeadingZerosI src));
 5197   effect(KILL cr);
 5198 
 5199   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5200   ins_encode %{
 5201     __ lzcntl($dst$$Register, $src$$Register);
 5202   %}
 5203   ins_pipe(ialu_reg);
 5204 %}
 5205 
 5206 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5207   predicate(!UseCountLeadingZerosInstruction);
 5208   match(Set dst (CountLeadingZerosI src));
 5209   effect(KILL cr);
 5210 
 5211   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5212             "JNZ    skip\n\t"
 5213             "MOV    $dst, -1\n"
 5214       "skip:\n\t"
 5215             "NEG    $dst\n\t"
 5216             "ADD    $dst, 31" %}
 5217   ins_encode %{
 5218     Register Rdst = $dst$$Register;
 5219     Register Rsrc = $src$$Register;
 5220     Label skip;
 5221     __ bsrl(Rdst, Rsrc);
 5222     __ jccb(Assembler::notZero, skip);
 5223     __ movl(Rdst, -1);
 5224     __ bind(skip);
 5225     __ negl(Rdst);
 5226     __ addl(Rdst, BitsPerInt - 1);
 5227   %}
 5228   ins_pipe(ialu_reg);
 5229 %}
 5230 
 5231 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5232   predicate(UseCountLeadingZerosInstruction);
 5233   match(Set dst (CountLeadingZerosL src));
 5234   effect(TEMP dst, KILL cr);
 5235 
 5236   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5237             "JNC    done\n\t"
 5238             "LZCNT  $dst, $src.lo\n\t"
 5239             "ADD    $dst, 32\n"
 5240       "done:" %}
 5241   ins_encode %{
 5242     Register Rdst = $dst$$Register;
 5243     Register Rsrc = $src$$Register;
 5244     Label done;
 5245     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5246     __ jccb(Assembler::carryClear, done);
 5247     __ lzcntl(Rdst, Rsrc);
 5248     __ addl(Rdst, BitsPerInt);
 5249     __ bind(done);
 5250   %}
 5251   ins_pipe(ialu_reg);
 5252 %}
 5253 
 5254 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5255   predicate(!UseCountLeadingZerosInstruction);
 5256   match(Set dst (CountLeadingZerosL src));
 5257   effect(TEMP dst, KILL cr);
 5258 
 5259   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5260             "JZ     msw_is_zero\n\t"
 5261             "ADD    $dst, 32\n\t"
 5262             "JMP    not_zero\n"
 5263       "msw_is_zero:\n\t"
 5264             "BSR    $dst, $src.lo\n\t"
 5265             "JNZ    not_zero\n\t"
 5266             "MOV    $dst, -1\n"
 5267       "not_zero:\n\t"
 5268             "NEG    $dst\n\t"
 5269             "ADD    $dst, 63\n" %}
 5270  ins_encode %{
 5271     Register Rdst = $dst$$Register;
 5272     Register Rsrc = $src$$Register;
 5273     Label msw_is_zero;
 5274     Label not_zero;
 5275     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5276     __ jccb(Assembler::zero, msw_is_zero);
 5277     __ addl(Rdst, BitsPerInt);
 5278     __ jmpb(not_zero);
 5279     __ bind(msw_is_zero);
 5280     __ bsrl(Rdst, Rsrc);
 5281     __ jccb(Assembler::notZero, not_zero);
 5282     __ movl(Rdst, -1);
 5283     __ bind(not_zero);
 5284     __ negl(Rdst);
 5285     __ addl(Rdst, BitsPerLong - 1);
 5286   %}
 5287   ins_pipe(ialu_reg);
 5288 %}
 5289 
 5290 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5291   predicate(UseCountTrailingZerosInstruction);
 5292   match(Set dst (CountTrailingZerosI src));
 5293   effect(KILL cr);
 5294 
 5295   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5296   ins_encode %{
 5297     __ tzcntl($dst$$Register, $src$$Register);
 5298   %}
 5299   ins_pipe(ialu_reg);
 5300 %}
 5301 
 5302 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5303   predicate(!UseCountTrailingZerosInstruction);
 5304   match(Set dst (CountTrailingZerosI src));
 5305   effect(KILL cr);
 5306 
 5307   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5308             "JNZ    done\n\t"
 5309             "MOV    $dst, 32\n"
 5310       "done:" %}
 5311   ins_encode %{
 5312     Register Rdst = $dst$$Register;
 5313     Label done;
 5314     __ bsfl(Rdst, $src$$Register);
 5315     __ jccb(Assembler::notZero, done);
 5316     __ movl(Rdst, BitsPerInt);
 5317     __ bind(done);
 5318   %}
 5319   ins_pipe(ialu_reg);
 5320 %}
 5321 
 5322 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5323   predicate(UseCountTrailingZerosInstruction);
 5324   match(Set dst (CountTrailingZerosL src));
 5325   effect(TEMP dst, KILL cr);
 5326 
 5327   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5328             "JNC    done\n\t"
 5329             "TZCNT  $dst, $src.hi\n\t"
 5330             "ADD    $dst, 32\n"
 5331             "done:" %}
 5332   ins_encode %{
 5333     Register Rdst = $dst$$Register;
 5334     Register Rsrc = $src$$Register;
 5335     Label done;
 5336     __ tzcntl(Rdst, Rsrc);
 5337     __ jccb(Assembler::carryClear, done);
 5338     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5339     __ addl(Rdst, BitsPerInt);
 5340     __ bind(done);
 5341   %}
 5342   ins_pipe(ialu_reg);
 5343 %}
 5344 
 5345 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5346   predicate(!UseCountTrailingZerosInstruction);
 5347   match(Set dst (CountTrailingZerosL src));
 5348   effect(TEMP dst, KILL cr);
 5349 
 5350   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5351             "JNZ    done\n\t"
 5352             "BSF    $dst, $src.hi\n\t"
 5353             "JNZ    msw_not_zero\n\t"
 5354             "MOV    $dst, 32\n"
 5355       "msw_not_zero:\n\t"
 5356             "ADD    $dst, 32\n"
 5357       "done:" %}
 5358   ins_encode %{
 5359     Register Rdst = $dst$$Register;
 5360     Register Rsrc = $src$$Register;
 5361     Label msw_not_zero;
 5362     Label done;
 5363     __ bsfl(Rdst, Rsrc);
 5364     __ jccb(Assembler::notZero, done);
 5365     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5366     __ jccb(Assembler::notZero, msw_not_zero);
 5367     __ movl(Rdst, BitsPerInt);
 5368     __ bind(msw_not_zero);
 5369     __ addl(Rdst, BitsPerInt);
 5370     __ bind(done);
 5371   %}
 5372   ins_pipe(ialu_reg);
 5373 %}
 5374 
 5375 
 5376 //---------- Population Count Instructions -------------------------------------
 5377 
 5378 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5379   predicate(UsePopCountInstruction);
 5380   match(Set dst (PopCountI src));
 5381   effect(KILL cr);
 5382 
 5383   format %{ "POPCNT $dst, $src" %}
 5384   ins_encode %{
 5385     __ popcntl($dst$$Register, $src$$Register);
 5386   %}
 5387   ins_pipe(ialu_reg);
 5388 %}
 5389 
 5390 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5391   predicate(UsePopCountInstruction);
 5392   match(Set dst (PopCountI (LoadI mem)));
 5393   effect(KILL cr);
 5394 
 5395   format %{ "POPCNT $dst, $mem" %}
 5396   ins_encode %{
 5397     __ popcntl($dst$$Register, $mem$$Address);
 5398   %}
 5399   ins_pipe(ialu_reg);
 5400 %}
 5401 
 5402 // Note: Long.bitCount(long) returns an int.
 5403 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5404   predicate(UsePopCountInstruction);
 5405   match(Set dst (PopCountL src));
 5406   effect(KILL cr, TEMP tmp, TEMP dst);
 5407 
 5408   format %{ "POPCNT $dst, $src.lo\n\t"
 5409             "POPCNT $tmp, $src.hi\n\t"
 5410             "ADD    $dst, $tmp" %}
 5411   ins_encode %{
 5412     __ popcntl($dst$$Register, $src$$Register);
 5413     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5414     __ addl($dst$$Register, $tmp$$Register);
 5415   %}
 5416   ins_pipe(ialu_reg);
 5417 %}
 5418 
 5419 // Note: Long.bitCount(long) returns an int.
 5420 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5421   predicate(UsePopCountInstruction);
 5422   match(Set dst (PopCountL (LoadL mem)));
 5423   effect(KILL cr, TEMP tmp, TEMP dst);
 5424 
 5425   format %{ "POPCNT $dst, $mem\n\t"
 5426             "POPCNT $tmp, $mem+4\n\t"
 5427             "ADD    $dst, $tmp" %}
 5428   ins_encode %{
 5429     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5430     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5431     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5432     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5433     __ addl($dst$$Register, $tmp$$Register);
 5434   %}
 5435   ins_pipe(ialu_reg);
 5436 %}
 5437 
 5438 
 5439 //----------Load/Store/Move Instructions---------------------------------------
 5440 //----------Load Instructions--------------------------------------------------
 5441 // Load Byte (8bit signed)
 5442 instruct loadB(xRegI dst, memory mem) %{
 5443   match(Set dst (LoadB mem));
 5444 
 5445   ins_cost(125);
 5446   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5447 
 5448   ins_encode %{
 5449     __ movsbl($dst$$Register, $mem$$Address);
 5450   %}
 5451 
 5452   ins_pipe(ialu_reg_mem);
 5453 %}
 5454 
 5455 // Load Byte (8bit signed) into Long Register
 5456 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5457   match(Set dst (ConvI2L (LoadB mem)));
 5458   effect(KILL cr);
 5459 
 5460   ins_cost(375);
 5461   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5462             "MOV    $dst.hi,$dst.lo\n\t"
 5463             "SAR    $dst.hi,7" %}
 5464 
 5465   ins_encode %{
 5466     __ movsbl($dst$$Register, $mem$$Address);
 5467     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5468     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5469   %}
 5470 
 5471   ins_pipe(ialu_reg_mem);
 5472 %}
 5473 
 5474 // Load Unsigned Byte (8bit UNsigned)
 5475 instruct loadUB(xRegI dst, memory mem) %{
 5476   match(Set dst (LoadUB mem));
 5477 
 5478   ins_cost(125);
 5479   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5480 
 5481   ins_encode %{
 5482     __ movzbl($dst$$Register, $mem$$Address);
 5483   %}
 5484 
 5485   ins_pipe(ialu_reg_mem);
 5486 %}
 5487 
 5488 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5489 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5490   match(Set dst (ConvI2L (LoadUB mem)));
 5491   effect(KILL cr);
 5492 
 5493   ins_cost(250);
 5494   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5495             "XOR    $dst.hi,$dst.hi" %}
 5496 
 5497   ins_encode %{
 5498     Register Rdst = $dst$$Register;
 5499     __ movzbl(Rdst, $mem$$Address);
 5500     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5501   %}
 5502 
 5503   ins_pipe(ialu_reg_mem);
 5504 %}
 5505 
 5506 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5507 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5508   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5509   effect(KILL cr);
 5510 
 5511   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5512             "XOR    $dst.hi,$dst.hi\n\t"
 5513             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5514   ins_encode %{
 5515     Register Rdst = $dst$$Register;
 5516     __ movzbl(Rdst, $mem$$Address);
 5517     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5518     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5519   %}
 5520   ins_pipe(ialu_reg_mem);
 5521 %}
 5522 
 5523 // Load Short (16bit signed)
 5524 instruct loadS(rRegI dst, memory mem) %{
 5525   match(Set dst (LoadS mem));
 5526 
 5527   ins_cost(125);
 5528   format %{ "MOVSX  $dst,$mem\t# short" %}
 5529 
 5530   ins_encode %{
 5531     __ movswl($dst$$Register, $mem$$Address);
 5532   %}
 5533 
 5534   ins_pipe(ialu_reg_mem);
 5535 %}
 5536 
 5537 // Load Short (16 bit signed) to Byte (8 bit signed)
 5538 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5539   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5540 
 5541   ins_cost(125);
 5542   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5543   ins_encode %{
 5544     __ movsbl($dst$$Register, $mem$$Address);
 5545   %}
 5546   ins_pipe(ialu_reg_mem);
 5547 %}
 5548 
 5549 // Load Short (16bit signed) into Long Register
 5550 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5551   match(Set dst (ConvI2L (LoadS mem)));
 5552   effect(KILL cr);
 5553 
 5554   ins_cost(375);
 5555   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5556             "MOV    $dst.hi,$dst.lo\n\t"
 5557             "SAR    $dst.hi,15" %}
 5558 
 5559   ins_encode %{
 5560     __ movswl($dst$$Register, $mem$$Address);
 5561     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5562     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5563   %}
 5564 
 5565   ins_pipe(ialu_reg_mem);
 5566 %}
 5567 
 5568 // Load Unsigned Short/Char (16bit unsigned)
 5569 instruct loadUS(rRegI dst, memory mem) %{
 5570   match(Set dst (LoadUS mem));
 5571 
 5572   ins_cost(125);
 5573   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5574 
 5575   ins_encode %{
 5576     __ movzwl($dst$$Register, $mem$$Address);
 5577   %}
 5578 
 5579   ins_pipe(ialu_reg_mem);
 5580 %}
 5581 
 5582 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5583 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5584   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5585 
 5586   ins_cost(125);
 5587   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5588   ins_encode %{
 5589     __ movsbl($dst$$Register, $mem$$Address);
 5590   %}
 5591   ins_pipe(ialu_reg_mem);
 5592 %}
 5593 
 5594 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5595 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5596   match(Set dst (ConvI2L (LoadUS mem)));
 5597   effect(KILL cr);
 5598 
 5599   ins_cost(250);
 5600   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5601             "XOR    $dst.hi,$dst.hi" %}
 5602 
 5603   ins_encode %{
 5604     __ movzwl($dst$$Register, $mem$$Address);
 5605     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5606   %}
 5607 
 5608   ins_pipe(ialu_reg_mem);
 5609 %}
 5610 
 5611 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5612 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5613   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5614   effect(KILL cr);
 5615 
 5616   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5617             "XOR    $dst.hi,$dst.hi" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movzbl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5627 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5628   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5629   effect(KILL cr);
 5630 
 5631   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5632             "XOR    $dst.hi,$dst.hi\n\t"
 5633             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5634   ins_encode %{
 5635     Register Rdst = $dst$$Register;
 5636     __ movzwl(Rdst, $mem$$Address);
 5637     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5638     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5639   %}
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Integer
 5644 instruct loadI(rRegI dst, memory mem) %{
 5645   match(Set dst (LoadI mem));
 5646 
 5647   ins_cost(125);
 5648   format %{ "MOV    $dst,$mem\t# int" %}
 5649 
 5650   ins_encode %{
 5651     __ movl($dst$$Register, $mem$$Address);
 5652   %}
 5653 
 5654   ins_pipe(ialu_reg_mem);
 5655 %}
 5656 
 5657 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5658 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5659   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5660 
 5661   ins_cost(125);
 5662   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5663   ins_encode %{
 5664     __ movsbl($dst$$Register, $mem$$Address);
 5665   %}
 5666   ins_pipe(ialu_reg_mem);
 5667 %}
 5668 
 5669 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5670 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5671   match(Set dst (AndI (LoadI mem) mask));
 5672 
 5673   ins_cost(125);
 5674   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5675   ins_encode %{
 5676     __ movzbl($dst$$Register, $mem$$Address);
 5677   %}
 5678   ins_pipe(ialu_reg_mem);
 5679 %}
 5680 
 5681 // Load Integer (32 bit signed) to Short (16 bit signed)
 5682 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5683   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5684 
 5685   ins_cost(125);
 5686   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5687   ins_encode %{
 5688     __ movswl($dst$$Register, $mem$$Address);
 5689   %}
 5690   ins_pipe(ialu_reg_mem);
 5691 %}
 5692 
 5693 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5694 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5695   match(Set dst (AndI (LoadI mem) mask));
 5696 
 5697   ins_cost(125);
 5698   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5699   ins_encode %{
 5700     __ movzwl($dst$$Register, $mem$$Address);
 5701   %}
 5702   ins_pipe(ialu_reg_mem);
 5703 %}
 5704 
 5705 // Load Integer into Long Register
 5706 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5707   match(Set dst (ConvI2L (LoadI mem)));
 5708   effect(KILL cr);
 5709 
 5710   ins_cost(375);
 5711   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5712             "MOV    $dst.hi,$dst.lo\n\t"
 5713             "SAR    $dst.hi,31" %}
 5714 
 5715   ins_encode %{
 5716     __ movl($dst$$Register, $mem$$Address);
 5717     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5718     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5719   %}
 5720 
 5721   ins_pipe(ialu_reg_mem);
 5722 %}
 5723 
 5724 // Load Integer with mask 0xFF into Long Register
 5725 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5726   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5727   effect(KILL cr);
 5728 
 5729   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5730             "XOR    $dst.hi,$dst.hi" %}
 5731   ins_encode %{
 5732     Register Rdst = $dst$$Register;
 5733     __ movzbl(Rdst, $mem$$Address);
 5734     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5735   %}
 5736   ins_pipe(ialu_reg_mem);
 5737 %}
 5738 
 5739 // Load Integer with mask 0xFFFF into Long Register
 5740 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5741   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5742   effect(KILL cr);
 5743 
 5744   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5745             "XOR    $dst.hi,$dst.hi" %}
 5746   ins_encode %{
 5747     Register Rdst = $dst$$Register;
 5748     __ movzwl(Rdst, $mem$$Address);
 5749     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5750   %}
 5751   ins_pipe(ialu_reg_mem);
 5752 %}
 5753 
 5754 // Load Integer with 31-bit mask into Long Register
 5755 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5756   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5757   effect(KILL cr);
 5758 
 5759   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5760             "XOR    $dst.hi,$dst.hi\n\t"
 5761             "AND    $dst.lo,$mask" %}
 5762   ins_encode %{
 5763     Register Rdst = $dst$$Register;
 5764     __ movl(Rdst, $mem$$Address);
 5765     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5766     __ andl(Rdst, $mask$$constant);
 5767   %}
 5768   ins_pipe(ialu_reg_mem);
 5769 %}
 5770 
 5771 // Load Unsigned Integer into Long Register
 5772 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5773   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5774   effect(KILL cr);
 5775 
 5776   ins_cost(250);
 5777   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5778             "XOR    $dst.hi,$dst.hi" %}
 5779 
 5780   ins_encode %{
 5781     __ movl($dst$$Register, $mem$$Address);
 5782     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5783   %}
 5784 
 5785   ins_pipe(ialu_reg_mem);
 5786 %}
 5787 
 5788 // Load Long.  Cannot clobber address while loading, so restrict address
 5789 // register to ESI
 5790 instruct loadL(eRegL dst, load_long_memory mem) %{
 5791   predicate(!((LoadLNode*)n)->require_atomic_access());
 5792   match(Set dst (LoadL mem));
 5793 
 5794   ins_cost(250);
 5795   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5796             "MOV    $dst.hi,$mem+4" %}
 5797 
 5798   ins_encode %{
 5799     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5800     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5801     __ movl($dst$$Register, Amemlo);
 5802     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5803   %}
 5804 
 5805   ins_pipe(ialu_reg_long_mem);
 5806 %}
 5807 
 5808 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5809 // then store it down to the stack and reload on the int
 5810 // side.
 5811 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5812   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5813   match(Set dst (LoadL mem));
 5814 
 5815   ins_cost(200);
 5816   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5817             "FISTp  $dst" %}
 5818   ins_encode(enc_loadL_volatile(mem,dst));
 5819   ins_pipe( fpu_reg_mem );
 5820 %}
 5821 
 5822 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5823   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5824   match(Set dst (LoadL mem));
 5825   effect(TEMP tmp);
 5826   ins_cost(180);
 5827   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5828             "MOVSD  $dst,$tmp" %}
 5829   ins_encode %{
 5830     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5831     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5832   %}
 5833   ins_pipe( pipe_slow );
 5834 %}
 5835 
 5836 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5837   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5838   match(Set dst (LoadL mem));
 5839   effect(TEMP tmp);
 5840   ins_cost(160);
 5841   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5842             "MOVD   $dst.lo,$tmp\n\t"
 5843             "PSRLQ  $tmp,32\n\t"
 5844             "MOVD   $dst.hi,$tmp" %}
 5845   ins_encode %{
 5846     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5847     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5848     __ psrlq($tmp$$XMMRegister, 32);
 5849     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5850   %}
 5851   ins_pipe( pipe_slow );
 5852 %}
 5853 
 5854 // Load Range
 5855 instruct loadRange(rRegI dst, memory mem) %{
 5856   match(Set dst (LoadRange mem));
 5857 
 5858   ins_cost(125);
 5859   format %{ "MOV    $dst,$mem" %}
 5860   opcode(0x8B);
 5861   ins_encode( OpcP, RegMem(dst,mem));
 5862   ins_pipe( ialu_reg_mem );
 5863 %}
 5864 
 5865 
 5866 // Load Pointer
 5867 instruct loadP(eRegP dst, memory mem) %{
 5868   match(Set dst (LoadP mem));
 5869 
 5870   ins_cost(125);
 5871   format %{ "MOV    $dst,$mem" %}
 5872   opcode(0x8B);
 5873   ins_encode( OpcP, RegMem(dst,mem));
 5874   ins_pipe( ialu_reg_mem );
 5875 %}
 5876 
 5877 // Load Klass Pointer
 5878 instruct loadKlass(eRegP dst, memory mem) %{
 5879   match(Set dst (LoadKlass mem));
 5880 
 5881   ins_cost(125);
 5882   format %{ "MOV    $dst,$mem" %}
 5883   opcode(0x8B);
 5884   ins_encode( OpcP, RegMem(dst,mem));
 5885   ins_pipe( ialu_reg_mem );
 5886 %}
 5887 
 5888 // Load Double
 5889 instruct loadDPR(regDPR dst, memory mem) %{
 5890   predicate(UseSSE<=1);
 5891   match(Set dst (LoadD mem));
 5892 
 5893   ins_cost(150);
 5894   format %{ "FLD_D  ST,$mem\n\t"
 5895             "FSTP   $dst" %}
 5896   opcode(0xDD);               /* DD /0 */
 5897   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5898               Pop_Reg_DPR(dst) );
 5899   ins_pipe( fpu_reg_mem );
 5900 %}
 5901 
 5902 // Load Double to XMM
 5903 instruct loadD(regD dst, memory mem) %{
 5904   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5905   match(Set dst (LoadD mem));
 5906   ins_cost(145);
 5907   format %{ "MOVSD  $dst,$mem" %}
 5908   ins_encode %{
 5909     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5910   %}
 5911   ins_pipe( pipe_slow );
 5912 %}
 5913 
 5914 instruct loadD_partial(regD dst, memory mem) %{
 5915   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5916   match(Set dst (LoadD mem));
 5917   ins_cost(145);
 5918   format %{ "MOVLPD $dst,$mem" %}
 5919   ins_encode %{
 5920     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5921   %}
 5922   ins_pipe( pipe_slow );
 5923 %}
 5924 
 5925 // Load to XMM register (single-precision floating point)
 5926 // MOVSS instruction
 5927 instruct loadF(regF dst, memory mem) %{
 5928   predicate(UseSSE>=1);
 5929   match(Set dst (LoadF mem));
 5930   ins_cost(145);
 5931   format %{ "MOVSS  $dst,$mem" %}
 5932   ins_encode %{
 5933     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5934   %}
 5935   ins_pipe( pipe_slow );
 5936 %}
 5937 
 5938 // Load Float
 5939 instruct loadFPR(regFPR dst, memory mem) %{
 5940   predicate(UseSSE==0);
 5941   match(Set dst (LoadF mem));
 5942 
 5943   ins_cost(150);
 5944   format %{ "FLD_S  ST,$mem\n\t"
 5945             "FSTP   $dst" %}
 5946   opcode(0xD9);               /* D9 /0 */
 5947   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5948               Pop_Reg_FPR(dst) );
 5949   ins_pipe( fpu_reg_mem );
 5950 %}
 5951 
 5952 // Load Effective Address
 5953 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5954   match(Set dst mem);
 5955 
 5956   ins_cost(110);
 5957   format %{ "LEA    $dst,$mem" %}
 5958   opcode(0x8D);
 5959   ins_encode( OpcP, RegMem(dst,mem));
 5960   ins_pipe( ialu_reg_reg_fat );
 5961 %}
 5962 
 5963 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5964   match(Set dst mem);
 5965 
 5966   ins_cost(110);
 5967   format %{ "LEA    $dst,$mem" %}
 5968   opcode(0x8D);
 5969   ins_encode( OpcP, RegMem(dst,mem));
 5970   ins_pipe( ialu_reg_reg_fat );
 5971 %}
 5972 
 5973 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5974   match(Set dst mem);
 5975 
 5976   ins_cost(110);
 5977   format %{ "LEA    $dst,$mem" %}
 5978   opcode(0x8D);
 5979   ins_encode( OpcP, RegMem(dst,mem));
 5980   ins_pipe( ialu_reg_reg_fat );
 5981 %}
 5982 
 5983 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5984   match(Set dst mem);
 5985 
 5986   ins_cost(110);
 5987   format %{ "LEA    $dst,$mem" %}
 5988   opcode(0x8D);
 5989   ins_encode( OpcP, RegMem(dst,mem));
 5990   ins_pipe( ialu_reg_reg_fat );
 5991 %}
 5992 
 5993 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5994   match(Set dst mem);
 5995 
 5996   ins_cost(110);
 5997   format %{ "LEA    $dst,$mem" %}
 5998   opcode(0x8D);
 5999   ins_encode( OpcP, RegMem(dst,mem));
 6000   ins_pipe( ialu_reg_reg_fat );
 6001 %}
 6002 
 6003 // Load Constant
 6004 instruct loadConI(rRegI dst, immI src) %{
 6005   match(Set dst src);
 6006 
 6007   format %{ "MOV    $dst,$src" %}
 6008   ins_encode( LdImmI(dst, src) );
 6009   ins_pipe( ialu_reg_fat );
 6010 %}
 6011 
 6012 // Load Constant zero
 6013 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6014   match(Set dst src);
 6015   effect(KILL cr);
 6016 
 6017   ins_cost(50);
 6018   format %{ "XOR    $dst,$dst" %}
 6019   opcode(0x33);  /* + rd */
 6020   ins_encode( OpcP, RegReg( dst, dst ) );
 6021   ins_pipe( ialu_reg );
 6022 %}
 6023 
 6024 instruct loadConP(eRegP dst, immP src) %{
 6025   match(Set dst src);
 6026 
 6027   format %{ "MOV    $dst,$src" %}
 6028   opcode(0xB8);  /* + rd */
 6029   ins_encode( LdImmP(dst, src) );
 6030   ins_pipe( ialu_reg_fat );
 6031 %}
 6032 
 6033 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6034   match(Set dst src);
 6035   effect(KILL cr);
 6036   ins_cost(200);
 6037   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6038             "MOV    $dst.hi,$src.hi" %}
 6039   opcode(0xB8);
 6040   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6041   ins_pipe( ialu_reg_long_fat );
 6042 %}
 6043 
 6044 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6045   match(Set dst src);
 6046   effect(KILL cr);
 6047   ins_cost(150);
 6048   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6049             "XOR    $dst.hi,$dst.hi" %}
 6050   opcode(0x33,0x33);
 6051   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6052   ins_pipe( ialu_reg_long );
 6053 %}
 6054 
 6055 // The instruction usage is guarded by predicate in operand immFPR().
 6056 instruct loadConFPR(regFPR dst, immFPR con) %{
 6057   match(Set dst con);
 6058   ins_cost(125);
 6059   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6060             "FSTP   $dst" %}
 6061   ins_encode %{
 6062     __ fld_s($constantaddress($con));
 6063     __ fstp_d($dst$$reg);
 6064   %}
 6065   ins_pipe(fpu_reg_con);
 6066 %}
 6067 
 6068 // The instruction usage is guarded by predicate in operand immFPR0().
 6069 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6070   match(Set dst con);
 6071   ins_cost(125);
 6072   format %{ "FLDZ   ST\n\t"
 6073             "FSTP   $dst" %}
 6074   ins_encode %{
 6075     __ fldz();
 6076     __ fstp_d($dst$$reg);
 6077   %}
 6078   ins_pipe(fpu_reg_con);
 6079 %}
 6080 
 6081 // The instruction usage is guarded by predicate in operand immFPR1().
 6082 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6083   match(Set dst con);
 6084   ins_cost(125);
 6085   format %{ "FLD1   ST\n\t"
 6086             "FSTP   $dst" %}
 6087   ins_encode %{
 6088     __ fld1();
 6089     __ fstp_d($dst$$reg);
 6090   %}
 6091   ins_pipe(fpu_reg_con);
 6092 %}
 6093 
 6094 // The instruction usage is guarded by predicate in operand immF().
 6095 instruct loadConF(regF dst, immF con) %{
 6096   match(Set dst con);
 6097   ins_cost(125);
 6098   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6099   ins_encode %{
 6100     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6101   %}
 6102   ins_pipe(pipe_slow);
 6103 %}
 6104 
 6105 // The instruction usage is guarded by predicate in operand immF0().
 6106 instruct loadConF0(regF dst, immF0 src) %{
 6107   match(Set dst src);
 6108   ins_cost(100);
 6109   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6110   ins_encode %{
 6111     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6112   %}
 6113   ins_pipe(pipe_slow);
 6114 %}
 6115 
 6116 // The instruction usage is guarded by predicate in operand immDPR().
 6117 instruct loadConDPR(regDPR dst, immDPR con) %{
 6118   match(Set dst con);
 6119   ins_cost(125);
 6120 
 6121   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6122             "FSTP   $dst" %}
 6123   ins_encode %{
 6124     __ fld_d($constantaddress($con));
 6125     __ fstp_d($dst$$reg);
 6126   %}
 6127   ins_pipe(fpu_reg_con);
 6128 %}
 6129 
 6130 // The instruction usage is guarded by predicate in operand immDPR0().
 6131 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6132   match(Set dst con);
 6133   ins_cost(125);
 6134 
 6135   format %{ "FLDZ   ST\n\t"
 6136             "FSTP   $dst" %}
 6137   ins_encode %{
 6138     __ fldz();
 6139     __ fstp_d($dst$$reg);
 6140   %}
 6141   ins_pipe(fpu_reg_con);
 6142 %}
 6143 
 6144 // The instruction usage is guarded by predicate in operand immDPR1().
 6145 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6146   match(Set dst con);
 6147   ins_cost(125);
 6148 
 6149   format %{ "FLD1   ST\n\t"
 6150             "FSTP   $dst" %}
 6151   ins_encode %{
 6152     __ fld1();
 6153     __ fstp_d($dst$$reg);
 6154   %}
 6155   ins_pipe(fpu_reg_con);
 6156 %}
 6157 
 6158 // The instruction usage is guarded by predicate in operand immD().
 6159 instruct loadConD(regD dst, immD con) %{
 6160   match(Set dst con);
 6161   ins_cost(125);
 6162   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6163   ins_encode %{
 6164     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6165   %}
 6166   ins_pipe(pipe_slow);
 6167 %}
 6168 
 6169 // The instruction usage is guarded by predicate in operand immD0().
 6170 instruct loadConD0(regD dst, immD0 src) %{
 6171   match(Set dst src);
 6172   ins_cost(100);
 6173   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6174   ins_encode %{
 6175     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6176   %}
 6177   ins_pipe( pipe_slow );
 6178 %}
 6179 
 6180 // Load Stack Slot
 6181 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6182   match(Set dst src);
 6183   ins_cost(125);
 6184 
 6185   format %{ "MOV    $dst,$src" %}
 6186   opcode(0x8B);
 6187   ins_encode( OpcP, RegMem(dst,src));
 6188   ins_pipe( ialu_reg_mem );
 6189 %}
 6190 
 6191 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6192   match(Set dst src);
 6193 
 6194   ins_cost(200);
 6195   format %{ "MOV    $dst,$src.lo\n\t"
 6196             "MOV    $dst+4,$src.hi" %}
 6197   opcode(0x8B, 0x8B);
 6198   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6199   ins_pipe( ialu_mem_long_reg );
 6200 %}
 6201 
 6202 // Load Stack Slot
 6203 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6204   match(Set dst src);
 6205   ins_cost(125);
 6206 
 6207   format %{ "MOV    $dst,$src" %}
 6208   opcode(0x8B);
 6209   ins_encode( OpcP, RegMem(dst,src));
 6210   ins_pipe( ialu_reg_mem );
 6211 %}
 6212 
 6213 // Load Stack Slot
 6214 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6215   match(Set dst src);
 6216   ins_cost(125);
 6217 
 6218   format %{ "FLD_S  $src\n\t"
 6219             "FSTP   $dst" %}
 6220   opcode(0xD9);               /* D9 /0, FLD m32real */
 6221   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6222               Pop_Reg_FPR(dst) );
 6223   ins_pipe( fpu_reg_mem );
 6224 %}
 6225 
 6226 // Load Stack Slot
 6227 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6228   match(Set dst src);
 6229   ins_cost(125);
 6230 
 6231   format %{ "FLD_D  $src\n\t"
 6232             "FSTP   $dst" %}
 6233   opcode(0xDD);               /* DD /0, FLD m64real */
 6234   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6235               Pop_Reg_DPR(dst) );
 6236   ins_pipe( fpu_reg_mem );
 6237 %}
 6238 
 6239 // Prefetch instructions for allocation.
 6240 // Must be safe to execute with invalid address (cannot fault).
 6241 
 6242 instruct prefetchAlloc0( memory mem ) %{
 6243   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6244   match(PrefetchAllocation mem);
 6245   ins_cost(0);
 6246   size(0);
 6247   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6248   ins_encode();
 6249   ins_pipe(empty);
 6250 %}
 6251 
 6252 instruct prefetchAlloc( memory mem ) %{
 6253   predicate(AllocatePrefetchInstr==3);
 6254   match( PrefetchAllocation mem );
 6255   ins_cost(100);
 6256 
 6257   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6258   ins_encode %{
 6259     __ prefetchw($mem$$Address);
 6260   %}
 6261   ins_pipe(ialu_mem);
 6262 %}
 6263 
 6264 instruct prefetchAllocNTA( memory mem ) %{
 6265   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6266   match(PrefetchAllocation mem);
 6267   ins_cost(100);
 6268 
 6269   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6270   ins_encode %{
 6271     __ prefetchnta($mem$$Address);
 6272   %}
 6273   ins_pipe(ialu_mem);
 6274 %}
 6275 
 6276 instruct prefetchAllocT0( memory mem ) %{
 6277   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6278   match(PrefetchAllocation mem);
 6279   ins_cost(100);
 6280 
 6281   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6282   ins_encode %{
 6283     __ prefetcht0($mem$$Address);
 6284   %}
 6285   ins_pipe(ialu_mem);
 6286 %}
 6287 
 6288 instruct prefetchAllocT2( memory mem ) %{
 6289   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6290   match(PrefetchAllocation mem);
 6291   ins_cost(100);
 6292 
 6293   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6294   ins_encode %{
 6295     __ prefetcht2($mem$$Address);
 6296   %}
 6297   ins_pipe(ialu_mem);
 6298 %}
 6299 
 6300 //----------Store Instructions-------------------------------------------------
 6301 
 6302 // Store Byte
 6303 instruct storeB(memory mem, xRegI src) %{
 6304   match(Set mem (StoreB mem src));
 6305 
 6306   ins_cost(125);
 6307   format %{ "MOV8   $mem,$src" %}
 6308   opcode(0x88);
 6309   ins_encode( OpcP, RegMem( src, mem ) );
 6310   ins_pipe( ialu_mem_reg );
 6311 %}
 6312 
 6313 // Store Char/Short
 6314 instruct storeC(memory mem, rRegI src) %{
 6315   match(Set mem (StoreC mem src));
 6316 
 6317   ins_cost(125);
 6318   format %{ "MOV16  $mem,$src" %}
 6319   opcode(0x89, 0x66);
 6320   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6321   ins_pipe( ialu_mem_reg );
 6322 %}
 6323 
 6324 // Store Integer
 6325 instruct storeI(memory mem, rRegI src) %{
 6326   match(Set mem (StoreI mem src));
 6327 
 6328   ins_cost(125);
 6329   format %{ "MOV    $mem,$src" %}
 6330   opcode(0x89);
 6331   ins_encode( OpcP, RegMem( src, mem ) );
 6332   ins_pipe( ialu_mem_reg );
 6333 %}
 6334 
 6335 // Store Long
 6336 instruct storeL(long_memory mem, eRegL src) %{
 6337   predicate(!((StoreLNode*)n)->require_atomic_access());
 6338   match(Set mem (StoreL mem src));
 6339 
 6340   ins_cost(200);
 6341   format %{ "MOV    $mem,$src.lo\n\t"
 6342             "MOV    $mem+4,$src.hi" %}
 6343   opcode(0x89, 0x89);
 6344   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6345   ins_pipe( ialu_mem_long_reg );
 6346 %}
 6347 
 6348 // Store Long to Integer
 6349 instruct storeL2I(memory mem, eRegL src) %{
 6350   match(Set mem (StoreI mem (ConvL2I src)));
 6351 
 6352   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6353   ins_encode %{
 6354     __ movl($mem$$Address, $src$$Register);
 6355   %}
 6356   ins_pipe(ialu_mem_reg);
 6357 %}
 6358 
 6359 // Volatile Store Long.  Must be atomic, so move it into
 6360 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6361 // target address before the store (for null-ptr checks)
 6362 // so the memory operand is used twice in the encoding.
 6363 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6364   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6365   match(Set mem (StoreL mem src));
 6366   effect( KILL cr );
 6367   ins_cost(400);
 6368   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6369             "FILD   $src\n\t"
 6370             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6371   opcode(0x3B);
 6372   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6373   ins_pipe( fpu_reg_mem );
 6374 %}
 6375 
 6376 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6377   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6378   match(Set mem (StoreL mem src));
 6379   effect( TEMP tmp, KILL cr );
 6380   ins_cost(380);
 6381   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6382             "MOVSD  $tmp,$src\n\t"
 6383             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6384   ins_encode %{
 6385     __ cmpl(rax, $mem$$Address);
 6386     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6387     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6388   %}
 6389   ins_pipe( pipe_slow );
 6390 %}
 6391 
 6392 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6393   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6394   match(Set mem (StoreL mem src));
 6395   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6396   ins_cost(360);
 6397   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6398             "MOVD   $tmp,$src.lo\n\t"
 6399             "MOVD   $tmp2,$src.hi\n\t"
 6400             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6401             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6402   ins_encode %{
 6403     __ cmpl(rax, $mem$$Address);
 6404     __ movdl($tmp$$XMMRegister, $src$$Register);
 6405     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6406     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6407     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6408   %}
 6409   ins_pipe( pipe_slow );
 6410 %}
 6411 
 6412 // Store Pointer; for storing unknown oops and raw pointers
 6413 instruct storeP(memory mem, anyRegP src) %{
 6414   match(Set mem (StoreP mem src));
 6415 
 6416   ins_cost(125);
 6417   format %{ "MOV    $mem,$src" %}
 6418   opcode(0x89);
 6419   ins_encode( OpcP, RegMem( src, mem ) );
 6420   ins_pipe( ialu_mem_reg );
 6421 %}
 6422 
 6423 // Store Integer Immediate
 6424 instruct storeImmI(memory mem, immI src) %{
 6425   match(Set mem (StoreI mem src));
 6426 
 6427   ins_cost(150);
 6428   format %{ "MOV    $mem,$src" %}
 6429   opcode(0xC7);               /* C7 /0 */
 6430   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6431   ins_pipe( ialu_mem_imm );
 6432 %}
 6433 
 6434 // Store Short/Char Immediate
 6435 instruct storeImmI16(memory mem, immI16 src) %{
 6436   predicate(UseStoreImmI16);
 6437   match(Set mem (StoreC mem src));
 6438 
 6439   ins_cost(150);
 6440   format %{ "MOV16  $mem,$src" %}
 6441   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6442   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6443   ins_pipe( ialu_mem_imm );
 6444 %}
 6445 
 6446 // Store Pointer Immediate; null pointers or constant oops that do not
 6447 // need card-mark barriers.
 6448 instruct storeImmP(memory mem, immP src) %{
 6449   match(Set mem (StoreP mem src));
 6450 
 6451   ins_cost(150);
 6452   format %{ "MOV    $mem,$src" %}
 6453   opcode(0xC7);               /* C7 /0 */
 6454   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6455   ins_pipe( ialu_mem_imm );
 6456 %}
 6457 
 6458 // Store Byte Immediate
 6459 instruct storeImmB(memory mem, immI8 src) %{
 6460   match(Set mem (StoreB mem src));
 6461 
 6462   ins_cost(150);
 6463   format %{ "MOV8   $mem,$src" %}
 6464   opcode(0xC6);               /* C6 /0 */
 6465   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6466   ins_pipe( ialu_mem_imm );
 6467 %}
 6468 
 6469 // Store CMS card-mark Immediate
 6470 instruct storeImmCM(memory mem, immI8 src) %{
 6471   match(Set mem (StoreCM mem src));
 6472 
 6473   ins_cost(150);
 6474   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6475   opcode(0xC6);               /* C6 /0 */
 6476   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6477   ins_pipe( ialu_mem_imm );
 6478 %}
 6479 
 6480 // Store Double
 6481 instruct storeDPR( memory mem, regDPR1 src) %{
 6482   predicate(UseSSE<=1);
 6483   match(Set mem (StoreD mem src));
 6484 
 6485   ins_cost(100);
 6486   format %{ "FST_D  $mem,$src" %}
 6487   opcode(0xDD);       /* DD /2 */
 6488   ins_encode( enc_FPR_store(mem,src) );
 6489   ins_pipe( fpu_mem_reg );
 6490 %}
 6491 
 6492 // Store double does rounding on x86
 6493 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6494   predicate(UseSSE<=1);
 6495   match(Set mem (StoreD mem (RoundDouble src)));
 6496 
 6497   ins_cost(100);
 6498   format %{ "FST_D  $mem,$src\t# round" %}
 6499   opcode(0xDD);       /* DD /2 */
 6500   ins_encode( enc_FPR_store(mem,src) );
 6501   ins_pipe( fpu_mem_reg );
 6502 %}
 6503 
 6504 // Store XMM register to memory (double-precision floating points)
 6505 // MOVSD instruction
 6506 instruct storeD(memory mem, regD src) %{
 6507   predicate(UseSSE>=2);
 6508   match(Set mem (StoreD mem src));
 6509   ins_cost(95);
 6510   format %{ "MOVSD  $mem,$src" %}
 6511   ins_encode %{
 6512     __ movdbl($mem$$Address, $src$$XMMRegister);
 6513   %}
 6514   ins_pipe( pipe_slow );
 6515 %}
 6516 
 6517 // Store XMM register to memory (single-precision floating point)
 6518 // MOVSS instruction
 6519 instruct storeF(memory mem, regF src) %{
 6520   predicate(UseSSE>=1);
 6521   match(Set mem (StoreF mem src));
 6522   ins_cost(95);
 6523   format %{ "MOVSS  $mem,$src" %}
 6524   ins_encode %{
 6525     __ movflt($mem$$Address, $src$$XMMRegister);
 6526   %}
 6527   ins_pipe( pipe_slow );
 6528 %}
 6529 
 6530 
 6531 // Store Float
 6532 instruct storeFPR( memory mem, regFPR1 src) %{
 6533   predicate(UseSSE==0);
 6534   match(Set mem (StoreF mem src));
 6535 
 6536   ins_cost(100);
 6537   format %{ "FST_S  $mem,$src" %}
 6538   opcode(0xD9);       /* D9 /2 */
 6539   ins_encode( enc_FPR_store(mem,src) );
 6540   ins_pipe( fpu_mem_reg );
 6541 %}
 6542 
 6543 // Store Float does rounding on x86
 6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6545   predicate(UseSSE==0);
 6546   match(Set mem (StoreF mem (RoundFloat src)));
 6547 
 6548   ins_cost(100);
 6549   format %{ "FST_S  $mem,$src\t# round" %}
 6550   opcode(0xD9);       /* D9 /2 */
 6551   ins_encode( enc_FPR_store(mem,src) );
 6552   ins_pipe( fpu_mem_reg );
 6553 %}
 6554 
 6555 // Store Float does rounding on x86
 6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6557   predicate(UseSSE<=1);
 6558   match(Set mem (StoreF mem (ConvD2F src)));
 6559 
 6560   ins_cost(100);
 6561   format %{ "FST_S  $mem,$src\t# D-round" %}
 6562   opcode(0xD9);       /* D9 /2 */
 6563   ins_encode( enc_FPR_store(mem,src) );
 6564   ins_pipe( fpu_mem_reg );
 6565 %}
 6566 
 6567 // Store immediate Float value (it is faster than store from FPU register)
 6568 // The instruction usage is guarded by predicate in operand immFPR().
 6569 instruct storeFPR_imm( memory mem, immFPR src) %{
 6570   match(Set mem (StoreF mem src));
 6571 
 6572   ins_cost(50);
 6573   format %{ "MOV    $mem,$src\t# store float" %}
 6574   opcode(0xC7);               /* C7 /0 */
 6575   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6576   ins_pipe( ialu_mem_imm );
 6577 %}
 6578 
 6579 // Store immediate Float value (it is faster than store from XMM register)
 6580 // The instruction usage is guarded by predicate in operand immF().
 6581 instruct storeF_imm( memory mem, immF src) %{
 6582   match(Set mem (StoreF mem src));
 6583 
 6584   ins_cost(50);
 6585   format %{ "MOV    $mem,$src\t# store float" %}
 6586   opcode(0xC7);               /* C7 /0 */
 6587   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6588   ins_pipe( ialu_mem_imm );
 6589 %}
 6590 
 6591 // Store Integer to stack slot
 6592 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6593   match(Set dst src);
 6594 
 6595   ins_cost(100);
 6596   format %{ "MOV    $dst,$src" %}
 6597   opcode(0x89);
 6598   ins_encode( OpcPRegSS( dst, src ) );
 6599   ins_pipe( ialu_mem_reg );
 6600 %}
 6601 
 6602 // Store Integer to stack slot
 6603 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6604   match(Set dst src);
 6605 
 6606   ins_cost(100);
 6607   format %{ "MOV    $dst,$src" %}
 6608   opcode(0x89);
 6609   ins_encode( OpcPRegSS( dst, src ) );
 6610   ins_pipe( ialu_mem_reg );
 6611 %}
 6612 
 6613 // Store Long to stack slot
 6614 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6615   match(Set dst src);
 6616 
 6617   ins_cost(200);
 6618   format %{ "MOV    $dst,$src.lo\n\t"
 6619             "MOV    $dst+4,$src.hi" %}
 6620   opcode(0x89, 0x89);
 6621   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6622   ins_pipe( ialu_mem_long_reg );
 6623 %}
 6624 
 6625 //----------MemBar Instructions-----------------------------------------------
 6626 // Memory barrier flavors
 6627 
 6628 instruct membar_acquire() %{
 6629   match(MemBarAcquire);
 6630   match(LoadFence);
 6631   ins_cost(400);
 6632 
 6633   size(0);
 6634   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6635   ins_encode();
 6636   ins_pipe(empty);
 6637 %}
 6638 
 6639 instruct membar_acquire_lock() %{
 6640   match(MemBarAcquireLock);
 6641   ins_cost(0);
 6642 
 6643   size(0);
 6644   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6645   ins_encode( );
 6646   ins_pipe(empty);
 6647 %}
 6648 
 6649 instruct membar_release() %{
 6650   match(MemBarRelease);
 6651   match(StoreFence);
 6652   ins_cost(400);
 6653 
 6654   size(0);
 6655   format %{ "MEMBAR-release ! (empty encoding)" %}
 6656   ins_encode( );
 6657   ins_pipe(empty);
 6658 %}
 6659 
 6660 instruct membar_release_lock() %{
 6661   match(MemBarReleaseLock);
 6662   ins_cost(0);
 6663 
 6664   size(0);
 6665   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6666   ins_encode( );
 6667   ins_pipe(empty);
 6668 %}
 6669 
 6670 instruct membar_volatile(eFlagsReg cr) %{
 6671   match(MemBarVolatile);
 6672   effect(KILL cr);
 6673   ins_cost(400);
 6674 
 6675   format %{
 6676     $$template
 6677     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6678   %}
 6679   ins_encode %{
 6680     __ membar(Assembler::StoreLoad);
 6681   %}
 6682   ins_pipe(pipe_slow);
 6683 %}
 6684 
 6685 instruct unnecessary_membar_volatile() %{
 6686   match(MemBarVolatile);
 6687   predicate(Matcher::post_store_load_barrier(n));
 6688   ins_cost(0);
 6689 
 6690   size(0);
 6691   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6692   ins_encode( );
 6693   ins_pipe(empty);
 6694 %}
 6695 
 6696 instruct membar_storestore() %{
 6697   match(MemBarStoreStore);
 6698   match(StoreStoreFence);
 6699   ins_cost(0);
 6700 
 6701   size(0);
 6702   format %{ "MEMBAR-storestore (empty encoding)" %}
 6703   ins_encode( );
 6704   ins_pipe(empty);
 6705 %}
 6706 
 6707 //----------Move Instructions--------------------------------------------------
 6708 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6709   match(Set dst (CastX2P src));
 6710   format %{ "# X2P  $dst, $src" %}
 6711   ins_encode( /*empty encoding*/ );
 6712   ins_cost(0);
 6713   ins_pipe(empty);
 6714 %}
 6715 
 6716 instruct castP2X(rRegI dst, eRegP src ) %{
 6717   match(Set dst (CastP2X src));
 6718   ins_cost(50);
 6719   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6720   ins_encode( enc_Copy( dst, src) );
 6721   ins_pipe( ialu_reg_reg );
 6722 %}
 6723 
 6724 //----------Conditional Move---------------------------------------------------
 6725 // Conditional move
 6726 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6727   predicate(!VM_Version::supports_cmov() );
 6728   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6729   ins_cost(200);
 6730   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6731             "MOV    $dst,$src\n"
 6732       "skip:" %}
 6733   ins_encode %{
 6734     Label Lskip;
 6735     // Invert sense of branch from sense of CMOV
 6736     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6737     __ movl($dst$$Register, $src$$Register);
 6738     __ bind(Lskip);
 6739   %}
 6740   ins_pipe( pipe_cmov_reg );
 6741 %}
 6742 
 6743 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6744   predicate(!VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6746   ins_cost(200);
 6747   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6748             "MOV    $dst,$src\n"
 6749       "skip:" %}
 6750   ins_encode %{
 6751     Label Lskip;
 6752     // Invert sense of branch from sense of CMOV
 6753     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6754     __ movl($dst$$Register, $src$$Register);
 6755     __ bind(Lskip);
 6756   %}
 6757   ins_pipe( pipe_cmov_reg );
 6758 %}
 6759 
 6760 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6761   predicate(VM_Version::supports_cmov() );
 6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6763   ins_cost(200);
 6764   format %{ "CMOV$cop $dst,$src" %}
 6765   opcode(0x0F,0x40);
 6766   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6767   ins_pipe( pipe_cmov_reg );
 6768 %}
 6769 
 6770 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6771   predicate(VM_Version::supports_cmov() );
 6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6773   ins_cost(200);
 6774   format %{ "CMOV$cop $dst,$src" %}
 6775   opcode(0x0F,0x40);
 6776   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6777   ins_pipe( pipe_cmov_reg );
 6778 %}
 6779 
 6780 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6781   predicate(VM_Version::supports_cmov() );
 6782   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6783   ins_cost(200);
 6784   expand %{
 6785     cmovI_regU(cop, cr, dst, src);
 6786   %}
 6787 %}
 6788 
 6789 // Conditional move
 6790 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6791   predicate(VM_Version::supports_cmov() );
 6792   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6793   ins_cost(250);
 6794   format %{ "CMOV$cop $dst,$src" %}
 6795   opcode(0x0F,0x40);
 6796   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6797   ins_pipe( pipe_cmov_mem );
 6798 %}
 6799 
 6800 // Conditional move
 6801 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6802   predicate(VM_Version::supports_cmov() );
 6803   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6804   ins_cost(250);
 6805   format %{ "CMOV$cop $dst,$src" %}
 6806   opcode(0x0F,0x40);
 6807   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6808   ins_pipe( pipe_cmov_mem );
 6809 %}
 6810 
 6811 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6812   predicate(VM_Version::supports_cmov() );
 6813   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6814   ins_cost(250);
 6815   expand %{
 6816     cmovI_memU(cop, cr, dst, src);
 6817   %}
 6818 %}
 6819 
 6820 // Conditional move
 6821 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6822   predicate(VM_Version::supports_cmov() );
 6823   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6824   ins_cost(200);
 6825   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6826   opcode(0x0F,0x40);
 6827   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6828   ins_pipe( pipe_cmov_reg );
 6829 %}
 6830 
 6831 // Conditional move (non-P6 version)
 6832 // Note:  a CMoveP is generated for  stubs and native wrappers
 6833 //        regardless of whether we are on a P6, so we
 6834 //        emulate a cmov here
 6835 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6836   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6837   ins_cost(300);
 6838   format %{ "Jn$cop   skip\n\t"
 6839           "MOV    $dst,$src\t# pointer\n"
 6840       "skip:" %}
 6841   opcode(0x8b);
 6842   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6843   ins_pipe( pipe_cmov_reg );
 6844 %}
 6845 
 6846 // Conditional move
 6847 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6848   predicate(VM_Version::supports_cmov() );
 6849   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6850   ins_cost(200);
 6851   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6852   opcode(0x0F,0x40);
 6853   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6854   ins_pipe( pipe_cmov_reg );
 6855 %}
 6856 
 6857 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6858   predicate(VM_Version::supports_cmov() );
 6859   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6860   ins_cost(200);
 6861   expand %{
 6862     cmovP_regU(cop, cr, dst, src);
 6863   %}
 6864 %}
 6865 
 6866 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6867 // correctly meets the two pointer arguments; one is an incoming
 6868 // register but the other is a memory operand.  ALSO appears to
 6869 // be buggy with implicit null checks.
 6870 //
 6871 //// Conditional move
 6872 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6873 //  predicate(VM_Version::supports_cmov() );
 6874 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6875 //  ins_cost(250);
 6876 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6877 //  opcode(0x0F,0x40);
 6878 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6879 //  ins_pipe( pipe_cmov_mem );
 6880 //%}
 6881 //
 6882 //// Conditional move
 6883 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6884 //  predicate(VM_Version::supports_cmov() );
 6885 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6886 //  ins_cost(250);
 6887 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6888 //  opcode(0x0F,0x40);
 6889 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6890 //  ins_pipe( pipe_cmov_mem );
 6891 //%}
 6892 
 6893 // Conditional move
 6894 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6895   predicate(UseSSE<=1);
 6896   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6897   ins_cost(200);
 6898   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6899   opcode(0xDA);
 6900   ins_encode( enc_cmov_dpr(cop,src) );
 6901   ins_pipe( pipe_cmovDPR_reg );
 6902 %}
 6903 
 6904 // Conditional move
 6905 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6906   predicate(UseSSE==0);
 6907   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6908   ins_cost(200);
 6909   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6910   opcode(0xDA);
 6911   ins_encode( enc_cmov_dpr(cop,src) );
 6912   ins_pipe( pipe_cmovDPR_reg );
 6913 %}
 6914 
 6915 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6916 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6917   predicate(UseSSE<=1);
 6918   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6919   ins_cost(200);
 6920   format %{ "Jn$cop   skip\n\t"
 6921             "MOV    $dst,$src\t# double\n"
 6922       "skip:" %}
 6923   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6924   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6925   ins_pipe( pipe_cmovDPR_reg );
 6926 %}
 6927 
 6928 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6929 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6930   predicate(UseSSE==0);
 6931   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6932   ins_cost(200);
 6933   format %{ "Jn$cop    skip\n\t"
 6934             "MOV    $dst,$src\t# float\n"
 6935       "skip:" %}
 6936   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6937   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6938   ins_pipe( pipe_cmovDPR_reg );
 6939 %}
 6940 
 6941 // No CMOVE with SSE/SSE2
 6942 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6943   predicate (UseSSE>=1);
 6944   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6945   ins_cost(200);
 6946   format %{ "Jn$cop   skip\n\t"
 6947             "MOVSS  $dst,$src\t# float\n"
 6948       "skip:" %}
 6949   ins_encode %{
 6950     Label skip;
 6951     // Invert sense of branch from sense of CMOV
 6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6953     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6954     __ bind(skip);
 6955   %}
 6956   ins_pipe( pipe_slow );
 6957 %}
 6958 
 6959 // No CMOVE with SSE/SSE2
 6960 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6961   predicate (UseSSE>=2);
 6962   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6963   ins_cost(200);
 6964   format %{ "Jn$cop   skip\n\t"
 6965             "MOVSD  $dst,$src\t# float\n"
 6966       "skip:" %}
 6967   ins_encode %{
 6968     Label skip;
 6969     // Invert sense of branch from sense of CMOV
 6970     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6971     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6972     __ bind(skip);
 6973   %}
 6974   ins_pipe( pipe_slow );
 6975 %}
 6976 
 6977 // unsigned version
 6978 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6979   predicate (UseSSE>=1);
 6980   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6981   ins_cost(200);
 6982   format %{ "Jn$cop   skip\n\t"
 6983             "MOVSS  $dst,$src\t# float\n"
 6984       "skip:" %}
 6985   ins_encode %{
 6986     Label skip;
 6987     // Invert sense of branch from sense of CMOV
 6988     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6989     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6990     __ bind(skip);
 6991   %}
 6992   ins_pipe( pipe_slow );
 6993 %}
 6994 
 6995 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6996   predicate (UseSSE>=1);
 6997   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6998   ins_cost(200);
 6999   expand %{
 7000     fcmovF_regU(cop, cr, dst, src);
 7001   %}
 7002 %}
 7003 
 7004 // unsigned version
 7005 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 7006   predicate (UseSSE>=2);
 7007   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7008   ins_cost(200);
 7009   format %{ "Jn$cop   skip\n\t"
 7010             "MOVSD  $dst,$src\t# float\n"
 7011       "skip:" %}
 7012   ins_encode %{
 7013     Label skip;
 7014     // Invert sense of branch from sense of CMOV
 7015     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7016     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7017     __ bind(skip);
 7018   %}
 7019   ins_pipe( pipe_slow );
 7020 %}
 7021 
 7022 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7023   predicate (UseSSE>=2);
 7024   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7025   ins_cost(200);
 7026   expand %{
 7027     fcmovD_regU(cop, cr, dst, src);
 7028   %}
 7029 %}
 7030 
 7031 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7032   predicate(VM_Version::supports_cmov() );
 7033   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7034   ins_cost(200);
 7035   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7036             "CMOV$cop $dst.hi,$src.hi" %}
 7037   opcode(0x0F,0x40);
 7038   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7039   ins_pipe( pipe_cmov_reg_long );
 7040 %}
 7041 
 7042 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7043   predicate(VM_Version::supports_cmov() );
 7044   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7045   ins_cost(200);
 7046   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7047             "CMOV$cop $dst.hi,$src.hi" %}
 7048   opcode(0x0F,0x40);
 7049   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7050   ins_pipe( pipe_cmov_reg_long );
 7051 %}
 7052 
 7053 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7054   predicate(VM_Version::supports_cmov() );
 7055   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7056   ins_cost(200);
 7057   expand %{
 7058     cmovL_regU(cop, cr, dst, src);
 7059   %}
 7060 %}
 7061 
 7062 //----------Arithmetic Instructions--------------------------------------------
 7063 //----------Addition Instructions----------------------------------------------
 7064 
 7065 // Integer Addition Instructions
 7066 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7067   match(Set dst (AddI dst src));
 7068   effect(KILL cr);
 7069 
 7070   size(2);
 7071   format %{ "ADD    $dst,$src" %}
 7072   opcode(0x03);
 7073   ins_encode( OpcP, RegReg( dst, src) );
 7074   ins_pipe( ialu_reg_reg );
 7075 %}
 7076 
 7077 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7078   match(Set dst (AddI dst src));
 7079   effect(KILL cr);
 7080 
 7081   format %{ "ADD    $dst,$src" %}
 7082   opcode(0x81, 0x00); /* /0 id */
 7083   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7084   ins_pipe( ialu_reg );
 7085 %}
 7086 
 7087 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7088   predicate(UseIncDec);
 7089   match(Set dst (AddI dst src));
 7090   effect(KILL cr);
 7091 
 7092   size(1);
 7093   format %{ "INC    $dst" %}
 7094   opcode(0x40); /*  */
 7095   ins_encode( Opc_plus( primary, dst ) );
 7096   ins_pipe( ialu_reg );
 7097 %}
 7098 
 7099 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7100   match(Set dst (AddI src0 src1));
 7101   ins_cost(110);
 7102 
 7103   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7104   opcode(0x8D); /* 0x8D /r */
 7105   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7106   ins_pipe( ialu_reg_reg );
 7107 %}
 7108 
 7109 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7110   match(Set dst (AddP src0 src1));
 7111   ins_cost(110);
 7112 
 7113   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7114   opcode(0x8D); /* 0x8D /r */
 7115   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7116   ins_pipe( ialu_reg_reg );
 7117 %}
 7118 
 7119 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7120   predicate(UseIncDec);
 7121   match(Set dst (AddI dst src));
 7122   effect(KILL cr);
 7123 
 7124   size(1);
 7125   format %{ "DEC    $dst" %}
 7126   opcode(0x48); /*  */
 7127   ins_encode( Opc_plus( primary, dst ) );
 7128   ins_pipe( ialu_reg );
 7129 %}
 7130 
 7131 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7132   match(Set dst (AddP dst src));
 7133   effect(KILL cr);
 7134 
 7135   size(2);
 7136   format %{ "ADD    $dst,$src" %}
 7137   opcode(0x03);
 7138   ins_encode( OpcP, RegReg( dst, src) );
 7139   ins_pipe( ialu_reg_reg );
 7140 %}
 7141 
 7142 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7143   match(Set dst (AddP dst src));
 7144   effect(KILL cr);
 7145 
 7146   format %{ "ADD    $dst,$src" %}
 7147   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7148   // ins_encode( RegImm( dst, src) );
 7149   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7150   ins_pipe( ialu_reg );
 7151 %}
 7152 
 7153 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7154   match(Set dst (AddI dst (LoadI src)));
 7155   effect(KILL cr);
 7156 
 7157   ins_cost(150);
 7158   format %{ "ADD    $dst,$src" %}
 7159   opcode(0x03);
 7160   ins_encode( OpcP, RegMem( dst, src) );
 7161   ins_pipe( ialu_reg_mem );
 7162 %}
 7163 
 7164 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7165   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7166   effect(KILL cr);
 7167 
 7168   ins_cost(150);
 7169   format %{ "ADD    $dst,$src" %}
 7170   opcode(0x01);  /* Opcode 01 /r */
 7171   ins_encode( OpcP, RegMem( src, dst ) );
 7172   ins_pipe( ialu_mem_reg );
 7173 %}
 7174 
 7175 // Add Memory with Immediate
 7176 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7177   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7178   effect(KILL cr);
 7179 
 7180   ins_cost(125);
 7181   format %{ "ADD    $dst,$src" %}
 7182   opcode(0x81);               /* Opcode 81 /0 id */
 7183   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7184   ins_pipe( ialu_mem_imm );
 7185 %}
 7186 
 7187 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7188   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7189   effect(KILL cr);
 7190 
 7191   ins_cost(125);
 7192   format %{ "INC    $dst" %}
 7193   opcode(0xFF);               /* Opcode FF /0 */
 7194   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7195   ins_pipe( ialu_mem_imm );
 7196 %}
 7197 
 7198 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7199   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7200   effect(KILL cr);
 7201 
 7202   ins_cost(125);
 7203   format %{ "DEC    $dst" %}
 7204   opcode(0xFF);               /* Opcode FF /1 */
 7205   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7206   ins_pipe( ialu_mem_imm );
 7207 %}
 7208 
 7209 
 7210 instruct checkCastPP( eRegP dst ) %{
 7211   match(Set dst (CheckCastPP dst));
 7212 
 7213   size(0);
 7214   format %{ "#checkcastPP of $dst" %}
 7215   ins_encode( /*empty encoding*/ );
 7216   ins_pipe( empty );
 7217 %}
 7218 
 7219 instruct castPP( eRegP dst ) %{
 7220   match(Set dst (CastPP dst));
 7221   format %{ "#castPP of $dst" %}
 7222   ins_encode( /*empty encoding*/ );
 7223   ins_pipe( empty );
 7224 %}
 7225 
 7226 instruct castII( rRegI dst ) %{
 7227   match(Set dst (CastII dst));
 7228   format %{ "#castII of $dst" %}
 7229   ins_encode( /*empty encoding*/ );
 7230   ins_cost(0);
 7231   ins_pipe( empty );
 7232 %}
 7233 
 7234 instruct castLL( eRegL dst ) %{
 7235   match(Set dst (CastLL dst));
 7236   format %{ "#castLL of $dst" %}
 7237   ins_encode( /*empty encoding*/ );
 7238   ins_cost(0);
 7239   ins_pipe( empty );
 7240 %}
 7241 
 7242 instruct castFF( regF dst ) %{
 7243   predicate(UseSSE >= 1);
 7244   match(Set dst (CastFF dst));
 7245   format %{ "#castFF of $dst" %}
 7246   ins_encode( /*empty encoding*/ );
 7247   ins_cost(0);
 7248   ins_pipe( empty );
 7249 %}
 7250 
 7251 instruct castDD( regD dst ) %{
 7252   predicate(UseSSE >= 2);
 7253   match(Set dst (CastDD dst));
 7254   format %{ "#castDD of $dst" %}
 7255   ins_encode( /*empty encoding*/ );
 7256   ins_cost(0);
 7257   ins_pipe( empty );
 7258 %}
 7259 
 7260 instruct castFF_PR( regFPR dst ) %{
 7261   predicate(UseSSE < 1);
 7262   match(Set dst (CastFF dst));
 7263   format %{ "#castFF of $dst" %}
 7264   ins_encode( /*empty encoding*/ );
 7265   ins_cost(0);
 7266   ins_pipe( empty );
 7267 %}
 7268 
 7269 instruct castDD_PR( regDPR dst ) %{
 7270   predicate(UseSSE < 2);
 7271   match(Set dst (CastDD dst));
 7272   format %{ "#castDD of $dst" %}
 7273   ins_encode( /*empty encoding*/ );
 7274   ins_cost(0);
 7275   ins_pipe( empty );
 7276 %}
 7277 
 7278 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7279 
 7280 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7281   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7282   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7283   effect(KILL cr, KILL oldval);
 7284   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7285             "MOV    $res,0\n\t"
 7286             "JNE,s  fail\n\t"
 7287             "MOV    $res,1\n"
 7288           "fail:" %}
 7289   ins_encode( enc_cmpxchg8(mem_ptr),
 7290               enc_flags_ne_to_boolean(res) );
 7291   ins_pipe( pipe_cmpxchg );
 7292 %}
 7293 
 7294 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7295   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7296   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7297   effect(KILL cr, KILL oldval);
 7298   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7299             "MOV    $res,0\n\t"
 7300             "JNE,s  fail\n\t"
 7301             "MOV    $res,1\n"
 7302           "fail:" %}
 7303   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7304   ins_pipe( pipe_cmpxchg );
 7305 %}
 7306 
 7307 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7308   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7309   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7310   effect(KILL cr, KILL oldval);
 7311   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7312             "MOV    $res,0\n\t"
 7313             "JNE,s  fail\n\t"
 7314             "MOV    $res,1\n"
 7315           "fail:" %}
 7316   ins_encode( enc_cmpxchgb(mem_ptr),
 7317               enc_flags_ne_to_boolean(res) );
 7318   ins_pipe( pipe_cmpxchg );
 7319 %}
 7320 
 7321 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7322   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7323   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7324   effect(KILL cr, KILL oldval);
 7325   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7326             "MOV    $res,0\n\t"
 7327             "JNE,s  fail\n\t"
 7328             "MOV    $res,1\n"
 7329           "fail:" %}
 7330   ins_encode( enc_cmpxchgw(mem_ptr),
 7331               enc_flags_ne_to_boolean(res) );
 7332   ins_pipe( pipe_cmpxchg );
 7333 %}
 7334 
 7335 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7336   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7337   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7338   effect(KILL cr, KILL oldval);
 7339   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7340             "MOV    $res,0\n\t"
 7341             "JNE,s  fail\n\t"
 7342             "MOV    $res,1\n"
 7343           "fail:" %}
 7344   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7345   ins_pipe( pipe_cmpxchg );
 7346 %}
 7347 
 7348 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7349   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7350   effect(KILL cr);
 7351   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7352   ins_encode( enc_cmpxchg8(mem_ptr) );
 7353   ins_pipe( pipe_cmpxchg );
 7354 %}
 7355 
 7356 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7357   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7358   effect(KILL cr);
 7359   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7360   ins_encode( enc_cmpxchg(mem_ptr) );
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7365   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7366   effect(KILL cr);
 7367   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7368   ins_encode( enc_cmpxchgb(mem_ptr) );
 7369   ins_pipe( pipe_cmpxchg );
 7370 %}
 7371 
 7372 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7373   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7374   effect(KILL cr);
 7375   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7376   ins_encode( enc_cmpxchgw(mem_ptr) );
 7377   ins_pipe( pipe_cmpxchg );
 7378 %}
 7379 
 7380 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7381   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7382   effect(KILL cr);
 7383   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7384   ins_encode( enc_cmpxchg(mem_ptr) );
 7385   ins_pipe( pipe_cmpxchg );
 7386 %}
 7387 
 7388 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7389   predicate(n->as_LoadStore()->result_not_used());
 7390   match(Set dummy (GetAndAddB mem add));
 7391   effect(KILL cr);
 7392   format %{ "ADDB  [$mem],$add" %}
 7393   ins_encode %{
 7394     __ lock();
 7395     __ addb($mem$$Address, $add$$constant);
 7396   %}
 7397   ins_pipe( pipe_cmpxchg );
 7398 %}
 7399 
 7400 // Important to match to xRegI: only 8-bit regs.
 7401 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7402   match(Set newval (GetAndAddB mem newval));
 7403   effect(KILL cr);
 7404   format %{ "XADDB  [$mem],$newval" %}
 7405   ins_encode %{
 7406     __ lock();
 7407     __ xaddb($mem$$Address, $newval$$Register);
 7408   %}
 7409   ins_pipe( pipe_cmpxchg );
 7410 %}
 7411 
 7412 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7413   predicate(n->as_LoadStore()->result_not_used());
 7414   match(Set dummy (GetAndAddS mem add));
 7415   effect(KILL cr);
 7416   format %{ "ADDS  [$mem],$add" %}
 7417   ins_encode %{
 7418     __ lock();
 7419     __ addw($mem$$Address, $add$$constant);
 7420   %}
 7421   ins_pipe( pipe_cmpxchg );
 7422 %}
 7423 
 7424 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7425   match(Set newval (GetAndAddS mem newval));
 7426   effect(KILL cr);
 7427   format %{ "XADDS  [$mem],$newval" %}
 7428   ins_encode %{
 7429     __ lock();
 7430     __ xaddw($mem$$Address, $newval$$Register);
 7431   %}
 7432   ins_pipe( pipe_cmpxchg );
 7433 %}
 7434 
 7435 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7436   predicate(n->as_LoadStore()->result_not_used());
 7437   match(Set dummy (GetAndAddI mem add));
 7438   effect(KILL cr);
 7439   format %{ "ADDL  [$mem],$add" %}
 7440   ins_encode %{
 7441     __ lock();
 7442     __ addl($mem$$Address, $add$$constant);
 7443   %}
 7444   ins_pipe( pipe_cmpxchg );
 7445 %}
 7446 
 7447 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7448   match(Set newval (GetAndAddI mem newval));
 7449   effect(KILL cr);
 7450   format %{ "XADDL  [$mem],$newval" %}
 7451   ins_encode %{
 7452     __ lock();
 7453     __ xaddl($mem$$Address, $newval$$Register);
 7454   %}
 7455   ins_pipe( pipe_cmpxchg );
 7456 %}
 7457 
 7458 // Important to match to xRegI: only 8-bit regs.
 7459 instruct xchgB( memory mem, xRegI newval) %{
 7460   match(Set newval (GetAndSetB mem newval));
 7461   format %{ "XCHGB  $newval,[$mem]" %}
 7462   ins_encode %{
 7463     __ xchgb($newval$$Register, $mem$$Address);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 instruct xchgS( memory mem, rRegI newval) %{
 7469   match(Set newval (GetAndSetS mem newval));
 7470   format %{ "XCHGW  $newval,[$mem]" %}
 7471   ins_encode %{
 7472     __ xchgw($newval$$Register, $mem$$Address);
 7473   %}
 7474   ins_pipe( pipe_cmpxchg );
 7475 %}
 7476 
 7477 instruct xchgI( memory mem, rRegI newval) %{
 7478   match(Set newval (GetAndSetI mem newval));
 7479   format %{ "XCHGL  $newval,[$mem]" %}
 7480   ins_encode %{
 7481     __ xchgl($newval$$Register, $mem$$Address);
 7482   %}
 7483   ins_pipe( pipe_cmpxchg );
 7484 %}
 7485 
 7486 instruct xchgP( memory mem, pRegP newval) %{
 7487   match(Set newval (GetAndSetP mem newval));
 7488   format %{ "XCHGL  $newval,[$mem]" %}
 7489   ins_encode %{
 7490     __ xchgl($newval$$Register, $mem$$Address);
 7491   %}
 7492   ins_pipe( pipe_cmpxchg );
 7493 %}
 7494 
 7495 //----------Subtraction Instructions-------------------------------------------
 7496 
 7497 // Integer Subtraction Instructions
 7498 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7499   match(Set dst (SubI dst src));
 7500   effect(KILL cr);
 7501 
 7502   size(2);
 7503   format %{ "SUB    $dst,$src" %}
 7504   opcode(0x2B);
 7505   ins_encode( OpcP, RegReg( dst, src) );
 7506   ins_pipe( ialu_reg_reg );
 7507 %}
 7508 
 7509 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7510   match(Set dst (SubI dst src));
 7511   effect(KILL cr);
 7512 
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7515   // ins_encode( RegImm( dst, src) );
 7516   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7517   ins_pipe( ialu_reg );
 7518 %}
 7519 
 7520 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7521   match(Set dst (SubI dst (LoadI src)));
 7522   effect(KILL cr);
 7523 
 7524   ins_cost(150);
 7525   format %{ "SUB    $dst,$src" %}
 7526   opcode(0x2B);
 7527   ins_encode( OpcP, RegMem( dst, src) );
 7528   ins_pipe( ialu_reg_mem );
 7529 %}
 7530 
 7531 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7532   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7533   effect(KILL cr);
 7534 
 7535   ins_cost(150);
 7536   format %{ "SUB    $dst,$src" %}
 7537   opcode(0x29);  /* Opcode 29 /r */
 7538   ins_encode( OpcP, RegMem( src, dst ) );
 7539   ins_pipe( ialu_mem_reg );
 7540 %}
 7541 
 7542 // Subtract from a pointer
 7543 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7544   match(Set dst (AddP dst (SubI zero src)));
 7545   effect(KILL cr);
 7546 
 7547   size(2);
 7548   format %{ "SUB    $dst,$src" %}
 7549   opcode(0x2B);
 7550   ins_encode( OpcP, RegReg( dst, src) );
 7551   ins_pipe( ialu_reg_reg );
 7552 %}
 7553 
 7554 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7555   match(Set dst (SubI zero dst));
 7556   effect(KILL cr);
 7557 
 7558   size(2);
 7559   format %{ "NEG    $dst" %}
 7560   opcode(0xF7,0x03);  // Opcode F7 /3
 7561   ins_encode( OpcP, RegOpc( dst ) );
 7562   ins_pipe( ialu_reg );
 7563 %}
 7564 
 7565 //----------Multiplication/Division Instructions-------------------------------
 7566 // Integer Multiplication Instructions
 7567 // Multiply Register
 7568 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7569   match(Set dst (MulI dst src));
 7570   effect(KILL cr);
 7571 
 7572   size(3);
 7573   ins_cost(300);
 7574   format %{ "IMUL   $dst,$src" %}
 7575   opcode(0xAF, 0x0F);
 7576   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7577   ins_pipe( ialu_reg_reg_alu0 );
 7578 %}
 7579 
 7580 // Multiply 32-bit Immediate
 7581 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7582   match(Set dst (MulI src imm));
 7583   effect(KILL cr);
 7584 
 7585   ins_cost(300);
 7586   format %{ "IMUL   $dst,$src,$imm" %}
 7587   opcode(0x69);  /* 69 /r id */
 7588   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7589   ins_pipe( ialu_reg_reg_alu0 );
 7590 %}
 7591 
 7592 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7593   match(Set dst src);
 7594   effect(KILL cr);
 7595 
 7596   // Note that this is artificially increased to make it more expensive than loadConL
 7597   ins_cost(250);
 7598   format %{ "MOV    EAX,$src\t// low word only" %}
 7599   opcode(0xB8);
 7600   ins_encode( LdImmL_Lo(dst, src) );
 7601   ins_pipe( ialu_reg_fat );
 7602 %}
 7603 
 7604 // Multiply by 32-bit Immediate, taking the shifted high order results
 7605 //  (special case for shift by 32)
 7606 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7607   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7608   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7609              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7610              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7611   effect(USE src1, KILL cr);
 7612 
 7613   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7614   ins_cost(0*100 + 1*400 - 150);
 7615   format %{ "IMUL   EDX:EAX,$src1" %}
 7616   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7617   ins_pipe( pipe_slow );
 7618 %}
 7619 
 7620 // Multiply by 32-bit Immediate, taking the shifted high order results
 7621 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7622   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7623   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7624              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7625              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7626   effect(USE src1, KILL cr);
 7627 
 7628   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7629   ins_cost(1*100 + 1*400 - 150);
 7630   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7631             "SAR    EDX,$cnt-32" %}
 7632   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7633   ins_pipe( pipe_slow );
 7634 %}
 7635 
 7636 // Multiply Memory 32-bit Immediate
 7637 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7638   match(Set dst (MulI (LoadI src) imm));
 7639   effect(KILL cr);
 7640 
 7641   ins_cost(300);
 7642   format %{ "IMUL   $dst,$src,$imm" %}
 7643   opcode(0x69);  /* 69 /r id */
 7644   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7645   ins_pipe( ialu_reg_mem_alu0 );
 7646 %}
 7647 
 7648 // Multiply Memory
 7649 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7650   match(Set dst (MulI dst (LoadI src)));
 7651   effect(KILL cr);
 7652 
 7653   ins_cost(350);
 7654   format %{ "IMUL   $dst,$src" %}
 7655   opcode(0xAF, 0x0F);
 7656   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7657   ins_pipe( ialu_reg_mem_alu0 );
 7658 %}
 7659 
 7660 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7661 %{
 7662   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7663   effect(KILL cr, KILL src2);
 7664 
 7665   expand %{ mulI_eReg(dst, src1, cr);
 7666            mulI_eReg(src2, src3, cr);
 7667            addI_eReg(dst, src2, cr); %}
 7668 %}
 7669 
 7670 // Multiply Register Int to Long
 7671 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7672   // Basic Idea: long = (long)int * (long)int
 7673   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7674   effect(DEF dst, USE src, USE src1, KILL flags);
 7675 
 7676   ins_cost(300);
 7677   format %{ "IMUL   $dst,$src1" %}
 7678 
 7679   ins_encode( long_int_multiply( dst, src1 ) );
 7680   ins_pipe( ialu_reg_reg_alu0 );
 7681 %}
 7682 
 7683 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7684   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7685   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7686   effect(KILL flags);
 7687 
 7688   ins_cost(300);
 7689   format %{ "MUL    $dst,$src1" %}
 7690 
 7691   ins_encode( long_uint_multiply(dst, src1) );
 7692   ins_pipe( ialu_reg_reg_alu0 );
 7693 %}
 7694 
 7695 // Multiply Register Long
 7696 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7697   match(Set dst (MulL dst src));
 7698   effect(KILL cr, TEMP tmp);
 7699   ins_cost(4*100+3*400);
 7700 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7701 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7702   format %{ "MOV    $tmp,$src.lo\n\t"
 7703             "IMUL   $tmp,EDX\n\t"
 7704             "MOV    EDX,$src.hi\n\t"
 7705             "IMUL   EDX,EAX\n\t"
 7706             "ADD    $tmp,EDX\n\t"
 7707             "MUL    EDX:EAX,$src.lo\n\t"
 7708             "ADD    EDX,$tmp" %}
 7709   ins_encode( long_multiply( dst, src, tmp ) );
 7710   ins_pipe( pipe_slow );
 7711 %}
 7712 
 7713 // Multiply Register Long where the left operand's high 32 bits are zero
 7714 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7715   predicate(is_operand_hi32_zero(n->in(1)));
 7716   match(Set dst (MulL dst src));
 7717   effect(KILL cr, TEMP tmp);
 7718   ins_cost(2*100+2*400);
 7719 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7720 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7721   format %{ "MOV    $tmp,$src.hi\n\t"
 7722             "IMUL   $tmp,EAX\n\t"
 7723             "MUL    EDX:EAX,$src.lo\n\t"
 7724             "ADD    EDX,$tmp" %}
 7725   ins_encode %{
 7726     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7727     __ imull($tmp$$Register, rax);
 7728     __ mull($src$$Register);
 7729     __ addl(rdx, $tmp$$Register);
 7730   %}
 7731   ins_pipe( pipe_slow );
 7732 %}
 7733 
 7734 // Multiply Register Long where the right operand's high 32 bits are zero
 7735 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7736   predicate(is_operand_hi32_zero(n->in(2)));
 7737   match(Set dst (MulL dst src));
 7738   effect(KILL cr, TEMP tmp);
 7739   ins_cost(2*100+2*400);
 7740 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7741 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7742   format %{ "MOV    $tmp,$src.lo\n\t"
 7743             "IMUL   $tmp,EDX\n\t"
 7744             "MUL    EDX:EAX,$src.lo\n\t"
 7745             "ADD    EDX,$tmp" %}
 7746   ins_encode %{
 7747     __ movl($tmp$$Register, $src$$Register);
 7748     __ imull($tmp$$Register, rdx);
 7749     __ mull($src$$Register);
 7750     __ addl(rdx, $tmp$$Register);
 7751   %}
 7752   ins_pipe( pipe_slow );
 7753 %}
 7754 
 7755 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7756 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7757   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7758   match(Set dst (MulL dst src));
 7759   effect(KILL cr);
 7760   ins_cost(1*400);
 7761 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7762 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7763   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7764   ins_encode %{
 7765     __ mull($src$$Register);
 7766   %}
 7767   ins_pipe( pipe_slow );
 7768 %}
 7769 
 7770 // Multiply Register Long by small constant
 7771 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7772   match(Set dst (MulL dst src));
 7773   effect(KILL cr, TEMP tmp);
 7774   ins_cost(2*100+2*400);
 7775   size(12);
 7776 // Basic idea: lo(result) = lo(src * EAX)
 7777 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7778   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7779             "MOV    EDX,$src\n\t"
 7780             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7781             "ADD    EDX,$tmp" %}
 7782   ins_encode( long_multiply_con( dst, src, tmp ) );
 7783   ins_pipe( pipe_slow );
 7784 %}
 7785 
 7786 // Integer DIV with Register
 7787 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7788   match(Set rax (DivI rax div));
 7789   effect(KILL rdx, KILL cr);
 7790   size(26);
 7791   ins_cost(30*100+10*100);
 7792   format %{ "CMP    EAX,0x80000000\n\t"
 7793             "JNE,s  normal\n\t"
 7794             "XOR    EDX,EDX\n\t"
 7795             "CMP    ECX,-1\n\t"
 7796             "JE,s   done\n"
 7797     "normal: CDQ\n\t"
 7798             "IDIV   $div\n\t"
 7799     "done:"        %}
 7800   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7801   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7802   ins_pipe( ialu_reg_reg_alu0 );
 7803 %}
 7804 
 7805 // Divide Register Long
 7806 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7807   match(Set dst (DivL src1 src2));
 7808   effect(CALL);
 7809   ins_cost(10000);
 7810   format %{ "PUSH   $src1.hi\n\t"
 7811             "PUSH   $src1.lo\n\t"
 7812             "PUSH   $src2.hi\n\t"
 7813             "PUSH   $src2.lo\n\t"
 7814             "CALL   SharedRuntime::ldiv\n\t"
 7815             "ADD    ESP,16" %}
 7816   ins_encode( long_div(src1,src2) );
 7817   ins_pipe( pipe_slow );
 7818 %}
 7819 
 7820 // Integer DIVMOD with Register, both quotient and mod results
 7821 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7822   match(DivModI rax div);
 7823   effect(KILL cr);
 7824   size(26);
 7825   ins_cost(30*100+10*100);
 7826   format %{ "CMP    EAX,0x80000000\n\t"
 7827             "JNE,s  normal\n\t"
 7828             "XOR    EDX,EDX\n\t"
 7829             "CMP    ECX,-1\n\t"
 7830             "JE,s   done\n"
 7831     "normal: CDQ\n\t"
 7832             "IDIV   $div\n\t"
 7833     "done:"        %}
 7834   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7835   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7836   ins_pipe( pipe_slow );
 7837 %}
 7838 
 7839 // Integer MOD with Register
 7840 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7841   match(Set rdx (ModI rax div));
 7842   effect(KILL rax, KILL cr);
 7843 
 7844   size(26);
 7845   ins_cost(300);
 7846   format %{ "CDQ\n\t"
 7847             "IDIV   $div" %}
 7848   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7849   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7850   ins_pipe( ialu_reg_reg_alu0 );
 7851 %}
 7852 
 7853 // Remainder Register Long
 7854 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7855   match(Set dst (ModL src1 src2));
 7856   effect(CALL);
 7857   ins_cost(10000);
 7858   format %{ "PUSH   $src1.hi\n\t"
 7859             "PUSH   $src1.lo\n\t"
 7860             "PUSH   $src2.hi\n\t"
 7861             "PUSH   $src2.lo\n\t"
 7862             "CALL   SharedRuntime::lrem\n\t"
 7863             "ADD    ESP,16" %}
 7864   ins_encode( long_mod(src1,src2) );
 7865   ins_pipe( pipe_slow );
 7866 %}
 7867 
 7868 // Divide Register Long (no special case since divisor != -1)
 7869 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7870   match(Set dst (DivL dst imm));
 7871   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7872   ins_cost(1000);
 7873   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7874             "XOR    $tmp2,$tmp2\n\t"
 7875             "CMP    $tmp,EDX\n\t"
 7876             "JA,s   fast\n\t"
 7877             "MOV    $tmp2,EAX\n\t"
 7878             "MOV    EAX,EDX\n\t"
 7879             "MOV    EDX,0\n\t"
 7880             "JLE,s  pos\n\t"
 7881             "LNEG   EAX : $tmp2\n\t"
 7882             "DIV    $tmp # unsigned division\n\t"
 7883             "XCHG   EAX,$tmp2\n\t"
 7884             "DIV    $tmp\n\t"
 7885             "LNEG   $tmp2 : EAX\n\t"
 7886             "JMP,s  done\n"
 7887     "pos:\n\t"
 7888             "DIV    $tmp\n\t"
 7889             "XCHG   EAX,$tmp2\n"
 7890     "fast:\n\t"
 7891             "DIV    $tmp\n"
 7892     "done:\n\t"
 7893             "MOV    EDX,$tmp2\n\t"
 7894             "NEG    EDX:EAX # if $imm < 0" %}
 7895   ins_encode %{
 7896     int con = (int)$imm$$constant;
 7897     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7898     int pcon = (con > 0) ? con : -con;
 7899     Label Lfast, Lpos, Ldone;
 7900 
 7901     __ movl($tmp$$Register, pcon);
 7902     __ xorl($tmp2$$Register,$tmp2$$Register);
 7903     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7904     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7905 
 7906     __ movl($tmp2$$Register, $dst$$Register); // save
 7907     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7908     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7909     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7910 
 7911     // Negative dividend.
 7912     // convert value to positive to use unsigned division
 7913     __ lneg($dst$$Register, $tmp2$$Register);
 7914     __ divl($tmp$$Register);
 7915     __ xchgl($dst$$Register, $tmp2$$Register);
 7916     __ divl($tmp$$Register);
 7917     // revert result back to negative
 7918     __ lneg($tmp2$$Register, $dst$$Register);
 7919     __ jmpb(Ldone);
 7920 
 7921     __ bind(Lpos);
 7922     __ divl($tmp$$Register); // Use unsigned division
 7923     __ xchgl($dst$$Register, $tmp2$$Register);
 7924     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7925 
 7926     __ bind(Lfast);
 7927     // fast path: src is positive
 7928     __ divl($tmp$$Register); // Use unsigned division
 7929 
 7930     __ bind(Ldone);
 7931     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7932     if (con < 0) {
 7933       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7934     }
 7935   %}
 7936   ins_pipe( pipe_slow );
 7937 %}
 7938 
 7939 // Remainder Register Long (remainder fit into 32 bits)
 7940 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7941   match(Set dst (ModL dst imm));
 7942   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7943   ins_cost(1000);
 7944   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7945             "CMP    $tmp,EDX\n\t"
 7946             "JA,s   fast\n\t"
 7947             "MOV    $tmp2,EAX\n\t"
 7948             "MOV    EAX,EDX\n\t"
 7949             "MOV    EDX,0\n\t"
 7950             "JLE,s  pos\n\t"
 7951             "LNEG   EAX : $tmp2\n\t"
 7952             "DIV    $tmp # unsigned division\n\t"
 7953             "MOV    EAX,$tmp2\n\t"
 7954             "DIV    $tmp\n\t"
 7955             "NEG    EDX\n\t"
 7956             "JMP,s  done\n"
 7957     "pos:\n\t"
 7958             "DIV    $tmp\n\t"
 7959             "MOV    EAX,$tmp2\n"
 7960     "fast:\n\t"
 7961             "DIV    $tmp\n"
 7962     "done:\n\t"
 7963             "MOV    EAX,EDX\n\t"
 7964             "SAR    EDX,31\n\t" %}
 7965   ins_encode %{
 7966     int con = (int)$imm$$constant;
 7967     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7968     int pcon = (con > 0) ? con : -con;
 7969     Label  Lfast, Lpos, Ldone;
 7970 
 7971     __ movl($tmp$$Register, pcon);
 7972     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7973     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7974 
 7975     __ movl($tmp2$$Register, $dst$$Register); // save
 7976     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7977     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7978     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7979 
 7980     // Negative dividend.
 7981     // convert value to positive to use unsigned division
 7982     __ lneg($dst$$Register, $tmp2$$Register);
 7983     __ divl($tmp$$Register);
 7984     __ movl($dst$$Register, $tmp2$$Register);
 7985     __ divl($tmp$$Register);
 7986     // revert remainder back to negative
 7987     __ negl(HIGH_FROM_LOW($dst$$Register));
 7988     __ jmpb(Ldone);
 7989 
 7990     __ bind(Lpos);
 7991     __ divl($tmp$$Register);
 7992     __ movl($dst$$Register, $tmp2$$Register);
 7993 
 7994     __ bind(Lfast);
 7995     // fast path: src is positive
 7996     __ divl($tmp$$Register);
 7997 
 7998     __ bind(Ldone);
 7999     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8000     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8001 
 8002   %}
 8003   ins_pipe( pipe_slow );
 8004 %}
 8005 
 8006 // Integer Shift Instructions
 8007 // Shift Left by one
 8008 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8009   match(Set dst (LShiftI dst shift));
 8010   effect(KILL cr);
 8011 
 8012   size(2);
 8013   format %{ "SHL    $dst,$shift" %}
 8014   opcode(0xD1, 0x4);  /* D1 /4 */
 8015   ins_encode( OpcP, RegOpc( dst ) );
 8016   ins_pipe( ialu_reg );
 8017 %}
 8018 
 8019 // Shift Left by 8-bit immediate
 8020 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8021   match(Set dst (LShiftI dst shift));
 8022   effect(KILL cr);
 8023 
 8024   size(3);
 8025   format %{ "SHL    $dst,$shift" %}
 8026   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8027   ins_encode( RegOpcImm( dst, shift) );
 8028   ins_pipe( ialu_reg );
 8029 %}
 8030 
 8031 // Shift Left by variable
 8032 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8033   match(Set dst (LShiftI dst shift));
 8034   effect(KILL cr);
 8035 
 8036   size(2);
 8037   format %{ "SHL    $dst,$shift" %}
 8038   opcode(0xD3, 0x4);  /* D3 /4 */
 8039   ins_encode( OpcP, RegOpc( dst ) );
 8040   ins_pipe( ialu_reg_reg );
 8041 %}
 8042 
 8043 // Arithmetic shift right by one
 8044 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8045   match(Set dst (RShiftI dst shift));
 8046   effect(KILL cr);
 8047 
 8048   size(2);
 8049   format %{ "SAR    $dst,$shift" %}
 8050   opcode(0xD1, 0x7);  /* D1 /7 */
 8051   ins_encode( OpcP, RegOpc( dst ) );
 8052   ins_pipe( ialu_reg );
 8053 %}
 8054 
 8055 // Arithmetic shift right by one
 8056 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8057   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8058   effect(KILL cr);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xD1, 0x7);  /* D1 /7 */
 8061   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8062   ins_pipe( ialu_mem_imm );
 8063 %}
 8064 
 8065 // Arithmetic Shift Right by 8-bit immediate
 8066 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8067   match(Set dst (RShiftI dst shift));
 8068   effect(KILL cr);
 8069 
 8070   size(3);
 8071   format %{ "SAR    $dst,$shift" %}
 8072   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8073   ins_encode( RegOpcImm( dst, shift ) );
 8074   ins_pipe( ialu_mem_imm );
 8075 %}
 8076 
 8077 // Arithmetic Shift Right by 8-bit immediate
 8078 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8079   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8080   effect(KILL cr);
 8081 
 8082   format %{ "SAR    $dst,$shift" %}
 8083   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8084   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8085   ins_pipe( ialu_mem_imm );
 8086 %}
 8087 
 8088 // Arithmetic Shift Right by variable
 8089 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8090   match(Set dst (RShiftI dst shift));
 8091   effect(KILL cr);
 8092 
 8093   size(2);
 8094   format %{ "SAR    $dst,$shift" %}
 8095   opcode(0xD3, 0x7);  /* D3 /7 */
 8096   ins_encode( OpcP, RegOpc( dst ) );
 8097   ins_pipe( ialu_reg_reg );
 8098 %}
 8099 
 8100 // Logical shift right by one
 8101 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8102   match(Set dst (URShiftI dst shift));
 8103   effect(KILL cr);
 8104 
 8105   size(2);
 8106   format %{ "SHR    $dst,$shift" %}
 8107   opcode(0xD1, 0x5);  /* D1 /5 */
 8108   ins_encode( OpcP, RegOpc( dst ) );
 8109   ins_pipe( ialu_reg );
 8110 %}
 8111 
 8112 // Logical Shift Right by 8-bit immediate
 8113 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8114   match(Set dst (URShiftI dst shift));
 8115   effect(KILL cr);
 8116 
 8117   size(3);
 8118   format %{ "SHR    $dst,$shift" %}
 8119   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8120   ins_encode( RegOpcImm( dst, shift) );
 8121   ins_pipe( ialu_reg );
 8122 %}
 8123 
 8124 
 8125 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8126 // This idiom is used by the compiler for the i2b bytecode.
 8127 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8128   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8129 
 8130   size(3);
 8131   format %{ "MOVSX  $dst,$src :8" %}
 8132   ins_encode %{
 8133     __ movsbl($dst$$Register, $src$$Register);
 8134   %}
 8135   ins_pipe(ialu_reg_reg);
 8136 %}
 8137 
 8138 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8139 // This idiom is used by the compiler the i2s bytecode.
 8140 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8141   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8142 
 8143   size(3);
 8144   format %{ "MOVSX  $dst,$src :16" %}
 8145   ins_encode %{
 8146     __ movswl($dst$$Register, $src$$Register);
 8147   %}
 8148   ins_pipe(ialu_reg_reg);
 8149 %}
 8150 
 8151 
 8152 // Logical Shift Right by variable
 8153 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8154   match(Set dst (URShiftI dst shift));
 8155   effect(KILL cr);
 8156 
 8157   size(2);
 8158   format %{ "SHR    $dst,$shift" %}
 8159   opcode(0xD3, 0x5);  /* D3 /5 */
 8160   ins_encode( OpcP, RegOpc( dst ) );
 8161   ins_pipe( ialu_reg_reg );
 8162 %}
 8163 
 8164 
 8165 //----------Logical Instructions-----------------------------------------------
 8166 //----------Integer Logical Instructions---------------------------------------
 8167 // And Instructions
 8168 // And Register with Register
 8169 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8170   match(Set dst (AndI dst src));
 8171   effect(KILL cr);
 8172 
 8173   size(2);
 8174   format %{ "AND    $dst,$src" %}
 8175   opcode(0x23);
 8176   ins_encode( OpcP, RegReg( dst, src) );
 8177   ins_pipe( ialu_reg_reg );
 8178 %}
 8179 
 8180 // And Register with Immediate
 8181 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8182   match(Set dst (AndI dst src));
 8183   effect(KILL cr);
 8184 
 8185   format %{ "AND    $dst,$src" %}
 8186   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8187   // ins_encode( RegImm( dst, src) );
 8188   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8189   ins_pipe( ialu_reg );
 8190 %}
 8191 
 8192 // And Register with Memory
 8193 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8194   match(Set dst (AndI dst (LoadI src)));
 8195   effect(KILL cr);
 8196 
 8197   ins_cost(150);
 8198   format %{ "AND    $dst,$src" %}
 8199   opcode(0x23);
 8200   ins_encode( OpcP, RegMem( dst, src) );
 8201   ins_pipe( ialu_reg_mem );
 8202 %}
 8203 
 8204 // And Memory with Register
 8205 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8206   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8207   effect(KILL cr);
 8208 
 8209   ins_cost(150);
 8210   format %{ "AND    $dst,$src" %}
 8211   opcode(0x21);  /* Opcode 21 /r */
 8212   ins_encode( OpcP, RegMem( src, dst ) );
 8213   ins_pipe( ialu_mem_reg );
 8214 %}
 8215 
 8216 // And Memory with Immediate
 8217 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8218   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8219   effect(KILL cr);
 8220 
 8221   ins_cost(125);
 8222   format %{ "AND    $dst,$src" %}
 8223   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8224   // ins_encode( MemImm( dst, src) );
 8225   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8226   ins_pipe( ialu_mem_imm );
 8227 %}
 8228 
 8229 // BMI1 instructions
 8230 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8231   match(Set dst (AndI (XorI src1 minus_1) src2));
 8232   predicate(UseBMI1Instructions);
 8233   effect(KILL cr);
 8234 
 8235   format %{ "ANDNL  $dst, $src1, $src2" %}
 8236 
 8237   ins_encode %{
 8238     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8239   %}
 8240   ins_pipe(ialu_reg);
 8241 %}
 8242 
 8243 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8244   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8245   predicate(UseBMI1Instructions);
 8246   effect(KILL cr);
 8247 
 8248   ins_cost(125);
 8249   format %{ "ANDNL  $dst, $src1, $src2" %}
 8250 
 8251   ins_encode %{
 8252     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8253   %}
 8254   ins_pipe(ialu_reg_mem);
 8255 %}
 8256 
 8257 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8258   match(Set dst (AndI (SubI imm_zero src) src));
 8259   predicate(UseBMI1Instructions);
 8260   effect(KILL cr);
 8261 
 8262   format %{ "BLSIL  $dst, $src" %}
 8263 
 8264   ins_encode %{
 8265     __ blsil($dst$$Register, $src$$Register);
 8266   %}
 8267   ins_pipe(ialu_reg);
 8268 %}
 8269 
 8270 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8271   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8272   predicate(UseBMI1Instructions);
 8273   effect(KILL cr);
 8274 
 8275   ins_cost(125);
 8276   format %{ "BLSIL  $dst, $src" %}
 8277 
 8278   ins_encode %{
 8279     __ blsil($dst$$Register, $src$$Address);
 8280   %}
 8281   ins_pipe(ialu_reg_mem);
 8282 %}
 8283 
 8284 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8285 %{
 8286   match(Set dst (XorI (AddI src minus_1) src));
 8287   predicate(UseBMI1Instructions);
 8288   effect(KILL cr);
 8289 
 8290   format %{ "BLSMSKL $dst, $src" %}
 8291 
 8292   ins_encode %{
 8293     __ blsmskl($dst$$Register, $src$$Register);
 8294   %}
 8295 
 8296   ins_pipe(ialu_reg);
 8297 %}
 8298 
 8299 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8300 %{
 8301   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8302   predicate(UseBMI1Instructions);
 8303   effect(KILL cr);
 8304 
 8305   ins_cost(125);
 8306   format %{ "BLSMSKL $dst, $src" %}
 8307 
 8308   ins_encode %{
 8309     __ blsmskl($dst$$Register, $src$$Address);
 8310   %}
 8311 
 8312   ins_pipe(ialu_reg_mem);
 8313 %}
 8314 
 8315 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8316 %{
 8317   match(Set dst (AndI (AddI src minus_1) src) );
 8318   predicate(UseBMI1Instructions);
 8319   effect(KILL cr);
 8320 
 8321   format %{ "BLSRL  $dst, $src" %}
 8322 
 8323   ins_encode %{
 8324     __ blsrl($dst$$Register, $src$$Register);
 8325   %}
 8326 
 8327   ins_pipe(ialu_reg);
 8328 %}
 8329 
 8330 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8331 %{
 8332   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8333   predicate(UseBMI1Instructions);
 8334   effect(KILL cr);
 8335 
 8336   ins_cost(125);
 8337   format %{ "BLSRL  $dst, $src" %}
 8338 
 8339   ins_encode %{
 8340     __ blsrl($dst$$Register, $src$$Address);
 8341   %}
 8342 
 8343   ins_pipe(ialu_reg_mem);
 8344 %}
 8345 
 8346 // Or Instructions
 8347 // Or Register with Register
 8348 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8349   match(Set dst (OrI dst src));
 8350   effect(KILL cr);
 8351 
 8352   size(2);
 8353   format %{ "OR     $dst,$src" %}
 8354   opcode(0x0B);
 8355   ins_encode( OpcP, RegReg( dst, src) );
 8356   ins_pipe( ialu_reg_reg );
 8357 %}
 8358 
 8359 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8360   match(Set dst (OrI dst (CastP2X src)));
 8361   effect(KILL cr);
 8362 
 8363   size(2);
 8364   format %{ "OR     $dst,$src" %}
 8365   opcode(0x0B);
 8366   ins_encode( OpcP, RegReg( dst, src) );
 8367   ins_pipe( ialu_reg_reg );
 8368 %}
 8369 
 8370 
 8371 // Or Register with Immediate
 8372 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8373   match(Set dst (OrI dst src));
 8374   effect(KILL cr);
 8375 
 8376   format %{ "OR     $dst,$src" %}
 8377   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8378   // ins_encode( RegImm( dst, src) );
 8379   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8380   ins_pipe( ialu_reg );
 8381 %}
 8382 
 8383 // Or Register with Memory
 8384 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8385   match(Set dst (OrI dst (LoadI src)));
 8386   effect(KILL cr);
 8387 
 8388   ins_cost(150);
 8389   format %{ "OR     $dst,$src" %}
 8390   opcode(0x0B);
 8391   ins_encode( OpcP, RegMem( dst, src) );
 8392   ins_pipe( ialu_reg_mem );
 8393 %}
 8394 
 8395 // Or Memory with Register
 8396 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8397   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8398   effect(KILL cr);
 8399 
 8400   ins_cost(150);
 8401   format %{ "OR     $dst,$src" %}
 8402   opcode(0x09);  /* Opcode 09 /r */
 8403   ins_encode( OpcP, RegMem( src, dst ) );
 8404   ins_pipe( ialu_mem_reg );
 8405 %}
 8406 
 8407 // Or Memory with Immediate
 8408 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8409   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8410   effect(KILL cr);
 8411 
 8412   ins_cost(125);
 8413   format %{ "OR     $dst,$src" %}
 8414   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8415   // ins_encode( MemImm( dst, src) );
 8416   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8417   ins_pipe( ialu_mem_imm );
 8418 %}
 8419 
 8420 // ROL/ROR
 8421 // ROL expand
 8422 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8423   effect(USE_DEF dst, USE shift, KILL cr);
 8424 
 8425   format %{ "ROL    $dst, $shift" %}
 8426   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8427   ins_encode( OpcP, RegOpc( dst ));
 8428   ins_pipe( ialu_reg );
 8429 %}
 8430 
 8431 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8432   effect(USE_DEF dst, USE shift, KILL cr);
 8433 
 8434   format %{ "ROL    $dst, $shift" %}
 8435   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8436   ins_encode( RegOpcImm(dst, shift) );
 8437   ins_pipe(ialu_reg);
 8438 %}
 8439 
 8440 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8441   effect(USE_DEF dst, USE shift, KILL cr);
 8442 
 8443   format %{ "ROL    $dst, $shift" %}
 8444   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8445   ins_encode(OpcP, RegOpc(dst));
 8446   ins_pipe( ialu_reg_reg );
 8447 %}
 8448 // end of ROL expand
 8449 
 8450 // ROL 32bit by one once
 8451 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8452   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8453 
 8454   expand %{
 8455     rolI_eReg_imm1(dst, lshift, cr);
 8456   %}
 8457 %}
 8458 
 8459 // ROL 32bit var by imm8 once
 8460 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8461   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8462   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8463 
 8464   expand %{
 8465     rolI_eReg_imm8(dst, lshift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROL 32bit var by var once
 8470 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8471   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8472 
 8473   expand %{
 8474     rolI_eReg_CL(dst, shift, cr);
 8475   %}
 8476 %}
 8477 
 8478 // ROL 32bit var by var once
 8479 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8480   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8481 
 8482   expand %{
 8483     rolI_eReg_CL(dst, shift, cr);
 8484   %}
 8485 %}
 8486 
 8487 // ROR expand
 8488 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8489   effect(USE_DEF dst, USE shift, KILL cr);
 8490 
 8491   format %{ "ROR    $dst, $shift" %}
 8492   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8493   ins_encode( OpcP, RegOpc( dst ) );
 8494   ins_pipe( ialu_reg );
 8495 %}
 8496 
 8497 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8498   effect (USE_DEF dst, USE shift, KILL cr);
 8499 
 8500   format %{ "ROR    $dst, $shift" %}
 8501   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8502   ins_encode( RegOpcImm(dst, shift) );
 8503   ins_pipe( ialu_reg );
 8504 %}
 8505 
 8506 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8507   effect(USE_DEF dst, USE shift, KILL cr);
 8508 
 8509   format %{ "ROR    $dst, $shift" %}
 8510   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8511   ins_encode(OpcP, RegOpc(dst));
 8512   ins_pipe( ialu_reg_reg );
 8513 %}
 8514 // end of ROR expand
 8515 
 8516 // ROR right once
 8517 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8518   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8519 
 8520   expand %{
 8521     rorI_eReg_imm1(dst, rshift, cr);
 8522   %}
 8523 %}
 8524 
 8525 // ROR 32bit by immI8 once
 8526 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8527   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8528   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8529 
 8530   expand %{
 8531     rorI_eReg_imm8(dst, rshift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // ROR 32bit var by var once
 8536 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8537   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8538 
 8539   expand %{
 8540     rorI_eReg_CL(dst, shift, cr);
 8541   %}
 8542 %}
 8543 
 8544 // ROR 32bit var by var once
 8545 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8546   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8547 
 8548   expand %{
 8549     rorI_eReg_CL(dst, shift, cr);
 8550   %}
 8551 %}
 8552 
 8553 // Xor Instructions
 8554 // Xor Register with Register
 8555 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8556   match(Set dst (XorI dst src));
 8557   effect(KILL cr);
 8558 
 8559   size(2);
 8560   format %{ "XOR    $dst,$src" %}
 8561   opcode(0x33);
 8562   ins_encode( OpcP, RegReg( dst, src) );
 8563   ins_pipe( ialu_reg_reg );
 8564 %}
 8565 
 8566 // Xor Register with Immediate -1
 8567 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8568   match(Set dst (XorI dst imm));
 8569 
 8570   size(2);
 8571   format %{ "NOT    $dst" %}
 8572   ins_encode %{
 8573      __ notl($dst$$Register);
 8574   %}
 8575   ins_pipe( ialu_reg );
 8576 %}
 8577 
 8578 // Xor Register with Immediate
 8579 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8580   match(Set dst (XorI dst src));
 8581   effect(KILL cr);
 8582 
 8583   format %{ "XOR    $dst,$src" %}
 8584   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8585   // ins_encode( RegImm( dst, src) );
 8586   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8587   ins_pipe( ialu_reg );
 8588 %}
 8589 
 8590 // Xor Register with Memory
 8591 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8592   match(Set dst (XorI dst (LoadI src)));
 8593   effect(KILL cr);
 8594 
 8595   ins_cost(150);
 8596   format %{ "XOR    $dst,$src" %}
 8597   opcode(0x33);
 8598   ins_encode( OpcP, RegMem(dst, src) );
 8599   ins_pipe( ialu_reg_mem );
 8600 %}
 8601 
 8602 // Xor Memory with Register
 8603 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8604   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(150);
 8608   format %{ "XOR    $dst,$src" %}
 8609   opcode(0x31);  /* Opcode 31 /r */
 8610   ins_encode( OpcP, RegMem( src, dst ) );
 8611   ins_pipe( ialu_mem_reg );
 8612 %}
 8613 
 8614 // Xor Memory with Immediate
 8615 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8616   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8617   effect(KILL cr);
 8618 
 8619   ins_cost(125);
 8620   format %{ "XOR    $dst,$src" %}
 8621   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8622   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8623   ins_pipe( ialu_mem_imm );
 8624 %}
 8625 
 8626 //----------Convert Int to Boolean---------------------------------------------
 8627 
 8628 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8629   effect( DEF dst, USE src );
 8630   format %{ "MOV    $dst,$src" %}
 8631   ins_encode( enc_Copy( dst, src) );
 8632   ins_pipe( ialu_reg_reg );
 8633 %}
 8634 
 8635 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8636   effect( USE_DEF dst, USE src, KILL cr );
 8637 
 8638   size(4);
 8639   format %{ "NEG    $dst\n\t"
 8640             "ADC    $dst,$src" %}
 8641   ins_encode( neg_reg(dst),
 8642               OpcRegReg(0x13,dst,src) );
 8643   ins_pipe( ialu_reg_reg_long );
 8644 %}
 8645 
 8646 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8647   match(Set dst (Conv2B src));
 8648 
 8649   expand %{
 8650     movI_nocopy(dst,src);
 8651     ci2b(dst,src,cr);
 8652   %}
 8653 %}
 8654 
 8655 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8656   effect( DEF dst, USE src );
 8657   format %{ "MOV    $dst,$src" %}
 8658   ins_encode( enc_Copy( dst, src) );
 8659   ins_pipe( ialu_reg_reg );
 8660 %}
 8661 
 8662 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8663   effect( USE_DEF dst, USE src, KILL cr );
 8664   format %{ "NEG    $dst\n\t"
 8665             "ADC    $dst,$src" %}
 8666   ins_encode( neg_reg(dst),
 8667               OpcRegReg(0x13,dst,src) );
 8668   ins_pipe( ialu_reg_reg_long );
 8669 %}
 8670 
 8671 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8672   match(Set dst (Conv2B src));
 8673 
 8674   expand %{
 8675     movP_nocopy(dst,src);
 8676     cp2b(dst,src,cr);
 8677   %}
 8678 %}
 8679 
 8680 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8681   match(Set dst (CmpLTMask p q));
 8682   effect(KILL cr);
 8683   ins_cost(400);
 8684 
 8685   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8686   format %{ "XOR    $dst,$dst\n\t"
 8687             "CMP    $p,$q\n\t"
 8688             "SETlt  $dst\n\t"
 8689             "NEG    $dst" %}
 8690   ins_encode %{
 8691     Register Rp = $p$$Register;
 8692     Register Rq = $q$$Register;
 8693     Register Rd = $dst$$Register;
 8694     Label done;
 8695     __ xorl(Rd, Rd);
 8696     __ cmpl(Rp, Rq);
 8697     __ setb(Assembler::less, Rd);
 8698     __ negl(Rd);
 8699   %}
 8700 
 8701   ins_pipe(pipe_slow);
 8702 %}
 8703 
 8704 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8705   match(Set dst (CmpLTMask dst zero));
 8706   effect(DEF dst, KILL cr);
 8707   ins_cost(100);
 8708 
 8709   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8710   ins_encode %{
 8711   __ sarl($dst$$Register, 31);
 8712   %}
 8713   ins_pipe(ialu_reg);
 8714 %}
 8715 
 8716 /* better to save a register than avoid a branch */
 8717 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8718   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8719   effect(KILL cr);
 8720   ins_cost(400);
 8721   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8722             "JGE    done\n\t"
 8723             "ADD    $p,$y\n"
 8724             "done:  " %}
 8725   ins_encode %{
 8726     Register Rp = $p$$Register;
 8727     Register Rq = $q$$Register;
 8728     Register Ry = $y$$Register;
 8729     Label done;
 8730     __ subl(Rp, Rq);
 8731     __ jccb(Assembler::greaterEqual, done);
 8732     __ addl(Rp, Ry);
 8733     __ bind(done);
 8734   %}
 8735 
 8736   ins_pipe(pipe_cmplt);
 8737 %}
 8738 
 8739 /* better to save a register than avoid a branch */
 8740 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8741   match(Set y (AndI (CmpLTMask p q) y));
 8742   effect(KILL cr);
 8743 
 8744   ins_cost(300);
 8745 
 8746   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8747             "JLT      done\n\t"
 8748             "XORL     $y, $y\n"
 8749             "done:  " %}
 8750   ins_encode %{
 8751     Register Rp = $p$$Register;
 8752     Register Rq = $q$$Register;
 8753     Register Ry = $y$$Register;
 8754     Label done;
 8755     __ cmpl(Rp, Rq);
 8756     __ jccb(Assembler::less, done);
 8757     __ xorl(Ry, Ry);
 8758     __ bind(done);
 8759   %}
 8760 
 8761   ins_pipe(pipe_cmplt);
 8762 %}
 8763 
 8764 /* If I enable this, I encourage spilling in the inner loop of compress.
 8765 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8766   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8767 */
 8768 //----------Overflow Math Instructions-----------------------------------------
 8769 
 8770 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8771 %{
 8772   match(Set cr (OverflowAddI op1 op2));
 8773   effect(DEF cr, USE_KILL op1, USE op2);
 8774 
 8775   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8776 
 8777   ins_encode %{
 8778     __ addl($op1$$Register, $op2$$Register);
 8779   %}
 8780   ins_pipe(ialu_reg_reg);
 8781 %}
 8782 
 8783 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8784 %{
 8785   match(Set cr (OverflowAddI op1 op2));
 8786   effect(DEF cr, USE_KILL op1, USE op2);
 8787 
 8788   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8789 
 8790   ins_encode %{
 8791     __ addl($op1$$Register, $op2$$constant);
 8792   %}
 8793   ins_pipe(ialu_reg_reg);
 8794 %}
 8795 
 8796 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8797 %{
 8798   match(Set cr (OverflowSubI op1 op2));
 8799 
 8800   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8801   ins_encode %{
 8802     __ cmpl($op1$$Register, $op2$$Register);
 8803   %}
 8804   ins_pipe(ialu_reg_reg);
 8805 %}
 8806 
 8807 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8808 %{
 8809   match(Set cr (OverflowSubI op1 op2));
 8810 
 8811   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8812   ins_encode %{
 8813     __ cmpl($op1$$Register, $op2$$constant);
 8814   %}
 8815   ins_pipe(ialu_reg_reg);
 8816 %}
 8817 
 8818 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8819 %{
 8820   match(Set cr (OverflowSubI zero op2));
 8821   effect(DEF cr, USE_KILL op2);
 8822 
 8823   format %{ "NEG    $op2\t# overflow check int" %}
 8824   ins_encode %{
 8825     __ negl($op2$$Register);
 8826   %}
 8827   ins_pipe(ialu_reg_reg);
 8828 %}
 8829 
 8830 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8831 %{
 8832   match(Set cr (OverflowMulI op1 op2));
 8833   effect(DEF cr, USE_KILL op1, USE op2);
 8834 
 8835   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8836   ins_encode %{
 8837     __ imull($op1$$Register, $op2$$Register);
 8838   %}
 8839   ins_pipe(ialu_reg_reg_alu0);
 8840 %}
 8841 
 8842 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8843 %{
 8844   match(Set cr (OverflowMulI op1 op2));
 8845   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8846 
 8847   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8848   ins_encode %{
 8849     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8850   %}
 8851   ins_pipe(ialu_reg_reg_alu0);
 8852 %}
 8853 
 8854 // Integer Absolute Instructions
 8855 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8856 %{
 8857   match(Set dst (AbsI src));
 8858   effect(TEMP dst, TEMP tmp, KILL cr);
 8859   format %{ "movl $tmp, $src\n\t"
 8860             "sarl $tmp, 31\n\t"
 8861             "movl $dst, $src\n\t"
 8862             "xorl $dst, $tmp\n\t"
 8863             "subl $dst, $tmp\n"
 8864           %}
 8865   ins_encode %{
 8866     __ movl($tmp$$Register, $src$$Register);
 8867     __ sarl($tmp$$Register, 31);
 8868     __ movl($dst$$Register, $src$$Register);
 8869     __ xorl($dst$$Register, $tmp$$Register);
 8870     __ subl($dst$$Register, $tmp$$Register);
 8871   %}
 8872 
 8873   ins_pipe(ialu_reg_reg);
 8874 %}
 8875 
 8876 //----------Long Instructions------------------------------------------------
 8877 // Add Long Register with Register
 8878 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8879   match(Set dst (AddL dst src));
 8880   effect(KILL cr);
 8881   ins_cost(200);
 8882   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8883             "ADC    $dst.hi,$src.hi" %}
 8884   opcode(0x03, 0x13);
 8885   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8886   ins_pipe( ialu_reg_reg_long );
 8887 %}
 8888 
 8889 // Add Long Register with Immediate
 8890 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8891   match(Set dst (AddL dst src));
 8892   effect(KILL cr);
 8893   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8894             "ADC    $dst.hi,$src.hi" %}
 8895   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8896   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8897   ins_pipe( ialu_reg_long );
 8898 %}
 8899 
 8900 // Add Long Register with Memory
 8901 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8902   match(Set dst (AddL dst (LoadL mem)));
 8903   effect(KILL cr);
 8904   ins_cost(125);
 8905   format %{ "ADD    $dst.lo,$mem\n\t"
 8906             "ADC    $dst.hi,$mem+4" %}
 8907   opcode(0x03, 0x13);
 8908   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8909   ins_pipe( ialu_reg_long_mem );
 8910 %}
 8911 
 8912 // Subtract Long Register with Register.
 8913 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8914   match(Set dst (SubL dst src));
 8915   effect(KILL cr);
 8916   ins_cost(200);
 8917   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8918             "SBB    $dst.hi,$src.hi" %}
 8919   opcode(0x2B, 0x1B);
 8920   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8921   ins_pipe( ialu_reg_reg_long );
 8922 %}
 8923 
 8924 // Subtract Long Register with Immediate
 8925 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8926   match(Set dst (SubL dst src));
 8927   effect(KILL cr);
 8928   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8929             "SBB    $dst.hi,$src.hi" %}
 8930   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8931   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8932   ins_pipe( ialu_reg_long );
 8933 %}
 8934 
 8935 // Subtract Long Register with Memory
 8936 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8937   match(Set dst (SubL dst (LoadL mem)));
 8938   effect(KILL cr);
 8939   ins_cost(125);
 8940   format %{ "SUB    $dst.lo,$mem\n\t"
 8941             "SBB    $dst.hi,$mem+4" %}
 8942   opcode(0x2B, 0x1B);
 8943   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8944   ins_pipe( ialu_reg_long_mem );
 8945 %}
 8946 
 8947 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8948   match(Set dst (SubL zero dst));
 8949   effect(KILL cr);
 8950   ins_cost(300);
 8951   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8952   ins_encode( neg_long(dst) );
 8953   ins_pipe( ialu_reg_reg_long );
 8954 %}
 8955 
 8956 // And Long Register with Register
 8957 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8958   match(Set dst (AndL dst src));
 8959   effect(KILL cr);
 8960   format %{ "AND    $dst.lo,$src.lo\n\t"
 8961             "AND    $dst.hi,$src.hi" %}
 8962   opcode(0x23,0x23);
 8963   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8964   ins_pipe( ialu_reg_reg_long );
 8965 %}
 8966 
 8967 // And Long Register with Immediate
 8968 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8969   match(Set dst (AndL dst src));
 8970   effect(KILL cr);
 8971   format %{ "AND    $dst.lo,$src.lo\n\t"
 8972             "AND    $dst.hi,$src.hi" %}
 8973   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8974   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8975   ins_pipe( ialu_reg_long );
 8976 %}
 8977 
 8978 // And Long Register with Memory
 8979 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8980   match(Set dst (AndL dst (LoadL mem)));
 8981   effect(KILL cr);
 8982   ins_cost(125);
 8983   format %{ "AND    $dst.lo,$mem\n\t"
 8984             "AND    $dst.hi,$mem+4" %}
 8985   opcode(0x23, 0x23);
 8986   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8987   ins_pipe( ialu_reg_long_mem );
 8988 %}
 8989 
 8990 // BMI1 instructions
 8991 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8992   match(Set dst (AndL (XorL src1 minus_1) src2));
 8993   predicate(UseBMI1Instructions);
 8994   effect(KILL cr, TEMP dst);
 8995 
 8996   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8997             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8998          %}
 8999 
 9000   ins_encode %{
 9001     Register Rdst = $dst$$Register;
 9002     Register Rsrc1 = $src1$$Register;
 9003     Register Rsrc2 = $src2$$Register;
 9004     __ andnl(Rdst, Rsrc1, Rsrc2);
 9005     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9006   %}
 9007   ins_pipe(ialu_reg_reg_long);
 9008 %}
 9009 
 9010 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9011   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9012   predicate(UseBMI1Instructions);
 9013   effect(KILL cr, TEMP dst);
 9014 
 9015   ins_cost(125);
 9016   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9017             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9018          %}
 9019 
 9020   ins_encode %{
 9021     Register Rdst = $dst$$Register;
 9022     Register Rsrc1 = $src1$$Register;
 9023     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9024 
 9025     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9026     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9027   %}
 9028   ins_pipe(ialu_reg_mem);
 9029 %}
 9030 
 9031 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9032   match(Set dst (AndL (SubL imm_zero src) src));
 9033   predicate(UseBMI1Instructions);
 9034   effect(KILL cr, TEMP dst);
 9035 
 9036   format %{ "MOVL   $dst.hi, 0\n\t"
 9037             "BLSIL  $dst.lo, $src.lo\n\t"
 9038             "JNZ    done\n\t"
 9039             "BLSIL  $dst.hi, $src.hi\n"
 9040             "done:"
 9041          %}
 9042 
 9043   ins_encode %{
 9044     Label done;
 9045     Register Rdst = $dst$$Register;
 9046     Register Rsrc = $src$$Register;
 9047     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9048     __ blsil(Rdst, Rsrc);
 9049     __ jccb(Assembler::notZero, done);
 9050     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9051     __ bind(done);
 9052   %}
 9053   ins_pipe(ialu_reg);
 9054 %}
 9055 
 9056 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9057   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9058   predicate(UseBMI1Instructions);
 9059   effect(KILL cr, TEMP dst);
 9060 
 9061   ins_cost(125);
 9062   format %{ "MOVL   $dst.hi, 0\n\t"
 9063             "BLSIL  $dst.lo, $src\n\t"
 9064             "JNZ    done\n\t"
 9065             "BLSIL  $dst.hi, $src+4\n"
 9066             "done:"
 9067          %}
 9068 
 9069   ins_encode %{
 9070     Label done;
 9071     Register Rdst = $dst$$Register;
 9072     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9073 
 9074     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9075     __ blsil(Rdst, $src$$Address);
 9076     __ jccb(Assembler::notZero, done);
 9077     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9078     __ bind(done);
 9079   %}
 9080   ins_pipe(ialu_reg_mem);
 9081 %}
 9082 
 9083 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9084 %{
 9085   match(Set dst (XorL (AddL src minus_1) src));
 9086   predicate(UseBMI1Instructions);
 9087   effect(KILL cr, TEMP dst);
 9088 
 9089   format %{ "MOVL    $dst.hi, 0\n\t"
 9090             "BLSMSKL $dst.lo, $src.lo\n\t"
 9091             "JNC     done\n\t"
 9092             "BLSMSKL $dst.hi, $src.hi\n"
 9093             "done:"
 9094          %}
 9095 
 9096   ins_encode %{
 9097     Label done;
 9098     Register Rdst = $dst$$Register;
 9099     Register Rsrc = $src$$Register;
 9100     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9101     __ blsmskl(Rdst, Rsrc);
 9102     __ jccb(Assembler::carryClear, done);
 9103     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9104     __ bind(done);
 9105   %}
 9106 
 9107   ins_pipe(ialu_reg);
 9108 %}
 9109 
 9110 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9111 %{
 9112   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9113   predicate(UseBMI1Instructions);
 9114   effect(KILL cr, TEMP dst);
 9115 
 9116   ins_cost(125);
 9117   format %{ "MOVL    $dst.hi, 0\n\t"
 9118             "BLSMSKL $dst.lo, $src\n\t"
 9119             "JNC     done\n\t"
 9120             "BLSMSKL $dst.hi, $src+4\n"
 9121             "done:"
 9122          %}
 9123 
 9124   ins_encode %{
 9125     Label done;
 9126     Register Rdst = $dst$$Register;
 9127     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9128 
 9129     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9130     __ blsmskl(Rdst, $src$$Address);
 9131     __ jccb(Assembler::carryClear, done);
 9132     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9133     __ bind(done);
 9134   %}
 9135 
 9136   ins_pipe(ialu_reg_mem);
 9137 %}
 9138 
 9139 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9140 %{
 9141   match(Set dst (AndL (AddL src minus_1) src) );
 9142   predicate(UseBMI1Instructions);
 9143   effect(KILL cr, TEMP dst);
 9144 
 9145   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9146             "BLSRL  $dst.lo, $src.lo\n\t"
 9147             "JNC    done\n\t"
 9148             "BLSRL  $dst.hi, $src.hi\n"
 9149             "done:"
 9150   %}
 9151 
 9152   ins_encode %{
 9153     Label done;
 9154     Register Rdst = $dst$$Register;
 9155     Register Rsrc = $src$$Register;
 9156     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9157     __ blsrl(Rdst, Rsrc);
 9158     __ jccb(Assembler::carryClear, done);
 9159     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9160     __ bind(done);
 9161   %}
 9162 
 9163   ins_pipe(ialu_reg);
 9164 %}
 9165 
 9166 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9167 %{
 9168   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9169   predicate(UseBMI1Instructions);
 9170   effect(KILL cr, TEMP dst);
 9171 
 9172   ins_cost(125);
 9173   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9174             "BLSRL  $dst.lo, $src\n\t"
 9175             "JNC    done\n\t"
 9176             "BLSRL  $dst.hi, $src+4\n"
 9177             "done:"
 9178   %}
 9179 
 9180   ins_encode %{
 9181     Label done;
 9182     Register Rdst = $dst$$Register;
 9183     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9184     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9185     __ blsrl(Rdst, $src$$Address);
 9186     __ jccb(Assembler::carryClear, done);
 9187     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9188     __ bind(done);
 9189   %}
 9190 
 9191   ins_pipe(ialu_reg_mem);
 9192 %}
 9193 
 9194 // Or Long Register with Register
 9195 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9196   match(Set dst (OrL dst src));
 9197   effect(KILL cr);
 9198   format %{ "OR     $dst.lo,$src.lo\n\t"
 9199             "OR     $dst.hi,$src.hi" %}
 9200   opcode(0x0B,0x0B);
 9201   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9202   ins_pipe( ialu_reg_reg_long );
 9203 %}
 9204 
 9205 // Or Long Register with Immediate
 9206 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9207   match(Set dst (OrL dst src));
 9208   effect(KILL cr);
 9209   format %{ "OR     $dst.lo,$src.lo\n\t"
 9210             "OR     $dst.hi,$src.hi" %}
 9211   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9212   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9213   ins_pipe( ialu_reg_long );
 9214 %}
 9215 
 9216 // Or Long Register with Memory
 9217 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9218   match(Set dst (OrL dst (LoadL mem)));
 9219   effect(KILL cr);
 9220   ins_cost(125);
 9221   format %{ "OR     $dst.lo,$mem\n\t"
 9222             "OR     $dst.hi,$mem+4" %}
 9223   opcode(0x0B,0x0B);
 9224   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9225   ins_pipe( ialu_reg_long_mem );
 9226 %}
 9227 
 9228 // Xor Long Register with Register
 9229 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9230   match(Set dst (XorL dst src));
 9231   effect(KILL cr);
 9232   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9233             "XOR    $dst.hi,$src.hi" %}
 9234   opcode(0x33,0x33);
 9235   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9236   ins_pipe( ialu_reg_reg_long );
 9237 %}
 9238 
 9239 // Xor Long Register with Immediate -1
 9240 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9241   match(Set dst (XorL dst imm));
 9242   format %{ "NOT    $dst.lo\n\t"
 9243             "NOT    $dst.hi" %}
 9244   ins_encode %{
 9245      __ notl($dst$$Register);
 9246      __ notl(HIGH_FROM_LOW($dst$$Register));
 9247   %}
 9248   ins_pipe( ialu_reg_long );
 9249 %}
 9250 
 9251 // Xor Long Register with Immediate
 9252 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9253   match(Set dst (XorL dst src));
 9254   effect(KILL cr);
 9255   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9256             "XOR    $dst.hi,$src.hi" %}
 9257   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9258   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9259   ins_pipe( ialu_reg_long );
 9260 %}
 9261 
 9262 // Xor Long Register with Memory
 9263 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9264   match(Set dst (XorL dst (LoadL mem)));
 9265   effect(KILL cr);
 9266   ins_cost(125);
 9267   format %{ "XOR    $dst.lo,$mem\n\t"
 9268             "XOR    $dst.hi,$mem+4" %}
 9269   opcode(0x33,0x33);
 9270   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9271   ins_pipe( ialu_reg_long_mem );
 9272 %}
 9273 
 9274 // Shift Left Long by 1
 9275 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9276   predicate(UseNewLongLShift);
 9277   match(Set dst (LShiftL dst cnt));
 9278   effect(KILL cr);
 9279   ins_cost(100);
 9280   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9281             "ADC    $dst.hi,$dst.hi" %}
 9282   ins_encode %{
 9283     __ addl($dst$$Register,$dst$$Register);
 9284     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9285   %}
 9286   ins_pipe( ialu_reg_long );
 9287 %}
 9288 
 9289 // Shift Left Long by 2
 9290 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9291   predicate(UseNewLongLShift);
 9292   match(Set dst (LShiftL dst cnt));
 9293   effect(KILL cr);
 9294   ins_cost(100);
 9295   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9296             "ADC    $dst.hi,$dst.hi\n\t"
 9297             "ADD    $dst.lo,$dst.lo\n\t"
 9298             "ADC    $dst.hi,$dst.hi" %}
 9299   ins_encode %{
 9300     __ addl($dst$$Register,$dst$$Register);
 9301     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9302     __ addl($dst$$Register,$dst$$Register);
 9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9304   %}
 9305   ins_pipe( ialu_reg_long );
 9306 %}
 9307 
 9308 // Shift Left Long by 3
 9309 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9310   predicate(UseNewLongLShift);
 9311   match(Set dst (LShiftL dst cnt));
 9312   effect(KILL cr);
 9313   ins_cost(100);
 9314   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9315             "ADC    $dst.hi,$dst.hi\n\t"
 9316             "ADD    $dst.lo,$dst.lo\n\t"
 9317             "ADC    $dst.hi,$dst.hi\n\t"
 9318             "ADD    $dst.lo,$dst.lo\n\t"
 9319             "ADC    $dst.hi,$dst.hi" %}
 9320   ins_encode %{
 9321     __ addl($dst$$Register,$dst$$Register);
 9322     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9323     __ addl($dst$$Register,$dst$$Register);
 9324     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9325     __ addl($dst$$Register,$dst$$Register);
 9326     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9327   %}
 9328   ins_pipe( ialu_reg_long );
 9329 %}
 9330 
 9331 // Shift Left Long by 1-31
 9332 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9333   match(Set dst (LShiftL dst cnt));
 9334   effect(KILL cr);
 9335   ins_cost(200);
 9336   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9337             "SHL    $dst.lo,$cnt" %}
 9338   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9339   ins_encode( move_long_small_shift(dst,cnt) );
 9340   ins_pipe( ialu_reg_long );
 9341 %}
 9342 
 9343 // Shift Left Long by 32-63
 9344 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9345   match(Set dst (LShiftL dst cnt));
 9346   effect(KILL cr);
 9347   ins_cost(300);
 9348   format %{ "MOV    $dst.hi,$dst.lo\n"
 9349           "\tSHL    $dst.hi,$cnt-32\n"
 9350           "\tXOR    $dst.lo,$dst.lo" %}
 9351   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9352   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9353   ins_pipe( ialu_reg_long );
 9354 %}
 9355 
 9356 // Shift Left Long by variable
 9357 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9358   match(Set dst (LShiftL dst shift));
 9359   effect(KILL cr);
 9360   ins_cost(500+200);
 9361   size(17);
 9362   format %{ "TEST   $shift,32\n\t"
 9363             "JEQ,s  small\n\t"
 9364             "MOV    $dst.hi,$dst.lo\n\t"
 9365             "XOR    $dst.lo,$dst.lo\n"
 9366     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9367             "SHL    $dst.lo,$shift" %}
 9368   ins_encode( shift_left_long( dst, shift ) );
 9369   ins_pipe( pipe_slow );
 9370 %}
 9371 
 9372 // Shift Right Long by 1-31
 9373 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9374   match(Set dst (URShiftL dst cnt));
 9375   effect(KILL cr);
 9376   ins_cost(200);
 9377   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9378             "SHR    $dst.hi,$cnt" %}
 9379   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9380   ins_encode( move_long_small_shift(dst,cnt) );
 9381   ins_pipe( ialu_reg_long );
 9382 %}
 9383 
 9384 // Shift Right Long by 32-63
 9385 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9386   match(Set dst (URShiftL dst cnt));
 9387   effect(KILL cr);
 9388   ins_cost(300);
 9389   format %{ "MOV    $dst.lo,$dst.hi\n"
 9390           "\tSHR    $dst.lo,$cnt-32\n"
 9391           "\tXOR    $dst.hi,$dst.hi" %}
 9392   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9393   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9394   ins_pipe( ialu_reg_long );
 9395 %}
 9396 
 9397 // Shift Right Long by variable
 9398 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9399   match(Set dst (URShiftL dst shift));
 9400   effect(KILL cr);
 9401   ins_cost(600);
 9402   size(17);
 9403   format %{ "TEST   $shift,32\n\t"
 9404             "JEQ,s  small\n\t"
 9405             "MOV    $dst.lo,$dst.hi\n\t"
 9406             "XOR    $dst.hi,$dst.hi\n"
 9407     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9408             "SHR    $dst.hi,$shift" %}
 9409   ins_encode( shift_right_long( dst, shift ) );
 9410   ins_pipe( pipe_slow );
 9411 %}
 9412 
 9413 // Shift Right Long by 1-31
 9414 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9415   match(Set dst (RShiftL dst cnt));
 9416   effect(KILL cr);
 9417   ins_cost(200);
 9418   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9419             "SAR    $dst.hi,$cnt" %}
 9420   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9421   ins_encode( move_long_small_shift(dst,cnt) );
 9422   ins_pipe( ialu_reg_long );
 9423 %}
 9424 
 9425 // Shift Right Long by 32-63
 9426 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9427   match(Set dst (RShiftL dst cnt));
 9428   effect(KILL cr);
 9429   ins_cost(300);
 9430   format %{ "MOV    $dst.lo,$dst.hi\n"
 9431           "\tSAR    $dst.lo,$cnt-32\n"
 9432           "\tSAR    $dst.hi,31" %}
 9433   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9434   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9435   ins_pipe( ialu_reg_long );
 9436 %}
 9437 
 9438 // Shift Right arithmetic Long by variable
 9439 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9440   match(Set dst (RShiftL dst shift));
 9441   effect(KILL cr);
 9442   ins_cost(600);
 9443   size(18);
 9444   format %{ "TEST   $shift,32\n\t"
 9445             "JEQ,s  small\n\t"
 9446             "MOV    $dst.lo,$dst.hi\n\t"
 9447             "SAR    $dst.hi,31\n"
 9448     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9449             "SAR    $dst.hi,$shift" %}
 9450   ins_encode( shift_right_arith_long( dst, shift ) );
 9451   ins_pipe( pipe_slow );
 9452 %}
 9453 
 9454 
 9455 //----------Double Instructions------------------------------------------------
 9456 // Double Math
 9457 
 9458 // Compare & branch
 9459 
 9460 // P6 version of float compare, sets condition codes in EFLAGS
 9461 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9462   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9463   match(Set cr (CmpD src1 src2));
 9464   effect(KILL rax);
 9465   ins_cost(150);
 9466   format %{ "FLD    $src1\n\t"
 9467             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9468             "JNP    exit\n\t"
 9469             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9470             "SAHF\n"
 9471      "exit:\tNOP               // avoid branch to branch" %}
 9472   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9473   ins_encode( Push_Reg_DPR(src1),
 9474               OpcP, RegOpc(src2),
 9475               cmpF_P6_fixup );
 9476   ins_pipe( pipe_slow );
 9477 %}
 9478 
 9479 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9480   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9481   match(Set cr (CmpD src1 src2));
 9482   ins_cost(150);
 9483   format %{ "FLD    $src1\n\t"
 9484             "FUCOMIP ST,$src2  // P6 instruction" %}
 9485   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9486   ins_encode( Push_Reg_DPR(src1),
 9487               OpcP, RegOpc(src2));
 9488   ins_pipe( pipe_slow );
 9489 %}
 9490 
 9491 // Compare & branch
 9492 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9493   predicate(UseSSE<=1);
 9494   match(Set cr (CmpD src1 src2));
 9495   effect(KILL rax);
 9496   ins_cost(200);
 9497   format %{ "FLD    $src1\n\t"
 9498             "FCOMp  $src2\n\t"
 9499             "FNSTSW AX\n\t"
 9500             "TEST   AX,0x400\n\t"
 9501             "JZ,s   flags\n\t"
 9502             "MOV    AH,1\t# unordered treat as LT\n"
 9503     "flags:\tSAHF" %}
 9504   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9505   ins_encode( Push_Reg_DPR(src1),
 9506               OpcP, RegOpc(src2),
 9507               fpu_flags);
 9508   ins_pipe( pipe_slow );
 9509 %}
 9510 
 9511 // Compare vs zero into -1,0,1
 9512 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9513   predicate(UseSSE<=1);
 9514   match(Set dst (CmpD3 src1 zero));
 9515   effect(KILL cr, KILL rax);
 9516   ins_cost(280);
 9517   format %{ "FTSTD  $dst,$src1" %}
 9518   opcode(0xE4, 0xD9);
 9519   ins_encode( Push_Reg_DPR(src1),
 9520               OpcS, OpcP, PopFPU,
 9521               CmpF_Result(dst));
 9522   ins_pipe( pipe_slow );
 9523 %}
 9524 
 9525 // Compare into -1,0,1
 9526 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9527   predicate(UseSSE<=1);
 9528   match(Set dst (CmpD3 src1 src2));
 9529   effect(KILL cr, KILL rax);
 9530   ins_cost(300);
 9531   format %{ "FCMPD  $dst,$src1,$src2" %}
 9532   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9533   ins_encode( Push_Reg_DPR(src1),
 9534               OpcP, RegOpc(src2),
 9535               CmpF_Result(dst));
 9536   ins_pipe( pipe_slow );
 9537 %}
 9538 
 9539 // float compare and set condition codes in EFLAGS by XMM regs
 9540 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9541   predicate(UseSSE>=2);
 9542   match(Set cr (CmpD src1 src2));
 9543   ins_cost(145);
 9544   format %{ "UCOMISD $src1,$src2\n\t"
 9545             "JNP,s   exit\n\t"
 9546             "PUSHF\t# saw NaN, set CF\n\t"
 9547             "AND     [rsp], #0xffffff2b\n\t"
 9548             "POPF\n"
 9549     "exit:" %}
 9550   ins_encode %{
 9551     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9552     emit_cmpfp_fixup(_masm);
 9553   %}
 9554   ins_pipe( pipe_slow );
 9555 %}
 9556 
 9557 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9558   predicate(UseSSE>=2);
 9559   match(Set cr (CmpD src1 src2));
 9560   ins_cost(100);
 9561   format %{ "UCOMISD $src1,$src2" %}
 9562   ins_encode %{
 9563     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9564   %}
 9565   ins_pipe( pipe_slow );
 9566 %}
 9567 
 9568 // float compare and set condition codes in EFLAGS by XMM regs
 9569 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9570   predicate(UseSSE>=2);
 9571   match(Set cr (CmpD src1 (LoadD src2)));
 9572   ins_cost(145);
 9573   format %{ "UCOMISD $src1,$src2\n\t"
 9574             "JNP,s   exit\n\t"
 9575             "PUSHF\t# saw NaN, set CF\n\t"
 9576             "AND     [rsp], #0xffffff2b\n\t"
 9577             "POPF\n"
 9578     "exit:" %}
 9579   ins_encode %{
 9580     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9581     emit_cmpfp_fixup(_masm);
 9582   %}
 9583   ins_pipe( pipe_slow );
 9584 %}
 9585 
 9586 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9587   predicate(UseSSE>=2);
 9588   match(Set cr (CmpD src1 (LoadD src2)));
 9589   ins_cost(100);
 9590   format %{ "UCOMISD $src1,$src2" %}
 9591   ins_encode %{
 9592     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9593   %}
 9594   ins_pipe( pipe_slow );
 9595 %}
 9596 
 9597 // Compare into -1,0,1 in XMM
 9598 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9599   predicate(UseSSE>=2);
 9600   match(Set dst (CmpD3 src1 src2));
 9601   effect(KILL cr);
 9602   ins_cost(255);
 9603   format %{ "UCOMISD $src1, $src2\n\t"
 9604             "MOV     $dst, #-1\n\t"
 9605             "JP,s    done\n\t"
 9606             "JB,s    done\n\t"
 9607             "SETNE   $dst\n\t"
 9608             "MOVZB   $dst, $dst\n"
 9609     "done:" %}
 9610   ins_encode %{
 9611     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9612     emit_cmpfp3(_masm, $dst$$Register);
 9613   %}
 9614   ins_pipe( pipe_slow );
 9615 %}
 9616 
 9617 // Compare into -1,0,1 in XMM and memory
 9618 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9619   predicate(UseSSE>=2);
 9620   match(Set dst (CmpD3 src1 (LoadD src2)));
 9621   effect(KILL cr);
 9622   ins_cost(275);
 9623   format %{ "UCOMISD $src1, $src2\n\t"
 9624             "MOV     $dst, #-1\n\t"
 9625             "JP,s    done\n\t"
 9626             "JB,s    done\n\t"
 9627             "SETNE   $dst\n\t"
 9628             "MOVZB   $dst, $dst\n"
 9629     "done:" %}
 9630   ins_encode %{
 9631     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9632     emit_cmpfp3(_masm, $dst$$Register);
 9633   %}
 9634   ins_pipe( pipe_slow );
 9635 %}
 9636 
 9637 
 9638 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9639   predicate (UseSSE <=1);
 9640   match(Set dst (SubD dst src));
 9641 
 9642   format %{ "FLD    $src\n\t"
 9643             "DSUBp  $dst,ST" %}
 9644   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9645   ins_cost(150);
 9646   ins_encode( Push_Reg_DPR(src),
 9647               OpcP, RegOpc(dst) );
 9648   ins_pipe( fpu_reg_reg );
 9649 %}
 9650 
 9651 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9652   predicate (UseSSE <=1);
 9653   match(Set dst (RoundDouble (SubD src1 src2)));
 9654   ins_cost(250);
 9655 
 9656   format %{ "FLD    $src2\n\t"
 9657             "DSUB   ST,$src1\n\t"
 9658             "FSTP_D $dst\t# D-round" %}
 9659   opcode(0xD8, 0x5);
 9660   ins_encode( Push_Reg_DPR(src2),
 9661               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9662   ins_pipe( fpu_mem_reg_reg );
 9663 %}
 9664 
 9665 
 9666 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9667   predicate (UseSSE <=1);
 9668   match(Set dst (SubD dst (LoadD src)));
 9669   ins_cost(150);
 9670 
 9671   format %{ "FLD    $src\n\t"
 9672             "DSUBp  $dst,ST" %}
 9673   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9674   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9675               OpcP, RegOpc(dst) );
 9676   ins_pipe( fpu_reg_mem );
 9677 %}
 9678 
 9679 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9680   predicate (UseSSE<=1);
 9681   match(Set dst (AbsD src));
 9682   ins_cost(100);
 9683   format %{ "FABS" %}
 9684   opcode(0xE1, 0xD9);
 9685   ins_encode( OpcS, OpcP );
 9686   ins_pipe( fpu_reg_reg );
 9687 %}
 9688 
 9689 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9690   predicate(UseSSE<=1);
 9691   match(Set dst (NegD src));
 9692   ins_cost(100);
 9693   format %{ "FCHS" %}
 9694   opcode(0xE0, 0xD9);
 9695   ins_encode( OpcS, OpcP );
 9696   ins_pipe( fpu_reg_reg );
 9697 %}
 9698 
 9699 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9700   predicate(UseSSE<=1);
 9701   match(Set dst (AddD dst src));
 9702   format %{ "FLD    $src\n\t"
 9703             "DADD   $dst,ST" %}
 9704   size(4);
 9705   ins_cost(150);
 9706   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9707   ins_encode( Push_Reg_DPR(src),
 9708               OpcP, RegOpc(dst) );
 9709   ins_pipe( fpu_reg_reg );
 9710 %}
 9711 
 9712 
 9713 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9714   predicate(UseSSE<=1);
 9715   match(Set dst (RoundDouble (AddD src1 src2)));
 9716   ins_cost(250);
 9717 
 9718   format %{ "FLD    $src2\n\t"
 9719             "DADD   ST,$src1\n\t"
 9720             "FSTP_D $dst\t# D-round" %}
 9721   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9722   ins_encode( Push_Reg_DPR(src2),
 9723               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9724   ins_pipe( fpu_mem_reg_reg );
 9725 %}
 9726 
 9727 
 9728 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9729   predicate(UseSSE<=1);
 9730   match(Set dst (AddD dst (LoadD src)));
 9731   ins_cost(150);
 9732 
 9733   format %{ "FLD    $src\n\t"
 9734             "DADDp  $dst,ST" %}
 9735   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9736   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9737               OpcP, RegOpc(dst) );
 9738   ins_pipe( fpu_reg_mem );
 9739 %}
 9740 
 9741 // add-to-memory
 9742 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9743   predicate(UseSSE<=1);
 9744   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9745   ins_cost(150);
 9746 
 9747   format %{ "FLD_D  $dst\n\t"
 9748             "DADD   ST,$src\n\t"
 9749             "FST_D  $dst" %}
 9750   opcode(0xDD, 0x0);
 9751   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9752               Opcode(0xD8), RegOpc(src),
 9753               set_instruction_start,
 9754               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9755   ins_pipe( fpu_reg_mem );
 9756 %}
 9757 
 9758 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9759   predicate(UseSSE<=1);
 9760   match(Set dst (AddD dst con));
 9761   ins_cost(125);
 9762   format %{ "FLD1\n\t"
 9763             "DADDp  $dst,ST" %}
 9764   ins_encode %{
 9765     __ fld1();
 9766     __ faddp($dst$$reg);
 9767   %}
 9768   ins_pipe(fpu_reg);
 9769 %}
 9770 
 9771 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9772   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9773   match(Set dst (AddD dst con));
 9774   ins_cost(200);
 9775   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9776             "DADDp  $dst,ST" %}
 9777   ins_encode %{
 9778     __ fld_d($constantaddress($con));
 9779     __ faddp($dst$$reg);
 9780   %}
 9781   ins_pipe(fpu_reg_mem);
 9782 %}
 9783 
 9784 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9785   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9786   match(Set dst (RoundDouble (AddD src con)));
 9787   ins_cost(200);
 9788   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9789             "DADD   ST,$src\n\t"
 9790             "FSTP_D $dst\t# D-round" %}
 9791   ins_encode %{
 9792     __ fld_d($constantaddress($con));
 9793     __ fadd($src$$reg);
 9794     __ fstp_d(Address(rsp, $dst$$disp));
 9795   %}
 9796   ins_pipe(fpu_mem_reg_con);
 9797 %}
 9798 
 9799 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9800   predicate(UseSSE<=1);
 9801   match(Set dst (MulD dst src));
 9802   format %{ "FLD    $src\n\t"
 9803             "DMULp  $dst,ST" %}
 9804   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9805   ins_cost(150);
 9806   ins_encode( Push_Reg_DPR(src),
 9807               OpcP, RegOpc(dst) );
 9808   ins_pipe( fpu_reg_reg );
 9809 %}
 9810 
 9811 // Strict FP instruction biases argument before multiply then
 9812 // biases result to avoid double rounding of subnormals.
 9813 //
 9814 // scale arg1 by multiplying arg1 by 2^(-15360)
 9815 // load arg2
 9816 // multiply scaled arg1 by arg2
 9817 // rescale product by 2^(15360)
 9818 //
 9819 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9820   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9821   match(Set dst (MulD dst src));
 9822   ins_cost(1);   // Select this instruction for all FP double multiplies
 9823 
 9824   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9825             "DMULp  $dst,ST\n\t"
 9826             "FLD    $src\n\t"
 9827             "DMULp  $dst,ST\n\t"
 9828             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9829             "DMULp  $dst,ST\n\t" %}
 9830   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9831   ins_encode( strictfp_bias1(dst),
 9832               Push_Reg_DPR(src),
 9833               OpcP, RegOpc(dst),
 9834               strictfp_bias2(dst) );
 9835   ins_pipe( fpu_reg_reg );
 9836 %}
 9837 
 9838 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9839   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9840   match(Set dst (MulD dst con));
 9841   ins_cost(200);
 9842   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9843             "DMULp  $dst,ST" %}
 9844   ins_encode %{
 9845     __ fld_d($constantaddress($con));
 9846     __ fmulp($dst$$reg);
 9847   %}
 9848   ins_pipe(fpu_reg_mem);
 9849 %}
 9850 
 9851 
 9852 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9853   predicate( UseSSE<=1 );
 9854   match(Set dst (MulD dst (LoadD src)));
 9855   ins_cost(200);
 9856   format %{ "FLD_D  $src\n\t"
 9857             "DMULp  $dst,ST" %}
 9858   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9859   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9860               OpcP, RegOpc(dst) );
 9861   ins_pipe( fpu_reg_mem );
 9862 %}
 9863 
 9864 //
 9865 // Cisc-alternate to reg-reg multiply
 9866 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9867   predicate( UseSSE<=1 );
 9868   match(Set dst (MulD src (LoadD mem)));
 9869   ins_cost(250);
 9870   format %{ "FLD_D  $mem\n\t"
 9871             "DMUL   ST,$src\n\t"
 9872             "FSTP_D $dst" %}
 9873   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9874   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9875               OpcReg_FPR(src),
 9876               Pop_Reg_DPR(dst) );
 9877   ins_pipe( fpu_reg_reg_mem );
 9878 %}
 9879 
 9880 
 9881 // MACRO3 -- addDPR a mulDPR
 9882 // This instruction is a '2-address' instruction in that the result goes
 9883 // back to src2.  This eliminates a move from the macro; possibly the
 9884 // register allocator will have to add it back (and maybe not).
 9885 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9886   predicate( UseSSE<=1 );
 9887   match(Set src2 (AddD (MulD src0 src1) src2));
 9888   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9889             "DMUL   ST,$src1\n\t"
 9890             "DADDp  $src2,ST" %}
 9891   ins_cost(250);
 9892   opcode(0xDD); /* LoadD DD /0 */
 9893   ins_encode( Push_Reg_FPR(src0),
 9894               FMul_ST_reg(src1),
 9895               FAddP_reg_ST(src2) );
 9896   ins_pipe( fpu_reg_reg_reg );
 9897 %}
 9898 
 9899 
 9900 // MACRO3 -- subDPR a mulDPR
 9901 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9902   predicate( UseSSE<=1 );
 9903   match(Set src2 (SubD (MulD src0 src1) src2));
 9904   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9905             "DMUL   ST,$src1\n\t"
 9906             "DSUBRp $src2,ST" %}
 9907   ins_cost(250);
 9908   ins_encode( Push_Reg_FPR(src0),
 9909               FMul_ST_reg(src1),
 9910               Opcode(0xDE), Opc_plus(0xE0,src2));
 9911   ins_pipe( fpu_reg_reg_reg );
 9912 %}
 9913 
 9914 
 9915 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9916   predicate( UseSSE<=1 );
 9917   match(Set dst (DivD dst src));
 9918 
 9919   format %{ "FLD    $src\n\t"
 9920             "FDIVp  $dst,ST" %}
 9921   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9922   ins_cost(150);
 9923   ins_encode( Push_Reg_DPR(src),
 9924               OpcP, RegOpc(dst) );
 9925   ins_pipe( fpu_reg_reg );
 9926 %}
 9927 
 9928 // Strict FP instruction biases argument before division then
 9929 // biases result, to avoid double rounding of subnormals.
 9930 //
 9931 // scale dividend by multiplying dividend by 2^(-15360)
 9932 // load divisor
 9933 // divide scaled dividend by divisor
 9934 // rescale quotient by 2^(15360)
 9935 //
 9936 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9937   predicate (UseSSE<=1);
 9938   match(Set dst (DivD dst src));
 9939   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9940   ins_cost(01);
 9941 
 9942   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9943             "DMULp  $dst,ST\n\t"
 9944             "FLD    $src\n\t"
 9945             "FDIVp  $dst,ST\n\t"
 9946             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9947             "DMULp  $dst,ST\n\t" %}
 9948   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9949   ins_encode( strictfp_bias1(dst),
 9950               Push_Reg_DPR(src),
 9951               OpcP, RegOpc(dst),
 9952               strictfp_bias2(dst) );
 9953   ins_pipe( fpu_reg_reg );
 9954 %}
 9955 
 9956 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9957   predicate(UseSSE<=1);
 9958   match(Set dst (ModD dst src));
 9959   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9960 
 9961   format %{ "DMOD   $dst,$src" %}
 9962   ins_cost(250);
 9963   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9964               emitModDPR(),
 9965               Push_Result_Mod_DPR(src),
 9966               Pop_Reg_DPR(dst));
 9967   ins_pipe( pipe_slow );
 9968 %}
 9969 
 9970 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9971   predicate(UseSSE>=2);
 9972   match(Set dst (ModD src0 src1));
 9973   effect(KILL rax, KILL cr);
 9974 
 9975   format %{ "SUB    ESP,8\t # DMOD\n"
 9976           "\tMOVSD  [ESP+0],$src1\n"
 9977           "\tFLD_D  [ESP+0]\n"
 9978           "\tMOVSD  [ESP+0],$src0\n"
 9979           "\tFLD_D  [ESP+0]\n"
 9980      "loop:\tFPREM\n"
 9981           "\tFWAIT\n"
 9982           "\tFNSTSW AX\n"
 9983           "\tSAHF\n"
 9984           "\tJP     loop\n"
 9985           "\tFSTP_D [ESP+0]\n"
 9986           "\tMOVSD  $dst,[ESP+0]\n"
 9987           "\tADD    ESP,8\n"
 9988           "\tFSTP   ST0\t # Restore FPU Stack"
 9989     %}
 9990   ins_cost(250);
 9991   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9992   ins_pipe( pipe_slow );
 9993 %}
 9994 
 9995 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9996   predicate (UseSSE<=1);
 9997   match(Set dst(AtanD dst src));
 9998   format %{ "DATA   $dst,$src" %}
 9999   opcode(0xD9, 0xF3);
10000   ins_encode( Push_Reg_DPR(src),
10001               OpcP, OpcS, RegOpc(dst) );
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10006   predicate (UseSSE>=2);
10007   match(Set dst(AtanD dst src));
10008   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10009   format %{ "DATA   $dst,$src" %}
10010   opcode(0xD9, 0xF3);
10011   ins_encode( Push_SrcD(src),
10012               OpcP, OpcS, Push_ResultD(dst) );
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10017   predicate (UseSSE<=1);
10018   match(Set dst (SqrtD src));
10019   format %{ "DSQRT  $dst,$src" %}
10020   opcode(0xFA, 0xD9);
10021   ins_encode( Push_Reg_DPR(src),
10022               OpcS, OpcP, Pop_Reg_DPR(dst) );
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 //-------------Float Instructions-------------------------------
10027 // Float Math
10028 
10029 // Code for float compare:
10030 //     fcompp();
10031 //     fwait(); fnstsw_ax();
10032 //     sahf();
10033 //     movl(dst, unordered_result);
10034 //     jcc(Assembler::parity, exit);
10035 //     movl(dst, less_result);
10036 //     jcc(Assembler::below, exit);
10037 //     movl(dst, equal_result);
10038 //     jcc(Assembler::equal, exit);
10039 //     movl(dst, greater_result);
10040 //   exit:
10041 
10042 // P6 version of float compare, sets condition codes in EFLAGS
10043 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10044   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10045   match(Set cr (CmpF src1 src2));
10046   effect(KILL rax);
10047   ins_cost(150);
10048   format %{ "FLD    $src1\n\t"
10049             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10050             "JNP    exit\n\t"
10051             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10052             "SAHF\n"
10053      "exit:\tNOP               // avoid branch to branch" %}
10054   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10055   ins_encode( Push_Reg_DPR(src1),
10056               OpcP, RegOpc(src2),
10057               cmpF_P6_fixup );
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10062   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10063   match(Set cr (CmpF src1 src2));
10064   ins_cost(100);
10065   format %{ "FLD    $src1\n\t"
10066             "FUCOMIP ST,$src2  // P6 instruction" %}
10067   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10068   ins_encode( Push_Reg_DPR(src1),
10069               OpcP, RegOpc(src2));
10070   ins_pipe( pipe_slow );
10071 %}
10072 
10073 
10074 // Compare & branch
10075 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10076   predicate(UseSSE == 0);
10077   match(Set cr (CmpF src1 src2));
10078   effect(KILL rax);
10079   ins_cost(200);
10080   format %{ "FLD    $src1\n\t"
10081             "FCOMp  $src2\n\t"
10082             "FNSTSW AX\n\t"
10083             "TEST   AX,0x400\n\t"
10084             "JZ,s   flags\n\t"
10085             "MOV    AH,1\t# unordered treat as LT\n"
10086     "flags:\tSAHF" %}
10087   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10088   ins_encode( Push_Reg_DPR(src1),
10089               OpcP, RegOpc(src2),
10090               fpu_flags);
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 // Compare vs zero into -1,0,1
10095 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10096   predicate(UseSSE == 0);
10097   match(Set dst (CmpF3 src1 zero));
10098   effect(KILL cr, KILL rax);
10099   ins_cost(280);
10100   format %{ "FTSTF  $dst,$src1" %}
10101   opcode(0xE4, 0xD9);
10102   ins_encode( Push_Reg_DPR(src1),
10103               OpcS, OpcP, PopFPU,
10104               CmpF_Result(dst));
10105   ins_pipe( pipe_slow );
10106 %}
10107 
10108 // Compare into -1,0,1
10109 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10110   predicate(UseSSE == 0);
10111   match(Set dst (CmpF3 src1 src2));
10112   effect(KILL cr, KILL rax);
10113   ins_cost(300);
10114   format %{ "FCMPF  $dst,$src1,$src2" %}
10115   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10116   ins_encode( Push_Reg_DPR(src1),
10117               OpcP, RegOpc(src2),
10118               CmpF_Result(dst));
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 // float compare and set condition codes in EFLAGS by XMM regs
10123 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10124   predicate(UseSSE>=1);
10125   match(Set cr (CmpF src1 src2));
10126   ins_cost(145);
10127   format %{ "UCOMISS $src1,$src2\n\t"
10128             "JNP,s   exit\n\t"
10129             "PUSHF\t# saw NaN, set CF\n\t"
10130             "AND     [rsp], #0xffffff2b\n\t"
10131             "POPF\n"
10132     "exit:" %}
10133   ins_encode %{
10134     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10135     emit_cmpfp_fixup(_masm);
10136   %}
10137   ins_pipe( pipe_slow );
10138 %}
10139 
10140 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 src2));
10143   ins_cost(100);
10144   format %{ "UCOMISS $src1,$src2" %}
10145   ins_encode %{
10146     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10147   %}
10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 // float compare and set condition codes in EFLAGS by XMM regs
10152 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10153   predicate(UseSSE>=1);
10154   match(Set cr (CmpF src1 (LoadF src2)));
10155   ins_cost(165);
10156   format %{ "UCOMISS $src1,$src2\n\t"
10157             "JNP,s   exit\n\t"
10158             "PUSHF\t# saw NaN, set CF\n\t"
10159             "AND     [rsp], #0xffffff2b\n\t"
10160             "POPF\n"
10161     "exit:" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10164     emit_cmpfp_fixup(_masm);
10165   %}
10166   ins_pipe( pipe_slow );
10167 %}
10168 
10169 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10170   predicate(UseSSE>=1);
10171   match(Set cr (CmpF src1 (LoadF src2)));
10172   ins_cost(100);
10173   format %{ "UCOMISS $src1,$src2" %}
10174   ins_encode %{
10175     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10176   %}
10177   ins_pipe( pipe_slow );
10178 %}
10179 
10180 // Compare into -1,0,1 in XMM
10181 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10182   predicate(UseSSE>=1);
10183   match(Set dst (CmpF3 src1 src2));
10184   effect(KILL cr);
10185   ins_cost(255);
10186   format %{ "UCOMISS $src1, $src2\n\t"
10187             "MOV     $dst, #-1\n\t"
10188             "JP,s    done\n\t"
10189             "JB,s    done\n\t"
10190             "SETNE   $dst\n\t"
10191             "MOVZB   $dst, $dst\n"
10192     "done:" %}
10193   ins_encode %{
10194     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10195     emit_cmpfp3(_masm, $dst$$Register);
10196   %}
10197   ins_pipe( pipe_slow );
10198 %}
10199 
10200 // Compare into -1,0,1 in XMM and memory
10201 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10202   predicate(UseSSE>=1);
10203   match(Set dst (CmpF3 src1 (LoadF src2)));
10204   effect(KILL cr);
10205   ins_cost(275);
10206   format %{ "UCOMISS $src1, $src2\n\t"
10207             "MOV     $dst, #-1\n\t"
10208             "JP,s    done\n\t"
10209             "JB,s    done\n\t"
10210             "SETNE   $dst\n\t"
10211             "MOVZB   $dst, $dst\n"
10212     "done:" %}
10213   ins_encode %{
10214     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10215     emit_cmpfp3(_masm, $dst$$Register);
10216   %}
10217   ins_pipe( pipe_slow );
10218 %}
10219 
10220 // Spill to obtain 24-bit precision
10221 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10222   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10223   match(Set dst (SubF src1 src2));
10224 
10225   format %{ "FSUB   $dst,$src1 - $src2" %}
10226   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10227   ins_encode( Push_Reg_FPR(src1),
10228               OpcReg_FPR(src2),
10229               Pop_Mem_FPR(dst) );
10230   ins_pipe( fpu_mem_reg_reg );
10231 %}
10232 //
10233 // This instruction does not round to 24-bits
10234 instruct subFPR_reg(regFPR dst, regFPR src) %{
10235   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10236   match(Set dst (SubF dst src));
10237 
10238   format %{ "FSUB   $dst,$src" %}
10239   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10240   ins_encode( Push_Reg_FPR(src),
10241               OpcP, RegOpc(dst) );
10242   ins_pipe( fpu_reg_reg );
10243 %}
10244 
10245 // Spill to obtain 24-bit precision
10246 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10247   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF src1 src2));
10249 
10250   format %{ "FADD   $dst,$src1,$src2" %}
10251   opcode(0xD8, 0x0); /* D8 C0+i */
10252   ins_encode( Push_Reg_FPR(src2),
10253               OpcReg_FPR(src1),
10254               Pop_Mem_FPR(dst) );
10255   ins_pipe( fpu_mem_reg_reg );
10256 %}
10257 //
10258 // This instruction does not round to 24-bits
10259 instruct addFPR_reg(regFPR dst, regFPR src) %{
10260   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10261   match(Set dst (AddF dst src));
10262 
10263   format %{ "FLD    $src\n\t"
10264             "FADDp  $dst,ST" %}
10265   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10266   ins_encode( Push_Reg_FPR(src),
10267               OpcP, RegOpc(dst) );
10268   ins_pipe( fpu_reg_reg );
10269 %}
10270 
10271 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10272   predicate(UseSSE==0);
10273   match(Set dst (AbsF src));
10274   ins_cost(100);
10275   format %{ "FABS" %}
10276   opcode(0xE1, 0xD9);
10277   ins_encode( OpcS, OpcP );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10282   predicate(UseSSE==0);
10283   match(Set dst (NegF src));
10284   ins_cost(100);
10285   format %{ "FCHS" %}
10286   opcode(0xE0, 0xD9);
10287   ins_encode( OpcS, OpcP );
10288   ins_pipe( fpu_reg_reg );
10289 %}
10290 
10291 // Cisc-alternate to addFPR_reg
10292 // Spill to obtain 24-bit precision
10293 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10294   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10295   match(Set dst (AddF src1 (LoadF src2)));
10296 
10297   format %{ "FLD    $src2\n\t"
10298             "FADD   ST,$src1\n\t"
10299             "FSTP_S $dst" %}
10300   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10301   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10302               OpcReg_FPR(src1),
10303               Pop_Mem_FPR(dst) );
10304   ins_pipe( fpu_mem_reg_mem );
10305 %}
10306 //
10307 // Cisc-alternate to addFPR_reg
10308 // This instruction does not round to 24-bits
10309 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10310   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10311   match(Set dst (AddF dst (LoadF src)));
10312 
10313   format %{ "FADD   $dst,$src" %}
10314   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10315   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10316               OpcP, RegOpc(dst) );
10317   ins_pipe( fpu_reg_mem );
10318 %}
10319 
10320 // // Following two instructions for _222_mpegaudio
10321 // Spill to obtain 24-bit precision
10322 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10323   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10324   match(Set dst (AddF src1 src2));
10325 
10326   format %{ "FADD   $dst,$src1,$src2" %}
10327   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10328   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10329               OpcReg_FPR(src2),
10330               Pop_Mem_FPR(dst) );
10331   ins_pipe( fpu_mem_reg_mem );
10332 %}
10333 
10334 // Cisc-spill variant
10335 // Spill to obtain 24-bit precision
10336 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10337   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10338   match(Set dst (AddF src1 (LoadF src2)));
10339 
10340   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10341   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10342   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10343               set_instruction_start,
10344               OpcP, RMopc_Mem(secondary,src1),
10345               Pop_Mem_FPR(dst) );
10346   ins_pipe( fpu_mem_mem_mem );
10347 %}
10348 
10349 // Spill to obtain 24-bit precision
10350 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10351   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10352   match(Set dst (AddF src1 src2));
10353 
10354   format %{ "FADD   $dst,$src1,$src2" %}
10355   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10356   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10357               set_instruction_start,
10358               OpcP, RMopc_Mem(secondary,src1),
10359               Pop_Mem_FPR(dst) );
10360   ins_pipe( fpu_mem_mem_mem );
10361 %}
10362 
10363 
10364 // Spill to obtain 24-bit precision
10365 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10366   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10367   match(Set dst (AddF src con));
10368   format %{ "FLD    $src\n\t"
10369             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10370             "FSTP_S $dst"  %}
10371   ins_encode %{
10372     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10373     __ fadd_s($constantaddress($con));
10374     __ fstp_s(Address(rsp, $dst$$disp));
10375   %}
10376   ins_pipe(fpu_mem_reg_con);
10377 %}
10378 //
10379 // This instruction does not round to 24-bits
10380 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10381   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10382   match(Set dst (AddF src con));
10383   format %{ "FLD    $src\n\t"
10384             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10385             "FSTP   $dst"  %}
10386   ins_encode %{
10387     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10388     __ fadd_s($constantaddress($con));
10389     __ fstp_d($dst$$reg);
10390   %}
10391   ins_pipe(fpu_reg_reg_con);
10392 %}
10393 
10394 // Spill to obtain 24-bit precision
10395 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10396   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10397   match(Set dst (MulF src1 src2));
10398 
10399   format %{ "FLD    $src1\n\t"
10400             "FMUL   $src2\n\t"
10401             "FSTP_S $dst"  %}
10402   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10403   ins_encode( Push_Reg_FPR(src1),
10404               OpcReg_FPR(src2),
10405               Pop_Mem_FPR(dst) );
10406   ins_pipe( fpu_mem_reg_reg );
10407 %}
10408 //
10409 // This instruction does not round to 24-bits
10410 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10411   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10412   match(Set dst (MulF src1 src2));
10413 
10414   format %{ "FLD    $src1\n\t"
10415             "FMUL   $src2\n\t"
10416             "FSTP_S $dst"  %}
10417   opcode(0xD8, 0x1); /* D8 C8+i */
10418   ins_encode( Push_Reg_FPR(src2),
10419               OpcReg_FPR(src1),
10420               Pop_Reg_FPR(dst) );
10421   ins_pipe( fpu_reg_reg_reg );
10422 %}
10423 
10424 
10425 // Spill to obtain 24-bit precision
10426 // Cisc-alternate to reg-reg multiply
10427 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10428   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10429   match(Set dst (MulF src1 (LoadF src2)));
10430 
10431   format %{ "FLD_S  $src2\n\t"
10432             "FMUL   $src1\n\t"
10433             "FSTP_S $dst"  %}
10434   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10435   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10436               OpcReg_FPR(src1),
10437               Pop_Mem_FPR(dst) );
10438   ins_pipe( fpu_mem_reg_mem );
10439 %}
10440 //
10441 // This instruction does not round to 24-bits
10442 // Cisc-alternate to reg-reg multiply
10443 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10444   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 (LoadF src2)));
10446 
10447   format %{ "FMUL   $dst,$src1,$src2" %}
10448   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10449   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10450               OpcReg_FPR(src1),
10451               Pop_Reg_FPR(dst) );
10452   ins_pipe( fpu_reg_reg_mem );
10453 %}
10454 
10455 // Spill to obtain 24-bit precision
10456 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10457   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10458   match(Set dst (MulF src1 src2));
10459 
10460   format %{ "FMUL   $dst,$src1,$src2" %}
10461   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10462   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10463               set_instruction_start,
10464               OpcP, RMopc_Mem(secondary,src1),
10465               Pop_Mem_FPR(dst) );
10466   ins_pipe( fpu_mem_mem_mem );
10467 %}
10468 
10469 // Spill to obtain 24-bit precision
10470 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10471   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10472   match(Set dst (MulF src con));
10473 
10474   format %{ "FLD    $src\n\t"
10475             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10476             "FSTP_S $dst"  %}
10477   ins_encode %{
10478     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10479     __ fmul_s($constantaddress($con));
10480     __ fstp_s(Address(rsp, $dst$$disp));
10481   %}
10482   ins_pipe(fpu_mem_reg_con);
10483 %}
10484 //
10485 // This instruction does not round to 24-bits
10486 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10487   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10488   match(Set dst (MulF src con));
10489 
10490   format %{ "FLD    $src\n\t"
10491             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10492             "FSTP   $dst"  %}
10493   ins_encode %{
10494     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10495     __ fmul_s($constantaddress($con));
10496     __ fstp_d($dst$$reg);
10497   %}
10498   ins_pipe(fpu_reg_reg_con);
10499 %}
10500 
10501 
10502 //
10503 // MACRO1 -- subsume unshared load into mulFPR
10504 // This instruction does not round to 24-bits
10505 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (MulF (LoadF mem1) src));
10508 
10509   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10510             "FMUL   ST,$src\n\t"
10511             "FSTP   $dst" %}
10512   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10513   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10514               OpcReg_FPR(src),
10515               Pop_Reg_FPR(dst) );
10516   ins_pipe( fpu_reg_reg_mem );
10517 %}
10518 //
10519 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10520 // This instruction does not round to 24-bits
10521 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10522   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10523   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10524   ins_cost(95);
10525 
10526   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10527             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10528             "FADD   ST,$src2\n\t"
10529             "FSTP   $dst" %}
10530   opcode(0xD9); /* LoadF D9 /0 */
10531   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10532               FMul_ST_reg(src1),
10533               FAdd_ST_reg(src2),
10534               Pop_Reg_FPR(dst) );
10535   ins_pipe( fpu_reg_mem_reg_reg );
10536 %}
10537 
10538 // MACRO3 -- addFPR a mulFPR
10539 // This instruction does not round to 24-bits.  It is a '2-address'
10540 // instruction in that the result goes back to src2.  This eliminates
10541 // a move from the macro; possibly the register allocator will have
10542 // to add it back (and maybe not).
10543 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10544   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10545   match(Set src2 (AddF (MulF src0 src1) src2));
10546 
10547   format %{ "FLD    $src0     ===MACRO3===\n\t"
10548             "FMUL   ST,$src1\n\t"
10549             "FADDP  $src2,ST" %}
10550   opcode(0xD9); /* LoadF D9 /0 */
10551   ins_encode( Push_Reg_FPR(src0),
10552               FMul_ST_reg(src1),
10553               FAddP_reg_ST(src2) );
10554   ins_pipe( fpu_reg_reg_reg );
10555 %}
10556 
10557 // MACRO4 -- divFPR subFPR
10558 // This instruction does not round to 24-bits
10559 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10560   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10561   match(Set dst (DivF (SubF src2 src1) src3));
10562 
10563   format %{ "FLD    $src2   ===MACRO4===\n\t"
10564             "FSUB   ST,$src1\n\t"
10565             "FDIV   ST,$src3\n\t"
10566             "FSTP  $dst" %}
10567   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10568   ins_encode( Push_Reg_FPR(src2),
10569               subFPR_divFPR_encode(src1,src3),
10570               Pop_Reg_FPR(dst) );
10571   ins_pipe( fpu_reg_reg_reg_reg );
10572 %}
10573 
10574 // Spill to obtain 24-bit precision
10575 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10576   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10577   match(Set dst (DivF src1 src2));
10578 
10579   format %{ "FDIV   $dst,$src1,$src2" %}
10580   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10581   ins_encode( Push_Reg_FPR(src1),
10582               OpcReg_FPR(src2),
10583               Pop_Mem_FPR(dst) );
10584   ins_pipe( fpu_mem_reg_reg );
10585 %}
10586 //
10587 // This instruction does not round to 24-bits
10588 instruct divFPR_reg(regFPR dst, regFPR src) %{
10589   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10590   match(Set dst (DivF dst src));
10591 
10592   format %{ "FDIV   $dst,$src" %}
10593   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10594   ins_encode( Push_Reg_FPR(src),
10595               OpcP, RegOpc(dst) );
10596   ins_pipe( fpu_reg_reg );
10597 %}
10598 
10599 
10600 // Spill to obtain 24-bit precision
10601 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10602   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10603   match(Set dst (ModF src1 src2));
10604   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10605 
10606   format %{ "FMOD   $dst,$src1,$src2" %}
10607   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10608               emitModDPR(),
10609               Push_Result_Mod_DPR(src2),
10610               Pop_Mem_FPR(dst));
10611   ins_pipe( pipe_slow );
10612 %}
10613 //
10614 // This instruction does not round to 24-bits
10615 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10616   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10617   match(Set dst (ModF dst src));
10618   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10619 
10620   format %{ "FMOD   $dst,$src" %}
10621   ins_encode(Push_Reg_Mod_DPR(dst, src),
10622               emitModDPR(),
10623               Push_Result_Mod_DPR(src),
10624               Pop_Reg_FPR(dst));
10625   ins_pipe( pipe_slow );
10626 %}
10627 
10628 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10629   predicate(UseSSE>=1);
10630   match(Set dst (ModF src0 src1));
10631   effect(KILL rax, KILL cr);
10632   format %{ "SUB    ESP,4\t # FMOD\n"
10633           "\tMOVSS  [ESP+0],$src1\n"
10634           "\tFLD_S  [ESP+0]\n"
10635           "\tMOVSS  [ESP+0],$src0\n"
10636           "\tFLD_S  [ESP+0]\n"
10637      "loop:\tFPREM\n"
10638           "\tFWAIT\n"
10639           "\tFNSTSW AX\n"
10640           "\tSAHF\n"
10641           "\tJP     loop\n"
10642           "\tFSTP_S [ESP+0]\n"
10643           "\tMOVSS  $dst,[ESP+0]\n"
10644           "\tADD    ESP,4\n"
10645           "\tFSTP   ST0\t # Restore FPU Stack"
10646     %}
10647   ins_cost(250);
10648   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10649   ins_pipe( pipe_slow );
10650 %}
10651 
10652 
10653 //----------Arithmetic Conversion Instructions---------------------------------
10654 // The conversions operations are all Alpha sorted.  Please keep it that way!
10655 
10656 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10657   predicate(UseSSE==0);
10658   match(Set dst (RoundFloat src));
10659   ins_cost(125);
10660   format %{ "FST_S  $dst,$src\t# F-round" %}
10661   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10662   ins_pipe( fpu_mem_reg );
10663 %}
10664 
10665 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10666   predicate(UseSSE<=1);
10667   match(Set dst (RoundDouble src));
10668   ins_cost(125);
10669   format %{ "FST_D  $dst,$src\t# D-round" %}
10670   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10671   ins_pipe( fpu_mem_reg );
10672 %}
10673 
10674 // Force rounding to 24-bit precision and 6-bit exponent
10675 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10676   predicate(UseSSE==0);
10677   match(Set dst (ConvD2F src));
10678   format %{ "FST_S  $dst,$src\t# F-round" %}
10679   expand %{
10680     roundFloat_mem_reg(dst,src);
10681   %}
10682 %}
10683 
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10686   predicate(UseSSE==1);
10687   match(Set dst (ConvD2F src));
10688   effect( KILL cr );
10689   format %{ "SUB    ESP,4\n\t"
10690             "FST_S  [ESP],$src\t# F-round\n\t"
10691             "MOVSS  $dst,[ESP]\n\t"
10692             "ADD ESP,4" %}
10693   ins_encode %{
10694     __ subptr(rsp, 4);
10695     if ($src$$reg != FPR1L_enc) {
10696       __ fld_s($src$$reg-1);
10697       __ fstp_s(Address(rsp, 0));
10698     } else {
10699       __ fst_s(Address(rsp, 0));
10700     }
10701     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10702     __ addptr(rsp, 4);
10703   %}
10704   ins_pipe( pipe_slow );
10705 %}
10706 
10707 // Force rounding double precision to single precision
10708 instruct convD2F_reg(regF dst, regD src) %{
10709   predicate(UseSSE>=2);
10710   match(Set dst (ConvD2F src));
10711   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10712   ins_encode %{
10713     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10719   predicate(UseSSE==0);
10720   match(Set dst (ConvF2D src));
10721   format %{ "FST_S  $dst,$src\t# D-round" %}
10722   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10723   ins_pipe( fpu_reg_reg );
10724 %}
10725 
10726 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10727   predicate(UseSSE==1);
10728   match(Set dst (ConvF2D src));
10729   format %{ "FST_D  $dst,$src\t# D-round" %}
10730   expand %{
10731     roundDouble_mem_reg(dst,src);
10732   %}
10733 %}
10734 
10735 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10736   predicate(UseSSE==1);
10737   match(Set dst (ConvF2D src));
10738   effect( KILL cr );
10739   format %{ "SUB    ESP,4\n\t"
10740             "MOVSS  [ESP] $src\n\t"
10741             "FLD_S  [ESP]\n\t"
10742             "ADD    ESP,4\n\t"
10743             "FSTP   $dst\t# D-round" %}
10744   ins_encode %{
10745     __ subptr(rsp, 4);
10746     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10747     __ fld_s(Address(rsp, 0));
10748     __ addptr(rsp, 4);
10749     __ fstp_d($dst$$reg);
10750   %}
10751   ins_pipe( pipe_slow );
10752 %}
10753 
10754 instruct convF2D_reg(regD dst, regF src) %{
10755   predicate(UseSSE>=2);
10756   match(Set dst (ConvF2D src));
10757   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10758   ins_encode %{
10759     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10760   %}
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10765 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10766   predicate(UseSSE<=1);
10767   match(Set dst (ConvD2I src));
10768   effect( KILL tmp, KILL cr );
10769   format %{ "FLD    $src\t# Convert double to int \n\t"
10770             "FLDCW  trunc mode\n\t"
10771             "SUB    ESP,4\n\t"
10772             "FISTp  [ESP + #0]\n\t"
10773             "FLDCW  std/24-bit mode\n\t"
10774             "POP    EAX\n\t"
10775             "CMP    EAX,0x80000000\n\t"
10776             "JNE,s  fast\n\t"
10777             "FLD_D  $src\n\t"
10778             "CALL   d2i_wrapper\n"
10779       "fast:" %}
10780   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10781   ins_pipe( pipe_slow );
10782 %}
10783 
10784 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10785 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10786   predicate(UseSSE>=2);
10787   match(Set dst (ConvD2I src));
10788   effect( KILL tmp, KILL cr );
10789   format %{ "CVTTSD2SI $dst, $src\n\t"
10790             "CMP    $dst,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "SUB    ESP, 8\n\t"
10793             "MOVSD  [ESP], $src\n\t"
10794             "FLD_D  [ESP]\n\t"
10795             "ADD    ESP, 8\n\t"
10796             "CALL   d2i_wrapper\n"
10797       "fast:" %}
10798   ins_encode %{
10799     Label fast;
10800     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10801     __ cmpl($dst$$Register, 0x80000000);
10802     __ jccb(Assembler::notEqual, fast);
10803     __ subptr(rsp, 8);
10804     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10805     __ fld_d(Address(rsp, 0));
10806     __ addptr(rsp, 8);
10807     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10808     __ post_call_nop();
10809     __ bind(fast);
10810   %}
10811   ins_pipe( pipe_slow );
10812 %}
10813 
10814 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10815   predicate(UseSSE<=1);
10816   match(Set dst (ConvD2L src));
10817   effect( KILL cr );
10818   format %{ "FLD    $src\t# Convert double to long\n\t"
10819             "FLDCW  trunc mode\n\t"
10820             "SUB    ESP,8\n\t"
10821             "FISTp  [ESP + #0]\n\t"
10822             "FLDCW  std/24-bit mode\n\t"
10823             "POP    EAX\n\t"
10824             "POP    EDX\n\t"
10825             "CMP    EDX,0x80000000\n\t"
10826             "JNE,s  fast\n\t"
10827             "TEST   EAX,EAX\n\t"
10828             "JNE,s  fast\n\t"
10829             "FLD    $src\n\t"
10830             "CALL   d2l_wrapper\n"
10831       "fast:" %}
10832   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10833   ins_pipe( pipe_slow );
10834 %}
10835 
10836 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10837 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10838   predicate (UseSSE>=2);
10839   match(Set dst (ConvD2L src));
10840   effect( KILL cr );
10841   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10842             "MOVSD  [ESP],$src\n\t"
10843             "FLD_D  [ESP]\n\t"
10844             "FLDCW  trunc mode\n\t"
10845             "FISTp  [ESP + #0]\n\t"
10846             "FLDCW  std/24-bit mode\n\t"
10847             "POP    EAX\n\t"
10848             "POP    EDX\n\t"
10849             "CMP    EDX,0x80000000\n\t"
10850             "JNE,s  fast\n\t"
10851             "TEST   EAX,EAX\n\t"
10852             "JNE,s  fast\n\t"
10853             "SUB    ESP,8\n\t"
10854             "MOVSD  [ESP],$src\n\t"
10855             "FLD_D  [ESP]\n\t"
10856             "ADD    ESP,8\n\t"
10857             "CALL   d2l_wrapper\n"
10858       "fast:" %}
10859   ins_encode %{
10860     Label fast;
10861     __ subptr(rsp, 8);
10862     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10863     __ fld_d(Address(rsp, 0));
10864     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10865     __ fistp_d(Address(rsp, 0));
10866     // Restore the rounding mode, mask the exception
10867     if (Compile::current()->in_24_bit_fp_mode()) {
10868       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10869     } else {
10870       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10871     }
10872     // Load the converted long, adjust CPU stack
10873     __ pop(rax);
10874     __ pop(rdx);
10875     __ cmpl(rdx, 0x80000000);
10876     __ jccb(Assembler::notEqual, fast);
10877     __ testl(rax, rax);
10878     __ jccb(Assembler::notEqual, fast);
10879     __ subptr(rsp, 8);
10880     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10881     __ fld_d(Address(rsp, 0));
10882     __ addptr(rsp, 8);
10883     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10884     __ post_call_nop();
10885     __ bind(fast);
10886   %}
10887   ins_pipe( pipe_slow );
10888 %}
10889 
10890 // Convert a double to an int.  Java semantics require we do complex
10891 // manglations in the corner cases.  So we set the rounding mode to
10892 // 'zero', store the darned double down as an int, and reset the
10893 // rounding mode to 'nearest'.  The hardware stores a flag value down
10894 // if we would overflow or converted a NAN; we check for this and
10895 // and go the slow path if needed.
10896 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10897   predicate(UseSSE==0);
10898   match(Set dst (ConvF2I src));
10899   effect( KILL tmp, KILL cr );
10900   format %{ "FLD    $src\t# Convert float to int \n\t"
10901             "FLDCW  trunc mode\n\t"
10902             "SUB    ESP,4\n\t"
10903             "FISTp  [ESP + #0]\n\t"
10904             "FLDCW  std/24-bit mode\n\t"
10905             "POP    EAX\n\t"
10906             "CMP    EAX,0x80000000\n\t"
10907             "JNE,s  fast\n\t"
10908             "FLD    $src\n\t"
10909             "CALL   d2i_wrapper\n"
10910       "fast:" %}
10911   // DPR2I_encoding works for FPR2I
10912   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10913   ins_pipe( pipe_slow );
10914 %}
10915 
10916 // Convert a float in xmm to an int reg.
10917 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10918   predicate(UseSSE>=1);
10919   match(Set dst (ConvF2I src));
10920   effect( KILL tmp, KILL cr );
10921   format %{ "CVTTSS2SI $dst, $src\n\t"
10922             "CMP    $dst,0x80000000\n\t"
10923             "JNE,s  fast\n\t"
10924             "SUB    ESP, 4\n\t"
10925             "MOVSS  [ESP], $src\n\t"
10926             "FLD    [ESP]\n\t"
10927             "ADD    ESP, 4\n\t"
10928             "CALL   d2i_wrapper\n"
10929       "fast:" %}
10930   ins_encode %{
10931     Label fast;
10932     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10933     __ cmpl($dst$$Register, 0x80000000);
10934     __ jccb(Assembler::notEqual, fast);
10935     __ subptr(rsp, 4);
10936     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10937     __ fld_s(Address(rsp, 0));
10938     __ addptr(rsp, 4);
10939     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10940     __ post_call_nop();
10941     __ bind(fast);
10942   %}
10943   ins_pipe( pipe_slow );
10944 %}
10945 
10946 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10947   predicate(UseSSE==0);
10948   match(Set dst (ConvF2L src));
10949   effect( KILL cr );
10950   format %{ "FLD    $src\t# Convert float to long\n\t"
10951             "FLDCW  trunc mode\n\t"
10952             "SUB    ESP,8\n\t"
10953             "FISTp  [ESP + #0]\n\t"
10954             "FLDCW  std/24-bit mode\n\t"
10955             "POP    EAX\n\t"
10956             "POP    EDX\n\t"
10957             "CMP    EDX,0x80000000\n\t"
10958             "JNE,s  fast\n\t"
10959             "TEST   EAX,EAX\n\t"
10960             "JNE,s  fast\n\t"
10961             "FLD    $src\n\t"
10962             "CALL   d2l_wrapper\n"
10963       "fast:" %}
10964   // DPR2L_encoding works for FPR2L
10965   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10966   ins_pipe( pipe_slow );
10967 %}
10968 
10969 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10970 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10971   predicate (UseSSE>=1);
10972   match(Set dst (ConvF2L src));
10973   effect( KILL cr );
10974   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10975             "MOVSS  [ESP],$src\n\t"
10976             "FLD_S  [ESP]\n\t"
10977             "FLDCW  trunc mode\n\t"
10978             "FISTp  [ESP + #0]\n\t"
10979             "FLDCW  std/24-bit mode\n\t"
10980             "POP    EAX\n\t"
10981             "POP    EDX\n\t"
10982             "CMP    EDX,0x80000000\n\t"
10983             "JNE,s  fast\n\t"
10984             "TEST   EAX,EAX\n\t"
10985             "JNE,s  fast\n\t"
10986             "SUB    ESP,4\t# Convert float to long\n\t"
10987             "MOVSS  [ESP],$src\n\t"
10988             "FLD_S  [ESP]\n\t"
10989             "ADD    ESP,4\n\t"
10990             "CALL   d2l_wrapper\n"
10991       "fast:" %}
10992   ins_encode %{
10993     Label fast;
10994     __ subptr(rsp, 8);
10995     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10996     __ fld_s(Address(rsp, 0));
10997     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10998     __ fistp_d(Address(rsp, 0));
10999     // Restore the rounding mode, mask the exception
11000     if (Compile::current()->in_24_bit_fp_mode()) {
11001       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11002     } else {
11003       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11004     }
11005     // Load the converted long, adjust CPU stack
11006     __ pop(rax);
11007     __ pop(rdx);
11008     __ cmpl(rdx, 0x80000000);
11009     __ jccb(Assembler::notEqual, fast);
11010     __ testl(rax, rax);
11011     __ jccb(Assembler::notEqual, fast);
11012     __ subptr(rsp, 4);
11013     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11014     __ fld_s(Address(rsp, 0));
11015     __ addptr(rsp, 4);
11016     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11017     __ post_call_nop();
11018     __ bind(fast);
11019   %}
11020   ins_pipe( pipe_slow );
11021 %}
11022 
11023 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11024   predicate( UseSSE<=1 );
11025   match(Set dst (ConvI2D src));
11026   format %{ "FILD   $src\n\t"
11027             "FSTP   $dst" %}
11028   opcode(0xDB, 0x0);  /* DB /0 */
11029   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11030   ins_pipe( fpu_reg_mem );
11031 %}
11032 
11033 instruct convI2D_reg(regD dst, rRegI src) %{
11034   predicate( UseSSE>=2 && !UseXmmI2D );
11035   match(Set dst (ConvI2D src));
11036   format %{ "CVTSI2SD $dst,$src" %}
11037   ins_encode %{
11038     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11039   %}
11040   ins_pipe( pipe_slow );
11041 %}
11042 
11043 instruct convI2D_mem(regD dst, memory mem) %{
11044   predicate( UseSSE>=2 );
11045   match(Set dst (ConvI2D (LoadI mem)));
11046   format %{ "CVTSI2SD $dst,$mem" %}
11047   ins_encode %{
11048     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11049   %}
11050   ins_pipe( pipe_slow );
11051 %}
11052 
11053 instruct convXI2D_reg(regD dst, rRegI src)
11054 %{
11055   predicate( UseSSE>=2 && UseXmmI2D );
11056   match(Set dst (ConvI2D src));
11057 
11058   format %{ "MOVD  $dst,$src\n\t"
11059             "CVTDQ2PD $dst,$dst\t# i2d" %}
11060   ins_encode %{
11061     __ movdl($dst$$XMMRegister, $src$$Register);
11062     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11063   %}
11064   ins_pipe(pipe_slow); // XXX
11065 %}
11066 
11067 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11068   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11069   match(Set dst (ConvI2D (LoadI mem)));
11070   format %{ "FILD   $mem\n\t"
11071             "FSTP   $dst" %}
11072   opcode(0xDB);      /* DB /0 */
11073   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11074               Pop_Reg_DPR(dst));
11075   ins_pipe( fpu_reg_mem );
11076 %}
11077 
11078 // Convert a byte to a float; no rounding step needed.
11079 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11080   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11081   match(Set dst (ConvI2F src));
11082   format %{ "FILD   $src\n\t"
11083             "FSTP   $dst" %}
11084 
11085   opcode(0xDB, 0x0);  /* DB /0 */
11086   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11087   ins_pipe( fpu_reg_mem );
11088 %}
11089 
11090 // In 24-bit mode, force exponent rounding by storing back out
11091 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11092   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11093   match(Set dst (ConvI2F src));
11094   ins_cost(200);
11095   format %{ "FILD   $src\n\t"
11096             "FSTP_S $dst" %}
11097   opcode(0xDB, 0x0);  /* DB /0 */
11098   ins_encode( Push_Mem_I(src),
11099               Pop_Mem_FPR(dst));
11100   ins_pipe( fpu_mem_mem );
11101 %}
11102 
11103 // In 24-bit mode, force exponent rounding by storing back out
11104 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11105   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11106   match(Set dst (ConvI2F (LoadI mem)));
11107   ins_cost(200);
11108   format %{ "FILD   $mem\n\t"
11109             "FSTP_S $dst" %}
11110   opcode(0xDB);  /* DB /0 */
11111   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11112               Pop_Mem_FPR(dst));
11113   ins_pipe( fpu_mem_mem );
11114 %}
11115 
11116 // This instruction does not round to 24-bits
11117 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11118   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11119   match(Set dst (ConvI2F src));
11120   format %{ "FILD   $src\n\t"
11121             "FSTP   $dst" %}
11122   opcode(0xDB, 0x0);  /* DB /0 */
11123   ins_encode( Push_Mem_I(src),
11124               Pop_Reg_FPR(dst));
11125   ins_pipe( fpu_reg_mem );
11126 %}
11127 
11128 // This instruction does not round to 24-bits
11129 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11130   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11131   match(Set dst (ConvI2F (LoadI mem)));
11132   format %{ "FILD   $mem\n\t"
11133             "FSTP   $dst" %}
11134   opcode(0xDB);      /* DB /0 */
11135   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11136               Pop_Reg_FPR(dst));
11137   ins_pipe( fpu_reg_mem );
11138 %}
11139 
11140 // Convert an int to a float in xmm; no rounding step needed.
11141 instruct convI2F_reg(regF dst, rRegI src) %{
11142   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11143   match(Set dst (ConvI2F src));
11144   format %{ "CVTSI2SS $dst, $src" %}
11145   ins_encode %{
11146     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11147   %}
11148   ins_pipe( pipe_slow );
11149 %}
11150 
11151  instruct convXI2F_reg(regF dst, rRegI src)
11152 %{
11153   predicate( UseSSE>=2 && UseXmmI2F );
11154   match(Set dst (ConvI2F src));
11155 
11156   format %{ "MOVD  $dst,$src\n\t"
11157             "CVTDQ2PS $dst,$dst\t# i2f" %}
11158   ins_encode %{
11159     __ movdl($dst$$XMMRegister, $src$$Register);
11160     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11161   %}
11162   ins_pipe(pipe_slow); // XXX
11163 %}
11164 
11165 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11166   match(Set dst (ConvI2L src));
11167   effect(KILL cr);
11168   ins_cost(375);
11169   format %{ "MOV    $dst.lo,$src\n\t"
11170             "MOV    $dst.hi,$src\n\t"
11171             "SAR    $dst.hi,31" %}
11172   ins_encode(convert_int_long(dst,src));
11173   ins_pipe( ialu_reg_reg_long );
11174 %}
11175 
11176 // Zero-extend convert int to long
11177 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11178   match(Set dst (AndL (ConvI2L src) mask) );
11179   effect( KILL flags );
11180   ins_cost(250);
11181   format %{ "MOV    $dst.lo,$src\n\t"
11182             "XOR    $dst.hi,$dst.hi" %}
11183   opcode(0x33); // XOR
11184   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11185   ins_pipe( ialu_reg_reg_long );
11186 %}
11187 
11188 // Zero-extend long
11189 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11190   match(Set dst (AndL src mask) );
11191   effect( KILL flags );
11192   ins_cost(250);
11193   format %{ "MOV    $dst.lo,$src.lo\n\t"
11194             "XOR    $dst.hi,$dst.hi\n\t" %}
11195   opcode(0x33); // XOR
11196   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11197   ins_pipe( ialu_reg_reg_long );
11198 %}
11199 
11200 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11201   predicate (UseSSE<=1);
11202   match(Set dst (ConvL2D src));
11203   effect( KILL cr );
11204   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11205             "PUSH   $src.lo\n\t"
11206             "FILD   ST,[ESP + #0]\n\t"
11207             "ADD    ESP,8\n\t"
11208             "FSTP_D $dst\t# D-round" %}
11209   opcode(0xDF, 0x5);  /* DF /5 */
11210   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11211   ins_pipe( pipe_slow );
11212 %}
11213 
11214 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11215   predicate (UseSSE>=2);
11216   match(Set dst (ConvL2D src));
11217   effect( KILL cr );
11218   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11219             "PUSH   $src.lo\n\t"
11220             "FILD_D [ESP]\n\t"
11221             "FSTP_D [ESP]\n\t"
11222             "MOVSD  $dst,[ESP]\n\t"
11223             "ADD    ESP,8" %}
11224   opcode(0xDF, 0x5);  /* DF /5 */
11225   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11226   ins_pipe( pipe_slow );
11227 %}
11228 
11229 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11230   predicate (UseSSE>=1);
11231   match(Set dst (ConvL2F src));
11232   effect( KILL cr );
11233   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11234             "PUSH   $src.lo\n\t"
11235             "FILD_D [ESP]\n\t"
11236             "FSTP_S [ESP]\n\t"
11237             "MOVSS  $dst,[ESP]\n\t"
11238             "ADD    ESP,8" %}
11239   opcode(0xDF, 0x5);  /* DF /5 */
11240   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11241   ins_pipe( pipe_slow );
11242 %}
11243 
11244 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11245   match(Set dst (ConvL2F src));
11246   effect( KILL cr );
11247   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11248             "PUSH   $src.lo\n\t"
11249             "FILD   ST,[ESP + #0]\n\t"
11250             "ADD    ESP,8\n\t"
11251             "FSTP_S $dst\t# F-round" %}
11252   opcode(0xDF, 0x5);  /* DF /5 */
11253   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11254   ins_pipe( pipe_slow );
11255 %}
11256 
11257 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11258   match(Set dst (ConvL2I src));
11259   effect( DEF dst, USE src );
11260   format %{ "MOV    $dst,$src.lo" %}
11261   ins_encode(enc_CopyL_Lo(dst,src));
11262   ins_pipe( ialu_reg_reg );
11263 %}
11264 
11265 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11266   match(Set dst (MoveF2I src));
11267   effect( DEF dst, USE src );
11268   ins_cost(100);
11269   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11270   ins_encode %{
11271     __ movl($dst$$Register, Address(rsp, $src$$disp));
11272   %}
11273   ins_pipe( ialu_reg_mem );
11274 %}
11275 
11276 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11277   predicate(UseSSE==0);
11278   match(Set dst (MoveF2I src));
11279   effect( DEF dst, USE src );
11280 
11281   ins_cost(125);
11282   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11283   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11284   ins_pipe( fpu_mem_reg );
11285 %}
11286 
11287 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11288   predicate(UseSSE>=1);
11289   match(Set dst (MoveF2I src));
11290   effect( DEF dst, USE src );
11291 
11292   ins_cost(95);
11293   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11294   ins_encode %{
11295     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11296   %}
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11301   predicate(UseSSE>=2);
11302   match(Set dst (MoveF2I src));
11303   effect( DEF dst, USE src );
11304   ins_cost(85);
11305   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11306   ins_encode %{
11307     __ movdl($dst$$Register, $src$$XMMRegister);
11308   %}
11309   ins_pipe( pipe_slow );
11310 %}
11311 
11312 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11313   match(Set dst (MoveI2F src));
11314   effect( DEF dst, USE src );
11315 
11316   ins_cost(100);
11317   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11318   ins_encode %{
11319     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11320   %}
11321   ins_pipe( ialu_mem_reg );
11322 %}
11323 
11324 
11325 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11326   predicate(UseSSE==0);
11327   match(Set dst (MoveI2F src));
11328   effect(DEF dst, USE src);
11329 
11330   ins_cost(125);
11331   format %{ "FLD_S  $src\n\t"
11332             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11333   opcode(0xD9);               /* D9 /0, FLD m32real */
11334   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11335               Pop_Reg_FPR(dst) );
11336   ins_pipe( fpu_reg_mem );
11337 %}
11338 
11339 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11340   predicate(UseSSE>=1);
11341   match(Set dst (MoveI2F src));
11342   effect( DEF dst, USE src );
11343 
11344   ins_cost(95);
11345   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11346   ins_encode %{
11347     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11348   %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11353   predicate(UseSSE>=2);
11354   match(Set dst (MoveI2F src));
11355   effect( DEF dst, USE src );
11356 
11357   ins_cost(85);
11358   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11359   ins_encode %{
11360     __ movdl($dst$$XMMRegister, $src$$Register);
11361   %}
11362   ins_pipe( pipe_slow );
11363 %}
11364 
11365 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11366   match(Set dst (MoveD2L src));
11367   effect(DEF dst, USE src);
11368 
11369   ins_cost(250);
11370   format %{ "MOV    $dst.lo,$src\n\t"
11371             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11372   opcode(0x8B, 0x8B);
11373   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11374   ins_pipe( ialu_mem_long_reg );
11375 %}
11376 
11377 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11378   predicate(UseSSE<=1);
11379   match(Set dst (MoveD2L src));
11380   effect(DEF dst, USE src);
11381 
11382   ins_cost(125);
11383   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11384   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11385   ins_pipe( fpu_mem_reg );
11386 %}
11387 
11388 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11389   predicate(UseSSE>=2);
11390   match(Set dst (MoveD2L src));
11391   effect(DEF dst, USE src);
11392   ins_cost(95);
11393   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11394   ins_encode %{
11395     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11396   %}
11397   ins_pipe( pipe_slow );
11398 %}
11399 
11400 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11401   predicate(UseSSE>=2);
11402   match(Set dst (MoveD2L src));
11403   effect(DEF dst, USE src, TEMP tmp);
11404   ins_cost(85);
11405   format %{ "MOVD   $dst.lo,$src\n\t"
11406             "PSHUFLW $tmp,$src,0x4E\n\t"
11407             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11408   ins_encode %{
11409     __ movdl($dst$$Register, $src$$XMMRegister);
11410     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11411     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11412   %}
11413   ins_pipe( pipe_slow );
11414 %}
11415 
11416 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11417   match(Set dst (MoveL2D src));
11418   effect(DEF dst, USE src);
11419 
11420   ins_cost(200);
11421   format %{ "MOV    $dst,$src.lo\n\t"
11422             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11423   opcode(0x89, 0x89);
11424   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11425   ins_pipe( ialu_mem_long_reg );
11426 %}
11427 
11428 
11429 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11430   predicate(UseSSE<=1);
11431   match(Set dst (MoveL2D src));
11432   effect(DEF dst, USE src);
11433   ins_cost(125);
11434 
11435   format %{ "FLD_D  $src\n\t"
11436             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11437   opcode(0xDD);               /* DD /0, FLD m64real */
11438   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11439               Pop_Reg_DPR(dst) );
11440   ins_pipe( fpu_reg_mem );
11441 %}
11442 
11443 
11444 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11445   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11446   match(Set dst (MoveL2D src));
11447   effect(DEF dst, USE src);
11448 
11449   ins_cost(95);
11450   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11451   ins_encode %{
11452     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11453   %}
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11458   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11459   match(Set dst (MoveL2D src));
11460   effect(DEF dst, USE src);
11461 
11462   ins_cost(95);
11463   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11464   ins_encode %{
11465     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11466   %}
11467   ins_pipe( pipe_slow );
11468 %}
11469 
11470 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11471   predicate(UseSSE>=2);
11472   match(Set dst (MoveL2D src));
11473   effect(TEMP dst, USE src, TEMP tmp);
11474   ins_cost(85);
11475   format %{ "MOVD   $dst,$src.lo\n\t"
11476             "MOVD   $tmp,$src.hi\n\t"
11477             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11478   ins_encode %{
11479     __ movdl($dst$$XMMRegister, $src$$Register);
11480     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11481     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11482   %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 //----------------------------- CompressBits/ExpandBits ------------------------
11487 
11488 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11489   predicate(n->bottom_type()->isa_long());
11490   match(Set dst (CompressBits src mask));
11491   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11492   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11493   ins_encode %{
11494     Label exit, partail_result;
11495     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11496     // Merge the results of upper and lower destination registers such that upper destination
11497     // results are contiguously laid out after the lower destination result.
11498     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11499     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11500     __ popcntl($rtmp$$Register, $mask$$Register);
11501     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11502     __ cmpl($rtmp$$Register, 32);
11503     __ jccb(Assembler::equal, exit);
11504     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11505     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11506     // Shift left the contents of upper destination register by true bit count of lower mask register
11507     // and merge with lower destination register.
11508     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11509     __ orl($dst$$Register, $rtmp$$Register);
11510     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11511     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11512     // since contents of upper destination have already been copied to lower destination
11513     // register.
11514     __ cmpl($rtmp$$Register, 0);
11515     __ jccb(Assembler::greater, partail_result);
11516     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11517     __ jmp(exit);
11518     __ bind(partail_result);
11519     // Perform right shift over upper destination register to move out bits already copied
11520     // to lower destination register.
11521     __ subl($rtmp$$Register, 32);
11522     __ negl($rtmp$$Register);
11523     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11524     __ bind(exit);
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11530   predicate(n->bottom_type()->isa_long());
11531   match(Set dst (ExpandBits src mask));
11532   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11533   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11534   ins_encode %{
11535     // Extraction operation sequentially reads the bits from source register starting from LSB
11536     // and lays them out into destination register at bit locations corresponding to true bits
11537     // in mask register. Thus number of source bits read are equal to combined true bit count
11538     // of mask register pair.
11539     Label exit, mask_clipping;
11540     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11541     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11542     __ popcntl($rtmp$$Register, $mask$$Register);
11543     // If true bit count of lower mask register is 32 then none of bit of lower source register
11544     // will feed to upper destination register.
11545     __ cmpl($rtmp$$Register, 32);
11546     __ jccb(Assembler::equal, exit);
11547     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11548     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11549     // Shift right the contents of lower source register to remove already consumed bits.
11550     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11551     // Extract the bits from lower source register starting from LSB under the influence
11552     // of upper mask register.
11553     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11554     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11555     __ subl($rtmp$$Register, 32);
11556     __ negl($rtmp$$Register);
11557     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11558     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11559     // Clear the set bits in upper mask register which have been used to extract the contents
11560     // from lower source register.
11561     __ bind(mask_clipping);
11562     __ blsrl($mask$$Register, $mask$$Register);
11563     __ decrementl($rtmp$$Register, 1);
11564     __ jccb(Assembler::greater, mask_clipping);
11565     // Starting from LSB extract the bits from upper source register under the influence of
11566     // remaining set bits in upper mask register.
11567     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11568     // Merge the partial results extracted from lower and upper source register bits.
11569     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11570     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11571     __ bind(exit);
11572   %}
11573   ins_pipe( pipe_slow );
11574 %}
11575 
11576 // =======================================================================
11577 // fast clearing of an array
11578 // Small ClearArray non-AVX512.
11579 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11580   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11581   match(Set dummy (ClearArray cnt base));
11582   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11583 
11584   format %{ $$template
11585     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11586     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11587     $$emit$$"JG     LARGE\n\t"
11588     $$emit$$"SHL    ECX, 1\n\t"
11589     $$emit$$"DEC    ECX\n\t"
11590     $$emit$$"JS     DONE\t# Zero length\n\t"
11591     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11592     $$emit$$"DEC    ECX\n\t"
11593     $$emit$$"JGE    LOOP\n\t"
11594     $$emit$$"JMP    DONE\n\t"
11595     $$emit$$"# LARGE:\n\t"
11596     if (UseFastStosb) {
11597        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11598        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11599     } else if (UseXMMForObjInit) {
11600        $$emit$$"MOV     RDI,RAX\n\t"
11601        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11602        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11603        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11604        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11605        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11606        $$emit$$"ADD     0x40,RAX\n\t"
11607        $$emit$$"# L_zero_64_bytes:\n\t"
11608        $$emit$$"SUB     0x8,RCX\n\t"
11609        $$emit$$"JGE     L_loop\n\t"
11610        $$emit$$"ADD     0x4,RCX\n\t"
11611        $$emit$$"JL      L_tail\n\t"
11612        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11613        $$emit$$"ADD     0x20,RAX\n\t"
11614        $$emit$$"SUB     0x4,RCX\n\t"
11615        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11616        $$emit$$"ADD     0x4,RCX\n\t"
11617        $$emit$$"JLE     L_end\n\t"
11618        $$emit$$"DEC     RCX\n\t"
11619        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11620        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11621        $$emit$$"ADD     0x8,RAX\n\t"
11622        $$emit$$"DEC     RCX\n\t"
11623        $$emit$$"JGE     L_sloop\n\t"
11624        $$emit$$"# L_end:\n\t"
11625     } else {
11626        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11627        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11628     }
11629     $$emit$$"# DONE"
11630   %}
11631   ins_encode %{
11632     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11633                  $tmp$$XMMRegister, false, knoreg);
11634   %}
11635   ins_pipe( pipe_slow );
11636 %}
11637 
11638 // Small ClearArray AVX512 non-constant length.
11639 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11640   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11641   match(Set dummy (ClearArray cnt base));
11642   ins_cost(125);
11643   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11644 
11645   format %{ $$template
11646     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11647     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11648     $$emit$$"JG     LARGE\n\t"
11649     $$emit$$"SHL    ECX, 1\n\t"
11650     $$emit$$"DEC    ECX\n\t"
11651     $$emit$$"JS     DONE\t# Zero length\n\t"
11652     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11653     $$emit$$"DEC    ECX\n\t"
11654     $$emit$$"JGE    LOOP\n\t"
11655     $$emit$$"JMP    DONE\n\t"
11656     $$emit$$"# LARGE:\n\t"
11657     if (UseFastStosb) {
11658        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11659        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11660     } else if (UseXMMForObjInit) {
11661        $$emit$$"MOV     RDI,RAX\n\t"
11662        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11663        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11664        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11665        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11666        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11667        $$emit$$"ADD     0x40,RAX\n\t"
11668        $$emit$$"# L_zero_64_bytes:\n\t"
11669        $$emit$$"SUB     0x8,RCX\n\t"
11670        $$emit$$"JGE     L_loop\n\t"
11671        $$emit$$"ADD     0x4,RCX\n\t"
11672        $$emit$$"JL      L_tail\n\t"
11673        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11674        $$emit$$"ADD     0x20,RAX\n\t"
11675        $$emit$$"SUB     0x4,RCX\n\t"
11676        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11677        $$emit$$"ADD     0x4,RCX\n\t"
11678        $$emit$$"JLE     L_end\n\t"
11679        $$emit$$"DEC     RCX\n\t"
11680        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11681        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11682        $$emit$$"ADD     0x8,RAX\n\t"
11683        $$emit$$"DEC     RCX\n\t"
11684        $$emit$$"JGE     L_sloop\n\t"
11685        $$emit$$"# L_end:\n\t"
11686     } else {
11687        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11688        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11689     }
11690     $$emit$$"# DONE"
11691   %}
11692   ins_encode %{
11693     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11694                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11695   %}
11696   ins_pipe( pipe_slow );
11697 %}
11698 
11699 // Large ClearArray non-AVX512.
11700 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11701   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11702   match(Set dummy (ClearArray cnt base));
11703   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11704   format %{ $$template
11705     if (UseFastStosb) {
11706        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11707        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11708        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11709     } else if (UseXMMForObjInit) {
11710        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11711        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11712        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11713        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11714        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11715        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11716        $$emit$$"ADD     0x40,RAX\n\t"
11717        $$emit$$"# L_zero_64_bytes:\n\t"
11718        $$emit$$"SUB     0x8,RCX\n\t"
11719        $$emit$$"JGE     L_loop\n\t"
11720        $$emit$$"ADD     0x4,RCX\n\t"
11721        $$emit$$"JL      L_tail\n\t"
11722        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11723        $$emit$$"ADD     0x20,RAX\n\t"
11724        $$emit$$"SUB     0x4,RCX\n\t"
11725        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11726        $$emit$$"ADD     0x4,RCX\n\t"
11727        $$emit$$"JLE     L_end\n\t"
11728        $$emit$$"DEC     RCX\n\t"
11729        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11730        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11731        $$emit$$"ADD     0x8,RAX\n\t"
11732        $$emit$$"DEC     RCX\n\t"
11733        $$emit$$"JGE     L_sloop\n\t"
11734        $$emit$$"# L_end:\n\t"
11735     } else {
11736        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11737        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11738        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11739     }
11740     $$emit$$"# DONE"
11741   %}
11742   ins_encode %{
11743     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11744                  $tmp$$XMMRegister, true, knoreg);
11745   %}
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 // Large ClearArray AVX512.
11750 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11751   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11752   match(Set dummy (ClearArray cnt base));
11753   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11754   format %{ $$template
11755     if (UseFastStosb) {
11756        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11757        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11758        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11759     } else if (UseXMMForObjInit) {
11760        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11761        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11762        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11763        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11764        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11765        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11766        $$emit$$"ADD     0x40,RAX\n\t"
11767        $$emit$$"# L_zero_64_bytes:\n\t"
11768        $$emit$$"SUB     0x8,RCX\n\t"
11769        $$emit$$"JGE     L_loop\n\t"
11770        $$emit$$"ADD     0x4,RCX\n\t"
11771        $$emit$$"JL      L_tail\n\t"
11772        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11773        $$emit$$"ADD     0x20,RAX\n\t"
11774        $$emit$$"SUB     0x4,RCX\n\t"
11775        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11776        $$emit$$"ADD     0x4,RCX\n\t"
11777        $$emit$$"JLE     L_end\n\t"
11778        $$emit$$"DEC     RCX\n\t"
11779        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11780        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11781        $$emit$$"ADD     0x8,RAX\n\t"
11782        $$emit$$"DEC     RCX\n\t"
11783        $$emit$$"JGE     L_sloop\n\t"
11784        $$emit$$"# L_end:\n\t"
11785     } else {
11786        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11787        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11788        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11789     }
11790     $$emit$$"# DONE"
11791   %}
11792   ins_encode %{
11793     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11794                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11795   %}
11796   ins_pipe( pipe_slow );
11797 %}
11798 
11799 // Small ClearArray AVX512 constant length.
11800 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11801 %{
11802   predicate(!((ClearArrayNode*)n)->is_large() &&
11803                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11804   match(Set dummy (ClearArray cnt base));
11805   ins_cost(100);
11806   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11807   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11808   ins_encode %{
11809    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11810   %}
11811   ins_pipe(pipe_slow);
11812 %}
11813 
11814 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11815                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11816   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11817   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11818   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11819 
11820   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11821   ins_encode %{
11822     __ string_compare($str1$$Register, $str2$$Register,
11823                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11824                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11825   %}
11826   ins_pipe( pipe_slow );
11827 %}
11828 
11829 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11830                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11831   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11832   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11833   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11834 
11835   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11836   ins_encode %{
11837     __ string_compare($str1$$Register, $str2$$Register,
11838                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11839                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11840   %}
11841   ins_pipe( pipe_slow );
11842 %}
11843 
11844 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11845                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11846   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11847   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11848   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11849 
11850   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11851   ins_encode %{
11852     __ string_compare($str1$$Register, $str2$$Register,
11853                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11854                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11855   %}
11856   ins_pipe( pipe_slow );
11857 %}
11858 
11859 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11860                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11861   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11862   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11863   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11864 
11865   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11866   ins_encode %{
11867     __ string_compare($str1$$Register, $str2$$Register,
11868                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11869                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11870   %}
11871   ins_pipe( pipe_slow );
11872 %}
11873 
11874 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11875                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11876   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11877   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11878   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11879 
11880   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11881   ins_encode %{
11882     __ string_compare($str1$$Register, $str2$$Register,
11883                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11884                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11885   %}
11886   ins_pipe( pipe_slow );
11887 %}
11888 
11889 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11890                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11891   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11892   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11893   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11894 
11895   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11896   ins_encode %{
11897     __ string_compare($str1$$Register, $str2$$Register,
11898                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11899                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11900   %}
11901   ins_pipe( pipe_slow );
11902 %}
11903 
11904 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11905                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11906   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11907   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11908   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11909 
11910   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11911   ins_encode %{
11912     __ string_compare($str2$$Register, $str1$$Register,
11913                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11914                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11915   %}
11916   ins_pipe( pipe_slow );
11917 %}
11918 
11919 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11920                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11921   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11922   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11923   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11924 
11925   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11926   ins_encode %{
11927     __ string_compare($str2$$Register, $str1$$Register,
11928                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11929                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11930   %}
11931   ins_pipe( pipe_slow );
11932 %}
11933 
11934 // fast string equals
11935 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11936                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11937   predicate(!VM_Version::supports_avx512vlbw());
11938   match(Set result (StrEquals (Binary str1 str2) cnt));
11939   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11940 
11941   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11942   ins_encode %{
11943     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11944                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11945                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11946   %}
11947 
11948   ins_pipe( pipe_slow );
11949 %}
11950 
11951 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11952                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11953   predicate(VM_Version::supports_avx512vlbw());
11954   match(Set result (StrEquals (Binary str1 str2) cnt));
11955   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11956 
11957   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11958   ins_encode %{
11959     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11960                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11961                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11962   %}
11963 
11964   ins_pipe( pipe_slow );
11965 %}
11966 
11967 
11968 // fast search of substring with known size.
11969 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11970                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11971   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11972   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11973   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11974 
11975   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11976   ins_encode %{
11977     int icnt2 = (int)$int_cnt2$$constant;
11978     if (icnt2 >= 16) {
11979       // IndexOf for constant substrings with size >= 16 elements
11980       // which don't need to be loaded through stack.
11981       __ string_indexofC8($str1$$Register, $str2$$Register,
11982                           $cnt1$$Register, $cnt2$$Register,
11983                           icnt2, $result$$Register,
11984                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11985     } else {
11986       // Small strings are loaded through stack if they cross page boundary.
11987       __ string_indexof($str1$$Register, $str2$$Register,
11988                         $cnt1$$Register, $cnt2$$Register,
11989                         icnt2, $result$$Register,
11990                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11991     }
11992   %}
11993   ins_pipe( pipe_slow );
11994 %}
11995 
11996 // fast search of substring with known size.
11997 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11998                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11999   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12000   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12001   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12002 
12003   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12004   ins_encode %{
12005     int icnt2 = (int)$int_cnt2$$constant;
12006     if (icnt2 >= 8) {
12007       // IndexOf for constant substrings with size >= 8 elements
12008       // which don't need to be loaded through stack.
12009       __ string_indexofC8($str1$$Register, $str2$$Register,
12010                           $cnt1$$Register, $cnt2$$Register,
12011                           icnt2, $result$$Register,
12012                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12013     } else {
12014       // Small strings are loaded through stack if they cross page boundary.
12015       __ string_indexof($str1$$Register, $str2$$Register,
12016                         $cnt1$$Register, $cnt2$$Register,
12017                         icnt2, $result$$Register,
12018                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12019     }
12020   %}
12021   ins_pipe( pipe_slow );
12022 %}
12023 
12024 // fast search of substring with known size.
12025 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12026                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12027   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12028   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12029   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12030 
12031   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12032   ins_encode %{
12033     int icnt2 = (int)$int_cnt2$$constant;
12034     if (icnt2 >= 8) {
12035       // IndexOf for constant substrings with size >= 8 elements
12036       // which don't need to be loaded through stack.
12037       __ string_indexofC8($str1$$Register, $str2$$Register,
12038                           $cnt1$$Register, $cnt2$$Register,
12039                           icnt2, $result$$Register,
12040                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12041     } else {
12042       // Small strings are loaded through stack if they cross page boundary.
12043       __ string_indexof($str1$$Register, $str2$$Register,
12044                         $cnt1$$Register, $cnt2$$Register,
12045                         icnt2, $result$$Register,
12046                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12047     }
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12053                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12054   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12055   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12056   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12057 
12058   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12059   ins_encode %{
12060     __ string_indexof($str1$$Register, $str2$$Register,
12061                       $cnt1$$Register, $cnt2$$Register,
12062                       (-1), $result$$Register,
12063                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12064   %}
12065   ins_pipe( pipe_slow );
12066 %}
12067 
12068 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12069                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12070   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12071   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12072   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12073 
12074   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12075   ins_encode %{
12076     __ string_indexof($str1$$Register, $str2$$Register,
12077                       $cnt1$$Register, $cnt2$$Register,
12078                       (-1), $result$$Register,
12079                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12080   %}
12081   ins_pipe( pipe_slow );
12082 %}
12083 
12084 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12085                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12086   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12087   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12088   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12089 
12090   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12091   ins_encode %{
12092     __ string_indexof($str1$$Register, $str2$$Register,
12093                       $cnt1$$Register, $cnt2$$Register,
12094                       (-1), $result$$Register,
12095                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12096   %}
12097   ins_pipe( pipe_slow );
12098 %}
12099 
12100 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12101                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12102   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12103   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12104   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12105   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12106   ins_encode %{
12107     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12108                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12109   %}
12110   ins_pipe( pipe_slow );
12111 %}
12112 
12113 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12114                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12115   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12116   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12117   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12118   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12119   ins_encode %{
12120     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12121                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12122   %}
12123   ins_pipe( pipe_slow );
12124 %}
12125 
12126 
12127 // fast array equals
12128 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12129                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12130 %{
12131   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12132   match(Set result (AryEq ary1 ary2));
12133   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12134   //ins_cost(300);
12135 
12136   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12137   ins_encode %{
12138     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12139                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12140                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12141   %}
12142   ins_pipe( pipe_slow );
12143 %}
12144 
12145 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12146                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12147 %{
12148   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12149   match(Set result (AryEq ary1 ary2));
12150   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12151   //ins_cost(300);
12152 
12153   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12154   ins_encode %{
12155     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12156                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12157                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12158   %}
12159   ins_pipe( pipe_slow );
12160 %}
12161 
12162 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12163                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12164 %{
12165   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12166   match(Set result (AryEq ary1 ary2));
12167   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12168   //ins_cost(300);
12169 
12170   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12171   ins_encode %{
12172     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12173                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12174                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12175   %}
12176   ins_pipe( pipe_slow );
12177 %}
12178 
12179 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12180                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12181 %{
12182   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12183   match(Set result (AryEq ary1 ary2));
12184   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12185   //ins_cost(300);
12186 
12187   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12188   ins_encode %{
12189     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12190                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12191                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12192   %}
12193   ins_pipe( pipe_slow );
12194 %}
12195 
12196 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12197                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12198 %{
12199   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12200   match(Set result (CountPositives ary1 len));
12201   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12202 
12203   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12204   ins_encode %{
12205     __ count_positives($ary1$$Register, $len$$Register,
12206                        $result$$Register, $tmp3$$Register,
12207                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12208   %}
12209   ins_pipe( pipe_slow );
12210 %}
12211 
12212 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12213                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12214 %{
12215   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12216   match(Set result (CountPositives ary1 len));
12217   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12218 
12219   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12220   ins_encode %{
12221     __ count_positives($ary1$$Register, $len$$Register,
12222                        $result$$Register, $tmp3$$Register,
12223                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12224   %}
12225   ins_pipe( pipe_slow );
12226 %}
12227 
12228 
12229 // fast char[] to byte[] compression
12230 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12231                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12232   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12233   match(Set result (StrCompressedCopy src (Binary dst len)));
12234   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12235 
12236   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12237   ins_encode %{
12238     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12239                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12240                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12241                            knoreg, knoreg);
12242   %}
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12247                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12248   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12249   match(Set result (StrCompressedCopy src (Binary dst len)));
12250   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12251 
12252   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12253   ins_encode %{
12254     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12255                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12256                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12257                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12258   %}
12259   ins_pipe( pipe_slow );
12260 %}
12261 
12262 // fast byte[] to char[] inflation
12263 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12264                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12265   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12266   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12267   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12268 
12269   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12270   ins_encode %{
12271     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12272                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12273   %}
12274   ins_pipe( pipe_slow );
12275 %}
12276 
12277 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12278                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12279   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12280   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12281   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12282 
12283   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12284   ins_encode %{
12285     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12286                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12287   %}
12288   ins_pipe( pipe_slow );
12289 %}
12290 
12291 // encode char[] to byte[] in ISO_8859_1
12292 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12293                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12294                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12295   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12296   match(Set result (EncodeISOArray src (Binary dst len)));
12297   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12298 
12299   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12300   ins_encode %{
12301     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12302                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12303                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12304   %}
12305   ins_pipe( pipe_slow );
12306 %}
12307 
12308 // encode char[] to byte[] in ASCII
12309 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12310                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12311                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12312   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12313   match(Set result (EncodeISOArray src (Binary dst len)));
12314   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12315 
12316   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12317   ins_encode %{
12318     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12319                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12320                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12321   %}
12322   ins_pipe( pipe_slow );
12323 %}
12324 
12325 //----------Control Flow Instructions------------------------------------------
12326 // Signed compare Instructions
12327 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12328   match(Set cr (CmpI op1 op2));
12329   effect( DEF cr, USE op1, USE op2 );
12330   format %{ "CMP    $op1,$op2" %}
12331   opcode(0x3B);  /* Opcode 3B /r */
12332   ins_encode( OpcP, RegReg( op1, op2) );
12333   ins_pipe( ialu_cr_reg_reg );
12334 %}
12335 
12336 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12337   match(Set cr (CmpI op1 op2));
12338   effect( DEF cr, USE op1 );
12339   format %{ "CMP    $op1,$op2" %}
12340   opcode(0x81,0x07);  /* Opcode 81 /7 */
12341   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12342   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12343   ins_pipe( ialu_cr_reg_imm );
12344 %}
12345 
12346 // Cisc-spilled version of cmpI_eReg
12347 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12348   match(Set cr (CmpI op1 (LoadI op2)));
12349 
12350   format %{ "CMP    $op1,$op2" %}
12351   ins_cost(500);
12352   opcode(0x3B);  /* Opcode 3B /r */
12353   ins_encode( OpcP, RegMem( op1, op2) );
12354   ins_pipe( ialu_cr_reg_mem );
12355 %}
12356 
12357 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12358   match(Set cr (CmpI src zero));
12359   effect( DEF cr, USE src );
12360 
12361   format %{ "TEST   $src,$src" %}
12362   opcode(0x85);
12363   ins_encode( OpcP, RegReg( src, src ) );
12364   ins_pipe( ialu_cr_reg_imm );
12365 %}
12366 
12367 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12368   match(Set cr (CmpI (AndI src con) zero));
12369 
12370   format %{ "TEST   $src,$con" %}
12371   opcode(0xF7,0x00);
12372   ins_encode( OpcP, RegOpc(src), Con32(con) );
12373   ins_pipe( ialu_cr_reg_imm );
12374 %}
12375 
12376 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12377   match(Set cr (CmpI (AndI src mem) zero));
12378 
12379   format %{ "TEST   $src,$mem" %}
12380   opcode(0x85);
12381   ins_encode( OpcP, RegMem( src, mem ) );
12382   ins_pipe( ialu_cr_reg_mem );
12383 %}
12384 
12385 // Unsigned compare Instructions; really, same as signed except they
12386 // produce an eFlagsRegU instead of eFlagsReg.
12387 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12388   match(Set cr (CmpU op1 op2));
12389 
12390   format %{ "CMPu   $op1,$op2" %}
12391   opcode(0x3B);  /* Opcode 3B /r */
12392   ins_encode( OpcP, RegReg( op1, op2) );
12393   ins_pipe( ialu_cr_reg_reg );
12394 %}
12395 
12396 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12397   match(Set cr (CmpU op1 op2));
12398 
12399   format %{ "CMPu   $op1,$op2" %}
12400   opcode(0x81,0x07);  /* Opcode 81 /7 */
12401   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12402   ins_pipe( ialu_cr_reg_imm );
12403 %}
12404 
12405 // // Cisc-spilled version of cmpU_eReg
12406 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12407   match(Set cr (CmpU op1 (LoadI op2)));
12408 
12409   format %{ "CMPu   $op1,$op2" %}
12410   ins_cost(500);
12411   opcode(0x3B);  /* Opcode 3B /r */
12412   ins_encode( OpcP, RegMem( op1, op2) );
12413   ins_pipe( ialu_cr_reg_mem );
12414 %}
12415 
12416 // // Cisc-spilled version of cmpU_eReg
12417 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12418 //  match(Set cr (CmpU (LoadI op1) op2));
12419 //
12420 //  format %{ "CMPu   $op1,$op2" %}
12421 //  ins_cost(500);
12422 //  opcode(0x39);  /* Opcode 39 /r */
12423 //  ins_encode( OpcP, RegMem( op1, op2) );
12424 //%}
12425 
12426 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12427   match(Set cr (CmpU src zero));
12428 
12429   format %{ "TESTu  $src,$src" %}
12430   opcode(0x85);
12431   ins_encode( OpcP, RegReg( src, src ) );
12432   ins_pipe( ialu_cr_reg_imm );
12433 %}
12434 
12435 // Unsigned pointer compare Instructions
12436 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12437   match(Set cr (CmpP op1 op2));
12438 
12439   format %{ "CMPu   $op1,$op2" %}
12440   opcode(0x3B);  /* Opcode 3B /r */
12441   ins_encode( OpcP, RegReg( op1, op2) );
12442   ins_pipe( ialu_cr_reg_reg );
12443 %}
12444 
12445 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12446   match(Set cr (CmpP op1 op2));
12447 
12448   format %{ "CMPu   $op1,$op2" %}
12449   opcode(0x81,0x07);  /* Opcode 81 /7 */
12450   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12451   ins_pipe( ialu_cr_reg_imm );
12452 %}
12453 
12454 // // Cisc-spilled version of cmpP_eReg
12455 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12456   match(Set cr (CmpP op1 (LoadP op2)));
12457 
12458   format %{ "CMPu   $op1,$op2" %}
12459   ins_cost(500);
12460   opcode(0x3B);  /* Opcode 3B /r */
12461   ins_encode( OpcP, RegMem( op1, op2) );
12462   ins_pipe( ialu_cr_reg_mem );
12463 %}
12464 
12465 // // Cisc-spilled version of cmpP_eReg
12466 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12467 //  match(Set cr (CmpP (LoadP op1) op2));
12468 //
12469 //  format %{ "CMPu   $op1,$op2" %}
12470 //  ins_cost(500);
12471 //  opcode(0x39);  /* Opcode 39 /r */
12472 //  ins_encode( OpcP, RegMem( op1, op2) );
12473 //%}
12474 
12475 // Compare raw pointer (used in out-of-heap check).
12476 // Only works because non-oop pointers must be raw pointers
12477 // and raw pointers have no anti-dependencies.
12478 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12479   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12480   match(Set cr (CmpP op1 (LoadP op2)));
12481 
12482   format %{ "CMPu   $op1,$op2" %}
12483   opcode(0x3B);  /* Opcode 3B /r */
12484   ins_encode( OpcP, RegMem( op1, op2) );
12485   ins_pipe( ialu_cr_reg_mem );
12486 %}
12487 
12488 //
12489 // This will generate a signed flags result. This should be ok
12490 // since any compare to a zero should be eq/neq.
12491 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12492   match(Set cr (CmpP src zero));
12493 
12494   format %{ "TEST   $src,$src" %}
12495   opcode(0x85);
12496   ins_encode( OpcP, RegReg( src, src ) );
12497   ins_pipe( ialu_cr_reg_imm );
12498 %}
12499 
12500 // Cisc-spilled version of testP_reg
12501 // This will generate a signed flags result. This should be ok
12502 // since any compare to a zero should be eq/neq.
12503 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12504   match(Set cr (CmpP (LoadP op) zero));
12505 
12506   format %{ "TEST   $op,0xFFFFFFFF" %}
12507   ins_cost(500);
12508   opcode(0xF7);               /* Opcode F7 /0 */
12509   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12510   ins_pipe( ialu_cr_reg_imm );
12511 %}
12512 
12513 // Yanked all unsigned pointer compare operations.
12514 // Pointer compares are done with CmpP which is already unsigned.
12515 
12516 //----------Max and Min--------------------------------------------------------
12517 // Min Instructions
12518 ////
12519 //   *** Min and Max using the conditional move are slower than the
12520 //   *** branch version on a Pentium III.
12521 // // Conditional move for min
12522 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12523 //  effect( USE_DEF op2, USE op1, USE cr );
12524 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12525 //  opcode(0x4C,0x0F);
12526 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12527 //  ins_pipe( pipe_cmov_reg );
12528 //%}
12529 //
12530 //// Min Register with Register (P6 version)
12531 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12532 //  predicate(VM_Version::supports_cmov() );
12533 //  match(Set op2 (MinI op1 op2));
12534 //  ins_cost(200);
12535 //  expand %{
12536 //    eFlagsReg cr;
12537 //    compI_eReg(cr,op1,op2);
12538 //    cmovI_reg_lt(op2,op1,cr);
12539 //  %}
12540 //%}
12541 
12542 // Min Register with Register (generic version)
12543 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12544   match(Set dst (MinI dst src));
12545   effect(KILL flags);
12546   ins_cost(300);
12547 
12548   format %{ "MIN    $dst,$src" %}
12549   opcode(0xCC);
12550   ins_encode( min_enc(dst,src) );
12551   ins_pipe( pipe_slow );
12552 %}
12553 
12554 // Max Register with Register
12555 //   *** Min and Max using the conditional move are slower than the
12556 //   *** branch version on a Pentium III.
12557 // // Conditional move for max
12558 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12559 //  effect( USE_DEF op2, USE op1, USE cr );
12560 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12561 //  opcode(0x4F,0x0F);
12562 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12563 //  ins_pipe( pipe_cmov_reg );
12564 //%}
12565 //
12566 // // Max Register with Register (P6 version)
12567 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12568 //  predicate(VM_Version::supports_cmov() );
12569 //  match(Set op2 (MaxI op1 op2));
12570 //  ins_cost(200);
12571 //  expand %{
12572 //    eFlagsReg cr;
12573 //    compI_eReg(cr,op1,op2);
12574 //    cmovI_reg_gt(op2,op1,cr);
12575 //  %}
12576 //%}
12577 
12578 // Max Register with Register (generic version)
12579 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12580   match(Set dst (MaxI dst src));
12581   effect(KILL flags);
12582   ins_cost(300);
12583 
12584   format %{ "MAX    $dst,$src" %}
12585   opcode(0xCC);
12586   ins_encode( max_enc(dst,src) );
12587   ins_pipe( pipe_slow );
12588 %}
12589 
12590 // ============================================================================
12591 // Counted Loop limit node which represents exact final iterator value.
12592 // Note: the resulting value should fit into integer range since
12593 // counted loops have limit check on overflow.
12594 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12595   match(Set limit (LoopLimit (Binary init limit) stride));
12596   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12597   ins_cost(300);
12598 
12599   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12600   ins_encode %{
12601     int strd = (int)$stride$$constant;
12602     assert(strd != 1 && strd != -1, "sanity");
12603     int m1 = (strd > 0) ? 1 : -1;
12604     // Convert limit to long (EAX:EDX)
12605     __ cdql();
12606     // Convert init to long (init:tmp)
12607     __ movl($tmp$$Register, $init$$Register);
12608     __ sarl($tmp$$Register, 31);
12609     // $limit - $init
12610     __ subl($limit$$Register, $init$$Register);
12611     __ sbbl($limit_hi$$Register, $tmp$$Register);
12612     // + ($stride - 1)
12613     if (strd > 0) {
12614       __ addl($limit$$Register, (strd - 1));
12615       __ adcl($limit_hi$$Register, 0);
12616       __ movl($tmp$$Register, strd);
12617     } else {
12618       __ addl($limit$$Register, (strd + 1));
12619       __ adcl($limit_hi$$Register, -1);
12620       __ lneg($limit_hi$$Register, $limit$$Register);
12621       __ movl($tmp$$Register, -strd);
12622     }
12623     // signed division: (EAX:EDX) / pos_stride
12624     __ idivl($tmp$$Register);
12625     if (strd < 0) {
12626       // restore sign
12627       __ negl($tmp$$Register);
12628     }
12629     // (EAX) * stride
12630     __ mull($tmp$$Register);
12631     // + init (ignore upper bits)
12632     __ addl($limit$$Register, $init$$Register);
12633   %}
12634   ins_pipe( pipe_slow );
12635 %}
12636 
12637 // ============================================================================
12638 // Branch Instructions
12639 // Jump Table
12640 instruct jumpXtnd(rRegI switch_val) %{
12641   match(Jump switch_val);
12642   ins_cost(350);
12643   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12644   ins_encode %{
12645     // Jump to Address(table_base + switch_reg)
12646     Address index(noreg, $switch_val$$Register, Address::times_1);
12647     __ jump(ArrayAddress($constantaddress, index), noreg);
12648   %}
12649   ins_pipe(pipe_jmp);
12650 %}
12651 
12652 // Jump Direct - Label defines a relative address from JMP+1
12653 instruct jmpDir(label labl) %{
12654   match(Goto);
12655   effect(USE labl);
12656 
12657   ins_cost(300);
12658   format %{ "JMP    $labl" %}
12659   size(5);
12660   ins_encode %{
12661     Label* L = $labl$$label;
12662     __ jmp(*L, false); // Always long jump
12663   %}
12664   ins_pipe( pipe_jmp );
12665 %}
12666 
12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12668 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12669   match(If cop cr);
12670   effect(USE labl);
12671 
12672   ins_cost(300);
12673   format %{ "J$cop    $labl" %}
12674   size(6);
12675   ins_encode %{
12676     Label* L = $labl$$label;
12677     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12678   %}
12679   ins_pipe( pipe_jcc );
12680 %}
12681 
12682 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12683 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12684   match(CountedLoopEnd cop cr);
12685   effect(USE labl);
12686 
12687   ins_cost(300);
12688   format %{ "J$cop    $labl\t# Loop end" %}
12689   size(6);
12690   ins_encode %{
12691     Label* L = $labl$$label;
12692     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12693   %}
12694   ins_pipe( pipe_jcc );
12695 %}
12696 
12697 // Jump Direct Conditional - using unsigned comparison
12698 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12699   match(If cop cmp);
12700   effect(USE labl);
12701 
12702   ins_cost(300);
12703   format %{ "J$cop,u  $labl" %}
12704   size(6);
12705   ins_encode %{
12706     Label* L = $labl$$label;
12707     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12708   %}
12709   ins_pipe(pipe_jcc);
12710 %}
12711 
12712 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12713   match(If cop cmp);
12714   effect(USE labl);
12715 
12716   ins_cost(200);
12717   format %{ "J$cop,u  $labl" %}
12718   size(6);
12719   ins_encode %{
12720     Label* L = $labl$$label;
12721     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12722   %}
12723   ins_pipe(pipe_jcc);
12724 %}
12725 
12726 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12727   match(If cop cmp);
12728   effect(USE labl);
12729 
12730   ins_cost(200);
12731   format %{ $$template
12732     if ($cop$$cmpcode == Assembler::notEqual) {
12733       $$emit$$"JP,u   $labl\n\t"
12734       $$emit$$"J$cop,u   $labl"
12735     } else {
12736       $$emit$$"JP,u   done\n\t"
12737       $$emit$$"J$cop,u   $labl\n\t"
12738       $$emit$$"done:"
12739     }
12740   %}
12741   ins_encode %{
12742     Label* l = $labl$$label;
12743     if ($cop$$cmpcode == Assembler::notEqual) {
12744       __ jcc(Assembler::parity, *l, false);
12745       __ jcc(Assembler::notEqual, *l, false);
12746     } else if ($cop$$cmpcode == Assembler::equal) {
12747       Label done;
12748       __ jccb(Assembler::parity, done);
12749       __ jcc(Assembler::equal, *l, false);
12750       __ bind(done);
12751     } else {
12752        ShouldNotReachHere();
12753     }
12754   %}
12755   ins_pipe(pipe_jcc);
12756 %}
12757 
12758 // ============================================================================
12759 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12760 // array for an instance of the superklass.  Set a hidden internal cache on a
12761 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12762 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12763 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12764   match(Set result (PartialSubtypeCheck sub super));
12765   effect( KILL rcx, KILL cr );
12766 
12767   ins_cost(1100);  // slightly larger than the next version
12768   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12769             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12770             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12771             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12772             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12773             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12774             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12775      "miss:\t" %}
12776 
12777   opcode(0x1); // Force a XOR of EDI
12778   ins_encode( enc_PartialSubtypeCheck() );
12779   ins_pipe( pipe_slow );
12780 %}
12781 
12782 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12783   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12784   effect( KILL rcx, KILL result );
12785 
12786   ins_cost(1000);
12787   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12788             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12789             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12790             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12791             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12792             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12793      "miss:\t" %}
12794 
12795   opcode(0x0);  // No need to XOR EDI
12796   ins_encode( enc_PartialSubtypeCheck() );
12797   ins_pipe( pipe_slow );
12798 %}
12799 
12800 // ============================================================================
12801 // Branch Instructions -- short offset versions
12802 //
12803 // These instructions are used to replace jumps of a long offset (the default
12804 // match) with jumps of a shorter offset.  These instructions are all tagged
12805 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12806 // match rules in general matching.  Instead, the ADLC generates a conversion
12807 // method in the MachNode which can be used to do in-place replacement of the
12808 // long variant with the shorter variant.  The compiler will determine if a
12809 // branch can be taken by the is_short_branch_offset() predicate in the machine
12810 // specific code section of the file.
12811 
12812 // Jump Direct - Label defines a relative address from JMP+1
12813 instruct jmpDir_short(label labl) %{
12814   match(Goto);
12815   effect(USE labl);
12816 
12817   ins_cost(300);
12818   format %{ "JMP,s  $labl" %}
12819   size(2);
12820   ins_encode %{
12821     Label* L = $labl$$label;
12822     __ jmpb(*L);
12823   %}
12824   ins_pipe( pipe_jmp );
12825   ins_short_branch(1);
12826 %}
12827 
12828 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12829 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12830   match(If cop cr);
12831   effect(USE labl);
12832 
12833   ins_cost(300);
12834   format %{ "J$cop,s  $labl" %}
12835   size(2);
12836   ins_encode %{
12837     Label* L = $labl$$label;
12838     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12839   %}
12840   ins_pipe( pipe_jcc );
12841   ins_short_branch(1);
12842 %}
12843 
12844 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12845 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12846   match(CountedLoopEnd cop cr);
12847   effect(USE labl);
12848 
12849   ins_cost(300);
12850   format %{ "J$cop,s  $labl\t# Loop end" %}
12851   size(2);
12852   ins_encode %{
12853     Label* L = $labl$$label;
12854     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12855   %}
12856   ins_pipe( pipe_jcc );
12857   ins_short_branch(1);
12858 %}
12859 
12860 // Jump Direct Conditional - using unsigned comparison
12861 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12862   match(If cop cmp);
12863   effect(USE labl);
12864 
12865   ins_cost(300);
12866   format %{ "J$cop,us $labl" %}
12867   size(2);
12868   ins_encode %{
12869     Label* L = $labl$$label;
12870     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12871   %}
12872   ins_pipe( pipe_jcc );
12873   ins_short_branch(1);
12874 %}
12875 
12876 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12877   match(If cop cmp);
12878   effect(USE labl);
12879 
12880   ins_cost(300);
12881   format %{ "J$cop,us $labl" %}
12882   size(2);
12883   ins_encode %{
12884     Label* L = $labl$$label;
12885     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12886   %}
12887   ins_pipe( pipe_jcc );
12888   ins_short_branch(1);
12889 %}
12890 
12891 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12892   match(If cop cmp);
12893   effect(USE labl);
12894 
12895   ins_cost(300);
12896   format %{ $$template
12897     if ($cop$$cmpcode == Assembler::notEqual) {
12898       $$emit$$"JP,u,s   $labl\n\t"
12899       $$emit$$"J$cop,u,s   $labl"
12900     } else {
12901       $$emit$$"JP,u,s   done\n\t"
12902       $$emit$$"J$cop,u,s  $labl\n\t"
12903       $$emit$$"done:"
12904     }
12905   %}
12906   size(4);
12907   ins_encode %{
12908     Label* l = $labl$$label;
12909     if ($cop$$cmpcode == Assembler::notEqual) {
12910       __ jccb(Assembler::parity, *l);
12911       __ jccb(Assembler::notEqual, *l);
12912     } else if ($cop$$cmpcode == Assembler::equal) {
12913       Label done;
12914       __ jccb(Assembler::parity, done);
12915       __ jccb(Assembler::equal, *l);
12916       __ bind(done);
12917     } else {
12918        ShouldNotReachHere();
12919     }
12920   %}
12921   ins_pipe(pipe_jcc);
12922   ins_short_branch(1);
12923 %}
12924 
12925 // ============================================================================
12926 // Long Compare
12927 //
12928 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12929 // is tricky.  The flavor of compare used depends on whether we are testing
12930 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12931 // The GE test is the negated LT test.  The LE test can be had by commuting
12932 // the operands (yielding a GE test) and then negating; negate again for the
12933 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12934 // NE test is negated from that.
12935 
12936 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12937 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12938 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12939 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12940 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12941 // foo match ends up with the wrong leaf.  One fix is to not match both
12942 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12943 // both forms beat the trinary form of long-compare and both are very useful
12944 // on Intel which has so few registers.
12945 
12946 // Manifest a CmpL result in an integer register.  Very painful.
12947 // This is the test to avoid.
12948 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12949   match(Set dst (CmpL3 src1 src2));
12950   effect( KILL flags );
12951   ins_cost(1000);
12952   format %{ "XOR    $dst,$dst\n\t"
12953             "CMP    $src1.hi,$src2.hi\n\t"
12954             "JLT,s  m_one\n\t"
12955             "JGT,s  p_one\n\t"
12956             "CMP    $src1.lo,$src2.lo\n\t"
12957             "JB,s   m_one\n\t"
12958             "JEQ,s  done\n"
12959     "p_one:\tINC    $dst\n\t"
12960             "JMP,s  done\n"
12961     "m_one:\tDEC    $dst\n"
12962      "done:" %}
12963   ins_encode %{
12964     Label p_one, m_one, done;
12965     __ xorptr($dst$$Register, $dst$$Register);
12966     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12967     __ jccb(Assembler::less,    m_one);
12968     __ jccb(Assembler::greater, p_one);
12969     __ cmpl($src1$$Register, $src2$$Register);
12970     __ jccb(Assembler::below,   m_one);
12971     __ jccb(Assembler::equal,   done);
12972     __ bind(p_one);
12973     __ incrementl($dst$$Register);
12974     __ jmpb(done);
12975     __ bind(m_one);
12976     __ decrementl($dst$$Register);
12977     __ bind(done);
12978   %}
12979   ins_pipe( pipe_slow );
12980 %}
12981 
12982 //======
12983 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12984 // compares.  Can be used for LE or GT compares by reversing arguments.
12985 // NOT GOOD FOR EQ/NE tests.
12986 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12987   match( Set flags (CmpL src zero ));
12988   ins_cost(100);
12989   format %{ "TEST   $src.hi,$src.hi" %}
12990   opcode(0x85);
12991   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12992   ins_pipe( ialu_cr_reg_reg );
12993 %}
12994 
12995 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12996 // compares.  Can be used for LE or GT compares by reversing arguments.
12997 // NOT GOOD FOR EQ/NE tests.
12998 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12999   match( Set flags (CmpL src1 src2 ));
13000   effect( TEMP tmp );
13001   ins_cost(300);
13002   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13003             "MOV    $tmp,$src1.hi\n\t"
13004             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13005   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13006   ins_pipe( ialu_cr_reg_reg );
13007 %}
13008 
13009 // Long compares reg < zero/req OR reg >= zero/req.
13010 // Just a wrapper for a normal branch, plus the predicate test.
13011 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13012   match(If cmp flags);
13013   effect(USE labl);
13014   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13015   expand %{
13016     jmpCon(cmp,flags,labl);    // JLT or JGE...
13017   %}
13018 %}
13019 
13020 //======
13021 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13022 // compares.  Can be used for LE or GT compares by reversing arguments.
13023 // NOT GOOD FOR EQ/NE tests.
13024 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13025   match(Set flags (CmpUL src zero));
13026   ins_cost(100);
13027   format %{ "TEST   $src.hi,$src.hi" %}
13028   opcode(0x85);
13029   ins_encode(OpcP, RegReg_Hi2(src, src));
13030   ins_pipe(ialu_cr_reg_reg);
13031 %}
13032 
13033 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13034 // compares.  Can be used for LE or GT compares by reversing arguments.
13035 // NOT GOOD FOR EQ/NE tests.
13036 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13037   match(Set flags (CmpUL src1 src2));
13038   effect(TEMP tmp);
13039   ins_cost(300);
13040   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13041             "MOV    $tmp,$src1.hi\n\t"
13042             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13043   ins_encode(long_cmp_flags2(src1, src2, tmp));
13044   ins_pipe(ialu_cr_reg_reg);
13045 %}
13046 
13047 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13048 // Just a wrapper for a normal branch, plus the predicate test.
13049 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13050   match(If cmp flags);
13051   effect(USE labl);
13052   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13053   expand %{
13054     jmpCon(cmp, flags, labl);    // JLT or JGE...
13055   %}
13056 %}
13057 
13058 // Compare 2 longs and CMOVE longs.
13059 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13060   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13061   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13062   ins_cost(400);
13063   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13064             "CMOV$cmp $dst.hi,$src.hi" %}
13065   opcode(0x0F,0x40);
13066   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13067   ins_pipe( pipe_cmov_reg_long );
13068 %}
13069 
13070 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13071   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13072   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13073   ins_cost(500);
13074   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13075             "CMOV$cmp $dst.hi,$src.hi" %}
13076   opcode(0x0F,0x40);
13077   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13078   ins_pipe( pipe_cmov_reg_long );
13079 %}
13080 
13081 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13082   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13083   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13084   ins_cost(400);
13085   expand %{
13086     cmovLL_reg_LTGE(cmp, flags, dst, src);
13087   %}
13088 %}
13089 
13090 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13091   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13092   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13093   ins_cost(500);
13094   expand %{
13095     cmovLL_mem_LTGE(cmp, flags, dst, src);
13096   %}
13097 %}
13098 
13099 // Compare 2 longs and CMOVE ints.
13100 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13101   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13102   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13103   ins_cost(200);
13104   format %{ "CMOV$cmp $dst,$src" %}
13105   opcode(0x0F,0x40);
13106   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13107   ins_pipe( pipe_cmov_reg );
13108 %}
13109 
13110 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13111   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13112   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13113   ins_cost(250);
13114   format %{ "CMOV$cmp $dst,$src" %}
13115   opcode(0x0F,0x40);
13116   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13117   ins_pipe( pipe_cmov_mem );
13118 %}
13119 
13120 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13121   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13122   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13123   ins_cost(200);
13124   expand %{
13125     cmovII_reg_LTGE(cmp, flags, dst, src);
13126   %}
13127 %}
13128 
13129 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13130   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13131   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13132   ins_cost(250);
13133   expand %{
13134     cmovII_mem_LTGE(cmp, flags, dst, src);
13135   %}
13136 %}
13137 
13138 // Compare 2 longs and CMOVE ptrs.
13139 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13140   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13141   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13142   ins_cost(200);
13143   format %{ "CMOV$cmp $dst,$src" %}
13144   opcode(0x0F,0x40);
13145   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13146   ins_pipe( pipe_cmov_reg );
13147 %}
13148 
13149 // Compare 2 unsigned longs and CMOVE ptrs.
13150 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13151   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13152   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13153   ins_cost(200);
13154   expand %{
13155     cmovPP_reg_LTGE(cmp,flags,dst,src);
13156   %}
13157 %}
13158 
13159 // Compare 2 longs and CMOVE doubles
13160 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13161   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13162   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13163   ins_cost(200);
13164   expand %{
13165     fcmovDPR_regS(cmp,flags,dst,src);
13166   %}
13167 %}
13168 
13169 // Compare 2 longs and CMOVE doubles
13170 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13171   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13172   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13173   ins_cost(200);
13174   expand %{
13175     fcmovD_regS(cmp,flags,dst,src);
13176   %}
13177 %}
13178 
13179 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13180   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13181   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13182   ins_cost(200);
13183   expand %{
13184     fcmovFPR_regS(cmp,flags,dst,src);
13185   %}
13186 %}
13187 
13188 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13189   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13190   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13191   ins_cost(200);
13192   expand %{
13193     fcmovF_regS(cmp,flags,dst,src);
13194   %}
13195 %}
13196 
13197 //======
13198 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13199 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13200   match( Set flags (CmpL src zero ));
13201   effect(TEMP tmp);
13202   ins_cost(200);
13203   format %{ "MOV    $tmp,$src.lo\n\t"
13204             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13205   ins_encode( long_cmp_flags0( src, tmp ) );
13206   ins_pipe( ialu_reg_reg_long );
13207 %}
13208 
13209 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13210 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13211   match( Set flags (CmpL src1 src2 ));
13212   ins_cost(200+300);
13213   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13214             "JNE,s  skip\n\t"
13215             "CMP    $src1.hi,$src2.hi\n\t"
13216      "skip:\t" %}
13217   ins_encode( long_cmp_flags1( src1, src2 ) );
13218   ins_pipe( ialu_cr_reg_reg );
13219 %}
13220 
13221 // Long compare reg == zero/reg OR reg != zero/reg
13222 // Just a wrapper for a normal branch, plus the predicate test.
13223 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13224   match(If cmp flags);
13225   effect(USE labl);
13226   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13227   expand %{
13228     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13229   %}
13230 %}
13231 
13232 //======
13233 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13234 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13235   match(Set flags (CmpUL src zero));
13236   effect(TEMP tmp);
13237   ins_cost(200);
13238   format %{ "MOV    $tmp,$src.lo\n\t"
13239             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13240   ins_encode(long_cmp_flags0(src, tmp));
13241   ins_pipe(ialu_reg_reg_long);
13242 %}
13243 
13244 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13245 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13246   match(Set flags (CmpUL src1 src2));
13247   ins_cost(200+300);
13248   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13249             "JNE,s  skip\n\t"
13250             "CMP    $src1.hi,$src2.hi\n\t"
13251      "skip:\t" %}
13252   ins_encode(long_cmp_flags1(src1, src2));
13253   ins_pipe(ialu_cr_reg_reg);
13254 %}
13255 
13256 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13257 // Just a wrapper for a normal branch, plus the predicate test.
13258 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13259   match(If cmp flags);
13260   effect(USE labl);
13261   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13262   expand %{
13263     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13264   %}
13265 %}
13266 
13267 // Compare 2 longs and CMOVE longs.
13268 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13269   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13270   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13271   ins_cost(400);
13272   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13273             "CMOV$cmp $dst.hi,$src.hi" %}
13274   opcode(0x0F,0x40);
13275   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13276   ins_pipe( pipe_cmov_reg_long );
13277 %}
13278 
13279 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13280   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13281   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13282   ins_cost(500);
13283   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13284             "CMOV$cmp $dst.hi,$src.hi" %}
13285   opcode(0x0F,0x40);
13286   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13287   ins_pipe( pipe_cmov_reg_long );
13288 %}
13289 
13290 // Compare 2 longs and CMOVE ints.
13291 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13292   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13293   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13294   ins_cost(200);
13295   format %{ "CMOV$cmp $dst,$src" %}
13296   opcode(0x0F,0x40);
13297   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13298   ins_pipe( pipe_cmov_reg );
13299 %}
13300 
13301 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13302   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13303   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13304   ins_cost(250);
13305   format %{ "CMOV$cmp $dst,$src" %}
13306   opcode(0x0F,0x40);
13307   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13308   ins_pipe( pipe_cmov_mem );
13309 %}
13310 
13311 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13312   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13313   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13314   ins_cost(200);
13315   expand %{
13316     cmovII_reg_EQNE(cmp, flags, dst, src);
13317   %}
13318 %}
13319 
13320 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13321   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13322   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13323   ins_cost(250);
13324   expand %{
13325     cmovII_mem_EQNE(cmp, flags, dst, src);
13326   %}
13327 %}
13328 
13329 // Compare 2 longs and CMOVE ptrs.
13330 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13331   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13332   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13333   ins_cost(200);
13334   format %{ "CMOV$cmp $dst,$src" %}
13335   opcode(0x0F,0x40);
13336   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13337   ins_pipe( pipe_cmov_reg );
13338 %}
13339 
13340 // Compare 2 unsigned longs and CMOVE ptrs.
13341 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13342   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13343   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13344   ins_cost(200);
13345   expand %{
13346     cmovPP_reg_EQNE(cmp,flags,dst,src);
13347   %}
13348 %}
13349 
13350 // Compare 2 longs and CMOVE doubles
13351 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13352   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13353   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13354   ins_cost(200);
13355   expand %{
13356     fcmovDPR_regS(cmp,flags,dst,src);
13357   %}
13358 %}
13359 
13360 // Compare 2 longs and CMOVE doubles
13361 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13362   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13363   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13364   ins_cost(200);
13365   expand %{
13366     fcmovD_regS(cmp,flags,dst,src);
13367   %}
13368 %}
13369 
13370 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13371   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13372   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13373   ins_cost(200);
13374   expand %{
13375     fcmovFPR_regS(cmp,flags,dst,src);
13376   %}
13377 %}
13378 
13379 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13380   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13381   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13382   ins_cost(200);
13383   expand %{
13384     fcmovF_regS(cmp,flags,dst,src);
13385   %}
13386 %}
13387 
13388 //======
13389 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13390 // Same as cmpL_reg_flags_LEGT except must negate src
13391 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13392   match( Set flags (CmpL src zero ));
13393   effect( TEMP tmp );
13394   ins_cost(300);
13395   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13396             "CMP    $tmp,$src.lo\n\t"
13397             "SBB    $tmp,$src.hi\n\t" %}
13398   ins_encode( long_cmp_flags3(src, tmp) );
13399   ins_pipe( ialu_reg_reg_long );
13400 %}
13401 
13402 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13403 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13404 // requires a commuted test to get the same result.
13405 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13406   match( Set flags (CmpL src1 src2 ));
13407   effect( TEMP tmp );
13408   ins_cost(300);
13409   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13410             "MOV    $tmp,$src2.hi\n\t"
13411             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13412   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13413   ins_pipe( ialu_cr_reg_reg );
13414 %}
13415 
13416 // Long compares reg < zero/req OR reg >= zero/req.
13417 // Just a wrapper for a normal branch, plus the predicate test
13418 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13419   match(If cmp flags);
13420   effect(USE labl);
13421   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13422   ins_cost(300);
13423   expand %{
13424     jmpCon(cmp,flags,labl);    // JGT or JLE...
13425   %}
13426 %}
13427 
13428 //======
13429 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13430 // Same as cmpUL_reg_flags_LEGT except must negate src
13431 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13432   match(Set flags (CmpUL src zero));
13433   effect(TEMP tmp);
13434   ins_cost(300);
13435   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13436             "CMP    $tmp,$src.lo\n\t"
13437             "SBB    $tmp,$src.hi\n\t" %}
13438   ins_encode(long_cmp_flags3(src, tmp));
13439   ins_pipe(ialu_reg_reg_long);
13440 %}
13441 
13442 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13443 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13444 // requires a commuted test to get the same result.
13445 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13446   match(Set flags (CmpUL src1 src2));
13447   effect(TEMP tmp);
13448   ins_cost(300);
13449   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13450             "MOV    $tmp,$src2.hi\n\t"
13451             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13452   ins_encode(long_cmp_flags2( src2, src1, tmp));
13453   ins_pipe(ialu_cr_reg_reg);
13454 %}
13455 
13456 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13457 // Just a wrapper for a normal branch, plus the predicate test
13458 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13459   match(If cmp flags);
13460   effect(USE labl);
13461   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13462   ins_cost(300);
13463   expand %{
13464     jmpCon(cmp, flags, labl);    // JGT or JLE...
13465   %}
13466 %}
13467 
13468 // Compare 2 longs and CMOVE longs.
13469 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13470   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13471   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13472   ins_cost(400);
13473   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13474             "CMOV$cmp $dst.hi,$src.hi" %}
13475   opcode(0x0F,0x40);
13476   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13477   ins_pipe( pipe_cmov_reg_long );
13478 %}
13479 
13480 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13481   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13482   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13483   ins_cost(500);
13484   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13485             "CMOV$cmp $dst.hi,$src.hi+4" %}
13486   opcode(0x0F,0x40);
13487   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13488   ins_pipe( pipe_cmov_reg_long );
13489 %}
13490 
13491 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13492   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13493   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13494   ins_cost(400);
13495   expand %{
13496     cmovLL_reg_LEGT(cmp, flags, dst, src);
13497   %}
13498 %}
13499 
13500 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13501   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503   ins_cost(500);
13504   expand %{
13505     cmovLL_mem_LEGT(cmp, flags, dst, src);
13506   %}
13507 %}
13508 
13509 // Compare 2 longs and CMOVE ints.
13510 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13511   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13512   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13513   ins_cost(200);
13514   format %{ "CMOV$cmp $dst,$src" %}
13515   opcode(0x0F,0x40);
13516   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13517   ins_pipe( pipe_cmov_reg );
13518 %}
13519 
13520 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13521   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13522   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13523   ins_cost(250);
13524   format %{ "CMOV$cmp $dst,$src" %}
13525   opcode(0x0F,0x40);
13526   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13527   ins_pipe( pipe_cmov_mem );
13528 %}
13529 
13530 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13531   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13532   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13533   ins_cost(200);
13534   expand %{
13535     cmovII_reg_LEGT(cmp, flags, dst, src);
13536   %}
13537 %}
13538 
13539 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13540   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13541   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13542   ins_cost(250);
13543   expand %{
13544     cmovII_mem_LEGT(cmp, flags, dst, src);
13545   %}
13546 %}
13547 
13548 // Compare 2 longs and CMOVE ptrs.
13549 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13550   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13551   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13552   ins_cost(200);
13553   format %{ "CMOV$cmp $dst,$src" %}
13554   opcode(0x0F,0x40);
13555   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13556   ins_pipe( pipe_cmov_reg );
13557 %}
13558 
13559 // Compare 2 unsigned longs and CMOVE ptrs.
13560 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13561   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13562   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13563   ins_cost(200);
13564   expand %{
13565     cmovPP_reg_LEGT(cmp,flags,dst,src);
13566   %}
13567 %}
13568 
13569 // Compare 2 longs and CMOVE doubles
13570 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13571   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13572   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13573   ins_cost(200);
13574   expand %{
13575     fcmovDPR_regS(cmp,flags,dst,src);
13576   %}
13577 %}
13578 
13579 // Compare 2 longs and CMOVE doubles
13580 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13581   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13582   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13583   ins_cost(200);
13584   expand %{
13585     fcmovD_regS(cmp,flags,dst,src);
13586   %}
13587 %}
13588 
13589 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13590   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13591   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13592   ins_cost(200);
13593   expand %{
13594     fcmovFPR_regS(cmp,flags,dst,src);
13595   %}
13596 %}
13597 
13598 
13599 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13600   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13601   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13602   ins_cost(200);
13603   expand %{
13604     fcmovF_regS(cmp,flags,dst,src);
13605   %}
13606 %}
13607 
13608 
13609 // ============================================================================
13610 // Procedure Call/Return Instructions
13611 // Call Java Static Instruction
13612 // Note: If this code changes, the corresponding ret_addr_offset() and
13613 //       compute_padding() functions will have to be adjusted.
13614 instruct CallStaticJavaDirect(method meth) %{
13615   match(CallStaticJava);
13616   effect(USE meth);
13617 
13618   ins_cost(300);
13619   format %{ "CALL,static " %}
13620   opcode(0xE8); /* E8 cd */
13621   ins_encode( pre_call_resets,
13622               Java_Static_Call( meth ),
13623               call_epilog,
13624               post_call_FPU );
13625   ins_pipe( pipe_slow );
13626   ins_alignment(4);
13627 %}
13628 
13629 // Call Java Dynamic Instruction
13630 // Note: If this code changes, the corresponding ret_addr_offset() and
13631 //       compute_padding() functions will have to be adjusted.
13632 instruct CallDynamicJavaDirect(method meth) %{
13633   match(CallDynamicJava);
13634   effect(USE meth);
13635 
13636   ins_cost(300);
13637   format %{ "MOV    EAX,(oop)-1\n\t"
13638             "CALL,dynamic" %}
13639   opcode(0xE8); /* E8 cd */
13640   ins_encode( pre_call_resets,
13641               Java_Dynamic_Call( meth ),
13642               call_epilog,
13643               post_call_FPU );
13644   ins_pipe( pipe_slow );
13645   ins_alignment(4);
13646 %}
13647 
13648 // Call Runtime Instruction
13649 instruct CallRuntimeDirect(method meth) %{
13650   match(CallRuntime );
13651   effect(USE meth);
13652 
13653   ins_cost(300);
13654   format %{ "CALL,runtime " %}
13655   opcode(0xE8); /* E8 cd */
13656   // Use FFREEs to clear entries in float stack
13657   ins_encode( pre_call_resets,
13658               FFree_Float_Stack_All,
13659               Java_To_Runtime( meth ),
13660               post_call_FPU );
13661   ins_pipe( pipe_slow );
13662 %}
13663 
13664 // Call runtime without safepoint
13665 instruct CallLeafDirect(method meth) %{
13666   match(CallLeaf);
13667   effect(USE meth);
13668 
13669   ins_cost(300);
13670   format %{ "CALL_LEAF,runtime " %}
13671   opcode(0xE8); /* E8 cd */
13672   ins_encode( pre_call_resets,
13673               FFree_Float_Stack_All,
13674               Java_To_Runtime( meth ),
13675               Verify_FPU_For_Leaf, post_call_FPU );
13676   ins_pipe( pipe_slow );
13677 %}
13678 
13679 instruct CallLeafNoFPDirect(method meth) %{
13680   match(CallLeafNoFP);
13681   effect(USE meth);
13682 
13683   ins_cost(300);
13684   format %{ "CALL_LEAF_NOFP,runtime " %}
13685   opcode(0xE8); /* E8 cd */
13686   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13687   ins_pipe( pipe_slow );
13688 %}
13689 
13690 
13691 // Return Instruction
13692 // Remove the return address & jump to it.
13693 instruct Ret() %{
13694   match(Return);
13695   format %{ "RET" %}
13696   opcode(0xC3);
13697   ins_encode(OpcP);
13698   ins_pipe( pipe_jmp );
13699 %}
13700 
13701 // Tail Call; Jump from runtime stub to Java code.
13702 // Also known as an 'interprocedural jump'.
13703 // Target of jump will eventually return to caller.
13704 // TailJump below removes the return address.
13705 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13706   match(TailCall jump_target method_ptr);
13707   ins_cost(300);
13708   format %{ "JMP    $jump_target \t# EBX holds method" %}
13709   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13710   ins_encode( OpcP, RegOpc(jump_target) );
13711   ins_pipe( pipe_jmp );
13712 %}
13713 
13714 
13715 // Tail Jump; remove the return address; jump to target.
13716 // TailCall above leaves the return address around.
13717 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13718   match( TailJump jump_target ex_oop );
13719   ins_cost(300);
13720   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13721             "JMP    $jump_target " %}
13722   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13723   ins_encode( enc_pop_rdx,
13724               OpcP, RegOpc(jump_target) );
13725   ins_pipe( pipe_jmp );
13726 %}
13727 
13728 // Create exception oop: created by stack-crawling runtime code.
13729 // Created exception is now available to this handler, and is setup
13730 // just prior to jumping to this handler.  No code emitted.
13731 instruct CreateException( eAXRegP ex_oop )
13732 %{
13733   match(Set ex_oop (CreateEx));
13734 
13735   size(0);
13736   // use the following format syntax
13737   format %{ "# exception oop is in EAX; no code emitted" %}
13738   ins_encode();
13739   ins_pipe( empty );
13740 %}
13741 
13742 
13743 // Rethrow exception:
13744 // The exception oop will come in the first argument position.
13745 // Then JUMP (not call) to the rethrow stub code.
13746 instruct RethrowException()
13747 %{
13748   match(Rethrow);
13749 
13750   // use the following format syntax
13751   format %{ "JMP    rethrow_stub" %}
13752   ins_encode(enc_rethrow);
13753   ins_pipe( pipe_jmp );
13754 %}
13755 
13756 // inlined locking and unlocking
13757 
13758 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13759   predicate(Compile::current()->use_rtm());
13760   match(Set cr (FastLock object box));
13761   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13762   ins_cost(300);
13763   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13764   ins_encode %{
13765     __ get_thread($thread$$Register);
13766     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13767                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13768                  _rtm_counters, _stack_rtm_counters,
13769                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13770                  true, ra_->C->profile_rtm());
13771   %}
13772   ins_pipe(pipe_slow);
13773 %}
13774 
13775 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13776   predicate(!Compile::current()->use_rtm());
13777   match(Set cr (FastLock object box));
13778   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13779   ins_cost(300);
13780   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13781   ins_encode %{
13782     __ get_thread($thread$$Register);
13783     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13784                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13785   %}
13786   ins_pipe(pipe_slow);
13787 %}
13788 
13789 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13790   match(Set cr (FastUnlock object box));
13791   effect(TEMP tmp, USE_KILL box);
13792   ins_cost(300);
13793   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13794   ins_encode %{
13795     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13796   %}
13797   ins_pipe(pipe_slow);
13798 %}
13799 
13800 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13801   predicate(Matcher::vector_length(n) <= 32);
13802   match(Set dst (MaskAll src));
13803   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13804   ins_encode %{
13805     int mask_len = Matcher::vector_length(this);
13806     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13807   %}
13808   ins_pipe( pipe_slow );
13809 %}
13810 
13811 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13812   predicate(Matcher::vector_length(n) > 32);
13813   match(Set dst (MaskAll src));
13814   effect(TEMP ktmp);
13815   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13816   ins_encode %{
13817     int mask_len = Matcher::vector_length(this);
13818     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13819   %}
13820   ins_pipe( pipe_slow );
13821 %}
13822 
13823 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13824   predicate(Matcher::vector_length(n) > 32);
13825   match(Set dst (MaskAll src));
13826   effect(TEMP ktmp);
13827   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13828   ins_encode %{
13829     int mask_len = Matcher::vector_length(this);
13830     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13831   %}
13832   ins_pipe( pipe_slow );
13833 %}
13834 
13835 // ============================================================================
13836 // Safepoint Instruction
13837 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13838   match(SafePoint poll);
13839   effect(KILL cr, USE poll);
13840 
13841   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13842   ins_cost(125);
13843   // EBP would need size(3)
13844   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13845   ins_encode %{
13846     __ relocate(relocInfo::poll_type);
13847     address pre_pc = __ pc();
13848     __ testl(rax, Address($poll$$Register, 0));
13849     address post_pc = __ pc();
13850     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13851   %}
13852   ins_pipe(ialu_reg_mem);
13853 %}
13854 
13855 
13856 // ============================================================================
13857 // This name is KNOWN by the ADLC and cannot be changed.
13858 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13859 // for this guy.
13860 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13861   match(Set dst (ThreadLocal));
13862   effect(DEF dst, KILL cr);
13863 
13864   format %{ "MOV    $dst, Thread::current()" %}
13865   ins_encode %{
13866     Register dstReg = as_Register($dst$$reg);
13867     __ get_thread(dstReg);
13868   %}
13869   ins_pipe( ialu_reg_fat );
13870 %}
13871 
13872 
13873 
13874 //----------PEEPHOLE RULES-----------------------------------------------------
13875 // These must follow all instruction definitions as they use the names
13876 // defined in the instructions definitions.
13877 //
13878 // peepmatch ( root_instr_name [preceding_instruction]* );
13879 //
13880 // peepconstraint %{
13881 // (instruction_number.operand_name relational_op instruction_number.operand_name
13882 //  [, ...] );
13883 // // instruction numbers are zero-based using left to right order in peepmatch
13884 //
13885 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13886 // // provide an instruction_number.operand_name for each operand that appears
13887 // // in the replacement instruction's match rule
13888 //
13889 // ---------VM FLAGS---------------------------------------------------------
13890 //
13891 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13892 //
13893 // Each peephole rule is given an identifying number starting with zero and
13894 // increasing by one in the order seen by the parser.  An individual peephole
13895 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13896 // on the command-line.
13897 //
13898 // ---------CURRENT LIMITATIONS----------------------------------------------
13899 //
13900 // Only match adjacent instructions in same basic block
13901 // Only equality constraints
13902 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13903 // Only one replacement instruction
13904 //
13905 // ---------EXAMPLE----------------------------------------------------------
13906 //
13907 // // pertinent parts of existing instructions in architecture description
13908 // instruct movI(rRegI dst, rRegI src) %{
13909 //   match(Set dst (CopyI src));
13910 // %}
13911 //
13912 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13913 //   match(Set dst (AddI dst src));
13914 //   effect(KILL cr);
13915 // %}
13916 //
13917 // // Change (inc mov) to lea
13918 // peephole %{
13919 //   // increment preceded by register-register move
13920 //   peepmatch ( incI_eReg movI );
13921 //   // require that the destination register of the increment
13922 //   // match the destination register of the move
13923 //   peepconstraint ( 0.dst == 1.dst );
13924 //   // construct a replacement instruction that sets
13925 //   // the destination to ( move's source register + one )
13926 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13927 // %}
13928 //
13929 // Implementation no longer uses movX instructions since
13930 // machine-independent system no longer uses CopyX nodes.
13931 //
13932 // peephole %{
13933 //   peepmatch ( incI_eReg movI );
13934 //   peepconstraint ( 0.dst == 1.dst );
13935 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13936 // %}
13937 //
13938 // peephole %{
13939 //   peepmatch ( decI_eReg movI );
13940 //   peepconstraint ( 0.dst == 1.dst );
13941 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13942 // %}
13943 //
13944 // peephole %{
13945 //   peepmatch ( addI_eReg_imm movI );
13946 //   peepconstraint ( 0.dst == 1.dst );
13947 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13948 // %}
13949 //
13950 // peephole %{
13951 //   peepmatch ( addP_eReg_imm movP );
13952 //   peepconstraint ( 0.dst == 1.dst );
13953 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13954 // %}
13955 
13956 // // Change load of spilled value to only a spill
13957 // instruct storeI(memory mem, rRegI src) %{
13958 //   match(Set mem (StoreI mem src));
13959 // %}
13960 //
13961 // instruct loadI(rRegI dst, memory mem) %{
13962 //   match(Set dst (LoadI mem));
13963 // %}
13964 //
13965 peephole %{
13966   peepmatch ( loadI storeI );
13967   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13968   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13969 %}
13970 
13971 //----------SMARTSPILL RULES---------------------------------------------------
13972 // These must follow all instruction definitions as they use the names
13973 // defined in the instructions definitions.