Old src/hotspot/cpu/x86/x86

    1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   int framesize = C->output()->frame_size_in_bytes();
  614   int bangsize = C->output()->bang_size_in_bytes();
  615 
  616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  617 
  618   C->output()->set_frame_complete(__ offset());
  619 
  620   if (C->has_mach_constant_base_node()) {
  621     // NOTE: We set the table base offset here because users might be
  622     // emitted before MachConstantBaseNode.
  623     ConstantTable& constant_table = C->output()->constant_table();
  624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  625   }
  626 }
  627 
  628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  629   return MachNode::size(ra_); // too many variables; just compute it the hard way
  630 }
  631 
  632 int MachPrologNode::reloc() const {
  633   return 0; // a large enough number
  634 }
  635 
  636 //=============================================================================
  637 #ifndef PRODUCT
  638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  639   Compile *C = ra_->C;
  640   int framesize = C->output()->frame_size_in_bytes();
  641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  642   // Remove two words for return addr and rbp,
  643   framesize -= 2*wordSize;
  644 
  645   if (C->max_vector_size() > 16) {
  646     st->print("VZEROUPPER");
  647     st->cr(); st->print("\t");
  648   }
  649   if (C->in_24_bit_fp_mode()) {
  650     st->print("FLDCW  standard control word");
  651     st->cr(); st->print("\t");
  652   }
  653   if (framesize) {
  654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  655     st->cr(); st->print("\t");
  656   }
  657   st->print_cr("POPL   EBP"); st->print("\t");
  658   if (do_polling() && C->is_method_compilation()) {
  659     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  660               "JA      #safepoint_stub\t"
  661               "# Safepoint: poll for GC");
  662   }
  663 }
  664 #endif
  665 
  666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  667   Compile *C = ra_->C;
  668 
  669   if (C->max_vector_size() > 16) {
  670     // Clear upper bits of YMM registers when current compiled code uses
  671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  672     __ vzeroupper();
  673   }
  674   // If method set FPU control word, restore to standard control word
  675   if (C->in_24_bit_fp_mode()) {
  676     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  677   }
  678 
  679   int framesize = C->output()->frame_size_in_bytes();
  680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  681   // Remove two words for return addr and rbp,
  682   framesize -= 2*wordSize;
  683 
  684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  685 
  686   if (framesize >= 128) {
  687     emit_opcode(masm, 0x81); // add  SP, #framesize
  688     emit_rm(masm, 0x3, 0x00, ESP_enc);
  689     emit_d32(masm, framesize);
  690   } else if (framesize) {
  691     emit_opcode(masm, 0x83); // add  SP, #framesize
  692     emit_rm(masm, 0x3, 0x00, ESP_enc);
  693     emit_d8(masm, framesize);
  694   }
  695 
  696   emit_opcode(masm, 0x58 | EBP_enc);
  697 
  698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  699     __ reserved_stack_check();
  700   }
  701 
  702   if (do_polling() && C->is_method_compilation()) {
  703     Register thread = as_Register(EBX_enc);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ set_inst_mark();
  713     __ relocate(relocInfo::poll_return_type);
  714     __ clear_inst_mark();
  715     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  716   }
  717 }
  718 
  719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  720   return MachNode::size(ra_); // too many variables; just compute it
  721                               // the hard way
  722 }
  723 
  724 int MachEpilogNode::reloc() const {
  725   return 0; // a large enough number
  726 }
  727 
  728 const Pipeline * MachEpilogNode::pipeline() const {
  729   return MachNode::pipeline_class();
  730 }
  731 
  732 //=============================================================================
  733 
  734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  735 static enum RC rc_class( OptoReg::Name reg ) {
  736 
  737   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  738   if (OptoReg::is_stack(reg)) return rc_stack;
  739 
  740   VMReg r = OptoReg::as_VMReg(reg);
  741   if (r->is_Register()) return rc_int;
  742   if (r->is_FloatRegister()) {
  743     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  744     return rc_float;
  745   }
  746   if (r->is_KRegister()) return rc_kreg;
  747   assert(r->is_XMMRegister(), "must be");
  748   return rc_xmm;
  749 }
  750 
  751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  752                         int opcode, const char *op_str, int size, outputStream* st ) {
  753   if( masm ) {
  754     masm->set_inst_mark();
  755     emit_opcode  (masm, opcode );
  756     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757     masm->clear_inst_mark();
  758 #ifndef PRODUCT
  759   } else if( !do_size ) {
  760     if( size != 0 ) st->print("\n\t");
  761     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  762       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  763       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  764     } else { // FLD, FST, PUSH, POP
  765       st->print("%s [ESP + #%d]",op_str,offset);
  766     }
  767 #endif
  768   }
  769   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  770   return size+3+offset_size;
  771 }
  772 
  773 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  775                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  776   int in_size_in_bits = Assembler::EVEX_32bit;
  777   int evex_encoding = 0;
  778   if (reg_lo+1 == reg_hi) {
  779     in_size_in_bits = Assembler::EVEX_64bit;
  780     evex_encoding = Assembler::VEX_W;
  781   }
  782   if (masm) {
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     __ set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (masm) {
  835     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  836     __ set_managed();
  837     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  838       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     } else {
  841       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  842                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  843     }
  844 #ifndef PRODUCT
  845   } else if (!do_size) {
  846     if (size != 0) st->print("\n\t");
  847     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  848       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       } else {
  851         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       }
  853     } else {
  854       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  855         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       } else {
  857         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       }
  859     }
  860 #endif
  861   }
  862   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  863   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  864   int sz = (UseAVX > 2) ? 6 : 4;
  865   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  866       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  867   return size + sz;
  868 }
  869 
  870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  871                             int src_hi, int dst_hi, int size, outputStream* st ) {
  872   // 32-bit
  873   if (masm) {
  874     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  875     __ set_managed();
  876     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  877              as_Register(Matcher::_regEncode[src_lo]));
  878 #ifndef PRODUCT
  879   } else if (!do_size) {
  880     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  881 #endif
  882   }
  883   return (UseAVX> 2) ? 6 : 4;
  884 }
  885 
  886 
  887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  888                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  889   // 32-bit
  890   if (masm) {
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     __ set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( masm ) {
  905     emit_opcode(masm, 0x8B );
  906     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( masm ) {
  920       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (masm) {
  956     switch (ireg) {
  957     case Op_VecS:
  958       __ pushl(Address(rsp, src_offset));
  959       __ popl (Address(rsp, dst_offset));
  960       break;
  961     case Op_VecD:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       __ pushl(Address(rsp, src_offset+4));
  965       __ popl (Address(rsp, dst_offset+4));
  966       break;
  967     case Op_VecX:
  968       __ movdqu(Address(rsp, -16), xmm0);
  969       __ movdqu(xmm0, Address(rsp, src_offset));
  970       __ movdqu(Address(rsp, dst_offset), xmm0);
  971       __ movdqu(xmm0, Address(rsp, -16));
  972       break;
  973     case Op_VecY:
  974       __ vmovdqu(Address(rsp, -32), xmm0);
  975       __ vmovdqu(xmm0, Address(rsp, src_offset));
  976       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  977       __ vmovdqu(xmm0, Address(rsp, -32));
  978       break;
  979     case Op_VecZ:
  980       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  981       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  982       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  983       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  984       break;
  985     default:
  986       ShouldNotReachHere();
  987     }
  988 #ifndef PRODUCT
  989   } else {
  990     switch (ireg) {
  991     case Op_VecS:
  992       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  993                 "popl    [rsp + #%d]",
  994                 src_offset, dst_offset);
  995       break;
  996     case Op_VecD:
  997       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  998                 "popq    [rsp + #%d]\n\t"
  999                 "pushl   [rsp + #%d]\n\t"
 1000                 "popq    [rsp + #%d]",
 1001                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1002       break;
 1003      case Op_VecX:
 1004       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1005                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1006                 "movdqu  [rsp + #%d], xmm0\n\t"
 1007                 "movdqu  xmm0, [rsp - #16]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecY:
 1011       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1012                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1013                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1014                 "vmovdqu xmm0, [rsp - #32]",
 1015                 src_offset, dst_offset);
 1016       break;
 1017     case Op_VecZ:
 1018       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1019                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1020                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1021                 "vmovdqu xmm0, [rsp - #64]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     default:
 1025       ShouldNotReachHere();
 1026     }
 1027 #endif
 1028   }
 1029 }
 1030 
 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1032   // Get registers to move
 1033   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1034   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1035   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1036   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1037 
 1038   enum RC src_second_rc = rc_class(src_second);
 1039   enum RC src_first_rc = rc_class(src_first);
 1040   enum RC dst_second_rc = rc_class(dst_second);
 1041   enum RC dst_first_rc = rc_class(dst_first);
 1042 
 1043   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1044 
 1045   // Generate spill code!
 1046   int size = 0;
 1047 
 1048   if( src_first == dst_first && src_second == dst_second )
 1049     return size;            // Self copy, no move
 1050 
 1051   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1052     uint ireg = ideal_reg();
 1053     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1054     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1055     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1056     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1057       // mem -> mem
 1058       int src_offset = ra_->reg2offset(src_first);
 1059       int dst_offset = ra_->reg2offset(dst_first);
 1060       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1061     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1062       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1063     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1064       int stack_offset = ra_->reg2offset(dst_first);
 1065       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1066     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1067       int stack_offset = ra_->reg2offset(src_first);
 1068       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1069     } else {
 1070       ShouldNotReachHere();
 1071     }
 1072     return 0;
 1073   }
 1074 
 1075   // --------------------------------------
 1076   // Check for mem-mem move.  push/pop to move.
 1077   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1078     if( src_second == dst_first ) { // overlapping stack copy ranges
 1079       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1080       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1081       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1082       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1083     }
 1084     // move low bits
 1085     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1086     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1087     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1088       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1089       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1090     }
 1091     return size;
 1092   }
 1093 
 1094   // --------------------------------------
 1095   // Check for integer reg-reg copy
 1096   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1097     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1098 
 1099   // Check for integer store
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1101     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1102 
 1103   // Check for integer load
 1104   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1105     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1106 
 1107   // Check for integer reg-xmm reg copy
 1108   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1109     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1110             "no 64 bit integer-float reg moves" );
 1111     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1112   }
 1113   // --------------------------------------
 1114   // Check for float reg-reg copy
 1115   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1116     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1117             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1118     if( masm ) {
 1119 
 1120       // Note the mucking with the register encode to compensate for the 0/1
 1121       // indexing issue mentioned in a comment in the reg_def sections
 1122       // for FPR registers many lines above here.
 1123 
 1124       if( src_first != FPR1L_num ) {
 1125         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1126         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1127         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1128         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1129      } else {
 1130         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1131         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1132      }
 1133 #ifndef PRODUCT
 1134     } else if( !do_size ) {
 1135       if( size != 0 ) st->print("\n\t");
 1136       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1137       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1138 #endif
 1139     }
 1140     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1141   }
 1142 
 1143   // Check for float store
 1144   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1145     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1146   }
 1147 
 1148   // Check for float load
 1149   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1150     int offset = ra_->reg2offset(src_first);
 1151     const char *op_str;
 1152     int op;
 1153     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1154       op_str = "FLD_D";
 1155       op = 0xDD;
 1156     } else {                   // 32-bit load
 1157       op_str = "FLD_S";
 1158       op = 0xD9;
 1159       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1160     }
 1161     if( masm ) {
 1162       masm->set_inst_mark();
 1163       emit_opcode  (masm, op );
 1164       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1167       masm->clear_inst_mark();
 1168 #ifndef PRODUCT
 1169     } else if( !do_size ) {
 1170       if( size != 0 ) st->print("\n\t");
 1171       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1172 #endif
 1173     }
 1174     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1175     return size + 3+offset_size+2;
 1176   }
 1177 
 1178   // Check for xmm reg-reg copy
 1179   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1180     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1181             (src_first+1 == src_second && dst_first+1 == dst_second),
 1182             "no non-adjacent float-moves" );
 1183     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1184   }
 1185 
 1186   // Check for xmm reg-integer reg copy
 1187   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1188     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1189             "no 64 bit float-integer reg moves" );
 1190     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1191   }
 1192 
 1193   // Check for xmm store
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1195     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1196   }
 1197 
 1198   // Check for float xmm load
 1199   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1200     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1201   }
 1202 
 1203   // Copy from float reg to xmm reg
 1204   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1205     // copy to the top of stack from floating point reg
 1206     // and use LEA to preserve flags
 1207     if( masm ) {
 1208       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1209       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1210       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1211       emit_d8(masm,0xF8);
 1212 #ifndef PRODUCT
 1213     } else if( !do_size ) {
 1214       if( size != 0 ) st->print("\n\t");
 1215       st->print("LEA    ESP,[ESP-8]");
 1216 #endif
 1217     }
 1218     size += 4;
 1219 
 1220     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1221 
 1222     // Copy from the temp memory to the xmm reg.
 1223     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1224 
 1225     if( masm ) {
 1226       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1227       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1228       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1229       emit_d8(masm,0x08);
 1230 #ifndef PRODUCT
 1231     } else if( !do_size ) {
 1232       if( size != 0 ) st->print("\n\t");
 1233       st->print("LEA    ESP,[ESP+8]");
 1234 #endif
 1235     }
 1236     size += 4;
 1237     return size;
 1238   }
 1239 
 1240   // AVX-512 opmask specific spilling.
 1241   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1242     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1243     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1244     int offset = ra_->reg2offset(src_first);
 1245     if (masm != nullptr) {
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (masm != nullptr) {
 1260       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1261 #ifndef PRODUCT
 1262     } else {
 1263       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1264 #endif
 1265     }
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1270     Unimplemented();
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     if (masm != nullptr) {
 1283       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1284 #ifndef PRODUCT
 1285     } else {
 1286       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1287 #endif
 1288     }
 1289     return 0;
 1290   }
 1291 
 1292   assert( size > 0, "missed a case" );
 1293 
 1294   // --------------------------------------------------------------------
 1295   // Check for second bits still needing moving.
 1296   if( src_second == dst_second )
 1297     return size;               // Self copy; no move
 1298   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1299 
 1300   // Check for second word int-int move
 1301   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1302     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1303 
 1304   // Check for second word integer store
 1305   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1306     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1307 
 1308   // Check for second word integer load
 1309   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1310     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1311 
 1312   Unimplemented();
 1313   return 0; // Mute compiler
 1314 }
 1315 
 1316 #ifndef PRODUCT
 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1318   implementation( nullptr, ra_, false, st );
 1319 }
 1320 #endif
 1321 
 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1323   implementation( masm, ra_, false, nullptr );
 1324 }
 1325 
 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1327   return MachNode::size(ra_);
 1328 }
 1329 
 1330 
 1331 //=============================================================================
 1332 #ifndef PRODUCT
 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1335   int reg = ra_->get_reg_first(this);
 1336   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1337 }
 1338 #endif
 1339 
 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1341   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1342   int reg = ra_->get_encode(this);
 1343   if( offset >= 128 ) {
 1344     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(masm, 0x2, reg, 0x04);
 1346     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1347     emit_d32(masm, offset);
 1348   }
 1349   else {
 1350     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1351     emit_rm(masm, 0x1, reg, 0x04);
 1352     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1353     emit_d8(masm, offset);
 1354   }
 1355 }
 1356 
 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1359   if( offset >= 128 ) {
 1360     return 7;
 1361   }
 1362   else {
 1363     return 4;
 1364   }
 1365 }
 1366 
 1367 //=============================================================================
 1368 #ifndef PRODUCT
 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1370   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1371   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1372   st->print_cr("\tNOP");
 1373   st->print_cr("\tNOP");
 1374   if( !OptoBreakpoint )
 1375     st->print_cr("\tNOP");
 1376 }
 1377 #endif
 1378 
 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1380   __ ic_check(CodeEntryAlignment);
 1381 }
 1382 
 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1384   return MachNode::size(ra_); // too many variables; just compute it
 1385                               // the hard way
 1386 }
 1387 
 1388 
 1389 //=============================================================================
 1390 
 1391 // Vector calling convention not supported.
 1392 bool Matcher::supports_vector_calling_convention() {
 1393   return false;
 1394 }
 1395 
 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1397   Unimplemented();
 1398   return OptoRegPair(0, 0);
 1399 }
 1400 
 1401 // Is this branch offset short enough that a short branch can be used?
 1402 //
 1403 // NOTE: If the platform does not provide any short branch variants, then
 1404 //       this method should return false for offset 0.
 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1406   // The passed offset is relative to address of the branch.
 1407   // On 86 a branch displacement is calculated relative to address
 1408   // of a next instruction.
 1409   offset -= br_size;
 1410 
 1411   // the short version of jmpConUCF2 contains multiple branches,
 1412   // making the reach slightly less
 1413   if (rule == jmpConUCF2_rule)
 1414     return (-126 <= offset && offset <= 125);
 1415   return (-128 <= offset && offset <= 127);
 1416 }
 1417 
 1418 // Return whether or not this register is ever used as an argument.  This
 1419 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1421 // arguments in those registers not be available to the callee.
 1422 bool Matcher::can_be_java_arg( int reg ) {
 1423   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1424   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1425   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1426   return false;
 1427 }
 1428 
 1429 bool Matcher::is_spillable_arg( int reg ) {
 1430   return can_be_java_arg(reg);
 1431 }
 1432 
 1433 uint Matcher::int_pressure_limit()
 1434 {
 1435   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1436 }
 1437 
 1438 uint Matcher::float_pressure_limit()
 1439 {
 1440   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1441 }
 1442 
 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1444   // Use hardware integer DIV instruction when
 1445   // it is faster than a code which use multiply.
 1446   // Only when constant divisor fits into 32 bit
 1447   // (min_jint is excluded to get only correct
 1448   // positive 32 bit values from negative).
 1449   return VM_Version::has_fast_idiv() &&
 1450          (divisor == (int)divisor && divisor != min_jint);
 1451 }
 1452 
 1453 // Register for DIVI projection of divmodI
 1454 RegMask Matcher::divI_proj_mask() {
 1455   return EAX_REG_mask();
 1456 }
 1457 
 1458 // Register for MODI projection of divmodI
 1459 RegMask Matcher::modI_proj_mask() {
 1460   return EDX_REG_mask();
 1461 }
 1462 
 1463 // Register for DIVL projection of divmodL
 1464 RegMask Matcher::divL_proj_mask() {
 1465   ShouldNotReachHere();
 1466   return RegMask();
 1467 }
 1468 
 1469 // Register for MODL projection of divmodL
 1470 RegMask Matcher::modL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1476   return NO_REG_mask();
 1477 }
 1478 
 1479 // Returns true if the high 32 bits of the value is known to be zero.
 1480 bool is_operand_hi32_zero(Node* n) {
 1481   int opc = n->Opcode();
 1482   if (opc == Op_AndL) {
 1483     Node* o2 = n->in(2);
 1484     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1485       return true;
 1486     }
 1487   }
 1488   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489     return true;
 1490   }
 1491   return false;
 1492 }
 1493 
 1494 %}
 1495 
 1496 //----------ENCODING BLOCK-----------------------------------------------------
 1497 // This block specifies the encoding classes used by the compiler to output
 1498 // byte streams.  Encoding classes generate functions which are called by
 1499 // Machine Instruction Nodes in order to generate the bit encoding of the
 1500 // instruction.  Operands specify their base encoding interface with the
 1501 // interface keyword.  There are currently supported four interfaces,
 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1503 // operand to generate a function which returns its register number when
 1504 // queried.   CONST_INTER causes an operand to generate a function which
 1505 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1506 // operand to generate four functions which return the Base Register, the
 1507 // Index Register, the Scale Value, and the Offset Value of the operand when
 1508 // queried.  COND_INTER causes an operand to generate six functions which
 1509 // return the encoding code (ie - encoding bits for the instruction)
 1510 // associated with each basic boolean condition for a conditional instruction.
 1511 // Instructions specify two basic values for encoding.  They use the
 1512 // ins_encode keyword to specify their encoding class (which must be one of
 1513 // the class names specified in the encoding block), and they use the
 1514 // opcode keyword to specify, in order, their primary, secondary, and
 1515 // tertiary opcode.  Only the opcode sections which a particular instruction
 1516 // needs for encoding need to be specified.
 1517 encode %{
 1518   // Build emit functions for each basic byte or larger field in the intel
 1519   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1520   // code in the enc_class source block.  Emit functions will live in the
 1521   // main source block for now.  In future, we can generalize this by
 1522   // adding a syntax that specifies the sizes of fields in an order,
 1523   // so that the adlc can build the emit functions automagically
 1524 
 1525   // Set instruction mark in MacroAssembler. This is used only in
 1526   // instructions that emit bytes directly to the CodeBuffer wraped
 1527   // in the MacroAssembler. Should go away once all "instruct" are
 1528   // patched to emit bytes only using methods in MacroAssembler.
 1529   enc_class SetInstMark %{
 1530     __ set_inst_mark();
 1531   %}
 1532 
 1533   enc_class ClearInstMark %{
 1534     __ clear_inst_mark();
 1535   %}
 1536 
 1537   // Emit primary opcode
 1538   enc_class OpcP %{
 1539     emit_opcode(masm, $primary);
 1540   %}
 1541 
 1542   // Emit secondary opcode
 1543   enc_class OpcS %{
 1544     emit_opcode(masm, $secondary);
 1545   %}
 1546 
 1547   // Emit opcode directly
 1548   enc_class Opcode(immI d8) %{
 1549     emit_opcode(masm, $d8$$constant);
 1550   %}
 1551 
 1552   enc_class SizePrefix %{
 1553     emit_opcode(masm,0x66);
 1554   %}
 1555 
 1556   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1557     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1558   %}
 1559 
 1560   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1561     emit_opcode(masm,$opcode$$constant);
 1562     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1563   %}
 1564 
 1565   enc_class mov_r32_imm0( rRegI dst ) %{
 1566     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1567     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1568   %}
 1569 
 1570   enc_class cdq_enc %{
 1571     // Full implementation of Java idiv and irem; checks for
 1572     // special case as described in JVM spec., p.243 & p.271.
 1573     //
 1574     //         normal case                           special case
 1575     //
 1576     // input : rax,: dividend                         min_int
 1577     //         reg: divisor                          -1
 1578     //
 1579     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1580     //         rdx: remainder (= rax, irem reg)       0
 1581     //
 1582     //  Code sequnce:
 1583     //
 1584     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1585     //  0F 85 0B 00 00 00    jne         normal_case
 1586     //  33 D2                xor         rdx,edx
 1587     //  83 F9 FF             cmp         rcx,0FFh
 1588     //  0F 84 03 00 00 00    je          done
 1589     //                  normal_case:
 1590     //  99                   cdq
 1591     //  F7 F9                idiv        rax,ecx
 1592     //                  done:
 1593     //
 1594     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1595     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1597     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1598     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1599     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1600     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1601     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1602     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1603     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1604     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1605     // normal_case:
 1606     emit_opcode(masm,0x99);                                         // cdq
 1607     // idiv (note: must be emitted by the user of this rule)
 1608     // normal:
 1609   %}
 1610 
 1611   // Dense encoding for older common ops
 1612   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1613     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1614   %}
 1615 
 1616 
 1617   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1618   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(masm, $primary | 0x02);
 1622     }
 1623     else {                          // If 32-bit immediate
 1624       emit_opcode(masm, $primary);
 1625     }
 1626   %}
 1627 
 1628   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1629     // Emit primary opcode and set sign-extend bit
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       emit_opcode(masm, $primary | 0x02);    }
 1633     else {                          // If 32-bit immediate
 1634       emit_opcode(masm, $primary);
 1635     }
 1636     // Emit r/m byte with secondary opcode, after primary opcode.
 1637     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1638   %}
 1639 
 1640   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1641     // Check for 8-bit immediate, and set sign extend bit in opcode
 1642     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1643       $$$emit8$imm$$constant;
 1644     }
 1645     else {                          // If 32-bit immediate
 1646       // Output immediate
 1647       $$$emit32$imm$$constant;
 1648     }
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)$imm$$constant; // Throw away top bits
 1655     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with secondary opcode, after primary opcode.
 1657     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1659     else                               emit_d32(masm,con);
 1660   %}
 1661 
 1662   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1663     // Emit primary opcode and set sign-extend bit
 1664     // Check for 8-bit immediate, and set sign extend bit in opcode
 1665     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1666     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1667     // Emit r/m byte with tertiary opcode, after primary opcode.
 1668     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1669     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1670     else                               emit_d32(masm,con);
 1671   %}
 1672 
 1673   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1674     emit_cc(masm, $secondary, $dst$$reg );
 1675   %}
 1676 
 1677   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1678     int destlo = $dst$$reg;
 1679     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1680     // bswap lo
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, destlo);
 1683     // bswap hi
 1684     emit_opcode(masm, 0x0F);
 1685     emit_cc(masm, 0xC8, desthi);
 1686     // xchg lo and hi
 1687     emit_opcode(masm, 0x87);
 1688     emit_rm(masm, 0x3, destlo, desthi);
 1689   %}
 1690 
 1691   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1692     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1693   %}
 1694 
 1695   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1696     $$$emit8$primary;
 1697     emit_cc(masm, $secondary, $cop$$cmpcode);
 1698   %}
 1699 
 1700   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1701     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1702     emit_d8(masm, op >> 8 );
 1703     emit_d8(masm, op & 255);
 1704   %}
 1705 
 1706   // emulate a CMOV with a conditional branch around a MOV
 1707   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1708     // Invert sense of branch from sense of CMOV
 1709     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1710     emit_d8( masm, $brOffs$$constant );
 1711   %}
 1712 
 1713   enc_class enc_PartialSubtypeCheck( ) %{
 1714     Register Redi = as_Register(EDI_enc); // result register
 1715     Register Reax = as_Register(EAX_enc); // super class
 1716     Register Recx = as_Register(ECX_enc); // killed
 1717     Register Resi = as_Register(ESI_enc); // sub class
 1718     Label miss;
 1719 
 1720     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1721                                      nullptr, &miss,
 1722                                      /*set_cond_codes:*/ true);
 1723     if ($primary) {
 1724       __ xorptr(Redi, Redi);
 1725     }
 1726     __ bind(miss);
 1727   %}
 1728 
 1729   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1730     int start = __ offset();
 1731     if (UseSSE >= 2) {
 1732       if (VerifyFPU) {
 1733         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1734       }
 1735     } else {
 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       __ empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1742     } else {
 1743       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1750     }
 1751   %}
 1752 
 1753   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1754     // This is the instruction starting address for relocation info.
 1755     __ set_inst_mark();
 1756     $$$emit8$primary;
 1757     // CALL directly to the runtime
 1758     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1759                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1760     __ clear_inst_mark();
 1761     __ post_call_nop();
 1762 
 1763     if (UseSSE >= 2) {
 1764       BasicType rt = tf()->return_type();
 1765 
 1766       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1767         // A C runtime call where the return value is unused.  In SSE2+
 1768         // mode the result needs to be removed from the FPU stack.  It's
 1769         // likely that this function call could be removed by the
 1770         // optimizer if the C function is a pure function.
 1771         __ ffree(0);
 1772       } else if (rt == T_FLOAT) {
 1773         __ lea(rsp, Address(rsp, -4));
 1774         __ fstp_s(Address(rsp, 0));
 1775         __ movflt(xmm0, Address(rsp, 0));
 1776         __ lea(rsp, Address(rsp,  4));
 1777       } else if (rt == T_DOUBLE) {
 1778         __ lea(rsp, Address(rsp, -8));
 1779         __ fstp_d(Address(rsp, 0));
 1780         __ movdbl(xmm0, Address(rsp, 0));
 1781         __ lea(rsp, Address(rsp,  8));
 1782       }
 1783     }
 1784   %}
 1785 
 1786   enc_class pre_call_resets %{
 1787     // If method sets FPU control word restore it here
 1788     debug_only(int off0 = __ offset());
 1789     if (ra_->C->in_24_bit_fp_mode()) {
 1790       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1791     }
 1792     // Clear upper bits of YMM registers when current compiled code uses
 1793     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1794     __ vzeroupper();
 1795     debug_only(int off1 = __ offset());
 1796     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1797   %}
 1798 
 1799   enc_class post_call_FPU %{
 1800     // If method sets FPU control word do it here also
 1801     if (Compile::current()->in_24_bit_fp_mode()) {
 1802       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1803     }
 1804   %}
 1805 
 1806   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1807     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1808     // who we intended to call.
 1809     __ set_inst_mark();
 1810     $$$emit8$primary;
 1811 
 1812     if (!_method) {
 1813       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1814                      runtime_call_Relocation::spec(),
 1815                      RELOC_IMM32);
 1816       __ clear_inst_mark();
 1817       __ post_call_nop();
 1818     } else {
 1819       int method_index = resolved_method_index(masm);
 1820       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1821                                                   : static_call_Relocation::spec(method_index);
 1822       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1823                      rspec, RELOC_DISP32);
 1824       __ post_call_nop();
 1825       address mark = __ inst_mark();
 1826       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1827         // Calls of the same statically bound method can share
 1828         // a stub to the interpreter.
 1829         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1830         __ clear_inst_mark();
 1831       } else {
 1832         // Emit stubs for static call.
 1833         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1834         __ clear_inst_mark();
 1835         if (stub == nullptr) {
 1836           ciEnv::current()->record_failure("CodeCache is full");
 1837           return;
 1838         }
 1839       }
 1840     }
 1841   %}
 1842 
 1843   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1844     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1845     __ post_call_nop();
 1846   %}
 1847 
 1848   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1849     int disp = in_bytes(Method::from_compiled_offset());
 1850     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1851 
 1852     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1853     __ set_inst_mark();
 1854     $$$emit8$primary;
 1855     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1856     emit_d8(masm, disp);             // Displacement
 1857     __ clear_inst_mark();
 1858     __ post_call_nop();
 1859   %}
 1860 
 1861   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1862     $$$emit8$primary;
 1863     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1864     $$$emit8$shift$$constant;
 1865   %}
 1866 
 1867   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1868     // Load immediate does not have a zero or sign extended version
 1869     // for 8-bit immediates
 1870     emit_opcode(masm, 0xB8 + $dst$$reg);
 1871     $$$emit32$src$$constant;
 1872   %}
 1873 
 1874   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1875     // Load immediate does not have a zero or sign extended version
 1876     // for 8-bit immediates
 1877     emit_opcode(masm, $primary + $dst$$reg);
 1878     $$$emit32$src$$constant;
 1879   %}
 1880 
 1881   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1882     // Load immediate does not have a zero or sign extended version
 1883     // for 8-bit immediates
 1884     int dst_enc = $dst$$reg;
 1885     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1886     if (src_con == 0) {
 1887       // xor dst, dst
 1888       emit_opcode(masm, 0x33);
 1889       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1890     } else {
 1891       emit_opcode(masm, $primary + dst_enc);
 1892       emit_d32(masm, src_con);
 1893     }
 1894   %}
 1895 
 1896   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1897     // Load immediate does not have a zero or sign extended version
 1898     // for 8-bit immediates
 1899     int dst_enc = $dst$$reg + 2;
 1900     int src_con = ((julong)($src$$constant)) >> 32;
 1901     if (src_con == 0) {
 1902       // xor dst, dst
 1903       emit_opcode(masm, 0x33);
 1904       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1905     } else {
 1906       emit_opcode(masm, $primary + dst_enc);
 1907       emit_d32(masm, src_con);
 1908     }
 1909   %}
 1910 
 1911 
 1912   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1913   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1914     encode_Copy( masm, $dst$$reg, $src$$reg );
 1915   %}
 1916 
 1917   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1918     encode_Copy( masm, $dst$$reg, $src$$reg );
 1919   %}
 1920 
 1921   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1922     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1923   %}
 1924 
 1925   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1926     $$$emit8$primary;
 1927     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1928   %}
 1929 
 1930   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1931     $$$emit8$secondary;
 1932     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1933   %}
 1934 
 1935   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1936     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1937   %}
 1938 
 1939   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1940     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1941   %}
 1942 
 1943   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1944     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1945   %}
 1946 
 1947   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1948     // Output immediate
 1949     $$$emit32$src$$constant;
 1950   %}
 1951 
 1952   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1953     // Output Float immediate bits
 1954     jfloat jf = $src$$constant;
 1955     int    jf_as_bits = jint_cast( jf );
 1956     emit_d32(masm, jf_as_bits);
 1957   %}
 1958 
 1959   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1960     // Output Float immediate bits
 1961     jfloat jf = $src$$constant;
 1962     int    jf_as_bits = jint_cast( jf );
 1963     emit_d32(masm, jf_as_bits);
 1964   %}
 1965 
 1966   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1967     // Output immediate
 1968     $$$emit16$src$$constant;
 1969   %}
 1970 
 1971   enc_class Con_d32(immI src) %{
 1972     emit_d32(masm,$src$$constant);
 1973   %}
 1974 
 1975   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1976     // Output immediate memory reference
 1977     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1978     emit_d32(masm, 0x00);
 1979   %}
 1980 
 1981   enc_class lock_prefix( ) %{
 1982     emit_opcode(masm,0xF0);         // [Lock]
 1983   %}
 1984 
 1985   // Cmp-xchg long value.
 1986   // Note: we need to swap rbx, and rcx before and after the
 1987   //       cmpxchg8 instruction because the instruction uses
 1988   //       rcx as the high order word of the new value to store but
 1989   //       our register encoding uses rbx,.
 1990   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1991 
 1992     // XCHG  rbx,ecx
 1993     emit_opcode(masm,0x87);
 1994     emit_opcode(masm,0xD9);
 1995     // [Lock]
 1996     emit_opcode(masm,0xF0);
 1997     // CMPXCHG8 [Eptr]
 1998     emit_opcode(masm,0x0F);
 1999     emit_opcode(masm,0xC7);
 2000     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2001     // XCHG  rbx,ecx
 2002     emit_opcode(masm,0x87);
 2003     emit_opcode(masm,0xD9);
 2004   %}
 2005 
 2006   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2007     // [Lock]
 2008     emit_opcode(masm,0xF0);
 2009 
 2010     // CMPXCHG [Eptr]
 2011     emit_opcode(masm,0x0F);
 2012     emit_opcode(masm,0xB1);
 2013     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2014   %}
 2015 
 2016   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2017     // [Lock]
 2018     emit_opcode(masm,0xF0);
 2019 
 2020     // CMPXCHGB [Eptr]
 2021     emit_opcode(masm,0x0F);
 2022     emit_opcode(masm,0xB0);
 2023     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2024   %}
 2025 
 2026   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2027     // [Lock]
 2028     emit_opcode(masm,0xF0);
 2029 
 2030     // 16-bit mode
 2031     emit_opcode(masm, 0x66);
 2032 
 2033     // CMPXCHGW [Eptr]
 2034     emit_opcode(masm,0x0F);
 2035     emit_opcode(masm,0xB1);
 2036     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2037   %}
 2038 
 2039   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2040     int res_encoding = $res$$reg;
 2041 
 2042     // MOV  res,0
 2043     emit_opcode( masm, 0xB8 + res_encoding);
 2044     emit_d32( masm, 0 );
 2045     // JNE,s  fail
 2046     emit_opcode(masm,0x75);
 2047     emit_d8(masm, 5 );
 2048     // MOV  res,1
 2049     emit_opcode( masm, 0xB8 + res_encoding);
 2050     emit_d32( masm, 1 );
 2051     // fail:
 2052   %}
 2053 
 2054   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2055     int reg_encoding = $ereg$$reg;
 2056     int base  = $mem$$base;
 2057     int index = $mem$$index;
 2058     int scale = $mem$$scale;
 2059     int displace = $mem$$disp;
 2060     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2061     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2062   %}
 2063 
 2064   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2065     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2066     int base  = $mem$$base;
 2067     int index = $mem$$index;
 2068     int scale = $mem$$scale;
 2069     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2070     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2071     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2072   %}
 2073 
 2074   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2075     int r1, r2;
 2076     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2077     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2078     emit_opcode(masm,0x0F);
 2079     emit_opcode(masm,$tertiary);
 2080     emit_rm(masm, 0x3, r1, r2);
 2081     emit_d8(masm,$cnt$$constant);
 2082     emit_d8(masm,$primary);
 2083     emit_rm(masm, 0x3, $secondary, r1);
 2084     emit_d8(masm,$cnt$$constant);
 2085   %}
 2086 
 2087   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2088     emit_opcode( masm, 0x8B ); // Move
 2089     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2090     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2091       emit_d8(masm,$primary);
 2092       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2093       emit_d8(masm,$cnt$$constant-32);
 2094     }
 2095     emit_d8(masm,$primary);
 2096     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2097     emit_d8(masm,31);
 2098   %}
 2099 
 2100   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2101     int r1, r2;
 2102     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2103     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2104 
 2105     emit_opcode( masm, 0x8B ); // Move r1,r2
 2106     emit_rm(masm, 0x3, r1, r2);
 2107     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2108       emit_opcode(masm,$primary);
 2109       emit_rm(masm, 0x3, $secondary, r1);
 2110       emit_d8(masm,$cnt$$constant-32);
 2111     }
 2112     emit_opcode(masm,0x33);  // XOR r2,r2
 2113     emit_rm(masm, 0x3, r2, r2);
 2114   %}
 2115 
 2116   // Clone of RegMem but accepts an extra parameter to access each
 2117   // half of a double in memory; it never needs relocation info.
 2118   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2119     emit_opcode(masm,$opcode$$constant);
 2120     int reg_encoding = $rm_reg$$reg;
 2121     int base     = $mem$$base;
 2122     int index    = $mem$$index;
 2123     int scale    = $mem$$scale;
 2124     int displace = $mem$$disp + $disp_for_half$$constant;
 2125     relocInfo::relocType disp_reloc = relocInfo::none;
 2126     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2127   %}
 2128 
 2129   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2130   //
 2131   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2132   // and it never needs relocation information.
 2133   // Frequently used to move data between FPU's Stack Top and memory.
 2134   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2135     int rm_byte_opcode = $rm_opcode$$constant;
 2136     int base     = $mem$$base;
 2137     int index    = $mem$$index;
 2138     int scale    = $mem$$scale;
 2139     int displace = $mem$$disp;
 2140     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2141     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2142   %}
 2143 
 2144   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2145     int rm_byte_opcode = $rm_opcode$$constant;
 2146     int base     = $mem$$base;
 2147     int index    = $mem$$index;
 2148     int scale    = $mem$$scale;
 2149     int displace = $mem$$disp;
 2150     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2151     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2152   %}
 2153 
 2154   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2155     int reg_encoding = $dst$$reg;
 2156     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2157     int index        = 0x04;            // 0x04 indicates no index
 2158     int scale        = 0x00;            // 0x00 indicates no scale
 2159     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2160     relocInfo::relocType disp_reloc = relocInfo::none;
 2161     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2162   %}
 2163 
 2164   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2165     // Compare dst,src
 2166     emit_opcode(masm,0x3B);
 2167     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2168     // jmp dst < src around move
 2169     emit_opcode(masm,0x7C);
 2170     emit_d8(masm,2);
 2171     // move dst,src
 2172     emit_opcode(masm,0x8B);
 2173     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2174   %}
 2175 
 2176   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2177     // Compare dst,src
 2178     emit_opcode(masm,0x3B);
 2179     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2180     // jmp dst > src around move
 2181     emit_opcode(masm,0x7F);
 2182     emit_d8(masm,2);
 2183     // move dst,src
 2184     emit_opcode(masm,0x8B);
 2185     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2186   %}
 2187 
 2188   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2189     // If src is FPR1, we can just FST to store it.
 2190     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2191     int reg_encoding = 0x2; // Just store
 2192     int base  = $mem$$base;
 2193     int index = $mem$$index;
 2194     int scale = $mem$$scale;
 2195     int displace = $mem$$disp;
 2196     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2197     if( $src$$reg != FPR1L_enc ) {
 2198       reg_encoding = 0x3;  // Store & pop
 2199       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2200       emit_d8( masm, 0xC0-1+$src$$reg );
 2201     }
 2202     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2203     emit_opcode(masm,$primary);
 2204     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2205     __ clear_inst_mark();
 2206   %}
 2207 
 2208   enc_class neg_reg(rRegI dst) %{
 2209     // NEG $dst
 2210     emit_opcode(masm,0xF7);
 2211     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2212   %}
 2213 
 2214   enc_class setLT_reg(eCXRegI dst) %{
 2215     // SETLT $dst
 2216     emit_opcode(masm,0x0F);
 2217     emit_opcode(masm,0x9C);
 2218     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2219   %}
 2220 
 2221   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2222     int tmpReg = $tmp$$reg;
 2223 
 2224     // SUB $p,$q
 2225     emit_opcode(masm,0x2B);
 2226     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2227     // SBB $tmp,$tmp
 2228     emit_opcode(masm,0x1B);
 2229     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2230     // AND $tmp,$y
 2231     emit_opcode(masm,0x23);
 2232     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2233     // ADD $p,$tmp
 2234     emit_opcode(masm,0x03);
 2235     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2236   %}
 2237 
 2238   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2239     // TEST shift,32
 2240     emit_opcode(masm,0xF7);
 2241     emit_rm(masm, 0x3, 0, ECX_enc);
 2242     emit_d32(masm,0x20);
 2243     // JEQ,s small
 2244     emit_opcode(masm, 0x74);
 2245     emit_d8(masm, 0x04);
 2246     // MOV    $dst.hi,$dst.lo
 2247     emit_opcode( masm, 0x8B );
 2248     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2249     // CLR    $dst.lo
 2250     emit_opcode(masm, 0x33);
 2251     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2252 // small:
 2253     // SHLD   $dst.hi,$dst.lo,$shift
 2254     emit_opcode(masm,0x0F);
 2255     emit_opcode(masm,0xA5);
 2256     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2257     // SHL    $dst.lo,$shift"
 2258     emit_opcode(masm,0xD3);
 2259     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2260   %}
 2261 
 2262   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2263     // TEST shift,32
 2264     emit_opcode(masm,0xF7);
 2265     emit_rm(masm, 0x3, 0, ECX_enc);
 2266     emit_d32(masm,0x20);
 2267     // JEQ,s small
 2268     emit_opcode(masm, 0x74);
 2269     emit_d8(masm, 0x04);
 2270     // MOV    $dst.lo,$dst.hi
 2271     emit_opcode( masm, 0x8B );
 2272     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2273     // CLR    $dst.hi
 2274     emit_opcode(masm, 0x33);
 2275     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2276 // small:
 2277     // SHRD   $dst.lo,$dst.hi,$shift
 2278     emit_opcode(masm,0x0F);
 2279     emit_opcode(masm,0xAD);
 2280     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2281     // SHR    $dst.hi,$shift"
 2282     emit_opcode(masm,0xD3);
 2283     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2284   %}
 2285 
 2286   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2287     // TEST shift,32
 2288     emit_opcode(masm,0xF7);
 2289     emit_rm(masm, 0x3, 0, ECX_enc);
 2290     emit_d32(masm,0x20);
 2291     // JEQ,s small
 2292     emit_opcode(masm, 0x74);
 2293     emit_d8(masm, 0x05);
 2294     // MOV    $dst.lo,$dst.hi
 2295     emit_opcode( masm, 0x8B );
 2296     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2297     // SAR    $dst.hi,31
 2298     emit_opcode(masm, 0xC1);
 2299     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2300     emit_d8(masm, 0x1F );
 2301 // small:
 2302     // SHRD   $dst.lo,$dst.hi,$shift
 2303     emit_opcode(masm,0x0F);
 2304     emit_opcode(masm,0xAD);
 2305     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2306     // SAR    $dst.hi,$shift"
 2307     emit_opcode(masm,0xD3);
 2308     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2309   %}
 2310 
 2311 
 2312   // ----------------- Encodings for floating point unit -----------------
 2313   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2314   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2315     $$$emit8$primary;
 2316     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2317   %}
 2318 
 2319   // Pop argument in FPR0 with FSTP ST(0)
 2320   enc_class PopFPU() %{
 2321     emit_opcode( masm, 0xDD );
 2322     emit_d8( masm, 0xD8 );
 2323   %}
 2324 
 2325   // !!!!! equivalent to Pop_Reg_F
 2326   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2327     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2328     emit_d8( masm, 0xD8+$dst$$reg );
 2329   %}
 2330 
 2331   enc_class Push_Reg_DPR( regDPR dst ) %{
 2332     emit_opcode( masm, 0xD9 );
 2333     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2334   %}
 2335 
 2336   enc_class strictfp_bias1( regDPR dst ) %{
 2337     emit_opcode( masm, 0xDB );           // FLD m80real
 2338     emit_opcode( masm, 0x2D );
 2339     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2340     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2341     emit_opcode( masm, 0xC8+$dst$$reg );
 2342   %}
 2343 
 2344   enc_class strictfp_bias2( regDPR dst ) %{
 2345     emit_opcode( masm, 0xDB );           // FLD m80real
 2346     emit_opcode( masm, 0x2D );
 2347     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2348     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2349     emit_opcode( masm, 0xC8+$dst$$reg );
 2350   %}
 2351 
 2352   // Special case for moving an integer register to a stack slot.
 2353   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2354     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2355   %}
 2356 
 2357   // Special case for moving a register to a stack slot.
 2358   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2359     // Opcode already emitted
 2360     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2361     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2362     emit_d32(masm, $dst$$disp);   // Displacement
 2363   %}
 2364 
 2365   // Push the integer in stackSlot 'src' onto FP-stack
 2366   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2367     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2368   %}
 2369 
 2370   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2371   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2372     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2373   %}
 2374 
 2375   // Same as Pop_Mem_F except for opcode
 2376   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2377   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2378     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2379   %}
 2380 
 2381   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2382     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2383     emit_d8( masm, 0xD8+$dst$$reg );
 2384   %}
 2385 
 2386   enc_class Push_Reg_FPR( regFPR dst ) %{
 2387     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2388     emit_d8( masm, 0xC0-1+$dst$$reg );
 2389   %}
 2390 
 2391   // Push FPU's float to a stack-slot, and pop FPU-stack
 2392   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2393     int pop = 0x02;
 2394     if ($src$$reg != FPR1L_enc) {
 2395       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2396       emit_d8( masm, 0xC0-1+$src$$reg );
 2397       pop = 0x03;
 2398     }
 2399     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2400   %}
 2401 
 2402   // Push FPU's double to a stack-slot, and pop FPU-stack
 2403   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2404     int pop = 0x02;
 2405     if ($src$$reg != FPR1L_enc) {
 2406       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2407       emit_d8( masm, 0xC0-1+$src$$reg );
 2408       pop = 0x03;
 2409     }
 2410     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2411   %}
 2412 
 2413   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2414   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2415     int pop = 0xD0 - 1; // -1 since we skip FLD
 2416     if ($src$$reg != FPR1L_enc) {
 2417       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2418       emit_d8( masm, 0xC0-1+$src$$reg );
 2419       pop = 0xD8;
 2420     }
 2421     emit_opcode( masm, 0xDD );
 2422     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2423   %}
 2424 
 2425 
 2426   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2427     // load dst in FPR0
 2428     emit_opcode( masm, 0xD9 );
 2429     emit_d8( masm, 0xC0-1+$dst$$reg );
 2430     if ($src$$reg != FPR1L_enc) {
 2431       // fincstp
 2432       emit_opcode (masm, 0xD9);
 2433       emit_opcode (masm, 0xF7);
 2434       // swap src with FPR1:
 2435       // FXCH FPR1 with src
 2436       emit_opcode(masm, 0xD9);
 2437       emit_d8(masm, 0xC8-1+$src$$reg );
 2438       // fdecstp
 2439       emit_opcode (masm, 0xD9);
 2440       emit_opcode (masm, 0xF6);
 2441     }
 2442   %}
 2443 
 2444   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2445     __ subptr(rsp, 8);
 2446     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2447     __ fld_d(Address(rsp, 0));
 2448     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2449     __ fld_d(Address(rsp, 0));
 2450   %}
 2451 
 2452   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2453     __ subptr(rsp, 4);
 2454     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2455     __ fld_s(Address(rsp, 0));
 2456     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2457     __ fld_s(Address(rsp, 0));
 2458   %}
 2459 
 2460   enc_class Push_ResultD(regD dst) %{
 2461     __ fstp_d(Address(rsp, 0));
 2462     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2463     __ addptr(rsp, 8);
 2464   %}
 2465 
 2466   enc_class Push_ResultF(regF dst, immI d8) %{
 2467     __ fstp_s(Address(rsp, 0));
 2468     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2469     __ addptr(rsp, $d8$$constant);
 2470   %}
 2471 
 2472   enc_class Push_SrcD(regD src) %{
 2473     __ subptr(rsp, 8);
 2474     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2475     __ fld_d(Address(rsp, 0));
 2476   %}
 2477 
 2478   enc_class push_stack_temp_qword() %{
 2479     __ subptr(rsp, 8);
 2480   %}
 2481 
 2482   enc_class pop_stack_temp_qword() %{
 2483     __ addptr(rsp, 8);
 2484   %}
 2485 
 2486   enc_class push_xmm_to_fpr1(regD src) %{
 2487     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2488     __ fld_d(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2492     if ($src$$reg != FPR1L_enc) {
 2493       // fincstp
 2494       emit_opcode (masm, 0xD9);
 2495       emit_opcode (masm, 0xF7);
 2496       // FXCH FPR1 with src
 2497       emit_opcode(masm, 0xD9);
 2498       emit_d8(masm, 0xC8-1+$src$$reg );
 2499       // fdecstp
 2500       emit_opcode (masm, 0xD9);
 2501       emit_opcode (masm, 0xF6);
 2502     }
 2503   %}
 2504 
 2505   enc_class fnstsw_sahf_skip_parity() %{
 2506     // fnstsw ax
 2507     emit_opcode( masm, 0xDF );
 2508     emit_opcode( masm, 0xE0 );
 2509     // sahf
 2510     emit_opcode( masm, 0x9E );
 2511     // jnp  ::skip
 2512     emit_opcode( masm, 0x7B );
 2513     emit_opcode( masm, 0x05 );
 2514   %}
 2515 
 2516   enc_class emitModDPR() %{
 2517     // fprem must be iterative
 2518     // :: loop
 2519     // fprem
 2520     emit_opcode( masm, 0xD9 );
 2521     emit_opcode( masm, 0xF8 );
 2522     // wait
 2523     emit_opcode( masm, 0x9b );
 2524     // fnstsw ax
 2525     emit_opcode( masm, 0xDF );
 2526     emit_opcode( masm, 0xE0 );
 2527     // sahf
 2528     emit_opcode( masm, 0x9E );
 2529     // jp  ::loop
 2530     emit_opcode( masm, 0x0F );
 2531     emit_opcode( masm, 0x8A );
 2532     emit_opcode( masm, 0xF4 );
 2533     emit_opcode( masm, 0xFF );
 2534     emit_opcode( masm, 0xFF );
 2535     emit_opcode( masm, 0xFF );
 2536   %}
 2537 
 2538   enc_class fpu_flags() %{
 2539     // fnstsw_ax
 2540     emit_opcode( masm, 0xDF);
 2541     emit_opcode( masm, 0xE0);
 2542     // test ax,0x0400
 2543     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2544     emit_opcode( masm, 0xA9 );
 2545     emit_d16   ( masm, 0x0400 );
 2546     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2547     // // test rax,0x0400
 2548     // emit_opcode( masm, 0xA9 );
 2549     // emit_d32   ( masm, 0x00000400 );
 2550     //
 2551     // jz exit (no unordered comparison)
 2552     emit_opcode( masm, 0x74 );
 2553     emit_d8    ( masm, 0x02 );
 2554     // mov ah,1 - treat as LT case (set carry flag)
 2555     emit_opcode( masm, 0xB4 );
 2556     emit_d8    ( masm, 0x01 );
 2557     // sahf
 2558     emit_opcode( masm, 0x9E);
 2559   %}
 2560 
 2561   enc_class cmpF_P6_fixup() %{
 2562     // Fixup the integer flags in case comparison involved a NaN
 2563     //
 2564     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2565     emit_opcode( masm, 0x7B );
 2566     emit_d8    ( masm, 0x03 );
 2567     // MOV AH,1 - treat as LT case (set carry flag)
 2568     emit_opcode( masm, 0xB4 );
 2569     emit_d8    ( masm, 0x01 );
 2570     // SAHF
 2571     emit_opcode( masm, 0x9E);
 2572     // NOP     // target for branch to avoid branch to branch
 2573     emit_opcode( masm, 0x90);
 2574   %}
 2575 
 2576 //     fnstsw_ax();
 2577 //     sahf();
 2578 //     movl(dst, nan_result);
 2579 //     jcc(Assembler::parity, exit);
 2580 //     movl(dst, less_result);
 2581 //     jcc(Assembler::below, exit);
 2582 //     movl(dst, equal_result);
 2583 //     jcc(Assembler::equal, exit);
 2584 //     movl(dst, greater_result);
 2585 
 2586 // less_result     =  1;
 2587 // greater_result  = -1;
 2588 // equal_result    = 0;
 2589 // nan_result      = -1;
 2590 
 2591   enc_class CmpF_Result(rRegI dst) %{
 2592     // fnstsw_ax();
 2593     emit_opcode( masm, 0xDF);
 2594     emit_opcode( masm, 0xE0);
 2595     // sahf
 2596     emit_opcode( masm, 0x9E);
 2597     // movl(dst, nan_result);
 2598     emit_opcode( masm, 0xB8 + $dst$$reg);
 2599     emit_d32( masm, -1 );
 2600     // jcc(Assembler::parity, exit);
 2601     emit_opcode( masm, 0x7A );
 2602     emit_d8    ( masm, 0x13 );
 2603     // movl(dst, less_result);
 2604     emit_opcode( masm, 0xB8 + $dst$$reg);
 2605     emit_d32( masm, -1 );
 2606     // jcc(Assembler::below, exit);
 2607     emit_opcode( masm, 0x72 );
 2608     emit_d8    ( masm, 0x0C );
 2609     // movl(dst, equal_result);
 2610     emit_opcode( masm, 0xB8 + $dst$$reg);
 2611     emit_d32( masm, 0 );
 2612     // jcc(Assembler::equal, exit);
 2613     emit_opcode( masm, 0x74 );
 2614     emit_d8    ( masm, 0x05 );
 2615     // movl(dst, greater_result);
 2616     emit_opcode( masm, 0xB8 + $dst$$reg);
 2617     emit_d32( masm, 1 );
 2618   %}
 2619 
 2620 
 2621   // Compare the longs and set flags
 2622   // BROKEN!  Do Not use as-is
 2623   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2624     // CMP    $src1.hi,$src2.hi
 2625     emit_opcode( masm, 0x3B );
 2626     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2627     // JNE,s  done
 2628     emit_opcode(masm,0x75);
 2629     emit_d8(masm, 2 );
 2630     // CMP    $src1.lo,$src2.lo
 2631     emit_opcode( masm, 0x3B );
 2632     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2633 // done:
 2634   %}
 2635 
 2636   enc_class convert_int_long( regL dst, rRegI src ) %{
 2637     // mov $dst.lo,$src
 2638     int dst_encoding = $dst$$reg;
 2639     int src_encoding = $src$$reg;
 2640     encode_Copy( masm, dst_encoding  , src_encoding );
 2641     // mov $dst.hi,$src
 2642     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2643     // sar $dst.hi,31
 2644     emit_opcode( masm, 0xC1 );
 2645     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2646     emit_d8(masm, 0x1F );
 2647   %}
 2648 
 2649   enc_class convert_long_double( eRegL src ) %{
 2650     // push $src.hi
 2651     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2652     // push $src.lo
 2653     emit_opcode(masm, 0x50+$src$$reg  );
 2654     // fild 64-bits at [SP]
 2655     emit_opcode(masm,0xdf);
 2656     emit_d8(masm, 0x6C);
 2657     emit_d8(masm, 0x24);
 2658     emit_d8(masm, 0x00);
 2659     // pop stack
 2660     emit_opcode(masm, 0x83); // add  SP, #8
 2661     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2662     emit_d8(masm, 0x8);
 2663   %}
 2664 
 2665   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2666     // IMUL   EDX:EAX,$src1
 2667     emit_opcode( masm, 0xF7 );
 2668     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2669     // SAR    EDX,$cnt-32
 2670     int shift_count = ((int)$cnt$$constant) - 32;
 2671     if (shift_count > 0) {
 2672       emit_opcode(masm, 0xC1);
 2673       emit_rm(masm, 0x3, 7, $dst$$reg );
 2674       emit_d8(masm, shift_count);
 2675     }
 2676   %}
 2677 
 2678   // this version doesn't have add sp, 8
 2679   enc_class convert_long_double2( eRegL src ) %{
 2680     // push $src.hi
 2681     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2682     // push $src.lo
 2683     emit_opcode(masm, 0x50+$src$$reg  );
 2684     // fild 64-bits at [SP]
 2685     emit_opcode(masm,0xdf);
 2686     emit_d8(masm, 0x6C);
 2687     emit_d8(masm, 0x24);
 2688     emit_d8(masm, 0x00);
 2689   %}
 2690 
 2691   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2692     // Basic idea: long = (long)int * (long)int
 2693     // IMUL EDX:EAX, src
 2694     emit_opcode( masm, 0xF7 );
 2695     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2696   %}
 2697 
 2698   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2699     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2700     // MUL EDX:EAX, src
 2701     emit_opcode( masm, 0xF7 );
 2702     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2703   %}
 2704 
 2705   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2706     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2707     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2708     // MOV    $tmp,$src.lo
 2709     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2710     // IMUL   $tmp,EDX
 2711     emit_opcode( masm, 0x0F );
 2712     emit_opcode( masm, 0xAF );
 2713     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2714     // MOV    EDX,$src.hi
 2715     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2716     // IMUL   EDX,EAX
 2717     emit_opcode( masm, 0x0F );
 2718     emit_opcode( masm, 0xAF );
 2719     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2720     // ADD    $tmp,EDX
 2721     emit_opcode( masm, 0x03 );
 2722     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2723     // MUL   EDX:EAX,$src.lo
 2724     emit_opcode( masm, 0xF7 );
 2725     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2726     // ADD    EDX,ESI
 2727     emit_opcode( masm, 0x03 );
 2728     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2729   %}
 2730 
 2731   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2732     // Basic idea: lo(result) = lo(src * y_lo)
 2733     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2734     // IMUL   $tmp,EDX,$src
 2735     emit_opcode( masm, 0x6B );
 2736     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2737     emit_d8( masm, (int)$src$$constant );
 2738     // MOV    EDX,$src
 2739     emit_opcode(masm, 0xB8 + EDX_enc);
 2740     emit_d32( masm, (int)$src$$constant );
 2741     // MUL   EDX:EAX,EDX
 2742     emit_opcode( masm, 0xF7 );
 2743     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2744     // ADD    EDX,ESI
 2745     emit_opcode( masm, 0x03 );
 2746     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2747   %}
 2748 
 2749   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2750     // PUSH src1.hi
 2751     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2752     // PUSH src1.lo
 2753     emit_opcode(masm,               0x50+$src1$$reg  );
 2754     // PUSH src2.hi
 2755     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2756     // PUSH src2.lo
 2757     emit_opcode(masm,               0x50+$src2$$reg  );
 2758     // CALL directly to the runtime
 2759     __ set_inst_mark();
 2760     emit_opcode(masm,0xE8);       // Call into runtime
 2761     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2762     __ clear_inst_mark();
 2763     __ post_call_nop();
 2764     // Restore stack
 2765     emit_opcode(masm, 0x83); // add  SP, #framesize
 2766     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2767     emit_d8(masm, 4*4);
 2768   %}
 2769 
 2770   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2771     // PUSH src1.hi
 2772     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2773     // PUSH src1.lo
 2774     emit_opcode(masm,               0x50+$src1$$reg  );
 2775     // PUSH src2.hi
 2776     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2777     // PUSH src2.lo
 2778     emit_opcode(masm,               0x50+$src2$$reg  );
 2779     // CALL directly to the runtime
 2780     __ set_inst_mark();
 2781     emit_opcode(masm,0xE8);       // Call into runtime
 2782     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2783     __ clear_inst_mark();
 2784     __ post_call_nop();
 2785     // Restore stack
 2786     emit_opcode(masm, 0x83); // add  SP, #framesize
 2787     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2788     emit_d8(masm, 4*4);
 2789   %}
 2790 
 2791   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2792     // MOV   $tmp,$src.lo
 2793     emit_opcode(masm, 0x8B);
 2794     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2795     // OR    $tmp,$src.hi
 2796     emit_opcode(masm, 0x0B);
 2797     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2798   %}
 2799 
 2800   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2801     // CMP    $src1.lo,$src2.lo
 2802     emit_opcode( masm, 0x3B );
 2803     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2804     // JNE,s  skip
 2805     emit_cc(masm, 0x70, 0x5);
 2806     emit_d8(masm,2);
 2807     // CMP    $src1.hi,$src2.hi
 2808     emit_opcode( masm, 0x3B );
 2809     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2810   %}
 2811 
 2812   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2813     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2814     emit_opcode( masm, 0x3B );
 2815     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2816     // MOV    $tmp,$src1.hi
 2817     emit_opcode( masm, 0x8B );
 2818     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2819     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2820     emit_opcode( masm, 0x1B );
 2821     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2822   %}
 2823 
 2824   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2825     // XOR    $tmp,$tmp
 2826     emit_opcode(masm,0x33);  // XOR
 2827     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2828     // CMP    $tmp,$src.lo
 2829     emit_opcode( masm, 0x3B );
 2830     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2831     // SBB    $tmp,$src.hi
 2832     emit_opcode( masm, 0x1B );
 2833     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2834   %}
 2835 
 2836  // Sniff, sniff... smells like Gnu Superoptimizer
 2837   enc_class neg_long( eRegL dst ) %{
 2838     emit_opcode(masm,0xF7);    // NEG hi
 2839     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2840     emit_opcode(masm,0xF7);    // NEG lo
 2841     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2842     emit_opcode(masm,0x83);    // SBB hi,0
 2843     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2844     emit_d8    (masm,0 );
 2845   %}
 2846 
 2847   enc_class enc_pop_rdx() %{
 2848     emit_opcode(masm,0x5A);
 2849   %}
 2850 
 2851   enc_class enc_rethrow() %{
 2852     __ set_inst_mark();
 2853     emit_opcode(masm, 0xE9);        // jmp    entry
 2854     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2855                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2856     __ clear_inst_mark();
 2857     __ post_call_nop();
 2858   %}
 2859 
 2860 
 2861   // Convert a double to an int.  Java semantics require we do complex
 2862   // manglelations in the corner cases.  So we set the rounding mode to
 2863   // 'zero', store the darned double down as an int, and reset the
 2864   // rounding mode to 'nearest'.  The hardware throws an exception which
 2865   // patches up the correct value directly to the stack.
 2866   enc_class DPR2I_encoding( regDPR src ) %{
 2867     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2868     // exceptions here, so that a NAN or other corner-case value will
 2869     // thrown an exception (but normal values get converted at full speed).
 2870     // However, I2C adapters and other float-stack manglers leave pending
 2871     // invalid-op exceptions hanging.  We would have to clear them before
 2872     // enabling them and that is more expensive than just testing for the
 2873     // invalid value Intel stores down in the corner cases.
 2874     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2875     emit_opcode(masm,0x2D);
 2876     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2877     // Allocate a word
 2878     emit_opcode(masm,0x83);            // SUB ESP,4
 2879     emit_opcode(masm,0xEC);
 2880     emit_d8(masm,0x04);
 2881     // Encoding assumes a double has been pushed into FPR0.
 2882     // Store down the double as an int, popping the FPU stack
 2883     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2884     emit_opcode(masm,0x1C);
 2885     emit_d8(masm,0x24);
 2886     // Restore the rounding mode; mask the exception
 2887     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2888     emit_opcode(masm,0x2D);
 2889     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2890         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2891         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2892 
 2893     // Load the converted int; adjust CPU stack
 2894     emit_opcode(masm,0x58);       // POP EAX
 2895     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2896     emit_d32   (masm,0x80000000); //         0x80000000
 2897     emit_opcode(masm,0x75);       // JNE around_slow_call
 2898     emit_d8    (masm,0x07);       // Size of slow_call
 2899     // Push src onto stack slow-path
 2900     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2901     emit_d8    (masm,0xC0-1+$src$$reg );
 2902     // CALL directly to the runtime
 2903     __ set_inst_mark();
 2904     emit_opcode(masm,0xE8);       // Call into runtime
 2905     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2906     __ clear_inst_mark();
 2907     __ post_call_nop();
 2908     // Carry on here...
 2909   %}
 2910 
 2911   enc_class DPR2L_encoding( regDPR src ) %{
 2912     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2913     emit_opcode(masm,0x2D);
 2914     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2915     // Allocate a word
 2916     emit_opcode(masm,0x83);            // SUB ESP,8
 2917     emit_opcode(masm,0xEC);
 2918     emit_d8(masm,0x08);
 2919     // Encoding assumes a double has been pushed into FPR0.
 2920     // Store down the double as a long, popping the FPU stack
 2921     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2922     emit_opcode(masm,0x3C);
 2923     emit_d8(masm,0x24);
 2924     // Restore the rounding mode; mask the exception
 2925     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2926     emit_opcode(masm,0x2D);
 2927     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2928         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2929         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2930 
 2931     // Load the converted int; adjust CPU stack
 2932     emit_opcode(masm,0x58);       // POP EAX
 2933     emit_opcode(masm,0x5A);       // POP EDX
 2934     emit_opcode(masm,0x81);       // CMP EDX,imm
 2935     emit_d8    (masm,0xFA);       // rdx
 2936     emit_d32   (masm,0x80000000); //         0x80000000
 2937     emit_opcode(masm,0x75);       // JNE around_slow_call
 2938     emit_d8    (masm,0x07+4);     // Size of slow_call
 2939     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2940     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2941     emit_opcode(masm,0x75);       // JNE around_slow_call
 2942     emit_d8    (masm,0x07);       // Size of slow_call
 2943     // Push src onto stack slow-path
 2944     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2945     emit_d8    (masm,0xC0-1+$src$$reg );
 2946     // CALL directly to the runtime
 2947     __ set_inst_mark();
 2948     emit_opcode(masm,0xE8);       // Call into runtime
 2949     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2950     __ clear_inst_mark();
 2951     __ post_call_nop();
 2952     // Carry on here...
 2953   %}
 2954 
 2955   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2956     // Operand was loaded from memory into fp ST (stack top)
 2957     // FMUL   ST,$src  /* D8 C8+i */
 2958     emit_opcode(masm, 0xD8);
 2959     emit_opcode(masm, 0xC8 + $src1$$reg);
 2960   %}
 2961 
 2962   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2963     // FADDP  ST,src2  /* D8 C0+i */
 2964     emit_opcode(masm, 0xD8);
 2965     emit_opcode(masm, 0xC0 + $src2$$reg);
 2966     //could use FADDP  src2,fpST  /* DE C0+i */
 2967   %}
 2968 
 2969   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2970     // FADDP  src2,ST  /* DE C0+i */
 2971     emit_opcode(masm, 0xDE);
 2972     emit_opcode(masm, 0xC0 + $src2$$reg);
 2973   %}
 2974 
 2975   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2976     // Operand has been loaded into fp ST (stack top)
 2977       // FSUB   ST,$src1
 2978       emit_opcode(masm, 0xD8);
 2979       emit_opcode(masm, 0xE0 + $src1$$reg);
 2980 
 2981       // FDIV
 2982       emit_opcode(masm, 0xD8);
 2983       emit_opcode(masm, 0xF0 + $src2$$reg);
 2984   %}
 2985 
 2986   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2987     // Operand was loaded from memory into fp ST (stack top)
 2988     // FADD   ST,$src  /* D8 C0+i */
 2989     emit_opcode(masm, 0xD8);
 2990     emit_opcode(masm, 0xC0 + $src1$$reg);
 2991 
 2992     // FMUL  ST,src2  /* D8 C*+i */
 2993     emit_opcode(masm, 0xD8);
 2994     emit_opcode(masm, 0xC8 + $src2$$reg);
 2995   %}
 2996 
 2997 
 2998   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 2999     // Operand was loaded from memory into fp ST (stack top)
 3000     // FADD   ST,$src  /* D8 C0+i */
 3001     emit_opcode(masm, 0xD8);
 3002     emit_opcode(masm, 0xC0 + $src1$$reg);
 3003 
 3004     // FMULP  src2,ST  /* DE C8+i */
 3005     emit_opcode(masm, 0xDE);
 3006     emit_opcode(masm, 0xC8 + $src2$$reg);
 3007   %}
 3008 
 3009   // Atomically load the volatile long
 3010   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3011     emit_opcode(masm,0xDF);
 3012     int rm_byte_opcode = 0x05;
 3013     int base     = $mem$$base;
 3014     int index    = $mem$$index;
 3015     int scale    = $mem$$scale;
 3016     int displace = $mem$$disp;
 3017     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3018     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3019     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 3020   %}
 3021 
 3022   // Volatile Store Long.  Must be atomic, so move it into
 3023   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3024   // target address before the store (for null-ptr checks)
 3025   // so the memory operand is used twice in the encoding.
 3026   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3027     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 3028     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 3029     emit_opcode(masm,0xDF);
 3030     int rm_byte_opcode = 0x07;
 3031     int base     = $mem$$base;
 3032     int index    = $mem$$index;
 3033     int scale    = $mem$$scale;
 3034     int displace = $mem$$disp;
 3035     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3036     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3037     __ clear_inst_mark();
 3038   %}
 3039 
 3040 %}
 3041 
 3042 
 3043 //----------FRAME--------------------------------------------------------------
 3044 // Definition of frame structure and management information.
 3045 //
 3046 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3047 //                             |   (to get allocators register number
 3048 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3049 //  r   CALLER     |        |
 3050 //  o     |        +--------+      pad to even-align allocators stack-slot
 3051 //  w     V        |  pad0  |        numbers; owned by CALLER
 3052 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3053 //  h     ^        |   in   |  5
 3054 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3055 //  |     |        |        |  3
 3056 //  |     |        +--------+
 3057 //  V     |        | old out|      Empty on Intel, window on Sparc
 3058 //        |    old |preserve|      Must be even aligned.
 3059 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3060 //        |        |   in   |  3   area for Intel ret address
 3061 //     Owned by    |preserve|      Empty on Sparc.
 3062 //       SELF      +--------+
 3063 //        |        |  pad2  |  2   pad to align old SP
 3064 //        |        +--------+  1
 3065 //        |        | locks  |  0
 3066 //        |        +--------+----> OptoReg::stack0(), even aligned
 3067 //        |        |  pad1  | 11   pad to align new SP
 3068 //        |        +--------+
 3069 //        |        |        | 10
 3070 //        |        | spills |  9   spills
 3071 //        V        |        |  8   (pad0 slot for callee)
 3072 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3073 //        ^        |  out   |  7
 3074 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3075 //     Owned by    +--------+
 3076 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3077 //        |    new |preserve|      Must be even-aligned.
 3078 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3079 //        |        |        |
 3080 //
 3081 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3082 //         known from SELF's arguments and the Java calling convention.
 3083 //         Region 6-7 is determined per call site.
 3084 // Note 2: If the calling convention leaves holes in the incoming argument
 3085 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3086 //         are owned by the CALLEE.  Holes should not be necessary in the
 3087 //         incoming area, as the Java calling convention is completely under
 3088 //         the control of the AD file.  Doubles can be sorted and packed to
 3089 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3090 //         varargs C calling conventions.
 3091 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3092 //         even aligned with pad0 as needed.
 3093 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3094 //         region 6-11 is even aligned; it may be padded out more so that
 3095 //         the region from SP to FP meets the minimum stack alignment.
 3096 
 3097 frame %{
 3098   // These three registers define part of the calling convention
 3099   // between compiled code and the interpreter.
 3100   inline_cache_reg(EAX);                // Inline Cache Register
 3101 
 3102   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3103   cisc_spilling_operand_name(indOffset32);
 3104 
 3105   // Number of stack slots consumed by locking an object
 3106   sync_stack_slots(1);
 3107 
 3108   // Compiled code's Frame Pointer
 3109   frame_pointer(ESP);
 3110   // Interpreter stores its frame pointer in a register which is
 3111   // stored to the stack by I2CAdaptors.
 3112   // I2CAdaptors convert from interpreted java to compiled java.
 3113   interpreter_frame_pointer(EBP);
 3114 
 3115   // Stack alignment requirement
 3116   // Alignment size in bytes (128-bit -> 16 bytes)
 3117   stack_alignment(StackAlignmentInBytes);
 3118 
 3119   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3120   // for calls to C.  Supports the var-args backing area for register parms.
 3121   varargs_C_out_slots_killed(0);
 3122 
 3123   // The after-PROLOG location of the return address.  Location of
 3124   // return address specifies a type (REG or STACK) and a number
 3125   // representing the register number (i.e. - use a register name) or
 3126   // stack slot.
 3127   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3128   // Otherwise, it is above the locks and verification slot and alignment word
 3129   return_addr(STACK - 1 +
 3130               align_up((Compile::current()->in_preserve_stack_slots() +
 3131                         Compile::current()->fixed_slots()),
 3132                        stack_alignment_in_slots()));
 3133 
 3134   // Location of C & interpreter return values
 3135   c_return_value %{
 3136     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3137     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3138     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3139 
 3140     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3141     // that C functions return float and double results in XMM0.
 3142     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3143       return OptoRegPair(XMM0b_num,XMM0_num);
 3144     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3145       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3146 
 3147     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3148   %}
 3149 
 3150   // Location of return values
 3151   return_value %{
 3152     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3153     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3154     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3155     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3156       return OptoRegPair(XMM0b_num,XMM0_num);
 3157     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3158       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3159     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3160   %}
 3161 
 3162 %}
 3163 
 3164 //----------ATTRIBUTES---------------------------------------------------------
 3165 //----------Operand Attributes-------------------------------------------------
 3166 op_attrib op_cost(0);        // Required cost attribute
 3167 
 3168 //----------Instruction Attributes---------------------------------------------
 3169 ins_attrib ins_cost(100);       // Required cost attribute
 3170 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3171 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3172                                 // non-matching short branch variant of some
 3173                                                             // long branch?
 3174 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3175                                 // specifies the alignment that some part of the instruction (not
 3176                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3177                                 // function must be provided for the instruction
 3178 
 3179 //----------OPERANDS-----------------------------------------------------------
 3180 // Operand definitions must precede instruction definitions for correct parsing
 3181 // in the ADLC because operands constitute user defined types which are used in
 3182 // instruction definitions.
 3183 
 3184 //----------Simple Operands----------------------------------------------------
 3185 // Immediate Operands
 3186 // Integer Immediate
 3187 operand immI() %{
 3188   match(ConI);
 3189 
 3190   op_cost(10);
 3191   format %{ %}
 3192   interface(CONST_INTER);
 3193 %}
 3194 
 3195 // Constant for test vs zero
 3196 operand immI_0() %{
 3197   predicate(n->get_int() == 0);
 3198   match(ConI);
 3199 
 3200   op_cost(0);
 3201   format %{ %}
 3202   interface(CONST_INTER);
 3203 %}
 3204 
 3205 // Constant for increment
 3206 operand immI_1() %{
 3207   predicate(n->get_int() == 1);
 3208   match(ConI);
 3209 
 3210   op_cost(0);
 3211   format %{ %}
 3212   interface(CONST_INTER);
 3213 %}
 3214 
 3215 // Constant for decrement
 3216 operand immI_M1() %{
 3217   predicate(n->get_int() == -1);
 3218   match(ConI);
 3219 
 3220   op_cost(0);
 3221   format %{ %}
 3222   interface(CONST_INTER);
 3223 %}
 3224 
 3225 // Valid scale values for addressing modes
 3226 operand immI2() %{
 3227   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3228   match(ConI);
 3229 
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 operand immI8() %{
 3235   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3236   match(ConI);
 3237 
 3238   op_cost(5);
 3239   format %{ %}
 3240   interface(CONST_INTER);
 3241 %}
 3242 
 3243 operand immU8() %{
 3244   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3245   match(ConI);
 3246 
 3247   op_cost(5);
 3248   format %{ %}
 3249   interface(CONST_INTER);
 3250 %}
 3251 
 3252 operand immI16() %{
 3253   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3254   match(ConI);
 3255 
 3256   op_cost(10);
 3257   format %{ %}
 3258   interface(CONST_INTER);
 3259 %}
 3260 
 3261 // Int Immediate non-negative
 3262 operand immU31()
 3263 %{
 3264   predicate(n->get_int() >= 0);
 3265   match(ConI);
 3266 
 3267   op_cost(0);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Constant for long shifts
 3273 operand immI_32() %{
 3274   predicate( n->get_int() == 32 );
 3275   match(ConI);
 3276 
 3277   op_cost(0);
 3278   format %{ %}
 3279   interface(CONST_INTER);
 3280 %}
 3281 
 3282 operand immI_1_31() %{
 3283   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3284   match(ConI);
 3285 
 3286   op_cost(0);
 3287   format %{ %}
 3288   interface(CONST_INTER);
 3289 %}
 3290 
 3291 operand immI_32_63() %{
 3292   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3293   match(ConI);
 3294   op_cost(0);
 3295 
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_2() %{
 3301   predicate( n->get_int() == 2 );
 3302   match(ConI);
 3303 
 3304   op_cost(0);
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 operand immI_3() %{
 3310   predicate( n->get_int() == 3 );
 3311   match(ConI);
 3312 
 3313   op_cost(0);
 3314   format %{ %}
 3315   interface(CONST_INTER);
 3316 %}
 3317 
 3318 operand immI_4()
 3319 %{
 3320   predicate(n->get_int() == 4);
 3321   match(ConI);
 3322 
 3323   op_cost(0);
 3324   format %{ %}
 3325   interface(CONST_INTER);
 3326 %}
 3327 
 3328 operand immI_8()
 3329 %{
 3330   predicate(n->get_int() == 8);
 3331   match(ConI);
 3332 
 3333   op_cost(0);
 3334   format %{ %}
 3335   interface(CONST_INTER);
 3336 %}
 3337 
 3338 // Pointer Immediate
 3339 operand immP() %{
 3340   match(ConP);
 3341 
 3342   op_cost(10);
 3343   format %{ %}
 3344   interface(CONST_INTER);
 3345 %}
 3346 
 3347 // Null Pointer Immediate
 3348 operand immP0() %{
 3349   predicate( n->get_ptr() == 0 );
 3350   match(ConP);
 3351   op_cost(0);
 3352 
 3353   format %{ %}
 3354   interface(CONST_INTER);
 3355 %}
 3356 
 3357 // Long Immediate
 3358 operand immL() %{
 3359   match(ConL);
 3360 
 3361   op_cost(20);
 3362   format %{ %}
 3363   interface(CONST_INTER);
 3364 %}
 3365 
 3366 // Long Immediate zero
 3367 operand immL0() %{
 3368   predicate( n->get_long() == 0L );
 3369   match(ConL);
 3370   op_cost(0);
 3371 
 3372   format %{ %}
 3373   interface(CONST_INTER);
 3374 %}
 3375 
 3376 // Long Immediate zero
 3377 operand immL_M1() %{
 3378   predicate( n->get_long() == -1L );
 3379   match(ConL);
 3380   op_cost(0);
 3381 
 3382   format %{ %}
 3383   interface(CONST_INTER);
 3384 %}
 3385 
 3386 // Long immediate from 0 to 127.
 3387 // Used for a shorter form of long mul by 10.
 3388 operand immL_127() %{
 3389   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3390   match(ConL);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long Immediate: low 32-bit mask
 3398 operand immL_32bits() %{
 3399   predicate(n->get_long() == 0xFFFFFFFFL);
 3400   match(ConL);
 3401   op_cost(0);
 3402 
 3403   format %{ %}
 3404   interface(CONST_INTER);
 3405 %}
 3406 
 3407 // Long Immediate: low 32-bit mask
 3408 operand immL32() %{
 3409   predicate(n->get_long() == (int)(n->get_long()));
 3410   match(ConL);
 3411   op_cost(20);
 3412 
 3413   format %{ %}
 3414   interface(CONST_INTER);
 3415 %}
 3416 
 3417 //Double Immediate zero
 3418 operand immDPR0() %{
 3419   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3420   // bug that generates code such that NaNs compare equal to 0.0
 3421   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3422   match(ConD);
 3423 
 3424   op_cost(5);
 3425   format %{ %}
 3426   interface(CONST_INTER);
 3427 %}
 3428 
 3429 // Double Immediate one
 3430 operand immDPR1() %{
 3431   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3432   match(ConD);
 3433 
 3434   op_cost(5);
 3435   format %{ %}
 3436   interface(CONST_INTER);
 3437 %}
 3438 
 3439 // Double Immediate
 3440 operand immDPR() %{
 3441   predicate(UseSSE<=1);
 3442   match(ConD);
 3443 
 3444   op_cost(5);
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 operand immD() %{
 3450   predicate(UseSSE>=2);
 3451   match(ConD);
 3452 
 3453   op_cost(5);
 3454   format %{ %}
 3455   interface(CONST_INTER);
 3456 %}
 3457 
 3458 // Double Immediate zero
 3459 operand immD0() %{
 3460   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3461   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3462   // compare equal to -0.0.
 3463   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3464   match(ConD);
 3465 
 3466   format %{ %}
 3467   interface(CONST_INTER);
 3468 %}
 3469 
 3470 // Float Immediate zero
 3471 operand immFPR0() %{
 3472   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3473   match(ConF);
 3474 
 3475   op_cost(5);
 3476   format %{ %}
 3477   interface(CONST_INTER);
 3478 %}
 3479 
 3480 // Float Immediate one
 3481 operand immFPR1() %{
 3482   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3483   match(ConF);
 3484 
 3485   op_cost(5);
 3486   format %{ %}
 3487   interface(CONST_INTER);
 3488 %}
 3489 
 3490 // Float Immediate
 3491 operand immFPR() %{
 3492   predicate( UseSSE == 0 );
 3493   match(ConF);
 3494 
 3495   op_cost(5);
 3496   format %{ %}
 3497   interface(CONST_INTER);
 3498 %}
 3499 
 3500 // Float Immediate
 3501 operand immF() %{
 3502   predicate(UseSSE >= 1);
 3503   match(ConF);
 3504 
 3505   op_cost(5);
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Float Immediate zero.  Zero and not -0.0
 3511 operand immF0() %{
 3512   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3513   match(ConF);
 3514 
 3515   op_cost(5);
 3516   format %{ %}
 3517   interface(CONST_INTER);
 3518 %}
 3519 
 3520 // Immediates for special shifts (sign extend)
 3521 
 3522 // Constants for increment
 3523 operand immI_16() %{
 3524   predicate( n->get_int() == 16 );
 3525   match(ConI);
 3526 
 3527   format %{ %}
 3528   interface(CONST_INTER);
 3529 %}
 3530 
 3531 operand immI_24() %{
 3532   predicate( n->get_int() == 24 );
 3533   match(ConI);
 3534 
 3535   format %{ %}
 3536   interface(CONST_INTER);
 3537 %}
 3538 
 3539 // Constant for byte-wide masking
 3540 operand immI_255() %{
 3541   predicate( n->get_int() == 255 );
 3542   match(ConI);
 3543 
 3544   format %{ %}
 3545   interface(CONST_INTER);
 3546 %}
 3547 
 3548 // Constant for short-wide masking
 3549 operand immI_65535() %{
 3550   predicate(n->get_int() == 65535);
 3551   match(ConI);
 3552 
 3553   format %{ %}
 3554   interface(CONST_INTER);
 3555 %}
 3556 
 3557 operand kReg()
 3558 %{
 3559   constraint(ALLOC_IN_RC(vectmask_reg));
 3560   match(RegVectMask);
 3561   format %{%}
 3562   interface(REG_INTER);
 3563 %}
 3564 
 3565 // Register Operands
 3566 // Integer Register
 3567 operand rRegI() %{
 3568   constraint(ALLOC_IN_RC(int_reg));
 3569   match(RegI);
 3570   match(xRegI);
 3571   match(eAXRegI);
 3572   match(eBXRegI);
 3573   match(eCXRegI);
 3574   match(eDXRegI);
 3575   match(eDIRegI);
 3576   match(eSIRegI);
 3577 
 3578   format %{ %}
 3579   interface(REG_INTER);
 3580 %}
 3581 
 3582 // Subset of Integer Register
 3583 operand xRegI(rRegI reg) %{
 3584   constraint(ALLOC_IN_RC(int_x_reg));
 3585   match(reg);
 3586   match(eAXRegI);
 3587   match(eBXRegI);
 3588   match(eCXRegI);
 3589   match(eDXRegI);
 3590 
 3591   format %{ %}
 3592   interface(REG_INTER);
 3593 %}
 3594 
 3595 // Special Registers
 3596 operand eAXRegI(xRegI reg) %{
 3597   constraint(ALLOC_IN_RC(eax_reg));
 3598   match(reg);
 3599   match(rRegI);
 3600 
 3601   format %{ "EAX" %}
 3602   interface(REG_INTER);
 3603 %}
 3604 
 3605 // Special Registers
 3606 operand eBXRegI(xRegI reg) %{
 3607   constraint(ALLOC_IN_RC(ebx_reg));
 3608   match(reg);
 3609   match(rRegI);
 3610 
 3611   format %{ "EBX" %}
 3612   interface(REG_INTER);
 3613 %}
 3614 
 3615 operand eCXRegI(xRegI reg) %{
 3616   constraint(ALLOC_IN_RC(ecx_reg));
 3617   match(reg);
 3618   match(rRegI);
 3619 
 3620   format %{ "ECX" %}
 3621   interface(REG_INTER);
 3622 %}
 3623 
 3624 operand eDXRegI(xRegI reg) %{
 3625   constraint(ALLOC_IN_RC(edx_reg));
 3626   match(reg);
 3627   match(rRegI);
 3628 
 3629   format %{ "EDX" %}
 3630   interface(REG_INTER);
 3631 %}
 3632 
 3633 operand eDIRegI(xRegI reg) %{
 3634   constraint(ALLOC_IN_RC(edi_reg));
 3635   match(reg);
 3636   match(rRegI);
 3637 
 3638   format %{ "EDI" %}
 3639   interface(REG_INTER);
 3640 %}
 3641 
 3642 operand nadxRegI() %{
 3643   constraint(ALLOC_IN_RC(nadx_reg));
 3644   match(RegI);
 3645   match(eBXRegI);
 3646   match(eCXRegI);
 3647   match(eSIRegI);
 3648   match(eDIRegI);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 operand ncxRegI() %{
 3655   constraint(ALLOC_IN_RC(ncx_reg));
 3656   match(RegI);
 3657   match(eAXRegI);
 3658   match(eDXRegI);
 3659   match(eSIRegI);
 3660   match(eDIRegI);
 3661 
 3662   format %{ %}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3667 // //
 3668 operand eSIRegI(xRegI reg) %{
 3669    constraint(ALLOC_IN_RC(esi_reg));
 3670    match(reg);
 3671    match(rRegI);
 3672 
 3673    format %{ "ESI" %}
 3674    interface(REG_INTER);
 3675 %}
 3676 
 3677 // Pointer Register
 3678 operand anyRegP() %{
 3679   constraint(ALLOC_IN_RC(any_reg));
 3680   match(RegP);
 3681   match(eAXRegP);
 3682   match(eBXRegP);
 3683   match(eCXRegP);
 3684   match(eDIRegP);
 3685   match(eRegP);
 3686 
 3687   format %{ %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 operand eRegP() %{
 3692   constraint(ALLOC_IN_RC(int_reg));
 3693   match(RegP);
 3694   match(eAXRegP);
 3695   match(eBXRegP);
 3696   match(eCXRegP);
 3697   match(eDIRegP);
 3698 
 3699   format %{ %}
 3700   interface(REG_INTER);
 3701 %}
 3702 
 3703 operand rRegP() %{
 3704   constraint(ALLOC_IN_RC(int_reg));
 3705   match(RegP);
 3706   match(eAXRegP);
 3707   match(eBXRegP);
 3708   match(eCXRegP);
 3709   match(eDIRegP);
 3710 
 3711   format %{ %}
 3712   interface(REG_INTER);
 3713 %}
 3714 
 3715 // On windows95, EBP is not safe to use for implicit null tests.
 3716 operand eRegP_no_EBP() %{
 3717   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3718   match(RegP);
 3719   match(eAXRegP);
 3720   match(eBXRegP);
 3721   match(eCXRegP);
 3722   match(eDIRegP);
 3723 
 3724   op_cost(100);
 3725   format %{ %}
 3726   interface(REG_INTER);
 3727 %}
 3728 
 3729 operand pRegP() %{
 3730   constraint(ALLOC_IN_RC(p_reg));
 3731   match(RegP);
 3732   match(eBXRegP);
 3733   match(eDXRegP);
 3734   match(eSIRegP);
 3735   match(eDIRegP);
 3736 
 3737   format %{ %}
 3738   interface(REG_INTER);
 3739 %}
 3740 
 3741 // Special Registers
 3742 // Return a pointer value
 3743 operand eAXRegP(eRegP reg) %{
 3744   constraint(ALLOC_IN_RC(eax_reg));
 3745   match(reg);
 3746   format %{ "EAX" %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // Used in AtomicAdd
 3751 operand eBXRegP(eRegP reg) %{
 3752   constraint(ALLOC_IN_RC(ebx_reg));
 3753   match(reg);
 3754   format %{ "EBX" %}
 3755   interface(REG_INTER);
 3756 %}
 3757 
 3758 // Tail-call (interprocedural jump) to interpreter
 3759 operand eCXRegP(eRegP reg) %{
 3760   constraint(ALLOC_IN_RC(ecx_reg));
 3761   match(reg);
 3762   format %{ "ECX" %}
 3763   interface(REG_INTER);
 3764 %}
 3765 
 3766 operand eDXRegP(eRegP reg) %{
 3767   constraint(ALLOC_IN_RC(edx_reg));
 3768   match(reg);
 3769   format %{ "EDX" %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 operand eSIRegP(eRegP reg) %{
 3774   constraint(ALLOC_IN_RC(esi_reg));
 3775   match(reg);
 3776   format %{ "ESI" %}
 3777   interface(REG_INTER);
 3778 %}
 3779 
 3780 // Used in rep stosw
 3781 operand eDIRegP(eRegP reg) %{
 3782   constraint(ALLOC_IN_RC(edi_reg));
 3783   match(reg);
 3784   format %{ "EDI" %}
 3785   interface(REG_INTER);
 3786 %}
 3787 
 3788 operand eRegL() %{
 3789   constraint(ALLOC_IN_RC(long_reg));
 3790   match(RegL);
 3791   match(eADXRegL);
 3792 
 3793   format %{ %}
 3794   interface(REG_INTER);
 3795 %}
 3796 
 3797 operand eADXRegL( eRegL reg ) %{
 3798   constraint(ALLOC_IN_RC(eadx_reg));
 3799   match(reg);
 3800 
 3801   format %{ "EDX:EAX" %}
 3802   interface(REG_INTER);
 3803 %}
 3804 
 3805 operand eBCXRegL( eRegL reg ) %{
 3806   constraint(ALLOC_IN_RC(ebcx_reg));
 3807   match(reg);
 3808 
 3809   format %{ "EBX:ECX" %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand eBDPRegL( eRegL reg ) %{
 3814   constraint(ALLOC_IN_RC(ebpd_reg));
 3815   match(reg);
 3816 
 3817   format %{ "EBP:EDI" %}
 3818   interface(REG_INTER);
 3819 %}
 3820 // Special case for integer high multiply
 3821 operand eADXRegL_low_only() %{
 3822   constraint(ALLOC_IN_RC(eadx_reg));
 3823   match(RegL);
 3824 
 3825   format %{ "EAX" %}
 3826   interface(REG_INTER);
 3827 %}
 3828 
 3829 // Flags register, used as output of compare instructions
 3830 operand rFlagsReg() %{
 3831   constraint(ALLOC_IN_RC(int_flags));
 3832   match(RegFlags);
 3833 
 3834   format %{ "EFLAGS" %}
 3835   interface(REG_INTER);
 3836 %}
 3837 
 3838 // Flags register, used as output of compare instructions
 3839 operand eFlagsReg() %{
 3840   constraint(ALLOC_IN_RC(int_flags));
 3841   match(RegFlags);
 3842 
 3843   format %{ "EFLAGS" %}
 3844   interface(REG_INTER);
 3845 %}
 3846 
 3847 // Flags register, used as output of FLOATING POINT compare instructions
 3848 operand eFlagsRegU() %{
 3849   constraint(ALLOC_IN_RC(int_flags));
 3850   match(RegFlags);
 3851 
 3852   format %{ "EFLAGS_U" %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 operand eFlagsRegUCF() %{
 3857   constraint(ALLOC_IN_RC(int_flags));
 3858   match(RegFlags);
 3859   predicate(false);
 3860 
 3861   format %{ "EFLAGS_U_CF" %}
 3862   interface(REG_INTER);
 3863 %}
 3864 
 3865 // Condition Code Register used by long compare
 3866 operand flagsReg_long_LTGE() %{
 3867   constraint(ALLOC_IN_RC(int_flags));
 3868   match(RegFlags);
 3869   format %{ "FLAGS_LTGE" %}
 3870   interface(REG_INTER);
 3871 %}
 3872 operand flagsReg_long_EQNE() %{
 3873   constraint(ALLOC_IN_RC(int_flags));
 3874   match(RegFlags);
 3875   format %{ "FLAGS_EQNE" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 operand flagsReg_long_LEGT() %{
 3879   constraint(ALLOC_IN_RC(int_flags));
 3880   match(RegFlags);
 3881   format %{ "FLAGS_LEGT" %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Condition Code Register used by unsigned long compare
 3886 operand flagsReg_ulong_LTGE() %{
 3887   constraint(ALLOC_IN_RC(int_flags));
 3888   match(RegFlags);
 3889   format %{ "FLAGS_U_LTGE" %}
 3890   interface(REG_INTER);
 3891 %}
 3892 operand flagsReg_ulong_EQNE() %{
 3893   constraint(ALLOC_IN_RC(int_flags));
 3894   match(RegFlags);
 3895   format %{ "FLAGS_U_EQNE" %}
 3896   interface(REG_INTER);
 3897 %}
 3898 operand flagsReg_ulong_LEGT() %{
 3899   constraint(ALLOC_IN_RC(int_flags));
 3900   match(RegFlags);
 3901   format %{ "FLAGS_U_LEGT" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 // Float register operands
 3906 operand regDPR() %{
 3907   predicate( UseSSE < 2 );
 3908   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3909   match(RegD);
 3910   match(regDPR1);
 3911   match(regDPR2);
 3912   format %{ %}
 3913   interface(REG_INTER);
 3914 %}
 3915 
 3916 operand regDPR1(regDPR reg) %{
 3917   predicate( UseSSE < 2 );
 3918   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3919   match(reg);
 3920   format %{ "FPR1" %}
 3921   interface(REG_INTER);
 3922 %}
 3923 
 3924 operand regDPR2(regDPR reg) %{
 3925   predicate( UseSSE < 2 );
 3926   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3927   match(reg);
 3928   format %{ "FPR2" %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand regnotDPR1(regDPR reg) %{
 3933   predicate( UseSSE < 2 );
 3934   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3935   match(reg);
 3936   format %{ %}
 3937   interface(REG_INTER);
 3938 %}
 3939 
 3940 // Float register operands
 3941 operand regFPR() %{
 3942   predicate( UseSSE < 2 );
 3943   constraint(ALLOC_IN_RC(fp_flt_reg));
 3944   match(RegF);
 3945   match(regFPR1);
 3946   format %{ %}
 3947   interface(REG_INTER);
 3948 %}
 3949 
 3950 // Float register operands
 3951 operand regFPR1(regFPR reg) %{
 3952   predicate( UseSSE < 2 );
 3953   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3954   match(reg);
 3955   format %{ "FPR1" %}
 3956   interface(REG_INTER);
 3957 %}
 3958 
 3959 // XMM Float register operands
 3960 operand regF() %{
 3961   predicate( UseSSE>=1 );
 3962   constraint(ALLOC_IN_RC(float_reg_legacy));
 3963   match(RegF);
 3964   format %{ %}
 3965   interface(REG_INTER);
 3966 %}
 3967 
 3968 operand legRegF() %{
 3969   predicate( UseSSE>=1 );
 3970   constraint(ALLOC_IN_RC(float_reg_legacy));
 3971   match(RegF);
 3972   format %{ %}
 3973   interface(REG_INTER);
 3974 %}
 3975 
 3976 // Float register operands
 3977 operand vlRegF() %{
 3978    constraint(ALLOC_IN_RC(float_reg_vl));
 3979    match(RegF);
 3980 
 3981    format %{ %}
 3982    interface(REG_INTER);
 3983 %}
 3984 
 3985 // XMM Double register operands
 3986 operand regD() %{
 3987   predicate( UseSSE>=2 );
 3988   constraint(ALLOC_IN_RC(double_reg_legacy));
 3989   match(RegD);
 3990   format %{ %}
 3991   interface(REG_INTER);
 3992 %}
 3993 
 3994 // Double register operands
 3995 operand legRegD() %{
 3996   predicate( UseSSE>=2 );
 3997   constraint(ALLOC_IN_RC(double_reg_legacy));
 3998   match(RegD);
 3999   format %{ %}
 4000   interface(REG_INTER);
 4001 %}
 4002 
 4003 operand vlRegD() %{
 4004    constraint(ALLOC_IN_RC(double_reg_vl));
 4005    match(RegD);
 4006 
 4007    format %{ %}
 4008    interface(REG_INTER);
 4009 %}
 4010 
 4011 //----------Memory Operands----------------------------------------------------
 4012 // Direct Memory Operand
 4013 operand direct(immP addr) %{
 4014   match(addr);
 4015 
 4016   format %{ "[$addr]" %}
 4017   interface(MEMORY_INTER) %{
 4018     base(0xFFFFFFFF);
 4019     index(0x4);
 4020     scale(0x0);
 4021     disp($addr);
 4022   %}
 4023 %}
 4024 
 4025 // Indirect Memory Operand
 4026 operand indirect(eRegP reg) %{
 4027   constraint(ALLOC_IN_RC(int_reg));
 4028   match(reg);
 4029 
 4030   format %{ "[$reg]" %}
 4031   interface(MEMORY_INTER) %{
 4032     base($reg);
 4033     index(0x4);
 4034     scale(0x0);
 4035     disp(0x0);
 4036   %}
 4037 %}
 4038 
 4039 // Indirect Memory Plus Short Offset Operand
 4040 operand indOffset8(eRegP reg, immI8 off) %{
 4041   match(AddP reg off);
 4042 
 4043   format %{ "[$reg + $off]" %}
 4044   interface(MEMORY_INTER) %{
 4045     base($reg);
 4046     index(0x4);
 4047     scale(0x0);
 4048     disp($off);
 4049   %}
 4050 %}
 4051 
 4052 // Indirect Memory Plus Long Offset Operand
 4053 operand indOffset32(eRegP reg, immI off) %{
 4054   match(AddP reg off);
 4055 
 4056   format %{ "[$reg + $off]" %}
 4057   interface(MEMORY_INTER) %{
 4058     base($reg);
 4059     index(0x4);
 4060     scale(0x0);
 4061     disp($off);
 4062   %}
 4063 %}
 4064 
 4065 // Indirect Memory Plus Long Offset Operand
 4066 operand indOffset32X(rRegI reg, immP off) %{
 4067   match(AddP off reg);
 4068 
 4069   format %{ "[$reg + $off]" %}
 4070   interface(MEMORY_INTER) %{
 4071     base($reg);
 4072     index(0x4);
 4073     scale(0x0);
 4074     disp($off);
 4075   %}
 4076 %}
 4077 
 4078 // Indirect Memory Plus Index Register Plus Offset Operand
 4079 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4080   match(AddP (AddP reg ireg) off);
 4081 
 4082   op_cost(10);
 4083   format %{"[$reg + $off + $ireg]" %}
 4084   interface(MEMORY_INTER) %{
 4085     base($reg);
 4086     index($ireg);
 4087     scale(0x0);
 4088     disp($off);
 4089   %}
 4090 %}
 4091 
 4092 // Indirect Memory Plus Index Register Plus Offset Operand
 4093 operand indIndex(eRegP reg, rRegI ireg) %{
 4094   match(AddP reg ireg);
 4095 
 4096   op_cost(10);
 4097   format %{"[$reg + $ireg]" %}
 4098   interface(MEMORY_INTER) %{
 4099     base($reg);
 4100     index($ireg);
 4101     scale(0x0);
 4102     disp(0x0);
 4103   %}
 4104 %}
 4105 
 4106 // // -------------------------------------------------------------------------
 4107 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4108 // // -------------------------------------------------------------------------
 4109 // // Scaled Memory Operands
 4110 // // Indirect Memory Times Scale Plus Offset Operand
 4111 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4112 //   match(AddP off (LShiftI ireg scale));
 4113 //
 4114 //   op_cost(10);
 4115 //   format %{"[$off + $ireg << $scale]" %}
 4116 //   interface(MEMORY_INTER) %{
 4117 //     base(0x4);
 4118 //     index($ireg);
 4119 //     scale($scale);
 4120 //     disp($off);
 4121 //   %}
 4122 // %}
 4123 
 4124 // Indirect Memory Times Scale Plus Index Register
 4125 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4126   match(AddP reg (LShiftI ireg scale));
 4127 
 4128   op_cost(10);
 4129   format %{"[$reg + $ireg << $scale]" %}
 4130   interface(MEMORY_INTER) %{
 4131     base($reg);
 4132     index($ireg);
 4133     scale($scale);
 4134     disp(0x0);
 4135   %}
 4136 %}
 4137 
 4138 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4139 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4140   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4141 
 4142   op_cost(10);
 4143   format %{"[$reg + $off + $ireg << $scale]" %}
 4144   interface(MEMORY_INTER) %{
 4145     base($reg);
 4146     index($ireg);
 4147     scale($scale);
 4148     disp($off);
 4149   %}
 4150 %}
 4151 
 4152 //----------Load Long Memory Operands------------------------------------------
 4153 // The load-long idiom will use it's address expression again after loading
 4154 // the first word of the long.  If the load-long destination overlaps with
 4155 // registers used in the addressing expression, the 2nd half will be loaded
 4156 // from a clobbered address.  Fix this by requiring that load-long use
 4157 // address registers that do not overlap with the load-long target.
 4158 
 4159 // load-long support
 4160 operand load_long_RegP() %{
 4161   constraint(ALLOC_IN_RC(esi_reg));
 4162   match(RegP);
 4163   match(eSIRegP);
 4164   op_cost(100);
 4165   format %{  %}
 4166   interface(REG_INTER);
 4167 %}
 4168 
 4169 // Indirect Memory Operand Long
 4170 operand load_long_indirect(load_long_RegP reg) %{
 4171   constraint(ALLOC_IN_RC(esi_reg));
 4172   match(reg);
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base($reg);
 4177     index(0x4);
 4178     scale(0x0);
 4179     disp(0x0);
 4180   %}
 4181 %}
 4182 
 4183 // Indirect Memory Plus Long Offset Operand
 4184 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4185   match(AddP reg off);
 4186 
 4187   format %{ "[$reg + $off]" %}
 4188   interface(MEMORY_INTER) %{
 4189     base($reg);
 4190     index(0x4);
 4191     scale(0x0);
 4192     disp($off);
 4193   %}
 4194 %}
 4195 
 4196 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4197 
 4198 
 4199 //----------Special Memory Operands--------------------------------------------
 4200 // Stack Slot Operand - This operand is used for loading and storing temporary
 4201 //                      values on the stack where a match requires a value to
 4202 //                      flow through memory.
 4203 operand stackSlotP(sRegP reg) %{
 4204   constraint(ALLOC_IN_RC(stack_slots));
 4205   // No match rule because this operand is only generated in matching
 4206   format %{ "[$reg]" %}
 4207   interface(MEMORY_INTER) %{
 4208     base(0x4);   // ESP
 4209     index(0x4);  // No Index
 4210     scale(0x0);  // No Scale
 4211     disp($reg);  // Stack Offset
 4212   %}
 4213 %}
 4214 
 4215 operand stackSlotI(sRegI reg) %{
 4216   constraint(ALLOC_IN_RC(stack_slots));
 4217   // No match rule because this operand is only generated in matching
 4218   format %{ "[$reg]" %}
 4219   interface(MEMORY_INTER) %{
 4220     base(0x4);   // ESP
 4221     index(0x4);  // No Index
 4222     scale(0x0);  // No Scale
 4223     disp($reg);  // Stack Offset
 4224   %}
 4225 %}
 4226 
 4227 operand stackSlotF(sRegF reg) %{
 4228   constraint(ALLOC_IN_RC(stack_slots));
 4229   // No match rule because this operand is only generated in matching
 4230   format %{ "[$reg]" %}
 4231   interface(MEMORY_INTER) %{
 4232     base(0x4);   // ESP
 4233     index(0x4);  // No Index
 4234     scale(0x0);  // No Scale
 4235     disp($reg);  // Stack Offset
 4236   %}
 4237 %}
 4238 
 4239 operand stackSlotD(sRegD reg) %{
 4240   constraint(ALLOC_IN_RC(stack_slots));
 4241   // No match rule because this operand is only generated in matching
 4242   format %{ "[$reg]" %}
 4243   interface(MEMORY_INTER) %{
 4244     base(0x4);   // ESP
 4245     index(0x4);  // No Index
 4246     scale(0x0);  // No Scale
 4247     disp($reg);  // Stack Offset
 4248   %}
 4249 %}
 4250 
 4251 operand stackSlotL(sRegL reg) %{
 4252   constraint(ALLOC_IN_RC(stack_slots));
 4253   // No match rule because this operand is only generated in matching
 4254   format %{ "[$reg]" %}
 4255   interface(MEMORY_INTER) %{
 4256     base(0x4);   // ESP
 4257     index(0x4);  // No Index
 4258     scale(0x0);  // No Scale
 4259     disp($reg);  // Stack Offset
 4260   %}
 4261 %}
 4262 
 4263 //----------Conditional Branch Operands----------------------------------------
 4264 // Comparison Op  - This is the operation of the comparison, and is limited to
 4265 //                  the following set of codes:
 4266 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4267 //
 4268 // Other attributes of the comparison, such as unsignedness, are specified
 4269 // by the comparison instruction that sets a condition code flags register.
 4270 // That result is represented by a flags operand whose subtype is appropriate
 4271 // to the unsignedness (etc.) of the comparison.
 4272 //
 4273 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4274 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4275 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4276 
 4277 // Comparison Code
 4278 operand cmpOp() %{
 4279   match(Bool);
 4280 
 4281   format %{ "" %}
 4282   interface(COND_INTER) %{
 4283     equal(0x4, "e");
 4284     not_equal(0x5, "ne");
 4285     less(0xC, "l");
 4286     greater_equal(0xD, "ge");
 4287     less_equal(0xE, "le");
 4288     greater(0xF, "g");
 4289     overflow(0x0, "o");
 4290     no_overflow(0x1, "no");
 4291   %}
 4292 %}
 4293 
 4294 // Comparison Code, unsigned compare.  Used by FP also, with
 4295 // C2 (unordered) turned into GT or LT already.  The other bits
 4296 // C0 and C3 are turned into Carry & Zero flags.
 4297 operand cmpOpU() %{
 4298   match(Bool);
 4299 
 4300   format %{ "" %}
 4301   interface(COND_INTER) %{
 4302     equal(0x4, "e");
 4303     not_equal(0x5, "ne");
 4304     less(0x2, "b");
 4305     greater_equal(0x3, "nb");
 4306     less_equal(0x6, "be");
 4307     greater(0x7, "nbe");
 4308     overflow(0x0, "o");
 4309     no_overflow(0x1, "no");
 4310   %}
 4311 %}
 4312 
 4313 // Floating comparisons that don't require any fixup for the unordered case
 4314 operand cmpOpUCF() %{
 4315   match(Bool);
 4316   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4317             n->as_Bool()->_test._test == BoolTest::ge ||
 4318             n->as_Bool()->_test._test == BoolTest::le ||
 4319             n->as_Bool()->_test._test == BoolTest::gt);
 4320   format %{ "" %}
 4321   interface(COND_INTER) %{
 4322     equal(0x4, "e");
 4323     not_equal(0x5, "ne");
 4324     less(0x2, "b");
 4325     greater_equal(0x3, "nb");
 4326     less_equal(0x6, "be");
 4327     greater(0x7, "nbe");
 4328     overflow(0x0, "o");
 4329     no_overflow(0x1, "no");
 4330   %}
 4331 %}
 4332 
 4333 
 4334 // Floating comparisons that can be fixed up with extra conditional jumps
 4335 operand cmpOpUCF2() %{
 4336   match(Bool);
 4337   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4338             n->as_Bool()->_test._test == BoolTest::eq);
 4339   format %{ "" %}
 4340   interface(COND_INTER) %{
 4341     equal(0x4, "e");
 4342     not_equal(0x5, "ne");
 4343     less(0x2, "b");
 4344     greater_equal(0x3, "nb");
 4345     less_equal(0x6, "be");
 4346     greater(0x7, "nbe");
 4347     overflow(0x0, "o");
 4348     no_overflow(0x1, "no");
 4349   %}
 4350 %}
 4351 
 4352 // Comparison Code for FP conditional move
 4353 operand cmpOp_fcmov() %{
 4354   match(Bool);
 4355 
 4356   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4357             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4358   format %{ "" %}
 4359   interface(COND_INTER) %{
 4360     equal        (0x0C8);
 4361     not_equal    (0x1C8);
 4362     less         (0x0C0);
 4363     greater_equal(0x1C0);
 4364     less_equal   (0x0D0);
 4365     greater      (0x1D0);
 4366     overflow(0x0, "o"); // not really supported by the instruction
 4367     no_overflow(0x1, "no"); // not really supported by the instruction
 4368   %}
 4369 %}
 4370 
 4371 // Comparison Code used in long compares
 4372 operand cmpOp_commute() %{
 4373   match(Bool);
 4374 
 4375   format %{ "" %}
 4376   interface(COND_INTER) %{
 4377     equal(0x4, "e");
 4378     not_equal(0x5, "ne");
 4379     less(0xF, "g");
 4380     greater_equal(0xE, "le");
 4381     less_equal(0xD, "ge");
 4382     greater(0xC, "l");
 4383     overflow(0x0, "o");
 4384     no_overflow(0x1, "no");
 4385   %}
 4386 %}
 4387 
 4388 // Comparison Code used in unsigned long compares
 4389 operand cmpOpU_commute() %{
 4390   match(Bool);
 4391 
 4392   format %{ "" %}
 4393   interface(COND_INTER) %{
 4394     equal(0x4, "e");
 4395     not_equal(0x5, "ne");
 4396     less(0x7, "nbe");
 4397     greater_equal(0x6, "be");
 4398     less_equal(0x3, "nb");
 4399     greater(0x2, "b");
 4400     overflow(0x0, "o");
 4401     no_overflow(0x1, "no");
 4402   %}
 4403 %}
 4404 
 4405 //----------OPERAND CLASSES----------------------------------------------------
 4406 // Operand Classes are groups of operands that are used as to simplify
 4407 // instruction definitions by not requiring the AD writer to specify separate
 4408 // instructions for every form of operand when the instruction accepts
 4409 // multiple operand types with the same basic encoding and format.  The classic
 4410 // case of this is memory operands.
 4411 
 4412 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4413                indIndex, indIndexScale, indIndexScaleOffset);
 4414 
 4415 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4416 // This means some kind of offset is always required and you cannot use
 4417 // an oop as the offset (done when working on static globals).
 4418 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4419                     indIndex, indIndexScale, indIndexScaleOffset);
 4420 
 4421 
 4422 //----------PIPELINE-----------------------------------------------------------
 4423 // Rules which define the behavior of the target architectures pipeline.
 4424 pipeline %{
 4425 
 4426 //----------ATTRIBUTES---------------------------------------------------------
 4427 attributes %{
 4428   variable_size_instructions;        // Fixed size instructions
 4429   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4430   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4431   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4432   instruction_fetch_units = 1;       // of 16 bytes
 4433 
 4434   // List of nop instructions
 4435   nops( MachNop );
 4436 %}
 4437 
 4438 //----------RESOURCES----------------------------------------------------------
 4439 // Resources are the functional units available to the machine
 4440 
 4441 // Generic P2/P3 pipeline
 4442 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4443 // 3 instructions decoded per cycle.
 4444 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4445 // 2 ALU op, only ALU0 handles mul/div instructions.
 4446 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4447            MS0, MS1, MEM = MS0 | MS1,
 4448            BR, FPU,
 4449            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4450 
 4451 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4452 // Pipeline Description specifies the stages in the machine's pipeline
 4453 
 4454 // Generic P2/P3 pipeline
 4455 pipe_desc(S0, S1, S2, S3, S4, S5);
 4456 
 4457 //----------PIPELINE CLASSES---------------------------------------------------
 4458 // Pipeline Classes describe the stages in which input and output are
 4459 // referenced by the hardware pipeline.
 4460 
 4461 // Naming convention: ialu or fpu
 4462 // Then: _reg
 4463 // Then: _reg if there is a 2nd register
 4464 // Then: _long if it's a pair of instructions implementing a long
 4465 // Then: _fat if it requires the big decoder
 4466 //   Or: _mem if it requires the big decoder and a memory unit.
 4467 
 4468 // Integer ALU reg operation
 4469 pipe_class ialu_reg(rRegI dst) %{
 4470     single_instruction;
 4471     dst    : S4(write);
 4472     dst    : S3(read);
 4473     DECODE : S0;        // any decoder
 4474     ALU    : S3;        // any alu
 4475 %}
 4476 
 4477 // Long ALU reg operation
 4478 pipe_class ialu_reg_long(eRegL dst) %{
 4479     instruction_count(2);
 4480     dst    : S4(write);
 4481     dst    : S3(read);
 4482     DECODE : S0(2);     // any 2 decoders
 4483     ALU    : S3(2);     // both alus
 4484 %}
 4485 
 4486 // Integer ALU reg operation using big decoder
 4487 pipe_class ialu_reg_fat(rRegI dst) %{
 4488     single_instruction;
 4489     dst    : S4(write);
 4490     dst    : S3(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S3;        // any alu
 4493 %}
 4494 
 4495 // Long ALU reg operation using big decoder
 4496 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4497     instruction_count(2);
 4498     dst    : S4(write);
 4499     dst    : S3(read);
 4500     D0     : S0(2);     // big decoder only; twice
 4501     ALU    : S3(2);     // any 2 alus
 4502 %}
 4503 
 4504 // Integer ALU reg-reg operation
 4505 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4506     single_instruction;
 4507     dst    : S4(write);
 4508     src    : S3(read);
 4509     DECODE : S0;        // any decoder
 4510     ALU    : S3;        // any alu
 4511 %}
 4512 
 4513 // Long ALU reg-reg operation
 4514 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4515     instruction_count(2);
 4516     dst    : S4(write);
 4517     src    : S3(read);
 4518     DECODE : S0(2);     // any 2 decoders
 4519     ALU    : S3(2);     // both alus
 4520 %}
 4521 
 4522 // Integer ALU reg-reg operation
 4523 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4524     single_instruction;
 4525     dst    : S4(write);
 4526     src    : S3(read);
 4527     D0     : S0;        // big decoder only
 4528     ALU    : S3;        // any alu
 4529 %}
 4530 
 4531 // Long ALU reg-reg operation
 4532 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4533     instruction_count(2);
 4534     dst    : S4(write);
 4535     src    : S3(read);
 4536     D0     : S0(2);     // big decoder only; twice
 4537     ALU    : S3(2);     // both alus
 4538 %}
 4539 
 4540 // Integer ALU reg-mem operation
 4541 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4542     single_instruction;
 4543     dst    : S5(write);
 4544     mem    : S3(read);
 4545     D0     : S0;        // big decoder only
 4546     ALU    : S4;        // any alu
 4547     MEM    : S3;        // any mem
 4548 %}
 4549 
 4550 // Long ALU reg-mem operation
 4551 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4552     instruction_count(2);
 4553     dst    : S5(write);
 4554     mem    : S3(read);
 4555     D0     : S0(2);     // big decoder only; twice
 4556     ALU    : S4(2);     // any 2 alus
 4557     MEM    : S3(2);     // both mems
 4558 %}
 4559 
 4560 // Integer mem operation (prefetch)
 4561 pipe_class ialu_mem(memory mem)
 4562 %{
 4563     single_instruction;
 4564     mem    : S3(read);
 4565     D0     : S0;        // big decoder only
 4566     MEM    : S3;        // any mem
 4567 %}
 4568 
 4569 // Integer Store to Memory
 4570 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4571     single_instruction;
 4572     mem    : S3(read);
 4573     src    : S5(read);
 4574     D0     : S0;        // big decoder only
 4575     ALU    : S4;        // any alu
 4576     MEM    : S3;
 4577 %}
 4578 
 4579 // Long Store to Memory
 4580 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4581     instruction_count(2);
 4582     mem    : S3(read);
 4583     src    : S5(read);
 4584     D0     : S0(2);     // big decoder only; twice
 4585     ALU    : S4(2);     // any 2 alus
 4586     MEM    : S3(2);     // Both mems
 4587 %}
 4588 
 4589 // Integer Store to Memory
 4590 pipe_class ialu_mem_imm(memory mem) %{
 4591     single_instruction;
 4592     mem    : S3(read);
 4593     D0     : S0;        // big decoder only
 4594     ALU    : S4;        // any alu
 4595     MEM    : S3;
 4596 %}
 4597 
 4598 // Integer ALU0 reg-reg operation
 4599 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4600     single_instruction;
 4601     dst    : S4(write);
 4602     src    : S3(read);
 4603     D0     : S0;        // Big decoder only
 4604     ALU0   : S3;        // only alu0
 4605 %}
 4606 
 4607 // Integer ALU0 reg-mem operation
 4608 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4609     single_instruction;
 4610     dst    : S5(write);
 4611     mem    : S3(read);
 4612     D0     : S0;        // big decoder only
 4613     ALU0   : S4;        // ALU0 only
 4614     MEM    : S3;        // any mem
 4615 %}
 4616 
 4617 // Integer ALU reg-reg operation
 4618 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4619     single_instruction;
 4620     cr     : S4(write);
 4621     src1   : S3(read);
 4622     src2   : S3(read);
 4623     DECODE : S0;        // any decoder
 4624     ALU    : S3;        // any alu
 4625 %}
 4626 
 4627 // Integer ALU reg-imm operation
 4628 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4629     single_instruction;
 4630     cr     : S4(write);
 4631     src1   : S3(read);
 4632     DECODE : S0;        // any decoder
 4633     ALU    : S3;        // any alu
 4634 %}
 4635 
 4636 // Integer ALU reg-mem operation
 4637 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4638     single_instruction;
 4639     cr     : S4(write);
 4640     src1   : S3(read);
 4641     src2   : S3(read);
 4642     D0     : S0;        // big decoder only
 4643     ALU    : S4;        // any alu
 4644     MEM    : S3;
 4645 %}
 4646 
 4647 // Conditional move reg-reg
 4648 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4649     instruction_count(4);
 4650     y      : S4(read);
 4651     q      : S3(read);
 4652     p      : S3(read);
 4653     DECODE : S0(4);     // any decoder
 4654 %}
 4655 
 4656 // Conditional move reg-reg
 4657 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4658     single_instruction;
 4659     dst    : S4(write);
 4660     src    : S3(read);
 4661     cr     : S3(read);
 4662     DECODE : S0;        // any decoder
 4663 %}
 4664 
 4665 // Conditional move reg-mem
 4666 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4667     single_instruction;
 4668     dst    : S4(write);
 4669     src    : S3(read);
 4670     cr     : S3(read);
 4671     DECODE : S0;        // any decoder
 4672     MEM    : S3;
 4673 %}
 4674 
 4675 // Conditional move reg-reg long
 4676 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4677     single_instruction;
 4678     dst    : S4(write);
 4679     src    : S3(read);
 4680     cr     : S3(read);
 4681     DECODE : S0(2);     // any 2 decoders
 4682 %}
 4683 
 4684 // Conditional move double reg-reg
 4685 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4686     single_instruction;
 4687     dst    : S4(write);
 4688     src    : S3(read);
 4689     cr     : S3(read);
 4690     DECODE : S0;        // any decoder
 4691 %}
 4692 
 4693 // Float reg-reg operation
 4694 pipe_class fpu_reg(regDPR dst) %{
 4695     instruction_count(2);
 4696     dst    : S3(read);
 4697     DECODE : S0(2);     // any 2 decoders
 4698     FPU    : S3;
 4699 %}
 4700 
 4701 // Float reg-reg operation
 4702 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4703     instruction_count(2);
 4704     dst    : S4(write);
 4705     src    : S3(read);
 4706     DECODE : S0(2);     // any 2 decoders
 4707     FPU    : S3;
 4708 %}
 4709 
 4710 // Float reg-reg operation
 4711 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4712     instruction_count(3);
 4713     dst    : S4(write);
 4714     src1   : S3(read);
 4715     src2   : S3(read);
 4716     DECODE : S0(3);     // any 3 decoders
 4717     FPU    : S3(2);
 4718 %}
 4719 
 4720 // Float reg-reg operation
 4721 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4722     instruction_count(4);
 4723     dst    : S4(write);
 4724     src1   : S3(read);
 4725     src2   : S3(read);
 4726     src3   : S3(read);
 4727     DECODE : S0(4);     // any 3 decoders
 4728     FPU    : S3(2);
 4729 %}
 4730 
 4731 // Float reg-reg operation
 4732 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4733     instruction_count(4);
 4734     dst    : S4(write);
 4735     src1   : S3(read);
 4736     src2   : S3(read);
 4737     src3   : S3(read);
 4738     DECODE : S1(3);     // any 3 decoders
 4739     D0     : S0;        // Big decoder only
 4740     FPU    : S3(2);
 4741     MEM    : S3;
 4742 %}
 4743 
 4744 // Float reg-mem operation
 4745 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4746     instruction_count(2);
 4747     dst    : S5(write);
 4748     mem    : S3(read);
 4749     D0     : S0;        // big decoder only
 4750     DECODE : S1;        // any decoder for FPU POP
 4751     FPU    : S4;
 4752     MEM    : S3;        // any mem
 4753 %}
 4754 
 4755 // Float reg-mem operation
 4756 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4757     instruction_count(3);
 4758     dst    : S5(write);
 4759     src1   : S3(read);
 4760     mem    : S3(read);
 4761     D0     : S0;        // big decoder only
 4762     DECODE : S1(2);     // any decoder for FPU POP
 4763     FPU    : S4;
 4764     MEM    : S3;        // any mem
 4765 %}
 4766 
 4767 // Float mem-reg operation
 4768 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4769     instruction_count(2);
 4770     src    : S5(read);
 4771     mem    : S3(read);
 4772     DECODE : S0;        // any decoder for FPU PUSH
 4773     D0     : S1;        // big decoder only
 4774     FPU    : S4;
 4775     MEM    : S3;        // any mem
 4776 %}
 4777 
 4778 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4779     instruction_count(3);
 4780     src1   : S3(read);
 4781     src2   : S3(read);
 4782     mem    : S3(read);
 4783     DECODE : S0(2);     // any decoder for FPU PUSH
 4784     D0     : S1;        // big decoder only
 4785     FPU    : S4;
 4786     MEM    : S3;        // any mem
 4787 %}
 4788 
 4789 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4790     instruction_count(3);
 4791     src1   : S3(read);
 4792     src2   : S3(read);
 4793     mem    : S4(read);
 4794     DECODE : S0;        // any decoder for FPU PUSH
 4795     D0     : S0(2);     // big decoder only
 4796     FPU    : S4;
 4797     MEM    : S3(2);     // any mem
 4798 %}
 4799 
 4800 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4801     instruction_count(2);
 4802     src1   : S3(read);
 4803     dst    : S4(read);
 4804     D0     : S0(2);     // big decoder only
 4805     MEM    : S3(2);     // any mem
 4806 %}
 4807 
 4808 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4809     instruction_count(3);
 4810     src1   : S3(read);
 4811     src2   : S3(read);
 4812     dst    : S4(read);
 4813     D0     : S0(3);     // big decoder only
 4814     FPU    : S4;
 4815     MEM    : S3(3);     // any mem
 4816 %}
 4817 
 4818 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4819     instruction_count(3);
 4820     src1   : S4(read);
 4821     mem    : S4(read);
 4822     DECODE : S0;        // any decoder for FPU PUSH
 4823     D0     : S0(2);     // big decoder only
 4824     FPU    : S4;
 4825     MEM    : S3(2);     // any mem
 4826 %}
 4827 
 4828 // Float load constant
 4829 pipe_class fpu_reg_con(regDPR dst) %{
 4830     instruction_count(2);
 4831     dst    : S5(write);
 4832     D0     : S0;        // big decoder only for the load
 4833     DECODE : S1;        // any decoder for FPU POP
 4834     FPU    : S4;
 4835     MEM    : S3;        // any mem
 4836 %}
 4837 
 4838 // Float load constant
 4839 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4840     instruction_count(3);
 4841     dst    : S5(write);
 4842     src    : S3(read);
 4843     D0     : S0;        // big decoder only for the load
 4844     DECODE : S1(2);     // any decoder for FPU POP
 4845     FPU    : S4;
 4846     MEM    : S3;        // any mem
 4847 %}
 4848 
 4849 // UnConditional branch
 4850 pipe_class pipe_jmp( label labl ) %{
 4851     single_instruction;
 4852     BR   : S3;
 4853 %}
 4854 
 4855 // Conditional branch
 4856 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4857     single_instruction;
 4858     cr    : S1(read);
 4859     BR    : S3;
 4860 %}
 4861 
 4862 // Allocation idiom
 4863 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4864     instruction_count(1); force_serialization;
 4865     fixed_latency(6);
 4866     heap_ptr : S3(read);
 4867     DECODE   : S0(3);
 4868     D0       : S2;
 4869     MEM      : S3;
 4870     ALU      : S3(2);
 4871     dst      : S5(write);
 4872     BR       : S5;
 4873 %}
 4874 
 4875 // Generic big/slow expanded idiom
 4876 pipe_class pipe_slow(  ) %{
 4877     instruction_count(10); multiple_bundles; force_serialization;
 4878     fixed_latency(100);
 4879     D0  : S0(2);
 4880     MEM : S3(2);
 4881 %}
 4882 
 4883 // The real do-nothing guy
 4884 pipe_class empty( ) %{
 4885     instruction_count(0);
 4886 %}
 4887 
 4888 // Define the class for the Nop node
 4889 define %{
 4890    MachNop = empty;
 4891 %}
 4892 
 4893 %}
 4894 
 4895 //----------INSTRUCTIONS-------------------------------------------------------
 4896 //
 4897 // match      -- States which machine-independent subtree may be replaced
 4898 //               by this instruction.
 4899 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4900 //               selection to identify a minimum cost tree of machine
 4901 //               instructions that matches a tree of machine-independent
 4902 //               instructions.
 4903 // format     -- A string providing the disassembly for this instruction.
 4904 //               The value of an instruction's operand may be inserted
 4905 //               by referring to it with a '$' prefix.
 4906 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4907 //               to within an encode class as $primary, $secondary, and $tertiary
 4908 //               respectively.  The primary opcode is commonly used to
 4909 //               indicate the type of machine instruction, while secondary
 4910 //               and tertiary are often used for prefix options or addressing
 4911 //               modes.
 4912 // ins_encode -- A list of encode classes with parameters. The encode class
 4913 //               name must have been defined in an 'enc_class' specification
 4914 //               in the encode section of the architecture description.
 4915 
 4916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4917 // Load Float
 4918 instruct MoveF2LEG(legRegF dst, regF src) %{
 4919   match(Set dst src);
 4920   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4921   ins_encode %{
 4922     ShouldNotReachHere();
 4923   %}
 4924   ins_pipe( fpu_reg_reg );
 4925 %}
 4926 
 4927 // Load Float
 4928 instruct MoveLEG2F(regF dst, legRegF src) %{
 4929   match(Set dst src);
 4930   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4931   ins_encode %{
 4932     ShouldNotReachHere();
 4933   %}
 4934   ins_pipe( fpu_reg_reg );
 4935 %}
 4936 
 4937 // Load Float
 4938 instruct MoveF2VL(vlRegF dst, regF src) %{
 4939   match(Set dst src);
 4940   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4941   ins_encode %{
 4942     ShouldNotReachHere();
 4943   %}
 4944   ins_pipe( fpu_reg_reg );
 4945 %}
 4946 
 4947 // Load Float
 4948 instruct MoveVL2F(regF dst, vlRegF src) %{
 4949   match(Set dst src);
 4950   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4951   ins_encode %{
 4952     ShouldNotReachHere();
 4953   %}
 4954   ins_pipe( fpu_reg_reg );
 4955 %}
 4956 
 4957 
 4958 
 4959 // Load Double
 4960 instruct MoveD2LEG(legRegD dst, regD src) %{
 4961   match(Set dst src);
 4962   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4963   ins_encode %{
 4964     ShouldNotReachHere();
 4965   %}
 4966   ins_pipe( fpu_reg_reg );
 4967 %}
 4968 
 4969 // Load Double
 4970 instruct MoveLEG2D(regD dst, legRegD src) %{
 4971   match(Set dst src);
 4972   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4973   ins_encode %{
 4974     ShouldNotReachHere();
 4975   %}
 4976   ins_pipe( fpu_reg_reg );
 4977 %}
 4978 
 4979 // Load Double
 4980 instruct MoveD2VL(vlRegD dst, regD src) %{
 4981   match(Set dst src);
 4982   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4983   ins_encode %{
 4984     ShouldNotReachHere();
 4985   %}
 4986   ins_pipe( fpu_reg_reg );
 4987 %}
 4988 
 4989 // Load Double
 4990 instruct MoveVL2D(regD dst, vlRegD src) %{
 4991   match(Set dst src);
 4992   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4993   ins_encode %{
 4994     ShouldNotReachHere();
 4995   %}
 4996   ins_pipe( fpu_reg_reg );
 4997 %}
 4998 
 4999 //----------BSWAP-Instruction--------------------------------------------------
 5000 instruct bytes_reverse_int(rRegI dst) %{
 5001   match(Set dst (ReverseBytesI dst));
 5002 
 5003   format %{ "BSWAP  $dst" %}
 5004   opcode(0x0F, 0xC8);
 5005   ins_encode( OpcP, OpcSReg(dst) );
 5006   ins_pipe( ialu_reg );
 5007 %}
 5008 
 5009 instruct bytes_reverse_long(eRegL dst) %{
 5010   match(Set dst (ReverseBytesL dst));
 5011 
 5012   format %{ "BSWAP  $dst.lo\n\t"
 5013             "BSWAP  $dst.hi\n\t"
 5014             "XCHG   $dst.lo $dst.hi" %}
 5015 
 5016   ins_cost(125);
 5017   ins_encode( bswap_long_bytes(dst) );
 5018   ins_pipe( ialu_reg_reg);
 5019 %}
 5020 
 5021 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5022   match(Set dst (ReverseBytesUS dst));
 5023   effect(KILL cr);
 5024 
 5025   format %{ "BSWAP  $dst\n\t"
 5026             "SHR    $dst,16\n\t" %}
 5027   ins_encode %{
 5028     __ bswapl($dst$$Register);
 5029     __ shrl($dst$$Register, 16);
 5030   %}
 5031   ins_pipe( ialu_reg );
 5032 %}
 5033 
 5034 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5035   match(Set dst (ReverseBytesS dst));
 5036   effect(KILL cr);
 5037 
 5038   format %{ "BSWAP  $dst\n\t"
 5039             "SAR    $dst,16\n\t" %}
 5040   ins_encode %{
 5041     __ bswapl($dst$$Register);
 5042     __ sarl($dst$$Register, 16);
 5043   %}
 5044   ins_pipe( ialu_reg );
 5045 %}
 5046 
 5047 
 5048 //---------- Zeros Count Instructions ------------------------------------------
 5049 
 5050 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5051   predicate(UseCountLeadingZerosInstruction);
 5052   match(Set dst (CountLeadingZerosI src));
 5053   effect(KILL cr);
 5054 
 5055   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5056   ins_encode %{
 5057     __ lzcntl($dst$$Register, $src$$Register);
 5058   %}
 5059   ins_pipe(ialu_reg);
 5060 %}
 5061 
 5062 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5063   predicate(!UseCountLeadingZerosInstruction);
 5064   match(Set dst (CountLeadingZerosI src));
 5065   effect(KILL cr);
 5066 
 5067   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5068             "JNZ    skip\n\t"
 5069             "MOV    $dst, -1\n"
 5070       "skip:\n\t"
 5071             "NEG    $dst\n\t"
 5072             "ADD    $dst, 31" %}
 5073   ins_encode %{
 5074     Register Rdst = $dst$$Register;
 5075     Register Rsrc = $src$$Register;
 5076     Label skip;
 5077     __ bsrl(Rdst, Rsrc);
 5078     __ jccb(Assembler::notZero, skip);
 5079     __ movl(Rdst, -1);
 5080     __ bind(skip);
 5081     __ negl(Rdst);
 5082     __ addl(Rdst, BitsPerInt - 1);
 5083   %}
 5084   ins_pipe(ialu_reg);
 5085 %}
 5086 
 5087 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5088   predicate(UseCountLeadingZerosInstruction);
 5089   match(Set dst (CountLeadingZerosL src));
 5090   effect(TEMP dst, KILL cr);
 5091 
 5092   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5093             "JNC    done\n\t"
 5094             "LZCNT  $dst, $src.lo\n\t"
 5095             "ADD    $dst, 32\n"
 5096       "done:" %}
 5097   ins_encode %{
 5098     Register Rdst = $dst$$Register;
 5099     Register Rsrc = $src$$Register;
 5100     Label done;
 5101     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5102     __ jccb(Assembler::carryClear, done);
 5103     __ lzcntl(Rdst, Rsrc);
 5104     __ addl(Rdst, BitsPerInt);
 5105     __ bind(done);
 5106   %}
 5107   ins_pipe(ialu_reg);
 5108 %}
 5109 
 5110 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5111   predicate(!UseCountLeadingZerosInstruction);
 5112   match(Set dst (CountLeadingZerosL src));
 5113   effect(TEMP dst, KILL cr);
 5114 
 5115   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5116             "JZ     msw_is_zero\n\t"
 5117             "ADD    $dst, 32\n\t"
 5118             "JMP    not_zero\n"
 5119       "msw_is_zero:\n\t"
 5120             "BSR    $dst, $src.lo\n\t"
 5121             "JNZ    not_zero\n\t"
 5122             "MOV    $dst, -1\n"
 5123       "not_zero:\n\t"
 5124             "NEG    $dst\n\t"
 5125             "ADD    $dst, 63\n" %}
 5126  ins_encode %{
 5127     Register Rdst = $dst$$Register;
 5128     Register Rsrc = $src$$Register;
 5129     Label msw_is_zero;
 5130     Label not_zero;
 5131     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5132     __ jccb(Assembler::zero, msw_is_zero);
 5133     __ addl(Rdst, BitsPerInt);
 5134     __ jmpb(not_zero);
 5135     __ bind(msw_is_zero);
 5136     __ bsrl(Rdst, Rsrc);
 5137     __ jccb(Assembler::notZero, not_zero);
 5138     __ movl(Rdst, -1);
 5139     __ bind(not_zero);
 5140     __ negl(Rdst);
 5141     __ addl(Rdst, BitsPerLong - 1);
 5142   %}
 5143   ins_pipe(ialu_reg);
 5144 %}
 5145 
 5146 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5147   predicate(UseCountTrailingZerosInstruction);
 5148   match(Set dst (CountTrailingZerosI src));
 5149   effect(KILL cr);
 5150 
 5151   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5152   ins_encode %{
 5153     __ tzcntl($dst$$Register, $src$$Register);
 5154   %}
 5155   ins_pipe(ialu_reg);
 5156 %}
 5157 
 5158 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5159   predicate(!UseCountTrailingZerosInstruction);
 5160   match(Set dst (CountTrailingZerosI src));
 5161   effect(KILL cr);
 5162 
 5163   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5164             "JNZ    done\n\t"
 5165             "MOV    $dst, 32\n"
 5166       "done:" %}
 5167   ins_encode %{
 5168     Register Rdst = $dst$$Register;
 5169     Label done;
 5170     __ bsfl(Rdst, $src$$Register);
 5171     __ jccb(Assembler::notZero, done);
 5172     __ movl(Rdst, BitsPerInt);
 5173     __ bind(done);
 5174   %}
 5175   ins_pipe(ialu_reg);
 5176 %}
 5177 
 5178 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5179   predicate(UseCountTrailingZerosInstruction);
 5180   match(Set dst (CountTrailingZerosL src));
 5181   effect(TEMP dst, KILL cr);
 5182 
 5183   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5184             "JNC    done\n\t"
 5185             "TZCNT  $dst, $src.hi\n\t"
 5186             "ADD    $dst, 32\n"
 5187             "done:" %}
 5188   ins_encode %{
 5189     Register Rdst = $dst$$Register;
 5190     Register Rsrc = $src$$Register;
 5191     Label done;
 5192     __ tzcntl(Rdst, Rsrc);
 5193     __ jccb(Assembler::carryClear, done);
 5194     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5195     __ addl(Rdst, BitsPerInt);
 5196     __ bind(done);
 5197   %}
 5198   ins_pipe(ialu_reg);
 5199 %}
 5200 
 5201 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5202   predicate(!UseCountTrailingZerosInstruction);
 5203   match(Set dst (CountTrailingZerosL src));
 5204   effect(TEMP dst, KILL cr);
 5205 
 5206   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5207             "JNZ    done\n\t"
 5208             "BSF    $dst, $src.hi\n\t"
 5209             "JNZ    msw_not_zero\n\t"
 5210             "MOV    $dst, 32\n"
 5211       "msw_not_zero:\n\t"
 5212             "ADD    $dst, 32\n"
 5213       "done:" %}
 5214   ins_encode %{
 5215     Register Rdst = $dst$$Register;
 5216     Register Rsrc = $src$$Register;
 5217     Label msw_not_zero;
 5218     Label done;
 5219     __ bsfl(Rdst, Rsrc);
 5220     __ jccb(Assembler::notZero, done);
 5221     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5222     __ jccb(Assembler::notZero, msw_not_zero);
 5223     __ movl(Rdst, BitsPerInt);
 5224     __ bind(msw_not_zero);
 5225     __ addl(Rdst, BitsPerInt);
 5226     __ bind(done);
 5227   %}
 5228   ins_pipe(ialu_reg);
 5229 %}
 5230 
 5231 
 5232 //---------- Population Count Instructions -------------------------------------
 5233 
 5234 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5235   predicate(UsePopCountInstruction);
 5236   match(Set dst (PopCountI src));
 5237   effect(KILL cr);
 5238 
 5239   format %{ "POPCNT $dst, $src" %}
 5240   ins_encode %{
 5241     __ popcntl($dst$$Register, $src$$Register);
 5242   %}
 5243   ins_pipe(ialu_reg);
 5244 %}
 5245 
 5246 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5247   predicate(UsePopCountInstruction);
 5248   match(Set dst (PopCountI (LoadI mem)));
 5249   effect(KILL cr);
 5250 
 5251   format %{ "POPCNT $dst, $mem" %}
 5252   ins_encode %{
 5253     __ popcntl($dst$$Register, $mem$$Address);
 5254   %}
 5255   ins_pipe(ialu_reg);
 5256 %}
 5257 
 5258 // Note: Long.bitCount(long) returns an int.
 5259 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5260   predicate(UsePopCountInstruction);
 5261   match(Set dst (PopCountL src));
 5262   effect(KILL cr, TEMP tmp, TEMP dst);
 5263 
 5264   format %{ "POPCNT $dst, $src.lo\n\t"
 5265             "POPCNT $tmp, $src.hi\n\t"
 5266             "ADD    $dst, $tmp" %}
 5267   ins_encode %{
 5268     __ popcntl($dst$$Register, $src$$Register);
 5269     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5270     __ addl($dst$$Register, $tmp$$Register);
 5271   %}
 5272   ins_pipe(ialu_reg);
 5273 %}
 5274 
 5275 // Note: Long.bitCount(long) returns an int.
 5276 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5277   predicate(UsePopCountInstruction);
 5278   match(Set dst (PopCountL (LoadL mem)));
 5279   effect(KILL cr, TEMP tmp, TEMP dst);
 5280 
 5281   format %{ "POPCNT $dst, $mem\n\t"
 5282             "POPCNT $tmp, $mem+4\n\t"
 5283             "ADD    $dst, $tmp" %}
 5284   ins_encode %{
 5285     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5286     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5287     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5288     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5289     __ addl($dst$$Register, $tmp$$Register);
 5290   %}
 5291   ins_pipe(ialu_reg);
 5292 %}
 5293 
 5294 
 5295 //----------Load/Store/Move Instructions---------------------------------------
 5296 //----------Load Instructions--------------------------------------------------
 5297 // Load Byte (8bit signed)
 5298 instruct loadB(xRegI dst, memory mem) %{
 5299   match(Set dst (LoadB mem));
 5300 
 5301   ins_cost(125);
 5302   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5303 
 5304   ins_encode %{
 5305     __ movsbl($dst$$Register, $mem$$Address);
 5306   %}
 5307 
 5308   ins_pipe(ialu_reg_mem);
 5309 %}
 5310 
 5311 // Load Byte (8bit signed) into Long Register
 5312 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5313   match(Set dst (ConvI2L (LoadB mem)));
 5314   effect(KILL cr);
 5315 
 5316   ins_cost(375);
 5317   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5318             "MOV    $dst.hi,$dst.lo\n\t"
 5319             "SAR    $dst.hi,7" %}
 5320 
 5321   ins_encode %{
 5322     __ movsbl($dst$$Register, $mem$$Address);
 5323     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5324     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5325   %}
 5326 
 5327   ins_pipe(ialu_reg_mem);
 5328 %}
 5329 
 5330 // Load Unsigned Byte (8bit UNsigned)
 5331 instruct loadUB(xRegI dst, memory mem) %{
 5332   match(Set dst (LoadUB mem));
 5333 
 5334   ins_cost(125);
 5335   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5336 
 5337   ins_encode %{
 5338     __ movzbl($dst$$Register, $mem$$Address);
 5339   %}
 5340 
 5341   ins_pipe(ialu_reg_mem);
 5342 %}
 5343 
 5344 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5345 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5346   match(Set dst (ConvI2L (LoadUB mem)));
 5347   effect(KILL cr);
 5348 
 5349   ins_cost(250);
 5350   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5351             "XOR    $dst.hi,$dst.hi" %}
 5352 
 5353   ins_encode %{
 5354     Register Rdst = $dst$$Register;
 5355     __ movzbl(Rdst, $mem$$Address);
 5356     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5357   %}
 5358 
 5359   ins_pipe(ialu_reg_mem);
 5360 %}
 5361 
 5362 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5363 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5364   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5365   effect(KILL cr);
 5366 
 5367   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5368             "XOR    $dst.hi,$dst.hi\n\t"
 5369             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5370   ins_encode %{
 5371     Register Rdst = $dst$$Register;
 5372     __ movzbl(Rdst, $mem$$Address);
 5373     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5374     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5375   %}
 5376   ins_pipe(ialu_reg_mem);
 5377 %}
 5378 
 5379 // Load Short (16bit signed)
 5380 instruct loadS(rRegI dst, memory mem) %{
 5381   match(Set dst (LoadS mem));
 5382 
 5383   ins_cost(125);
 5384   format %{ "MOVSX  $dst,$mem\t# short" %}
 5385 
 5386   ins_encode %{
 5387     __ movswl($dst$$Register, $mem$$Address);
 5388   %}
 5389 
 5390   ins_pipe(ialu_reg_mem);
 5391 %}
 5392 
 5393 // Load Short (16 bit signed) to Byte (8 bit signed)
 5394 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5395   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5396 
 5397   ins_cost(125);
 5398   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5399   ins_encode %{
 5400     __ movsbl($dst$$Register, $mem$$Address);
 5401   %}
 5402   ins_pipe(ialu_reg_mem);
 5403 %}
 5404 
 5405 // Load Short (16bit signed) into Long Register
 5406 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5407   match(Set dst (ConvI2L (LoadS mem)));
 5408   effect(KILL cr);
 5409 
 5410   ins_cost(375);
 5411   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5412             "MOV    $dst.hi,$dst.lo\n\t"
 5413             "SAR    $dst.hi,15" %}
 5414 
 5415   ins_encode %{
 5416     __ movswl($dst$$Register, $mem$$Address);
 5417     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5418     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5419   %}
 5420 
 5421   ins_pipe(ialu_reg_mem);
 5422 %}
 5423 
 5424 // Load Unsigned Short/Char (16bit unsigned)
 5425 instruct loadUS(rRegI dst, memory mem) %{
 5426   match(Set dst (LoadUS mem));
 5427 
 5428   ins_cost(125);
 5429   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5430 
 5431   ins_encode %{
 5432     __ movzwl($dst$$Register, $mem$$Address);
 5433   %}
 5434 
 5435   ins_pipe(ialu_reg_mem);
 5436 %}
 5437 
 5438 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5439 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5440   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5441 
 5442   ins_cost(125);
 5443   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5444   ins_encode %{
 5445     __ movsbl($dst$$Register, $mem$$Address);
 5446   %}
 5447   ins_pipe(ialu_reg_mem);
 5448 %}
 5449 
 5450 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5451 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5452   match(Set dst (ConvI2L (LoadUS mem)));
 5453   effect(KILL cr);
 5454 
 5455   ins_cost(250);
 5456   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5457             "XOR    $dst.hi,$dst.hi" %}
 5458 
 5459   ins_encode %{
 5460     __ movzwl($dst$$Register, $mem$$Address);
 5461     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5462   %}
 5463 
 5464   ins_pipe(ialu_reg_mem);
 5465 %}
 5466 
 5467 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5468 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5469   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5470   effect(KILL cr);
 5471 
 5472   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5473             "XOR    $dst.hi,$dst.hi" %}
 5474   ins_encode %{
 5475     Register Rdst = $dst$$Register;
 5476     __ movzbl(Rdst, $mem$$Address);
 5477     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5478   %}
 5479   ins_pipe(ialu_reg_mem);
 5480 %}
 5481 
 5482 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5483 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5484   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5485   effect(KILL cr);
 5486 
 5487   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5488             "XOR    $dst.hi,$dst.hi\n\t"
 5489             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5490   ins_encode %{
 5491     Register Rdst = $dst$$Register;
 5492     __ movzwl(Rdst, $mem$$Address);
 5493     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5494     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5495   %}
 5496   ins_pipe(ialu_reg_mem);
 5497 %}
 5498 
 5499 // Load Integer
 5500 instruct loadI(rRegI dst, memory mem) %{
 5501   match(Set dst (LoadI mem));
 5502 
 5503   ins_cost(125);
 5504   format %{ "MOV    $dst,$mem\t# int" %}
 5505 
 5506   ins_encode %{
 5507     __ movl($dst$$Register, $mem$$Address);
 5508   %}
 5509 
 5510   ins_pipe(ialu_reg_mem);
 5511 %}
 5512 
 5513 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5514 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5515   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5516 
 5517   ins_cost(125);
 5518   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5519   ins_encode %{
 5520     __ movsbl($dst$$Register, $mem$$Address);
 5521   %}
 5522   ins_pipe(ialu_reg_mem);
 5523 %}
 5524 
 5525 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5526 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5527   match(Set dst (AndI (LoadI mem) mask));
 5528 
 5529   ins_cost(125);
 5530   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5531   ins_encode %{
 5532     __ movzbl($dst$$Register, $mem$$Address);
 5533   %}
 5534   ins_pipe(ialu_reg_mem);
 5535 %}
 5536 
 5537 // Load Integer (32 bit signed) to Short (16 bit signed)
 5538 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5539   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5540 
 5541   ins_cost(125);
 5542   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5543   ins_encode %{
 5544     __ movswl($dst$$Register, $mem$$Address);
 5545   %}
 5546   ins_pipe(ialu_reg_mem);
 5547 %}
 5548 
 5549 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5550 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5551   match(Set dst (AndI (LoadI mem) mask));
 5552 
 5553   ins_cost(125);
 5554   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5555   ins_encode %{
 5556     __ movzwl($dst$$Register, $mem$$Address);
 5557   %}
 5558   ins_pipe(ialu_reg_mem);
 5559 %}
 5560 
 5561 // Load Integer into Long Register
 5562 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5563   match(Set dst (ConvI2L (LoadI mem)));
 5564   effect(KILL cr);
 5565 
 5566   ins_cost(375);
 5567   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5568             "MOV    $dst.hi,$dst.lo\n\t"
 5569             "SAR    $dst.hi,31" %}
 5570 
 5571   ins_encode %{
 5572     __ movl($dst$$Register, $mem$$Address);
 5573     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5574     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5575   %}
 5576 
 5577   ins_pipe(ialu_reg_mem);
 5578 %}
 5579 
 5580 // Load Integer with mask 0xFF into Long Register
 5581 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5582   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5583   effect(KILL cr);
 5584 
 5585   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5586             "XOR    $dst.hi,$dst.hi" %}
 5587   ins_encode %{
 5588     Register Rdst = $dst$$Register;
 5589     __ movzbl(Rdst, $mem$$Address);
 5590     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5591   %}
 5592   ins_pipe(ialu_reg_mem);
 5593 %}
 5594 
 5595 // Load Integer with mask 0xFFFF into Long Register
 5596 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5597   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5598   effect(KILL cr);
 5599 
 5600   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5601             "XOR    $dst.hi,$dst.hi" %}
 5602   ins_encode %{
 5603     Register Rdst = $dst$$Register;
 5604     __ movzwl(Rdst, $mem$$Address);
 5605     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5606   %}
 5607   ins_pipe(ialu_reg_mem);
 5608 %}
 5609 
 5610 // Load Integer with 31-bit mask into Long Register
 5611 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5612   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5613   effect(KILL cr);
 5614 
 5615   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5616             "XOR    $dst.hi,$dst.hi\n\t"
 5617             "AND    $dst.lo,$mask" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622     __ andl(Rdst, $mask$$constant);
 5623   %}
 5624   ins_pipe(ialu_reg_mem);
 5625 %}
 5626 
 5627 // Load Unsigned Integer into Long Register
 5628 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5629   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5630   effect(KILL cr);
 5631 
 5632   ins_cost(250);
 5633   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5634             "XOR    $dst.hi,$dst.hi" %}
 5635 
 5636   ins_encode %{
 5637     __ movl($dst$$Register, $mem$$Address);
 5638     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5639   %}
 5640 
 5641   ins_pipe(ialu_reg_mem);
 5642 %}
 5643 
 5644 // Load Long.  Cannot clobber address while loading, so restrict address
 5645 // register to ESI
 5646 instruct loadL(eRegL dst, load_long_memory mem) %{
 5647   predicate(!((LoadLNode*)n)->require_atomic_access());
 5648   match(Set dst (LoadL mem));
 5649 
 5650   ins_cost(250);
 5651   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5652             "MOV    $dst.hi,$mem+4" %}
 5653 
 5654   ins_encode %{
 5655     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5656     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5657     __ movl($dst$$Register, Amemlo);
 5658     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5659   %}
 5660 
 5661   ins_pipe(ialu_reg_long_mem);
 5662 %}
 5663 
 5664 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5665 // then store it down to the stack and reload on the int
 5666 // side.
 5667 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5668   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5669   match(Set dst (LoadL mem));
 5670 
 5671   ins_cost(200);
 5672   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5673             "FISTp  $dst" %}
 5674   ins_encode(enc_loadL_volatile(mem,dst));
 5675   ins_pipe( fpu_reg_mem );
 5676 %}
 5677 
 5678 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5679   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5680   match(Set dst (LoadL mem));
 5681   effect(TEMP tmp);
 5682   ins_cost(180);
 5683   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5684             "MOVSD  $dst,$tmp" %}
 5685   ins_encode %{
 5686     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5687     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5688   %}
 5689   ins_pipe( pipe_slow );
 5690 %}
 5691 
 5692 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5693   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5694   match(Set dst (LoadL mem));
 5695   effect(TEMP tmp);
 5696   ins_cost(160);
 5697   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5698             "MOVD   $dst.lo,$tmp\n\t"
 5699             "PSRLQ  $tmp,32\n\t"
 5700             "MOVD   $dst.hi,$tmp" %}
 5701   ins_encode %{
 5702     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5703     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5704     __ psrlq($tmp$$XMMRegister, 32);
 5705     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5706   %}
 5707   ins_pipe( pipe_slow );
 5708 %}
 5709 
 5710 // Load Range
 5711 instruct loadRange(rRegI dst, memory mem) %{
 5712   match(Set dst (LoadRange mem));
 5713 
 5714   ins_cost(125);
 5715   format %{ "MOV    $dst,$mem" %}
 5716   opcode(0x8B);
 5717   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5718   ins_pipe( ialu_reg_mem );
 5719 %}
 5720 
 5721 
 5722 // Load Pointer
 5723 instruct loadP(eRegP dst, memory mem) %{
 5724   match(Set dst (LoadP mem));
 5725 
 5726   ins_cost(125);
 5727   format %{ "MOV    $dst,$mem" %}
 5728   opcode(0x8B);
 5729   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5730   ins_pipe( ialu_reg_mem );
 5731 %}
 5732 
 5733 // Load Klass Pointer
 5734 instruct loadKlass(eRegP dst, memory mem) %{
 5735   match(Set dst (LoadKlass mem));
 5736 
 5737   ins_cost(125);
 5738   format %{ "MOV    $dst,$mem" %}
 5739   opcode(0x8B);
 5740   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5741   ins_pipe( ialu_reg_mem );
 5742 %}
 5743 
 5744 // Load Double
 5745 instruct loadDPR(regDPR dst, memory mem) %{
 5746   predicate(UseSSE<=1);
 5747   match(Set dst (LoadD mem));
 5748 
 5749   ins_cost(150);
 5750   format %{ "FLD_D  ST,$mem\n\t"
 5751             "FSTP   $dst" %}
 5752   opcode(0xDD);               /* DD /0 */
 5753   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5754               Pop_Reg_DPR(dst), ClearInstMark );
 5755   ins_pipe( fpu_reg_mem );
 5756 %}
 5757 
 5758 // Load Double to XMM
 5759 instruct loadD(regD dst, memory mem) %{
 5760   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5761   match(Set dst (LoadD mem));
 5762   ins_cost(145);
 5763   format %{ "MOVSD  $dst,$mem" %}
 5764   ins_encode %{
 5765     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5766   %}
 5767   ins_pipe( pipe_slow );
 5768 %}
 5769 
 5770 instruct loadD_partial(regD dst, memory mem) %{
 5771   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5772   match(Set dst (LoadD mem));
 5773   ins_cost(145);
 5774   format %{ "MOVLPD $dst,$mem" %}
 5775   ins_encode %{
 5776     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5777   %}
 5778   ins_pipe( pipe_slow );
 5779 %}
 5780 
 5781 // Load to XMM register (single-precision floating point)
 5782 // MOVSS instruction
 5783 instruct loadF(regF dst, memory mem) %{
 5784   predicate(UseSSE>=1);
 5785   match(Set dst (LoadF mem));
 5786   ins_cost(145);
 5787   format %{ "MOVSS  $dst,$mem" %}
 5788   ins_encode %{
 5789     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5790   %}
 5791   ins_pipe( pipe_slow );
 5792 %}
 5793 
 5794 // Load Float
 5795 instruct loadFPR(regFPR dst, memory mem) %{
 5796   predicate(UseSSE==0);
 5797   match(Set dst (LoadF mem));
 5798 
 5799   ins_cost(150);
 5800   format %{ "FLD_S  ST,$mem\n\t"
 5801             "FSTP   $dst" %}
 5802   opcode(0xD9);               /* D9 /0 */
 5803   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5804               Pop_Reg_FPR(dst), ClearInstMark );
 5805   ins_pipe( fpu_reg_mem );
 5806 %}
 5807 
 5808 // Load Effective Address
 5809 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5810   match(Set dst mem);
 5811 
 5812   ins_cost(110);
 5813   format %{ "LEA    $dst,$mem" %}
 5814   opcode(0x8D);
 5815   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5816   ins_pipe( ialu_reg_reg_fat );
 5817 %}
 5818 
 5819 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5820   match(Set dst mem);
 5821 
 5822   ins_cost(110);
 5823   format %{ "LEA    $dst,$mem" %}
 5824   opcode(0x8D);
 5825   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5826   ins_pipe( ialu_reg_reg_fat );
 5827 %}
 5828 
 5829 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5830   match(Set dst mem);
 5831 
 5832   ins_cost(110);
 5833   format %{ "LEA    $dst,$mem" %}
 5834   opcode(0x8D);
 5835   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5836   ins_pipe( ialu_reg_reg_fat );
 5837 %}
 5838 
 5839 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5840   match(Set dst mem);
 5841 
 5842   ins_cost(110);
 5843   format %{ "LEA    $dst,$mem" %}
 5844   opcode(0x8D);
 5845   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5846   ins_pipe( ialu_reg_reg_fat );
 5847 %}
 5848 
 5849 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5850   match(Set dst mem);
 5851 
 5852   ins_cost(110);
 5853   format %{ "LEA    $dst,$mem" %}
 5854   opcode(0x8D);
 5855   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5856   ins_pipe( ialu_reg_reg_fat );
 5857 %}
 5858 
 5859 // Load Constant
 5860 instruct loadConI(rRegI dst, immI src) %{
 5861   match(Set dst src);
 5862 
 5863   format %{ "MOV    $dst,$src" %}
 5864   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5865   ins_pipe( ialu_reg_fat );
 5866 %}
 5867 
 5868 // Load Constant zero
 5869 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5870   match(Set dst src);
 5871   effect(KILL cr);
 5872 
 5873   ins_cost(50);
 5874   format %{ "XOR    $dst,$dst" %}
 5875   opcode(0x33);  /* + rd */
 5876   ins_encode( OpcP, RegReg( dst, dst ) );
 5877   ins_pipe( ialu_reg );
 5878 %}
 5879 
 5880 instruct loadConP(eRegP dst, immP src) %{
 5881   match(Set dst src);
 5882 
 5883   format %{ "MOV    $dst,$src" %}
 5884   opcode(0xB8);  /* + rd */
 5885   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5886   ins_pipe( ialu_reg_fat );
 5887 %}
 5888 
 5889 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5890   match(Set dst src);
 5891   effect(KILL cr);
 5892   ins_cost(200);
 5893   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5894             "MOV    $dst.hi,$src.hi" %}
 5895   opcode(0xB8);
 5896   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5897   ins_pipe( ialu_reg_long_fat );
 5898 %}
 5899 
 5900 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5901   match(Set dst src);
 5902   effect(KILL cr);
 5903   ins_cost(150);
 5904   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5905             "XOR    $dst.hi,$dst.hi" %}
 5906   opcode(0x33,0x33);
 5907   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5908   ins_pipe( ialu_reg_long );
 5909 %}
 5910 
 5911 // The instruction usage is guarded by predicate in operand immFPR().
 5912 instruct loadConFPR(regFPR dst, immFPR con) %{
 5913   match(Set dst con);
 5914   ins_cost(125);
 5915   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5916             "FSTP   $dst" %}
 5917   ins_encode %{
 5918     __ fld_s($constantaddress($con));
 5919     __ fstp_d($dst$$reg);
 5920   %}
 5921   ins_pipe(fpu_reg_con);
 5922 %}
 5923 
 5924 // The instruction usage is guarded by predicate in operand immFPR0().
 5925 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5926   match(Set dst con);
 5927   ins_cost(125);
 5928   format %{ "FLDZ   ST\n\t"
 5929             "FSTP   $dst" %}
 5930   ins_encode %{
 5931     __ fldz();
 5932     __ fstp_d($dst$$reg);
 5933   %}
 5934   ins_pipe(fpu_reg_con);
 5935 %}
 5936 
 5937 // The instruction usage is guarded by predicate in operand immFPR1().
 5938 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5939   match(Set dst con);
 5940   ins_cost(125);
 5941   format %{ "FLD1   ST\n\t"
 5942             "FSTP   $dst" %}
 5943   ins_encode %{
 5944     __ fld1();
 5945     __ fstp_d($dst$$reg);
 5946   %}
 5947   ins_pipe(fpu_reg_con);
 5948 %}
 5949 
 5950 // The instruction usage is guarded by predicate in operand immF().
 5951 instruct loadConF(regF dst, immF con) %{
 5952   match(Set dst con);
 5953   ins_cost(125);
 5954   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5955   ins_encode %{
 5956     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5957   %}
 5958   ins_pipe(pipe_slow);
 5959 %}
 5960 
 5961 // The instruction usage is guarded by predicate in operand immF0().
 5962 instruct loadConF0(regF dst, immF0 src) %{
 5963   match(Set dst src);
 5964   ins_cost(100);
 5965   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5966   ins_encode %{
 5967     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5968   %}
 5969   ins_pipe(pipe_slow);
 5970 %}
 5971 
 5972 // The instruction usage is guarded by predicate in operand immDPR().
 5973 instruct loadConDPR(regDPR dst, immDPR con) %{
 5974   match(Set dst con);
 5975   ins_cost(125);
 5976 
 5977   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5978             "FSTP   $dst" %}
 5979   ins_encode %{
 5980     __ fld_d($constantaddress($con));
 5981     __ fstp_d($dst$$reg);
 5982   %}
 5983   ins_pipe(fpu_reg_con);
 5984 %}
 5985 
 5986 // The instruction usage is guarded by predicate in operand immDPR0().
 5987 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5988   match(Set dst con);
 5989   ins_cost(125);
 5990 
 5991   format %{ "FLDZ   ST\n\t"
 5992             "FSTP   $dst" %}
 5993   ins_encode %{
 5994     __ fldz();
 5995     __ fstp_d($dst$$reg);
 5996   %}
 5997   ins_pipe(fpu_reg_con);
 5998 %}
 5999 
 6000 // The instruction usage is guarded by predicate in operand immDPR1().
 6001 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6002   match(Set dst con);
 6003   ins_cost(125);
 6004 
 6005   format %{ "FLD1   ST\n\t"
 6006             "FSTP   $dst" %}
 6007   ins_encode %{
 6008     __ fld1();
 6009     __ fstp_d($dst$$reg);
 6010   %}
 6011   ins_pipe(fpu_reg_con);
 6012 %}
 6013 
 6014 // The instruction usage is guarded by predicate in operand immD().
 6015 instruct loadConD(regD dst, immD con) %{
 6016   match(Set dst con);
 6017   ins_cost(125);
 6018   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6019   ins_encode %{
 6020     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6021   %}
 6022   ins_pipe(pipe_slow);
 6023 %}
 6024 
 6025 // The instruction usage is guarded by predicate in operand immD0().
 6026 instruct loadConD0(regD dst, immD0 src) %{
 6027   match(Set dst src);
 6028   ins_cost(100);
 6029   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6030   ins_encode %{
 6031     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6032   %}
 6033   ins_pipe( pipe_slow );
 6034 %}
 6035 
 6036 // Load Stack Slot
 6037 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6038   match(Set dst src);
 6039   ins_cost(125);
 6040 
 6041   format %{ "MOV    $dst,$src" %}
 6042   opcode(0x8B);
 6043   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6044   ins_pipe( ialu_reg_mem );
 6045 %}
 6046 
 6047 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6048   match(Set dst src);
 6049 
 6050   ins_cost(200);
 6051   format %{ "MOV    $dst,$src.lo\n\t"
 6052             "MOV    $dst+4,$src.hi" %}
 6053   opcode(0x8B, 0x8B);
 6054   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6055   ins_pipe( ialu_mem_long_reg );
 6056 %}
 6057 
 6058 // Load Stack Slot
 6059 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6060   match(Set dst src);
 6061   ins_cost(125);
 6062 
 6063   format %{ "MOV    $dst,$src" %}
 6064   opcode(0x8B);
 6065   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6066   ins_pipe( ialu_reg_mem );
 6067 %}
 6068 
 6069 // Load Stack Slot
 6070 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6071   match(Set dst src);
 6072   ins_cost(125);
 6073 
 6074   format %{ "FLD_S  $src\n\t"
 6075             "FSTP   $dst" %}
 6076   opcode(0xD9);               /* D9 /0, FLD m32real */
 6077   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6078               Pop_Reg_FPR(dst), ClearInstMark );
 6079   ins_pipe( fpu_reg_mem );
 6080 %}
 6081 
 6082 // Load Stack Slot
 6083 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6084   match(Set dst src);
 6085   ins_cost(125);
 6086 
 6087   format %{ "FLD_D  $src\n\t"
 6088             "FSTP   $dst" %}
 6089   opcode(0xDD);               /* DD /0, FLD m64real */
 6090   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6091               Pop_Reg_DPR(dst), ClearInstMark );
 6092   ins_pipe( fpu_reg_mem );
 6093 %}
 6094 
 6095 // Prefetch instructions for allocation.
 6096 // Must be safe to execute with invalid address (cannot fault).
 6097 
 6098 instruct prefetchAlloc0( memory mem ) %{
 6099   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6100   match(PrefetchAllocation mem);
 6101   ins_cost(0);
 6102   size(0);
 6103   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6104   ins_encode();
 6105   ins_pipe(empty);
 6106 %}
 6107 
 6108 instruct prefetchAlloc( memory mem ) %{
 6109   predicate(AllocatePrefetchInstr==3);
 6110   match( PrefetchAllocation mem );
 6111   ins_cost(100);
 6112 
 6113   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6114   ins_encode %{
 6115     __ prefetchw($mem$$Address);
 6116   %}
 6117   ins_pipe(ialu_mem);
 6118 %}
 6119 
 6120 instruct prefetchAllocNTA( memory mem ) %{
 6121   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6122   match(PrefetchAllocation mem);
 6123   ins_cost(100);
 6124 
 6125   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6126   ins_encode %{
 6127     __ prefetchnta($mem$$Address);
 6128   %}
 6129   ins_pipe(ialu_mem);
 6130 %}
 6131 
 6132 instruct prefetchAllocT0( memory mem ) %{
 6133   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6134   match(PrefetchAllocation mem);
 6135   ins_cost(100);
 6136 
 6137   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6138   ins_encode %{
 6139     __ prefetcht0($mem$$Address);
 6140   %}
 6141   ins_pipe(ialu_mem);
 6142 %}
 6143 
 6144 instruct prefetchAllocT2( memory mem ) %{
 6145   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6146   match(PrefetchAllocation mem);
 6147   ins_cost(100);
 6148 
 6149   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6150   ins_encode %{
 6151     __ prefetcht2($mem$$Address);
 6152   %}
 6153   ins_pipe(ialu_mem);
 6154 %}
 6155 
 6156 //----------Store Instructions-------------------------------------------------
 6157 
 6158 // Store Byte
 6159 instruct storeB(memory mem, xRegI src) %{
 6160   match(Set mem (StoreB mem src));
 6161 
 6162   ins_cost(125);
 6163   format %{ "MOV8   $mem,$src" %}
 6164   opcode(0x88);
 6165   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6166   ins_pipe( ialu_mem_reg );
 6167 %}
 6168 
 6169 // Store Char/Short
 6170 instruct storeC(memory mem, rRegI src) %{
 6171   match(Set mem (StoreC mem src));
 6172 
 6173   ins_cost(125);
 6174   format %{ "MOV16  $mem,$src" %}
 6175   opcode(0x89, 0x66);
 6176   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6177   ins_pipe( ialu_mem_reg );
 6178 %}
 6179 
 6180 // Store Integer
 6181 instruct storeI(memory mem, rRegI src) %{
 6182   match(Set mem (StoreI mem src));
 6183 
 6184   ins_cost(125);
 6185   format %{ "MOV    $mem,$src" %}
 6186   opcode(0x89);
 6187   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6188   ins_pipe( ialu_mem_reg );
 6189 %}
 6190 
 6191 // Store Long
 6192 instruct storeL(long_memory mem, eRegL src) %{
 6193   predicate(!((StoreLNode*)n)->require_atomic_access());
 6194   match(Set mem (StoreL mem src));
 6195 
 6196   ins_cost(200);
 6197   format %{ "MOV    $mem,$src.lo\n\t"
 6198             "MOV    $mem+4,$src.hi" %}
 6199   opcode(0x89, 0x89);
 6200   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6201   ins_pipe( ialu_mem_long_reg );
 6202 %}
 6203 
 6204 // Store Long to Integer
 6205 instruct storeL2I(memory mem, eRegL src) %{
 6206   match(Set mem (StoreI mem (ConvL2I src)));
 6207 
 6208   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6209   ins_encode %{
 6210     __ movl($mem$$Address, $src$$Register);
 6211   %}
 6212   ins_pipe(ialu_mem_reg);
 6213 %}
 6214 
 6215 // Volatile Store Long.  Must be atomic, so move it into
 6216 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6217 // target address before the store (for null-ptr checks)
 6218 // so the memory operand is used twice in the encoding.
 6219 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6220   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6221   match(Set mem (StoreL mem src));
 6222   effect( KILL cr );
 6223   ins_cost(400);
 6224   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6225             "FILD   $src\n\t"
 6226             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6227   opcode(0x3B);
 6228   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6229   ins_pipe( fpu_reg_mem );
 6230 %}
 6231 
 6232 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6233   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6234   match(Set mem (StoreL mem src));
 6235   effect( TEMP tmp, KILL cr );
 6236   ins_cost(380);
 6237   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6238             "MOVSD  $tmp,$src\n\t"
 6239             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6240   ins_encode %{
 6241     __ cmpl(rax, $mem$$Address);
 6242     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6243     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6244   %}
 6245   ins_pipe( pipe_slow );
 6246 %}
 6247 
 6248 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6249   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6250   match(Set mem (StoreL mem src));
 6251   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6252   ins_cost(360);
 6253   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6254             "MOVD   $tmp,$src.lo\n\t"
 6255             "MOVD   $tmp2,$src.hi\n\t"
 6256             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6257             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6258   ins_encode %{
 6259     __ cmpl(rax, $mem$$Address);
 6260     __ movdl($tmp$$XMMRegister, $src$$Register);
 6261     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6262     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6263     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6264   %}
 6265   ins_pipe( pipe_slow );
 6266 %}
 6267 
 6268 // Store Pointer; for storing unknown oops and raw pointers
 6269 instruct storeP(memory mem, anyRegP src) %{
 6270   match(Set mem (StoreP mem src));
 6271 
 6272   ins_cost(125);
 6273   format %{ "MOV    $mem,$src" %}
 6274   opcode(0x89);
 6275   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6276   ins_pipe( ialu_mem_reg );
 6277 %}
 6278 
 6279 // Store Integer Immediate
 6280 instruct storeImmI(memory mem, immI src) %{
 6281   match(Set mem (StoreI mem src));
 6282 
 6283   ins_cost(150);
 6284   format %{ "MOV    $mem,$src" %}
 6285   opcode(0xC7);               /* C7 /0 */
 6286   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6287   ins_pipe( ialu_mem_imm );
 6288 %}
 6289 
 6290 // Store Short/Char Immediate
 6291 instruct storeImmI16(memory mem, immI16 src) %{
 6292   predicate(UseStoreImmI16);
 6293   match(Set mem (StoreC mem src));
 6294 
 6295   ins_cost(150);
 6296   format %{ "MOV16  $mem,$src" %}
 6297   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6298   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6299   ins_pipe( ialu_mem_imm );
 6300 %}
 6301 
 6302 // Store Pointer Immediate; null pointers or constant oops that do not
 6303 // need card-mark barriers.
 6304 instruct storeImmP(memory mem, immP src) %{
 6305   match(Set mem (StoreP mem src));
 6306 
 6307   ins_cost(150);
 6308   format %{ "MOV    $mem,$src" %}
 6309   opcode(0xC7);               /* C7 /0 */
 6310   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6311   ins_pipe( ialu_mem_imm );
 6312 %}
 6313 
 6314 // Store Byte Immediate
 6315 instruct storeImmB(memory mem, immI8 src) %{
 6316   match(Set mem (StoreB mem src));
 6317 
 6318   ins_cost(150);
 6319   format %{ "MOV8   $mem,$src" %}
 6320   opcode(0xC6);               /* C6 /0 */
 6321   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6322   ins_pipe( ialu_mem_imm );
 6323 %}
 6324 
 6325 // Store CMS card-mark Immediate
 6326 instruct storeImmCM(memory mem, immI8 src) %{
 6327   match(Set mem (StoreCM mem src));
 6328 
 6329   ins_cost(150);
 6330   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6331   opcode(0xC6);               /* C6 /0 */
 6332   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6333   ins_pipe( ialu_mem_imm );
 6334 %}
 6335 
 6336 // Store Double
 6337 instruct storeDPR( memory mem, regDPR1 src) %{
 6338   predicate(UseSSE<=1);
 6339   match(Set mem (StoreD mem src));
 6340 
 6341   ins_cost(100);
 6342   format %{ "FST_D  $mem,$src" %}
 6343   opcode(0xDD);       /* DD /2 */
 6344   ins_encode( enc_FPR_store(mem,src) );
 6345   ins_pipe( fpu_mem_reg );
 6346 %}
 6347 
 6348 // Store double does rounding on x86
 6349 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6350   predicate(UseSSE<=1);
 6351   match(Set mem (StoreD mem (RoundDouble src)));
 6352 
 6353   ins_cost(100);
 6354   format %{ "FST_D  $mem,$src\t# round" %}
 6355   opcode(0xDD);       /* DD /2 */
 6356   ins_encode( enc_FPR_store(mem,src) );
 6357   ins_pipe( fpu_mem_reg );
 6358 %}
 6359 
 6360 // Store XMM register to memory (double-precision floating points)
 6361 // MOVSD instruction
 6362 instruct storeD(memory mem, regD src) %{
 6363   predicate(UseSSE>=2);
 6364   match(Set mem (StoreD mem src));
 6365   ins_cost(95);
 6366   format %{ "MOVSD  $mem,$src" %}
 6367   ins_encode %{
 6368     __ movdbl($mem$$Address, $src$$XMMRegister);
 6369   %}
 6370   ins_pipe( pipe_slow );
 6371 %}
 6372 
 6373 // Store XMM register to memory (single-precision floating point)
 6374 // MOVSS instruction
 6375 instruct storeF(memory mem, regF src) %{
 6376   predicate(UseSSE>=1);
 6377   match(Set mem (StoreF mem src));
 6378   ins_cost(95);
 6379   format %{ "MOVSS  $mem,$src" %}
 6380   ins_encode %{
 6381     __ movflt($mem$$Address, $src$$XMMRegister);
 6382   %}
 6383   ins_pipe( pipe_slow );
 6384 %}
 6385 
 6386 
 6387 // Store Float
 6388 instruct storeFPR( memory mem, regFPR1 src) %{
 6389   predicate(UseSSE==0);
 6390   match(Set mem (StoreF mem src));
 6391 
 6392   ins_cost(100);
 6393   format %{ "FST_S  $mem,$src" %}
 6394   opcode(0xD9);       /* D9 /2 */
 6395   ins_encode( enc_FPR_store(mem,src) );
 6396   ins_pipe( fpu_mem_reg );
 6397 %}
 6398 
 6399 // Store Float does rounding on x86
 6400 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6401   predicate(UseSSE==0);
 6402   match(Set mem (StoreF mem (RoundFloat src)));
 6403 
 6404   ins_cost(100);
 6405   format %{ "FST_S  $mem,$src\t# round" %}
 6406   opcode(0xD9);       /* D9 /2 */
 6407   ins_encode( enc_FPR_store(mem,src) );
 6408   ins_pipe( fpu_mem_reg );
 6409 %}
 6410 
 6411 // Store Float does rounding on x86
 6412 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6413   predicate(UseSSE<=1);
 6414   match(Set mem (StoreF mem (ConvD2F src)));
 6415 
 6416   ins_cost(100);
 6417   format %{ "FST_S  $mem,$src\t# D-round" %}
 6418   opcode(0xD9);       /* D9 /2 */
 6419   ins_encode( enc_FPR_store(mem,src) );
 6420   ins_pipe( fpu_mem_reg );
 6421 %}
 6422 
 6423 // Store immediate Float value (it is faster than store from FPU register)
 6424 // The instruction usage is guarded by predicate in operand immFPR().
 6425 instruct storeFPR_imm( memory mem, immFPR src) %{
 6426   match(Set mem (StoreF mem src));
 6427 
 6428   ins_cost(50);
 6429   format %{ "MOV    $mem,$src\t# store float" %}
 6430   opcode(0xC7);               /* C7 /0 */
 6431   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6432   ins_pipe( ialu_mem_imm );
 6433 %}
 6434 
 6435 // Store immediate Float value (it is faster than store from XMM register)
 6436 // The instruction usage is guarded by predicate in operand immF().
 6437 instruct storeF_imm( memory mem, immF src) %{
 6438   match(Set mem (StoreF mem src));
 6439 
 6440   ins_cost(50);
 6441   format %{ "MOV    $mem,$src\t# store float" %}
 6442   opcode(0xC7);               /* C7 /0 */
 6443   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6444   ins_pipe( ialu_mem_imm );
 6445 %}
 6446 
 6447 // Store Integer to stack slot
 6448 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6449   match(Set dst src);
 6450 
 6451   ins_cost(100);
 6452   format %{ "MOV    $dst,$src" %}
 6453   opcode(0x89);
 6454   ins_encode( OpcPRegSS( dst, src ) );
 6455   ins_pipe( ialu_mem_reg );
 6456 %}
 6457 
 6458 // Store Integer to stack slot
 6459 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6460   match(Set dst src);
 6461 
 6462   ins_cost(100);
 6463   format %{ "MOV    $dst,$src" %}
 6464   opcode(0x89);
 6465   ins_encode( OpcPRegSS( dst, src ) );
 6466   ins_pipe( ialu_mem_reg );
 6467 %}
 6468 
 6469 // Store Long to stack slot
 6470 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6471   match(Set dst src);
 6472 
 6473   ins_cost(200);
 6474   format %{ "MOV    $dst,$src.lo\n\t"
 6475             "MOV    $dst+4,$src.hi" %}
 6476   opcode(0x89, 0x89);
 6477   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6478   ins_pipe( ialu_mem_long_reg );
 6479 %}
 6480 
 6481 //----------MemBar Instructions-----------------------------------------------
 6482 // Memory barrier flavors
 6483 
 6484 instruct membar_acquire() %{
 6485   match(MemBarAcquire);
 6486   match(LoadFence);
 6487   ins_cost(400);
 6488 
 6489   size(0);
 6490   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6491   ins_encode();
 6492   ins_pipe(empty);
 6493 %}
 6494 
 6495 instruct membar_acquire_lock() %{
 6496   match(MemBarAcquireLock);
 6497   ins_cost(0);
 6498 
 6499   size(0);
 6500   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6501   ins_encode( );
 6502   ins_pipe(empty);
 6503 %}
 6504 
 6505 instruct membar_release() %{
 6506   match(MemBarRelease);
 6507   match(StoreFence);
 6508   ins_cost(400);
 6509 
 6510   size(0);
 6511   format %{ "MEMBAR-release ! (empty encoding)" %}
 6512   ins_encode( );
 6513   ins_pipe(empty);
 6514 %}
 6515 
 6516 instruct membar_release_lock() %{
 6517   match(MemBarReleaseLock);
 6518   ins_cost(0);
 6519 
 6520   size(0);
 6521   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6522   ins_encode( );
 6523   ins_pipe(empty);
 6524 %}
 6525 
 6526 instruct membar_volatile(eFlagsReg cr) %{
 6527   match(MemBarVolatile);
 6528   effect(KILL cr);
 6529   ins_cost(400);
 6530 
 6531   format %{
 6532     $$template
 6533     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6534   %}
 6535   ins_encode %{
 6536     __ membar(Assembler::StoreLoad);
 6537   %}
 6538   ins_pipe(pipe_slow);
 6539 %}
 6540 
 6541 instruct unnecessary_membar_volatile() %{
 6542   match(MemBarVolatile);
 6543   predicate(Matcher::post_store_load_barrier(n));
 6544   ins_cost(0);
 6545 
 6546   size(0);
 6547   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6548   ins_encode( );
 6549   ins_pipe(empty);
 6550 %}
 6551 
 6552 instruct membar_storestore() %{
 6553   match(MemBarStoreStore);
 6554   match(StoreStoreFence);
 6555   ins_cost(0);
 6556 
 6557   size(0);
 6558   format %{ "MEMBAR-storestore (empty encoding)" %}
 6559   ins_encode( );
 6560   ins_pipe(empty);
 6561 %}
 6562 
 6563 //----------Move Instructions--------------------------------------------------
 6564 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6565   match(Set dst (CastX2P src));
 6566   format %{ "# X2P  $dst, $src" %}
 6567   ins_encode( /*empty encoding*/ );
 6568   ins_cost(0);
 6569   ins_pipe(empty);
 6570 %}
 6571 
 6572 instruct castP2X(rRegI dst, eRegP src ) %{
 6573   match(Set dst (CastP2X src));
 6574   ins_cost(50);
 6575   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6576   ins_encode( enc_Copy( dst, src) );
 6577   ins_pipe( ialu_reg_reg );
 6578 %}
 6579 
 6580 //----------Conditional Move---------------------------------------------------
 6581 // Conditional move
 6582 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6583   predicate(!VM_Version::supports_cmov() );
 6584   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6585   ins_cost(200);
 6586   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6587             "MOV    $dst,$src\n"
 6588       "skip:" %}
 6589   ins_encode %{
 6590     Label Lskip;
 6591     // Invert sense of branch from sense of CMOV
 6592     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6593     __ movl($dst$$Register, $src$$Register);
 6594     __ bind(Lskip);
 6595   %}
 6596   ins_pipe( pipe_cmov_reg );
 6597 %}
 6598 
 6599 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6600   predicate(!VM_Version::supports_cmov() );
 6601   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6602   ins_cost(200);
 6603   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6604             "MOV    $dst,$src\n"
 6605       "skip:" %}
 6606   ins_encode %{
 6607     Label Lskip;
 6608     // Invert sense of branch from sense of CMOV
 6609     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6610     __ movl($dst$$Register, $src$$Register);
 6611     __ bind(Lskip);
 6612   %}
 6613   ins_pipe( pipe_cmov_reg );
 6614 %}
 6615 
 6616 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6617   predicate(VM_Version::supports_cmov() );
 6618   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6619   ins_cost(200);
 6620   format %{ "CMOV$cop $dst,$src" %}
 6621   opcode(0x0F,0x40);
 6622   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6623   ins_pipe( pipe_cmov_reg );
 6624 %}
 6625 
 6626 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6627   predicate(VM_Version::supports_cmov() );
 6628   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6629   ins_cost(200);
 6630   format %{ "CMOV$cop $dst,$src" %}
 6631   opcode(0x0F,0x40);
 6632   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6633   ins_pipe( pipe_cmov_reg );
 6634 %}
 6635 
 6636 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6637   predicate(VM_Version::supports_cmov() );
 6638   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6639   ins_cost(200);
 6640   expand %{
 6641     cmovI_regU(cop, cr, dst, src);
 6642   %}
 6643 %}
 6644 
 6645 // Conditional move
 6646 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6647   predicate(VM_Version::supports_cmov() );
 6648   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6649   ins_cost(250);
 6650   format %{ "CMOV$cop $dst,$src" %}
 6651   opcode(0x0F,0x40);
 6652   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6653   ins_pipe( pipe_cmov_mem );
 6654 %}
 6655 
 6656 // Conditional move
 6657 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6658   predicate(VM_Version::supports_cmov() );
 6659   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6660   ins_cost(250);
 6661   format %{ "CMOV$cop $dst,$src" %}
 6662   opcode(0x0F,0x40);
 6663   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6664   ins_pipe( pipe_cmov_mem );
 6665 %}
 6666 
 6667 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6668   predicate(VM_Version::supports_cmov() );
 6669   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6670   ins_cost(250);
 6671   expand %{
 6672     cmovI_memU(cop, cr, dst, src);
 6673   %}
 6674 %}
 6675 
 6676 // Conditional move
 6677 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6678   predicate(VM_Version::supports_cmov() );
 6679   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6680   ins_cost(200);
 6681   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6682   opcode(0x0F,0x40);
 6683   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6684   ins_pipe( pipe_cmov_reg );
 6685 %}
 6686 
 6687 // Conditional move (non-P6 version)
 6688 // Note:  a CMoveP is generated for  stubs and native wrappers
 6689 //        regardless of whether we are on a P6, so we
 6690 //        emulate a cmov here
 6691 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6692   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6693   ins_cost(300);
 6694   format %{ "Jn$cop   skip\n\t"
 6695           "MOV    $dst,$src\t# pointer\n"
 6696       "skip:" %}
 6697   opcode(0x8b);
 6698   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6699   ins_pipe( pipe_cmov_reg );
 6700 %}
 6701 
 6702 // Conditional move
 6703 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6704   predicate(VM_Version::supports_cmov() );
 6705   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6706   ins_cost(200);
 6707   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6708   opcode(0x0F,0x40);
 6709   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6710   ins_pipe( pipe_cmov_reg );
 6711 %}
 6712 
 6713 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6714   predicate(VM_Version::supports_cmov() );
 6715   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6716   ins_cost(200);
 6717   expand %{
 6718     cmovP_regU(cop, cr, dst, src);
 6719   %}
 6720 %}
 6721 
 6722 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6723 // correctly meets the two pointer arguments; one is an incoming
 6724 // register but the other is a memory operand.  ALSO appears to
 6725 // be buggy with implicit null checks.
 6726 //
 6727 //// Conditional move
 6728 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6729 //  predicate(VM_Version::supports_cmov() );
 6730 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6731 //  ins_cost(250);
 6732 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6733 //  opcode(0x0F,0x40);
 6734 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6735 //  ins_pipe( pipe_cmov_mem );
 6736 //%}
 6737 //
 6738 //// Conditional move
 6739 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6740 //  predicate(VM_Version::supports_cmov() );
 6741 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6742 //  ins_cost(250);
 6743 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6744 //  opcode(0x0F,0x40);
 6745 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6746 //  ins_pipe( pipe_cmov_mem );
 6747 //%}
 6748 
 6749 // Conditional move
 6750 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6751   predicate(UseSSE<=1);
 6752   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6753   ins_cost(200);
 6754   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6755   opcode(0xDA);
 6756   ins_encode( enc_cmov_dpr(cop,src) );
 6757   ins_pipe( pipe_cmovDPR_reg );
 6758 %}
 6759 
 6760 // Conditional move
 6761 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6762   predicate(UseSSE==0);
 6763   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6764   ins_cost(200);
 6765   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6766   opcode(0xDA);
 6767   ins_encode( enc_cmov_dpr(cop,src) );
 6768   ins_pipe( pipe_cmovDPR_reg );
 6769 %}
 6770 
 6771 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6772 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6773   predicate(UseSSE<=1);
 6774   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6775   ins_cost(200);
 6776   format %{ "Jn$cop   skip\n\t"
 6777             "MOV    $dst,$src\t# double\n"
 6778       "skip:" %}
 6779   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6780   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6781   ins_pipe( pipe_cmovDPR_reg );
 6782 %}
 6783 
 6784 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6785 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6786   predicate(UseSSE==0);
 6787   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6788   ins_cost(200);
 6789   format %{ "Jn$cop    skip\n\t"
 6790             "MOV    $dst,$src\t# float\n"
 6791       "skip:" %}
 6792   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6793   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6794   ins_pipe( pipe_cmovDPR_reg );
 6795 %}
 6796 
 6797 // No CMOVE with SSE/SSE2
 6798 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6799   predicate (UseSSE>=1);
 6800   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6801   ins_cost(200);
 6802   format %{ "Jn$cop   skip\n\t"
 6803             "MOVSS  $dst,$src\t# float\n"
 6804       "skip:" %}
 6805   ins_encode %{
 6806     Label skip;
 6807     // Invert sense of branch from sense of CMOV
 6808     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6809     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6810     __ bind(skip);
 6811   %}
 6812   ins_pipe( pipe_slow );
 6813 %}
 6814 
 6815 // No CMOVE with SSE/SSE2
 6816 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6817   predicate (UseSSE>=2);
 6818   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6819   ins_cost(200);
 6820   format %{ "Jn$cop   skip\n\t"
 6821             "MOVSD  $dst,$src\t# float\n"
 6822       "skip:" %}
 6823   ins_encode %{
 6824     Label skip;
 6825     // Invert sense of branch from sense of CMOV
 6826     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6827     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6828     __ bind(skip);
 6829   %}
 6830   ins_pipe( pipe_slow );
 6831 %}
 6832 
 6833 // unsigned version
 6834 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6835   predicate (UseSSE>=1);
 6836   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6837   ins_cost(200);
 6838   format %{ "Jn$cop   skip\n\t"
 6839             "MOVSS  $dst,$src\t# float\n"
 6840       "skip:" %}
 6841   ins_encode %{
 6842     Label skip;
 6843     // Invert sense of branch from sense of CMOV
 6844     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6845     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6846     __ bind(skip);
 6847   %}
 6848   ins_pipe( pipe_slow );
 6849 %}
 6850 
 6851 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6852   predicate (UseSSE>=1);
 6853   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6854   ins_cost(200);
 6855   expand %{
 6856     fcmovF_regU(cop, cr, dst, src);
 6857   %}
 6858 %}
 6859 
 6860 // unsigned version
 6861 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6862   predicate (UseSSE>=2);
 6863   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6864   ins_cost(200);
 6865   format %{ "Jn$cop   skip\n\t"
 6866             "MOVSD  $dst,$src\t# float\n"
 6867       "skip:" %}
 6868   ins_encode %{
 6869     Label skip;
 6870     // Invert sense of branch from sense of CMOV
 6871     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6872     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6873     __ bind(skip);
 6874   %}
 6875   ins_pipe( pipe_slow );
 6876 %}
 6877 
 6878 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6879   predicate (UseSSE>=2);
 6880   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6881   ins_cost(200);
 6882   expand %{
 6883     fcmovD_regU(cop, cr, dst, src);
 6884   %}
 6885 %}
 6886 
 6887 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6888   predicate(VM_Version::supports_cmov() );
 6889   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6890   ins_cost(200);
 6891   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6892             "CMOV$cop $dst.hi,$src.hi" %}
 6893   opcode(0x0F,0x40);
 6894   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6895   ins_pipe( pipe_cmov_reg_long );
 6896 %}
 6897 
 6898 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6899   predicate(VM_Version::supports_cmov() );
 6900   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6901   ins_cost(200);
 6902   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6903             "CMOV$cop $dst.hi,$src.hi" %}
 6904   opcode(0x0F,0x40);
 6905   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6906   ins_pipe( pipe_cmov_reg_long );
 6907 %}
 6908 
 6909 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6910   predicate(VM_Version::supports_cmov() );
 6911   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6912   ins_cost(200);
 6913   expand %{
 6914     cmovL_regU(cop, cr, dst, src);
 6915   %}
 6916 %}
 6917 
 6918 //----------Arithmetic Instructions--------------------------------------------
 6919 //----------Addition Instructions----------------------------------------------
 6920 
 6921 // Integer Addition Instructions
 6922 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6923   match(Set dst (AddI dst src));
 6924   effect(KILL cr);
 6925 
 6926   size(2);
 6927   format %{ "ADD    $dst,$src" %}
 6928   opcode(0x03);
 6929   ins_encode( OpcP, RegReg( dst, src) );
 6930   ins_pipe( ialu_reg_reg );
 6931 %}
 6932 
 6933 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6934   match(Set dst (AddI dst src));
 6935   effect(KILL cr);
 6936 
 6937   format %{ "ADD    $dst,$src" %}
 6938   opcode(0x81, 0x00); /* /0 id */
 6939   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6940   ins_pipe( ialu_reg );
 6941 %}
 6942 
 6943 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6944   predicate(UseIncDec);
 6945   match(Set dst (AddI dst src));
 6946   effect(KILL cr);
 6947 
 6948   size(1);
 6949   format %{ "INC    $dst" %}
 6950   opcode(0x40); /*  */
 6951   ins_encode( Opc_plus( primary, dst ) );
 6952   ins_pipe( ialu_reg );
 6953 %}
 6954 
 6955 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6956   match(Set dst (AddI src0 src1));
 6957   ins_cost(110);
 6958 
 6959   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6960   opcode(0x8D); /* 0x8D /r */
 6961   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6962   ins_pipe( ialu_reg_reg );
 6963 %}
 6964 
 6965 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6966   match(Set dst (AddP src0 src1));
 6967   ins_cost(110);
 6968 
 6969   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6970   opcode(0x8D); /* 0x8D /r */
 6971   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6972   ins_pipe( ialu_reg_reg );
 6973 %}
 6974 
 6975 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6976   predicate(UseIncDec);
 6977   match(Set dst (AddI dst src));
 6978   effect(KILL cr);
 6979 
 6980   size(1);
 6981   format %{ "DEC    $dst" %}
 6982   opcode(0x48); /*  */
 6983   ins_encode( Opc_plus( primary, dst ) );
 6984   ins_pipe( ialu_reg );
 6985 %}
 6986 
 6987 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6988   match(Set dst (AddP dst src));
 6989   effect(KILL cr);
 6990 
 6991   size(2);
 6992   format %{ "ADD    $dst,$src" %}
 6993   opcode(0x03);
 6994   ins_encode( OpcP, RegReg( dst, src) );
 6995   ins_pipe( ialu_reg_reg );
 6996 %}
 6997 
 6998 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6999   match(Set dst (AddP dst src));
 7000   effect(KILL cr);
 7001 
 7002   format %{ "ADD    $dst,$src" %}
 7003   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7004   // ins_encode( RegImm( dst, src) );
 7005   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7006   ins_pipe( ialu_reg );
 7007 %}
 7008 
 7009 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7010   match(Set dst (AddI dst (LoadI src)));
 7011   effect(KILL cr);
 7012 
 7013   ins_cost(150);
 7014   format %{ "ADD    $dst,$src" %}
 7015   opcode(0x03);
 7016   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7017   ins_pipe( ialu_reg_mem );
 7018 %}
 7019 
 7020 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7021   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7022   effect(KILL cr);
 7023 
 7024   ins_cost(150);
 7025   format %{ "ADD    $dst,$src" %}
 7026   opcode(0x01);  /* Opcode 01 /r */
 7027   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7028   ins_pipe( ialu_mem_reg );
 7029 %}
 7030 
 7031 // Add Memory with Immediate
 7032 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7034   effect(KILL cr);
 7035 
 7036   ins_cost(125);
 7037   format %{ "ADD    $dst,$src" %}
 7038   opcode(0x81);               /* Opcode 81 /0 id */
 7039   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 7040   ins_pipe( ialu_mem_imm );
 7041 %}
 7042 
 7043 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7044   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7045   effect(KILL cr);
 7046 
 7047   ins_cost(125);
 7048   format %{ "INC    $dst" %}
 7049   opcode(0xFF);               /* Opcode FF /0 */
 7050   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 7051   ins_pipe( ialu_mem_imm );
 7052 %}
 7053 
 7054 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7055   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7056   effect(KILL cr);
 7057 
 7058   ins_cost(125);
 7059   format %{ "DEC    $dst" %}
 7060   opcode(0xFF);               /* Opcode FF /1 */
 7061   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7062   ins_pipe( ialu_mem_imm );
 7063 %}
 7064 
 7065 
 7066 instruct checkCastPP( eRegP dst ) %{
 7067   match(Set dst (CheckCastPP dst));
 7068 
 7069   size(0);
 7070   format %{ "#checkcastPP of $dst" %}
 7071   ins_encode( /*empty encoding*/ );
 7072   ins_pipe( empty );
 7073 %}
 7074 
 7075 instruct castPP( eRegP dst ) %{
 7076   match(Set dst (CastPP dst));
 7077   format %{ "#castPP of $dst" %}
 7078   ins_encode( /*empty encoding*/ );
 7079   ins_pipe( empty );
 7080 %}
 7081 
 7082 instruct castII( rRegI dst ) %{
 7083   match(Set dst (CastII dst));
 7084   format %{ "#castII of $dst" %}
 7085   ins_encode( /*empty encoding*/ );
 7086   ins_cost(0);
 7087   ins_pipe( empty );
 7088 %}
 7089 
 7090 instruct castLL( eRegL dst ) %{
 7091   match(Set dst (CastLL dst));
 7092   format %{ "#castLL of $dst" %}
 7093   ins_encode( /*empty encoding*/ );
 7094   ins_cost(0);
 7095   ins_pipe( empty );
 7096 %}
 7097 
 7098 instruct castFF( regF dst ) %{
 7099   predicate(UseSSE >= 1);
 7100   match(Set dst (CastFF dst));
 7101   format %{ "#castFF of $dst" %}
 7102   ins_encode( /*empty encoding*/ );
 7103   ins_cost(0);
 7104   ins_pipe( empty );
 7105 %}
 7106 
 7107 instruct castDD( regD dst ) %{
 7108   predicate(UseSSE >= 2);
 7109   match(Set dst (CastDD dst));
 7110   format %{ "#castDD of $dst" %}
 7111   ins_encode( /*empty encoding*/ );
 7112   ins_cost(0);
 7113   ins_pipe( empty );
 7114 %}
 7115 
 7116 instruct castFF_PR( regFPR dst ) %{
 7117   predicate(UseSSE < 1);
 7118   match(Set dst (CastFF dst));
 7119   format %{ "#castFF of $dst" %}
 7120   ins_encode( /*empty encoding*/ );
 7121   ins_cost(0);
 7122   ins_pipe( empty );
 7123 %}
 7124 
 7125 instruct castDD_PR( regDPR dst ) %{
 7126   predicate(UseSSE < 2);
 7127   match(Set dst (CastDD dst));
 7128   format %{ "#castDD of $dst" %}
 7129   ins_encode( /*empty encoding*/ );
 7130   ins_cost(0);
 7131   ins_pipe( empty );
 7132 %}
 7133 
 7134 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7135 
 7136 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7137   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7138   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7139   effect(KILL cr, KILL oldval);
 7140   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7141             "MOV    $res,0\n\t"
 7142             "JNE,s  fail\n\t"
 7143             "MOV    $res,1\n"
 7144           "fail:" %}
 7145   ins_encode( enc_cmpxchg8(mem_ptr),
 7146               enc_flags_ne_to_boolean(res) );
 7147   ins_pipe( pipe_cmpxchg );
 7148 %}
 7149 
 7150 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7151   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7152   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7153   effect(KILL cr, KILL oldval);
 7154   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7155             "MOV    $res,0\n\t"
 7156             "JNE,s  fail\n\t"
 7157             "MOV    $res,1\n"
 7158           "fail:" %}
 7159   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7160   ins_pipe( pipe_cmpxchg );
 7161 %}
 7162 
 7163 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7164   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7165   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7166   effect(KILL cr, KILL oldval);
 7167   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7168             "MOV    $res,0\n\t"
 7169             "JNE,s  fail\n\t"
 7170             "MOV    $res,1\n"
 7171           "fail:" %}
 7172   ins_encode( enc_cmpxchgb(mem_ptr),
 7173               enc_flags_ne_to_boolean(res) );
 7174   ins_pipe( pipe_cmpxchg );
 7175 %}
 7176 
 7177 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7178   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7179   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7180   effect(KILL cr, KILL oldval);
 7181   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7182             "MOV    $res,0\n\t"
 7183             "JNE,s  fail\n\t"
 7184             "MOV    $res,1\n"
 7185           "fail:" %}
 7186   ins_encode( enc_cmpxchgw(mem_ptr),
 7187               enc_flags_ne_to_boolean(res) );
 7188   ins_pipe( pipe_cmpxchg );
 7189 %}
 7190 
 7191 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7192   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7193   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7194   effect(KILL cr, KILL oldval);
 7195   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7196             "MOV    $res,0\n\t"
 7197             "JNE,s  fail\n\t"
 7198             "MOV    $res,1\n"
 7199           "fail:" %}
 7200   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7201   ins_pipe( pipe_cmpxchg );
 7202 %}
 7203 
 7204 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7205   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7206   effect(KILL cr);
 7207   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7208   ins_encode( enc_cmpxchg8(mem_ptr) );
 7209   ins_pipe( pipe_cmpxchg );
 7210 %}
 7211 
 7212 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7213   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7214   effect(KILL cr);
 7215   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7216   ins_encode( enc_cmpxchg(mem_ptr) );
 7217   ins_pipe( pipe_cmpxchg );
 7218 %}
 7219 
 7220 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7221   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7222   effect(KILL cr);
 7223   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7224   ins_encode( enc_cmpxchgb(mem_ptr) );
 7225   ins_pipe( pipe_cmpxchg );
 7226 %}
 7227 
 7228 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7229   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7230   effect(KILL cr);
 7231   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7232   ins_encode( enc_cmpxchgw(mem_ptr) );
 7233   ins_pipe( pipe_cmpxchg );
 7234 %}
 7235 
 7236 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7237   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7238   effect(KILL cr);
 7239   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7240   ins_encode( enc_cmpxchg(mem_ptr) );
 7241   ins_pipe( pipe_cmpxchg );
 7242 %}
 7243 
 7244 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7245   predicate(n->as_LoadStore()->result_not_used());
 7246   match(Set dummy (GetAndAddB mem add));
 7247   effect(KILL cr);
 7248   format %{ "ADDB  [$mem],$add" %}
 7249   ins_encode %{
 7250     __ lock();
 7251     __ addb($mem$$Address, $add$$constant);
 7252   %}
 7253   ins_pipe( pipe_cmpxchg );
 7254 %}
 7255 
 7256 // Important to match to xRegI: only 8-bit regs.
 7257 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7258   match(Set newval (GetAndAddB mem newval));
 7259   effect(KILL cr);
 7260   format %{ "XADDB  [$mem],$newval" %}
 7261   ins_encode %{
 7262     __ lock();
 7263     __ xaddb($mem$$Address, $newval$$Register);
 7264   %}
 7265   ins_pipe( pipe_cmpxchg );
 7266 %}
 7267 
 7268 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7269   predicate(n->as_LoadStore()->result_not_used());
 7270   match(Set dummy (GetAndAddS mem add));
 7271   effect(KILL cr);
 7272   format %{ "ADDS  [$mem],$add" %}
 7273   ins_encode %{
 7274     __ lock();
 7275     __ addw($mem$$Address, $add$$constant);
 7276   %}
 7277   ins_pipe( pipe_cmpxchg );
 7278 %}
 7279 
 7280 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7281   match(Set newval (GetAndAddS mem newval));
 7282   effect(KILL cr);
 7283   format %{ "XADDS  [$mem],$newval" %}
 7284   ins_encode %{
 7285     __ lock();
 7286     __ xaddw($mem$$Address, $newval$$Register);
 7287   %}
 7288   ins_pipe( pipe_cmpxchg );
 7289 %}
 7290 
 7291 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7292   predicate(n->as_LoadStore()->result_not_used());
 7293   match(Set dummy (GetAndAddI mem add));
 7294   effect(KILL cr);
 7295   format %{ "ADDL  [$mem],$add" %}
 7296   ins_encode %{
 7297     __ lock();
 7298     __ addl($mem$$Address, $add$$constant);
 7299   %}
 7300   ins_pipe( pipe_cmpxchg );
 7301 %}
 7302 
 7303 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7304   match(Set newval (GetAndAddI mem newval));
 7305   effect(KILL cr);
 7306   format %{ "XADDL  [$mem],$newval" %}
 7307   ins_encode %{
 7308     __ lock();
 7309     __ xaddl($mem$$Address, $newval$$Register);
 7310   %}
 7311   ins_pipe( pipe_cmpxchg );
 7312 %}
 7313 
 7314 // Important to match to xRegI: only 8-bit regs.
 7315 instruct xchgB( memory mem, xRegI newval) %{
 7316   match(Set newval (GetAndSetB mem newval));
 7317   format %{ "XCHGB  $newval,[$mem]" %}
 7318   ins_encode %{
 7319     __ xchgb($newval$$Register, $mem$$Address);
 7320   %}
 7321   ins_pipe( pipe_cmpxchg );
 7322 %}
 7323 
 7324 instruct xchgS( memory mem, rRegI newval) %{
 7325   match(Set newval (GetAndSetS mem newval));
 7326   format %{ "XCHGW  $newval,[$mem]" %}
 7327   ins_encode %{
 7328     __ xchgw($newval$$Register, $mem$$Address);
 7329   %}
 7330   ins_pipe( pipe_cmpxchg );
 7331 %}
 7332 
 7333 instruct xchgI( memory mem, rRegI newval) %{
 7334   match(Set newval (GetAndSetI mem newval));
 7335   format %{ "XCHGL  $newval,[$mem]" %}
 7336   ins_encode %{
 7337     __ xchgl($newval$$Register, $mem$$Address);
 7338   %}
 7339   ins_pipe( pipe_cmpxchg );
 7340 %}
 7341 
 7342 instruct xchgP( memory mem, pRegP newval) %{
 7343   match(Set newval (GetAndSetP mem newval));
 7344   format %{ "XCHGL  $newval,[$mem]" %}
 7345   ins_encode %{
 7346     __ xchgl($newval$$Register, $mem$$Address);
 7347   %}
 7348   ins_pipe( pipe_cmpxchg );
 7349 %}
 7350 
 7351 //----------Subtraction Instructions-------------------------------------------
 7352 
 7353 // Integer Subtraction Instructions
 7354 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7355   match(Set dst (SubI dst src));
 7356   effect(KILL cr);
 7357 
 7358   size(2);
 7359   format %{ "SUB    $dst,$src" %}
 7360   opcode(0x2B);
 7361   ins_encode( OpcP, RegReg( dst, src) );
 7362   ins_pipe( ialu_reg_reg );
 7363 %}
 7364 
 7365 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7366   match(Set dst (SubI dst src));
 7367   effect(KILL cr);
 7368 
 7369   format %{ "SUB    $dst,$src" %}
 7370   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7371   // ins_encode( RegImm( dst, src) );
 7372   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7373   ins_pipe( ialu_reg );
 7374 %}
 7375 
 7376 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7377   match(Set dst (SubI dst (LoadI src)));
 7378   effect(KILL cr);
 7379 
 7380   ins_cost(150);
 7381   format %{ "SUB    $dst,$src" %}
 7382   opcode(0x2B);
 7383   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7384   ins_pipe( ialu_reg_mem );
 7385 %}
 7386 
 7387 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7388   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7389   effect(KILL cr);
 7390 
 7391   ins_cost(150);
 7392   format %{ "SUB    $dst,$src" %}
 7393   opcode(0x29);  /* Opcode 29 /r */
 7394   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7395   ins_pipe( ialu_mem_reg );
 7396 %}
 7397 
 7398 // Subtract from a pointer
 7399 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7400   match(Set dst (AddP dst (SubI zero src)));
 7401   effect(KILL cr);
 7402 
 7403   size(2);
 7404   format %{ "SUB    $dst,$src" %}
 7405   opcode(0x2B);
 7406   ins_encode( OpcP, RegReg( dst, src) );
 7407   ins_pipe( ialu_reg_reg );
 7408 %}
 7409 
 7410 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7411   match(Set dst (SubI zero dst));
 7412   effect(KILL cr);
 7413 
 7414   size(2);
 7415   format %{ "NEG    $dst" %}
 7416   opcode(0xF7,0x03);  // Opcode F7 /3
 7417   ins_encode( OpcP, RegOpc( dst ) );
 7418   ins_pipe( ialu_reg );
 7419 %}
 7420 
 7421 //----------Multiplication/Division Instructions-------------------------------
 7422 // Integer Multiplication Instructions
 7423 // Multiply Register
 7424 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7425   match(Set dst (MulI dst src));
 7426   effect(KILL cr);
 7427 
 7428   size(3);
 7429   ins_cost(300);
 7430   format %{ "IMUL   $dst,$src" %}
 7431   opcode(0xAF, 0x0F);
 7432   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7433   ins_pipe( ialu_reg_reg_alu0 );
 7434 %}
 7435 
 7436 // Multiply 32-bit Immediate
 7437 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7438   match(Set dst (MulI src imm));
 7439   effect(KILL cr);
 7440 
 7441   ins_cost(300);
 7442   format %{ "IMUL   $dst,$src,$imm" %}
 7443   opcode(0x69);  /* 69 /r id */
 7444   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7445   ins_pipe( ialu_reg_reg_alu0 );
 7446 %}
 7447 
 7448 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7449   match(Set dst src);
 7450   effect(KILL cr);
 7451 
 7452   // Note that this is artificially increased to make it more expensive than loadConL
 7453   ins_cost(250);
 7454   format %{ "MOV    EAX,$src\t// low word only" %}
 7455   opcode(0xB8);
 7456   ins_encode( LdImmL_Lo(dst, src) );
 7457   ins_pipe( ialu_reg_fat );
 7458 %}
 7459 
 7460 // Multiply by 32-bit Immediate, taking the shifted high order results
 7461 //  (special case for shift by 32)
 7462 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7463   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7464   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7465              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7466              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7467   effect(USE src1, KILL cr);
 7468 
 7469   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7470   ins_cost(0*100 + 1*400 - 150);
 7471   format %{ "IMUL   EDX:EAX,$src1" %}
 7472   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7473   ins_pipe( pipe_slow );
 7474 %}
 7475 
 7476 // Multiply by 32-bit Immediate, taking the shifted high order results
 7477 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7478   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7479   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7480              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7481              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7482   effect(USE src1, KILL cr);
 7483 
 7484   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7485   ins_cost(1*100 + 1*400 - 150);
 7486   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7487             "SAR    EDX,$cnt-32" %}
 7488   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7489   ins_pipe( pipe_slow );
 7490 %}
 7491 
 7492 // Multiply Memory 32-bit Immediate
 7493 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7494   match(Set dst (MulI (LoadI src) imm));
 7495   effect(KILL cr);
 7496 
 7497   ins_cost(300);
 7498   format %{ "IMUL   $dst,$src,$imm" %}
 7499   opcode(0x69);  /* 69 /r id */
 7500   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7501   ins_pipe( ialu_reg_mem_alu0 );
 7502 %}
 7503 
 7504 // Multiply Memory
 7505 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7506   match(Set dst (MulI dst (LoadI src)));
 7507   effect(KILL cr);
 7508 
 7509   ins_cost(350);
 7510   format %{ "IMUL   $dst,$src" %}
 7511   opcode(0xAF, 0x0F);
 7512   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7513   ins_pipe( ialu_reg_mem_alu0 );
 7514 %}
 7515 
 7516 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7517 %{
 7518   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7519   effect(KILL cr, KILL src2);
 7520 
 7521   expand %{ mulI_eReg(dst, src1, cr);
 7522            mulI_eReg(src2, src3, cr);
 7523            addI_eReg(dst, src2, cr); %}
 7524 %}
 7525 
 7526 // Multiply Register Int to Long
 7527 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7528   // Basic Idea: long = (long)int * (long)int
 7529   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7530   effect(DEF dst, USE src, USE src1, KILL flags);
 7531 
 7532   ins_cost(300);
 7533   format %{ "IMUL   $dst,$src1" %}
 7534 
 7535   ins_encode( long_int_multiply( dst, src1 ) );
 7536   ins_pipe( ialu_reg_reg_alu0 );
 7537 %}
 7538 
 7539 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7540   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7541   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7542   effect(KILL flags);
 7543 
 7544   ins_cost(300);
 7545   format %{ "MUL    $dst,$src1" %}
 7546 
 7547   ins_encode( long_uint_multiply(dst, src1) );
 7548   ins_pipe( ialu_reg_reg_alu0 );
 7549 %}
 7550 
 7551 // Multiply Register Long
 7552 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7553   match(Set dst (MulL dst src));
 7554   effect(KILL cr, TEMP tmp);
 7555   ins_cost(4*100+3*400);
 7556 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7557 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7558   format %{ "MOV    $tmp,$src.lo\n\t"
 7559             "IMUL   $tmp,EDX\n\t"
 7560             "MOV    EDX,$src.hi\n\t"
 7561             "IMUL   EDX,EAX\n\t"
 7562             "ADD    $tmp,EDX\n\t"
 7563             "MUL    EDX:EAX,$src.lo\n\t"
 7564             "ADD    EDX,$tmp" %}
 7565   ins_encode( long_multiply( dst, src, tmp ) );
 7566   ins_pipe( pipe_slow );
 7567 %}
 7568 
 7569 // Multiply Register Long where the left operand's high 32 bits are zero
 7570 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7571   predicate(is_operand_hi32_zero(n->in(1)));
 7572   match(Set dst (MulL dst src));
 7573   effect(KILL cr, TEMP tmp);
 7574   ins_cost(2*100+2*400);
 7575 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7576 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7577   format %{ "MOV    $tmp,$src.hi\n\t"
 7578             "IMUL   $tmp,EAX\n\t"
 7579             "MUL    EDX:EAX,$src.lo\n\t"
 7580             "ADD    EDX,$tmp" %}
 7581   ins_encode %{
 7582     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7583     __ imull($tmp$$Register, rax);
 7584     __ mull($src$$Register);
 7585     __ addl(rdx, $tmp$$Register);
 7586   %}
 7587   ins_pipe( pipe_slow );
 7588 %}
 7589 
 7590 // Multiply Register Long where the right operand's high 32 bits are zero
 7591 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7592   predicate(is_operand_hi32_zero(n->in(2)));
 7593   match(Set dst (MulL dst src));
 7594   effect(KILL cr, TEMP tmp);
 7595   ins_cost(2*100+2*400);
 7596 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7597 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7598   format %{ "MOV    $tmp,$src.lo\n\t"
 7599             "IMUL   $tmp,EDX\n\t"
 7600             "MUL    EDX:EAX,$src.lo\n\t"
 7601             "ADD    EDX,$tmp" %}
 7602   ins_encode %{
 7603     __ movl($tmp$$Register, $src$$Register);
 7604     __ imull($tmp$$Register, rdx);
 7605     __ mull($src$$Register);
 7606     __ addl(rdx, $tmp$$Register);
 7607   %}
 7608   ins_pipe( pipe_slow );
 7609 %}
 7610 
 7611 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7612 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7613   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7614   match(Set dst (MulL dst src));
 7615   effect(KILL cr);
 7616   ins_cost(1*400);
 7617 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7618 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7619   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7620   ins_encode %{
 7621     __ mull($src$$Register);
 7622   %}
 7623   ins_pipe( pipe_slow );
 7624 %}
 7625 
 7626 // Multiply Register Long by small constant
 7627 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7628   match(Set dst (MulL dst src));
 7629   effect(KILL cr, TEMP tmp);
 7630   ins_cost(2*100+2*400);
 7631   size(12);
 7632 // Basic idea: lo(result) = lo(src * EAX)
 7633 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7634   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7635             "MOV    EDX,$src\n\t"
 7636             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7637             "ADD    EDX,$tmp" %}
 7638   ins_encode( long_multiply_con( dst, src, tmp ) );
 7639   ins_pipe( pipe_slow );
 7640 %}
 7641 
 7642 // Integer DIV with Register
 7643 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7644   match(Set rax (DivI rax div));
 7645   effect(KILL rdx, KILL cr);
 7646   size(26);
 7647   ins_cost(30*100+10*100);
 7648   format %{ "CMP    EAX,0x80000000\n\t"
 7649             "JNE,s  normal\n\t"
 7650             "XOR    EDX,EDX\n\t"
 7651             "CMP    ECX,-1\n\t"
 7652             "JE,s   done\n"
 7653     "normal: CDQ\n\t"
 7654             "IDIV   $div\n\t"
 7655     "done:"        %}
 7656   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7657   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7658   ins_pipe( ialu_reg_reg_alu0 );
 7659 %}
 7660 
 7661 // Divide Register Long
 7662 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7663   match(Set dst (DivL src1 src2));
 7664   effect(CALL);
 7665   ins_cost(10000);
 7666   format %{ "PUSH   $src1.hi\n\t"
 7667             "PUSH   $src1.lo\n\t"
 7668             "PUSH   $src2.hi\n\t"
 7669             "PUSH   $src2.lo\n\t"
 7670             "CALL   SharedRuntime::ldiv\n\t"
 7671             "ADD    ESP,16" %}
 7672   ins_encode( long_div(src1,src2) );
 7673   ins_pipe( pipe_slow );
 7674 %}
 7675 
 7676 // Integer DIVMOD with Register, both quotient and mod results
 7677 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7678   match(DivModI rax div);
 7679   effect(KILL cr);
 7680   size(26);
 7681   ins_cost(30*100+10*100);
 7682   format %{ "CMP    EAX,0x80000000\n\t"
 7683             "JNE,s  normal\n\t"
 7684             "XOR    EDX,EDX\n\t"
 7685             "CMP    ECX,-1\n\t"
 7686             "JE,s   done\n"
 7687     "normal: CDQ\n\t"
 7688             "IDIV   $div\n\t"
 7689     "done:"        %}
 7690   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7691   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7692   ins_pipe( pipe_slow );
 7693 %}
 7694 
 7695 // Integer MOD with Register
 7696 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7697   match(Set rdx (ModI rax div));
 7698   effect(KILL rax, KILL cr);
 7699 
 7700   size(26);
 7701   ins_cost(300);
 7702   format %{ "CDQ\n\t"
 7703             "IDIV   $div" %}
 7704   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7705   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7706   ins_pipe( ialu_reg_reg_alu0 );
 7707 %}
 7708 
 7709 // Remainder Register Long
 7710 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7711   match(Set dst (ModL src1 src2));
 7712   effect(CALL);
 7713   ins_cost(10000);
 7714   format %{ "PUSH   $src1.hi\n\t"
 7715             "PUSH   $src1.lo\n\t"
 7716             "PUSH   $src2.hi\n\t"
 7717             "PUSH   $src2.lo\n\t"
 7718             "CALL   SharedRuntime::lrem\n\t"
 7719             "ADD    ESP,16" %}
 7720   ins_encode( long_mod(src1,src2) );
 7721   ins_pipe( pipe_slow );
 7722 %}
 7723 
 7724 // Divide Register Long (no special case since divisor != -1)
 7725 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7726   match(Set dst (DivL dst imm));
 7727   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7728   ins_cost(1000);
 7729   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7730             "XOR    $tmp2,$tmp2\n\t"
 7731             "CMP    $tmp,EDX\n\t"
 7732             "JA,s   fast\n\t"
 7733             "MOV    $tmp2,EAX\n\t"
 7734             "MOV    EAX,EDX\n\t"
 7735             "MOV    EDX,0\n\t"
 7736             "JLE,s  pos\n\t"
 7737             "LNEG   EAX : $tmp2\n\t"
 7738             "DIV    $tmp # unsigned division\n\t"
 7739             "XCHG   EAX,$tmp2\n\t"
 7740             "DIV    $tmp\n\t"
 7741             "LNEG   $tmp2 : EAX\n\t"
 7742             "JMP,s  done\n"
 7743     "pos:\n\t"
 7744             "DIV    $tmp\n\t"
 7745             "XCHG   EAX,$tmp2\n"
 7746     "fast:\n\t"
 7747             "DIV    $tmp\n"
 7748     "done:\n\t"
 7749             "MOV    EDX,$tmp2\n\t"
 7750             "NEG    EDX:EAX # if $imm < 0" %}
 7751   ins_encode %{
 7752     int con = (int)$imm$$constant;
 7753     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7754     int pcon = (con > 0) ? con : -con;
 7755     Label Lfast, Lpos, Ldone;
 7756 
 7757     __ movl($tmp$$Register, pcon);
 7758     __ xorl($tmp2$$Register,$tmp2$$Register);
 7759     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7760     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7761 
 7762     __ movl($tmp2$$Register, $dst$$Register); // save
 7763     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7764     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7765     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7766 
 7767     // Negative dividend.
 7768     // convert value to positive to use unsigned division
 7769     __ lneg($dst$$Register, $tmp2$$Register);
 7770     __ divl($tmp$$Register);
 7771     __ xchgl($dst$$Register, $tmp2$$Register);
 7772     __ divl($tmp$$Register);
 7773     // revert result back to negative
 7774     __ lneg($tmp2$$Register, $dst$$Register);
 7775     __ jmpb(Ldone);
 7776 
 7777     __ bind(Lpos);
 7778     __ divl($tmp$$Register); // Use unsigned division
 7779     __ xchgl($dst$$Register, $tmp2$$Register);
 7780     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7781 
 7782     __ bind(Lfast);
 7783     // fast path: src is positive
 7784     __ divl($tmp$$Register); // Use unsigned division
 7785 
 7786     __ bind(Ldone);
 7787     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7788     if (con < 0) {
 7789       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7790     }
 7791   %}
 7792   ins_pipe( pipe_slow );
 7793 %}
 7794 
 7795 // Remainder Register Long (remainder fit into 32 bits)
 7796 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7797   match(Set dst (ModL dst imm));
 7798   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7799   ins_cost(1000);
 7800   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7801             "CMP    $tmp,EDX\n\t"
 7802             "JA,s   fast\n\t"
 7803             "MOV    $tmp2,EAX\n\t"
 7804             "MOV    EAX,EDX\n\t"
 7805             "MOV    EDX,0\n\t"
 7806             "JLE,s  pos\n\t"
 7807             "LNEG   EAX : $tmp2\n\t"
 7808             "DIV    $tmp # unsigned division\n\t"
 7809             "MOV    EAX,$tmp2\n\t"
 7810             "DIV    $tmp\n\t"
 7811             "NEG    EDX\n\t"
 7812             "JMP,s  done\n"
 7813     "pos:\n\t"
 7814             "DIV    $tmp\n\t"
 7815             "MOV    EAX,$tmp2\n"
 7816     "fast:\n\t"
 7817             "DIV    $tmp\n"
 7818     "done:\n\t"
 7819             "MOV    EAX,EDX\n\t"
 7820             "SAR    EDX,31\n\t" %}
 7821   ins_encode %{
 7822     int con = (int)$imm$$constant;
 7823     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7824     int pcon = (con > 0) ? con : -con;
 7825     Label  Lfast, Lpos, Ldone;
 7826 
 7827     __ movl($tmp$$Register, pcon);
 7828     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7829     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7830 
 7831     __ movl($tmp2$$Register, $dst$$Register); // save
 7832     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7833     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7834     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7835 
 7836     // Negative dividend.
 7837     // convert value to positive to use unsigned division
 7838     __ lneg($dst$$Register, $tmp2$$Register);
 7839     __ divl($tmp$$Register);
 7840     __ movl($dst$$Register, $tmp2$$Register);
 7841     __ divl($tmp$$Register);
 7842     // revert remainder back to negative
 7843     __ negl(HIGH_FROM_LOW($dst$$Register));
 7844     __ jmpb(Ldone);
 7845 
 7846     __ bind(Lpos);
 7847     __ divl($tmp$$Register);
 7848     __ movl($dst$$Register, $tmp2$$Register);
 7849 
 7850     __ bind(Lfast);
 7851     // fast path: src is positive
 7852     __ divl($tmp$$Register);
 7853 
 7854     __ bind(Ldone);
 7855     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7856     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7857 
 7858   %}
 7859   ins_pipe( pipe_slow );
 7860 %}
 7861 
 7862 // Integer Shift Instructions
 7863 // Shift Left by one
 7864 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7865   match(Set dst (LShiftI dst shift));
 7866   effect(KILL cr);
 7867 
 7868   size(2);
 7869   format %{ "SHL    $dst,$shift" %}
 7870   opcode(0xD1, 0x4);  /* D1 /4 */
 7871   ins_encode( OpcP, RegOpc( dst ) );
 7872   ins_pipe( ialu_reg );
 7873 %}
 7874 
 7875 // Shift Left by 8-bit immediate
 7876 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7877   match(Set dst (LShiftI dst shift));
 7878   effect(KILL cr);
 7879 
 7880   size(3);
 7881   format %{ "SHL    $dst,$shift" %}
 7882   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7883   ins_encode( RegOpcImm( dst, shift) );
 7884   ins_pipe( ialu_reg );
 7885 %}
 7886 
 7887 // Shift Left by variable
 7888 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7889   match(Set dst (LShiftI dst shift));
 7890   effect(KILL cr);
 7891 
 7892   size(2);
 7893   format %{ "SHL    $dst,$shift" %}
 7894   opcode(0xD3, 0x4);  /* D3 /4 */
 7895   ins_encode( OpcP, RegOpc( dst ) );
 7896   ins_pipe( ialu_reg_reg );
 7897 %}
 7898 
 7899 // Arithmetic shift right by one
 7900 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7901   match(Set dst (RShiftI dst shift));
 7902   effect(KILL cr);
 7903 
 7904   size(2);
 7905   format %{ "SAR    $dst,$shift" %}
 7906   opcode(0xD1, 0x7);  /* D1 /7 */
 7907   ins_encode( OpcP, RegOpc( dst ) );
 7908   ins_pipe( ialu_reg );
 7909 %}
 7910 
 7911 // Arithmetic shift right by one
 7912 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7913   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7914   effect(KILL cr);
 7915   format %{ "SAR    $dst,$shift" %}
 7916   opcode(0xD1, 0x7);  /* D1 /7 */
 7917   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7918   ins_pipe( ialu_mem_imm );
 7919 %}
 7920 
 7921 // Arithmetic Shift Right by 8-bit immediate
 7922 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7923   match(Set dst (RShiftI dst shift));
 7924   effect(KILL cr);
 7925 
 7926   size(3);
 7927   format %{ "SAR    $dst,$shift" %}
 7928   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7929   ins_encode( RegOpcImm( dst, shift ) );
 7930   ins_pipe( ialu_mem_imm );
 7931 %}
 7932 
 7933 // Arithmetic Shift Right by 8-bit immediate
 7934 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7935   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7936   effect(KILL cr);
 7937 
 7938   format %{ "SAR    $dst,$shift" %}
 7939   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7940   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7941   ins_pipe( ialu_mem_imm );
 7942 %}
 7943 
 7944 // Arithmetic Shift Right by variable
 7945 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7946   match(Set dst (RShiftI dst shift));
 7947   effect(KILL cr);
 7948 
 7949   size(2);
 7950   format %{ "SAR    $dst,$shift" %}
 7951   opcode(0xD3, 0x7);  /* D3 /7 */
 7952   ins_encode( OpcP, RegOpc( dst ) );
 7953   ins_pipe( ialu_reg_reg );
 7954 %}
 7955 
 7956 // Logical shift right by one
 7957 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7958   match(Set dst (URShiftI dst shift));
 7959   effect(KILL cr);
 7960 
 7961   size(2);
 7962   format %{ "SHR    $dst,$shift" %}
 7963   opcode(0xD1, 0x5);  /* D1 /5 */
 7964   ins_encode( OpcP, RegOpc( dst ) );
 7965   ins_pipe( ialu_reg );
 7966 %}
 7967 
 7968 // Logical Shift Right by 8-bit immediate
 7969 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7970   match(Set dst (URShiftI dst shift));
 7971   effect(KILL cr);
 7972 
 7973   size(3);
 7974   format %{ "SHR    $dst,$shift" %}
 7975   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7976   ins_encode( RegOpcImm( dst, shift) );
 7977   ins_pipe( ialu_reg );
 7978 %}
 7979 
 7980 
 7981 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7982 // This idiom is used by the compiler for the i2b bytecode.
 7983 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7984   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7985 
 7986   size(3);
 7987   format %{ "MOVSX  $dst,$src :8" %}
 7988   ins_encode %{
 7989     __ movsbl($dst$$Register, $src$$Register);
 7990   %}
 7991   ins_pipe(ialu_reg_reg);
 7992 %}
 7993 
 7994 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7995 // This idiom is used by the compiler the i2s bytecode.
 7996 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7997   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7998 
 7999   size(3);
 8000   format %{ "MOVSX  $dst,$src :16" %}
 8001   ins_encode %{
 8002     __ movswl($dst$$Register, $src$$Register);
 8003   %}
 8004   ins_pipe(ialu_reg_reg);
 8005 %}
 8006 
 8007 
 8008 // Logical Shift Right by variable
 8009 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8010   match(Set dst (URShiftI dst shift));
 8011   effect(KILL cr);
 8012 
 8013   size(2);
 8014   format %{ "SHR    $dst,$shift" %}
 8015   opcode(0xD3, 0x5);  /* D3 /5 */
 8016   ins_encode( OpcP, RegOpc( dst ) );
 8017   ins_pipe( ialu_reg_reg );
 8018 %}
 8019 
 8020 
 8021 //----------Logical Instructions-----------------------------------------------
 8022 //----------Integer Logical Instructions---------------------------------------
 8023 // And Instructions
 8024 // And Register with Register
 8025 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8026   match(Set dst (AndI dst src));
 8027   effect(KILL cr);
 8028 
 8029   size(2);
 8030   format %{ "AND    $dst,$src" %}
 8031   opcode(0x23);
 8032   ins_encode( OpcP, RegReg( dst, src) );
 8033   ins_pipe( ialu_reg_reg );
 8034 %}
 8035 
 8036 // And Register with Immediate
 8037 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8038   match(Set dst (AndI dst src));
 8039   effect(KILL cr);
 8040 
 8041   format %{ "AND    $dst,$src" %}
 8042   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8043   // ins_encode( RegImm( dst, src) );
 8044   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8045   ins_pipe( ialu_reg );
 8046 %}
 8047 
 8048 // And Register with Memory
 8049 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8050   match(Set dst (AndI dst (LoadI src)));
 8051   effect(KILL cr);
 8052 
 8053   ins_cost(150);
 8054   format %{ "AND    $dst,$src" %}
 8055   opcode(0x23);
 8056   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8057   ins_pipe( ialu_reg_mem );
 8058 %}
 8059 
 8060 // And Memory with Register
 8061 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8062   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8063   effect(KILL cr);
 8064 
 8065   ins_cost(150);
 8066   format %{ "AND    $dst,$src" %}
 8067   opcode(0x21);  /* Opcode 21 /r */
 8068   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8069   ins_pipe( ialu_mem_reg );
 8070 %}
 8071 
 8072 // And Memory with Immediate
 8073 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8074   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8075   effect(KILL cr);
 8076 
 8077   ins_cost(125);
 8078   format %{ "AND    $dst,$src" %}
 8079   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8080   // ins_encode( MemImm( dst, src) );
 8081   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8082   ins_pipe( ialu_mem_imm );
 8083 %}
 8084 
 8085 // BMI1 instructions
 8086 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8087   match(Set dst (AndI (XorI src1 minus_1) src2));
 8088   predicate(UseBMI1Instructions);
 8089   effect(KILL cr);
 8090 
 8091   format %{ "ANDNL  $dst, $src1, $src2" %}
 8092 
 8093   ins_encode %{
 8094     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8095   %}
 8096   ins_pipe(ialu_reg);
 8097 %}
 8098 
 8099 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8100   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8101   predicate(UseBMI1Instructions);
 8102   effect(KILL cr);
 8103 
 8104   ins_cost(125);
 8105   format %{ "ANDNL  $dst, $src1, $src2" %}
 8106 
 8107   ins_encode %{
 8108     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8109   %}
 8110   ins_pipe(ialu_reg_mem);
 8111 %}
 8112 
 8113 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8114   match(Set dst (AndI (SubI imm_zero src) src));
 8115   predicate(UseBMI1Instructions);
 8116   effect(KILL cr);
 8117 
 8118   format %{ "BLSIL  $dst, $src" %}
 8119 
 8120   ins_encode %{
 8121     __ blsil($dst$$Register, $src$$Register);
 8122   %}
 8123   ins_pipe(ialu_reg);
 8124 %}
 8125 
 8126 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8127   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8128   predicate(UseBMI1Instructions);
 8129   effect(KILL cr);
 8130 
 8131   ins_cost(125);
 8132   format %{ "BLSIL  $dst, $src" %}
 8133 
 8134   ins_encode %{
 8135     __ blsil($dst$$Register, $src$$Address);
 8136   %}
 8137   ins_pipe(ialu_reg_mem);
 8138 %}
 8139 
 8140 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8141 %{
 8142   match(Set dst (XorI (AddI src minus_1) src));
 8143   predicate(UseBMI1Instructions);
 8144   effect(KILL cr);
 8145 
 8146   format %{ "BLSMSKL $dst, $src" %}
 8147 
 8148   ins_encode %{
 8149     __ blsmskl($dst$$Register, $src$$Register);
 8150   %}
 8151 
 8152   ins_pipe(ialu_reg);
 8153 %}
 8154 
 8155 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8156 %{
 8157   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8158   predicate(UseBMI1Instructions);
 8159   effect(KILL cr);
 8160 
 8161   ins_cost(125);
 8162   format %{ "BLSMSKL $dst, $src" %}
 8163 
 8164   ins_encode %{
 8165     __ blsmskl($dst$$Register, $src$$Address);
 8166   %}
 8167 
 8168   ins_pipe(ialu_reg_mem);
 8169 %}
 8170 
 8171 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8172 %{
 8173   match(Set dst (AndI (AddI src minus_1) src) );
 8174   predicate(UseBMI1Instructions);
 8175   effect(KILL cr);
 8176 
 8177   format %{ "BLSRL  $dst, $src" %}
 8178 
 8179   ins_encode %{
 8180     __ blsrl($dst$$Register, $src$$Register);
 8181   %}
 8182 
 8183   ins_pipe(ialu_reg);
 8184 %}
 8185 
 8186 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8187 %{
 8188   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8189   predicate(UseBMI1Instructions);
 8190   effect(KILL cr);
 8191 
 8192   ins_cost(125);
 8193   format %{ "BLSRL  $dst, $src" %}
 8194 
 8195   ins_encode %{
 8196     __ blsrl($dst$$Register, $src$$Address);
 8197   %}
 8198 
 8199   ins_pipe(ialu_reg_mem);
 8200 %}
 8201 
 8202 // Or Instructions
 8203 // Or Register with Register
 8204 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8205   match(Set dst (OrI dst src));
 8206   effect(KILL cr);
 8207 
 8208   size(2);
 8209   format %{ "OR     $dst,$src" %}
 8210   opcode(0x0B);
 8211   ins_encode( OpcP, RegReg( dst, src) );
 8212   ins_pipe( ialu_reg_reg );
 8213 %}
 8214 
 8215 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8216   match(Set dst (OrI dst (CastP2X src)));
 8217   effect(KILL cr);
 8218 
 8219   size(2);
 8220   format %{ "OR     $dst,$src" %}
 8221   opcode(0x0B);
 8222   ins_encode( OpcP, RegReg( dst, src) );
 8223   ins_pipe( ialu_reg_reg );
 8224 %}
 8225 
 8226 
 8227 // Or Register with Immediate
 8228 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8229   match(Set dst (OrI dst src));
 8230   effect(KILL cr);
 8231 
 8232   format %{ "OR     $dst,$src" %}
 8233   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8234   // ins_encode( RegImm( dst, src) );
 8235   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8236   ins_pipe( ialu_reg );
 8237 %}
 8238 
 8239 // Or Register with Memory
 8240 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8241   match(Set dst (OrI dst (LoadI src)));
 8242   effect(KILL cr);
 8243 
 8244   ins_cost(150);
 8245   format %{ "OR     $dst,$src" %}
 8246   opcode(0x0B);
 8247   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8248   ins_pipe( ialu_reg_mem );
 8249 %}
 8250 
 8251 // Or Memory with Register
 8252 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8253   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8254   effect(KILL cr);
 8255 
 8256   ins_cost(150);
 8257   format %{ "OR     $dst,$src" %}
 8258   opcode(0x09);  /* Opcode 09 /r */
 8259   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8260   ins_pipe( ialu_mem_reg );
 8261 %}
 8262 
 8263 // Or Memory with Immediate
 8264 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8265   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8266   effect(KILL cr);
 8267 
 8268   ins_cost(125);
 8269   format %{ "OR     $dst,$src" %}
 8270   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8271   // ins_encode( MemImm( dst, src) );
 8272   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8273   ins_pipe( ialu_mem_imm );
 8274 %}
 8275 
 8276 // ROL/ROR
 8277 // ROL expand
 8278 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8279   effect(USE_DEF dst, USE shift, KILL cr);
 8280 
 8281   format %{ "ROL    $dst, $shift" %}
 8282   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8283   ins_encode( OpcP, RegOpc( dst ));
 8284   ins_pipe( ialu_reg );
 8285 %}
 8286 
 8287 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8288   effect(USE_DEF dst, USE shift, KILL cr);
 8289 
 8290   format %{ "ROL    $dst, $shift" %}
 8291   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8292   ins_encode( RegOpcImm(dst, shift) );
 8293   ins_pipe(ialu_reg);
 8294 %}
 8295 
 8296 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8297   effect(USE_DEF dst, USE shift, KILL cr);
 8298 
 8299   format %{ "ROL    $dst, $shift" %}
 8300   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8301   ins_encode(OpcP, RegOpc(dst));
 8302   ins_pipe( ialu_reg_reg );
 8303 %}
 8304 // end of ROL expand
 8305 
 8306 // ROL 32bit by one once
 8307 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8308   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8309 
 8310   expand %{
 8311     rolI_eReg_imm1(dst, lshift, cr);
 8312   %}
 8313 %}
 8314 
 8315 // ROL 32bit var by imm8 once
 8316 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8317   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8318   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8319 
 8320   expand %{
 8321     rolI_eReg_imm8(dst, lshift, cr);
 8322   %}
 8323 %}
 8324 
 8325 // ROL 32bit var by var once
 8326 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8327   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8328 
 8329   expand %{
 8330     rolI_eReg_CL(dst, shift, cr);
 8331   %}
 8332 %}
 8333 
 8334 // ROL 32bit var by var once
 8335 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8336   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8337 
 8338   expand %{
 8339     rolI_eReg_CL(dst, shift, cr);
 8340   %}
 8341 %}
 8342 
 8343 // ROR expand
 8344 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8345   effect(USE_DEF dst, USE shift, KILL cr);
 8346 
 8347   format %{ "ROR    $dst, $shift" %}
 8348   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8349   ins_encode( OpcP, RegOpc( dst ) );
 8350   ins_pipe( ialu_reg );
 8351 %}
 8352 
 8353 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8354   effect (USE_DEF dst, USE shift, KILL cr);
 8355 
 8356   format %{ "ROR    $dst, $shift" %}
 8357   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8358   ins_encode( RegOpcImm(dst, shift) );
 8359   ins_pipe( ialu_reg );
 8360 %}
 8361 
 8362 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8363   effect(USE_DEF dst, USE shift, KILL cr);
 8364 
 8365   format %{ "ROR    $dst, $shift" %}
 8366   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8367   ins_encode(OpcP, RegOpc(dst));
 8368   ins_pipe( ialu_reg_reg );
 8369 %}
 8370 // end of ROR expand
 8371 
 8372 // ROR right once
 8373 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8374   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8375 
 8376   expand %{
 8377     rorI_eReg_imm1(dst, rshift, cr);
 8378   %}
 8379 %}
 8380 
 8381 // ROR 32bit by immI8 once
 8382 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8383   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8384   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8385 
 8386   expand %{
 8387     rorI_eReg_imm8(dst, rshift, cr);
 8388   %}
 8389 %}
 8390 
 8391 // ROR 32bit var by var once
 8392 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8393   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8394 
 8395   expand %{
 8396     rorI_eReg_CL(dst, shift, cr);
 8397   %}
 8398 %}
 8399 
 8400 // ROR 32bit var by var once
 8401 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8402   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8403 
 8404   expand %{
 8405     rorI_eReg_CL(dst, shift, cr);
 8406   %}
 8407 %}
 8408 
 8409 // Xor Instructions
 8410 // Xor Register with Register
 8411 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8412   match(Set dst (XorI dst src));
 8413   effect(KILL cr);
 8414 
 8415   size(2);
 8416   format %{ "XOR    $dst,$src" %}
 8417   opcode(0x33);
 8418   ins_encode( OpcP, RegReg( dst, src) );
 8419   ins_pipe( ialu_reg_reg );
 8420 %}
 8421 
 8422 // Xor Register with Immediate -1
 8423 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8424   match(Set dst (XorI dst imm));
 8425 
 8426   size(2);
 8427   format %{ "NOT    $dst" %}
 8428   ins_encode %{
 8429      __ notl($dst$$Register);
 8430   %}
 8431   ins_pipe( ialu_reg );
 8432 %}
 8433 
 8434 // Xor Register with Immediate
 8435 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8436   match(Set dst (XorI dst src));
 8437   effect(KILL cr);
 8438 
 8439   format %{ "XOR    $dst,$src" %}
 8440   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8441   // ins_encode( RegImm( dst, src) );
 8442   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8443   ins_pipe( ialu_reg );
 8444 %}
 8445 
 8446 // Xor Register with Memory
 8447 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8448   match(Set dst (XorI dst (LoadI src)));
 8449   effect(KILL cr);
 8450 
 8451   ins_cost(150);
 8452   format %{ "XOR    $dst,$src" %}
 8453   opcode(0x33);
 8454   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8455   ins_pipe( ialu_reg_mem );
 8456 %}
 8457 
 8458 // Xor Memory with Register
 8459 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8460   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8461   effect(KILL cr);
 8462 
 8463   ins_cost(150);
 8464   format %{ "XOR    $dst,$src" %}
 8465   opcode(0x31);  /* Opcode 31 /r */
 8466   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8467   ins_pipe( ialu_mem_reg );
 8468 %}
 8469 
 8470 // Xor Memory with Immediate
 8471 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8472   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8473   effect(KILL cr);
 8474 
 8475   ins_cost(125);
 8476   format %{ "XOR    $dst,$src" %}
 8477   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8478   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8479   ins_pipe( ialu_mem_imm );
 8480 %}
 8481 
 8482 //----------Convert Int to Boolean---------------------------------------------
 8483 
 8484 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8485   effect( DEF dst, USE src );
 8486   format %{ "MOV    $dst,$src" %}
 8487   ins_encode( enc_Copy( dst, src) );
 8488   ins_pipe( ialu_reg_reg );
 8489 %}
 8490 
 8491 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8492   effect( USE_DEF dst, USE src, KILL cr );
 8493 
 8494   size(4);
 8495   format %{ "NEG    $dst\n\t"
 8496             "ADC    $dst,$src" %}
 8497   ins_encode( neg_reg(dst),
 8498               OpcRegReg(0x13,dst,src) );
 8499   ins_pipe( ialu_reg_reg_long );
 8500 %}
 8501 
 8502 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8503   match(Set dst (Conv2B src));
 8504 
 8505   expand %{
 8506     movI_nocopy(dst,src);
 8507     ci2b(dst,src,cr);
 8508   %}
 8509 %}
 8510 
 8511 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8512   effect( DEF dst, USE src );
 8513   format %{ "MOV    $dst,$src" %}
 8514   ins_encode( enc_Copy( dst, src) );
 8515   ins_pipe( ialu_reg_reg );
 8516 %}
 8517 
 8518 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8519   effect( USE_DEF dst, USE src, KILL cr );
 8520   format %{ "NEG    $dst\n\t"
 8521             "ADC    $dst,$src" %}
 8522   ins_encode( neg_reg(dst),
 8523               OpcRegReg(0x13,dst,src) );
 8524   ins_pipe( ialu_reg_reg_long );
 8525 %}
 8526 
 8527 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8528   match(Set dst (Conv2B src));
 8529 
 8530   expand %{
 8531     movP_nocopy(dst,src);
 8532     cp2b(dst,src,cr);
 8533   %}
 8534 %}
 8535 
 8536 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8537   match(Set dst (CmpLTMask p q));
 8538   effect(KILL cr);
 8539   ins_cost(400);
 8540 
 8541   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8542   format %{ "XOR    $dst,$dst\n\t"
 8543             "CMP    $p,$q\n\t"
 8544             "SETlt  $dst\n\t"
 8545             "NEG    $dst" %}
 8546   ins_encode %{
 8547     Register Rp = $p$$Register;
 8548     Register Rq = $q$$Register;
 8549     Register Rd = $dst$$Register;
 8550     Label done;
 8551     __ xorl(Rd, Rd);
 8552     __ cmpl(Rp, Rq);
 8553     __ setb(Assembler::less, Rd);
 8554     __ negl(Rd);
 8555   %}
 8556 
 8557   ins_pipe(pipe_slow);
 8558 %}
 8559 
 8560 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8561   match(Set dst (CmpLTMask dst zero));
 8562   effect(DEF dst, KILL cr);
 8563   ins_cost(100);
 8564 
 8565   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8566   ins_encode %{
 8567   __ sarl($dst$$Register, 31);
 8568   %}
 8569   ins_pipe(ialu_reg);
 8570 %}
 8571 
 8572 /* better to save a register than avoid a branch */
 8573 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8574   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8575   effect(KILL cr);
 8576   ins_cost(400);
 8577   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8578             "JGE    done\n\t"
 8579             "ADD    $p,$y\n"
 8580             "done:  " %}
 8581   ins_encode %{
 8582     Register Rp = $p$$Register;
 8583     Register Rq = $q$$Register;
 8584     Register Ry = $y$$Register;
 8585     Label done;
 8586     __ subl(Rp, Rq);
 8587     __ jccb(Assembler::greaterEqual, done);
 8588     __ addl(Rp, Ry);
 8589     __ bind(done);
 8590   %}
 8591 
 8592   ins_pipe(pipe_cmplt);
 8593 %}
 8594 
 8595 /* better to save a register than avoid a branch */
 8596 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8597   match(Set y (AndI (CmpLTMask p q) y));
 8598   effect(KILL cr);
 8599 
 8600   ins_cost(300);
 8601 
 8602   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8603             "JLT      done\n\t"
 8604             "XORL     $y, $y\n"
 8605             "done:  " %}
 8606   ins_encode %{
 8607     Register Rp = $p$$Register;
 8608     Register Rq = $q$$Register;
 8609     Register Ry = $y$$Register;
 8610     Label done;
 8611     __ cmpl(Rp, Rq);
 8612     __ jccb(Assembler::less, done);
 8613     __ xorl(Ry, Ry);
 8614     __ bind(done);
 8615   %}
 8616 
 8617   ins_pipe(pipe_cmplt);
 8618 %}
 8619 
 8620 /* If I enable this, I encourage spilling in the inner loop of compress.
 8621 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8622   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8623 */
 8624 //----------Overflow Math Instructions-----------------------------------------
 8625 
 8626 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8627 %{
 8628   match(Set cr (OverflowAddI op1 op2));
 8629   effect(DEF cr, USE_KILL op1, USE op2);
 8630 
 8631   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8632 
 8633   ins_encode %{
 8634     __ addl($op1$$Register, $op2$$Register);
 8635   %}
 8636   ins_pipe(ialu_reg_reg);
 8637 %}
 8638 
 8639 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8640 %{
 8641   match(Set cr (OverflowAddI op1 op2));
 8642   effect(DEF cr, USE_KILL op1, USE op2);
 8643 
 8644   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8645 
 8646   ins_encode %{
 8647     __ addl($op1$$Register, $op2$$constant);
 8648   %}
 8649   ins_pipe(ialu_reg_reg);
 8650 %}
 8651 
 8652 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8653 %{
 8654   match(Set cr (OverflowSubI op1 op2));
 8655 
 8656   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8657   ins_encode %{
 8658     __ cmpl($op1$$Register, $op2$$Register);
 8659   %}
 8660   ins_pipe(ialu_reg_reg);
 8661 %}
 8662 
 8663 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8664 %{
 8665   match(Set cr (OverflowSubI op1 op2));
 8666 
 8667   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8668   ins_encode %{
 8669     __ cmpl($op1$$Register, $op2$$constant);
 8670   %}
 8671   ins_pipe(ialu_reg_reg);
 8672 %}
 8673 
 8674 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8675 %{
 8676   match(Set cr (OverflowSubI zero op2));
 8677   effect(DEF cr, USE_KILL op2);
 8678 
 8679   format %{ "NEG    $op2\t# overflow check int" %}
 8680   ins_encode %{
 8681     __ negl($op2$$Register);
 8682   %}
 8683   ins_pipe(ialu_reg_reg);
 8684 %}
 8685 
 8686 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8687 %{
 8688   match(Set cr (OverflowMulI op1 op2));
 8689   effect(DEF cr, USE_KILL op1, USE op2);
 8690 
 8691   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8692   ins_encode %{
 8693     __ imull($op1$$Register, $op2$$Register);
 8694   %}
 8695   ins_pipe(ialu_reg_reg_alu0);
 8696 %}
 8697 
 8698 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8699 %{
 8700   match(Set cr (OverflowMulI op1 op2));
 8701   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8702 
 8703   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8704   ins_encode %{
 8705     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8706   %}
 8707   ins_pipe(ialu_reg_reg_alu0);
 8708 %}
 8709 
 8710 // Integer Absolute Instructions
 8711 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8712 %{
 8713   match(Set dst (AbsI src));
 8714   effect(TEMP dst, TEMP tmp, KILL cr);
 8715   format %{ "movl $tmp, $src\n\t"
 8716             "sarl $tmp, 31\n\t"
 8717             "movl $dst, $src\n\t"
 8718             "xorl $dst, $tmp\n\t"
 8719             "subl $dst, $tmp\n"
 8720           %}
 8721   ins_encode %{
 8722     __ movl($tmp$$Register, $src$$Register);
 8723     __ sarl($tmp$$Register, 31);
 8724     __ movl($dst$$Register, $src$$Register);
 8725     __ xorl($dst$$Register, $tmp$$Register);
 8726     __ subl($dst$$Register, $tmp$$Register);
 8727   %}
 8728 
 8729   ins_pipe(ialu_reg_reg);
 8730 %}
 8731 
 8732 //----------Long Instructions------------------------------------------------
 8733 // Add Long Register with Register
 8734 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8735   match(Set dst (AddL dst src));
 8736   effect(KILL cr);
 8737   ins_cost(200);
 8738   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8739             "ADC    $dst.hi,$src.hi" %}
 8740   opcode(0x03, 0x13);
 8741   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8742   ins_pipe( ialu_reg_reg_long );
 8743 %}
 8744 
 8745 // Add Long Register with Immediate
 8746 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8747   match(Set dst (AddL dst src));
 8748   effect(KILL cr);
 8749   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8750             "ADC    $dst.hi,$src.hi" %}
 8751   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8752   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8753   ins_pipe( ialu_reg_long );
 8754 %}
 8755 
 8756 // Add Long Register with Memory
 8757 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8758   match(Set dst (AddL dst (LoadL mem)));
 8759   effect(KILL cr);
 8760   ins_cost(125);
 8761   format %{ "ADD    $dst.lo,$mem\n\t"
 8762             "ADC    $dst.hi,$mem+4" %}
 8763   opcode(0x03, 0x13);
 8764   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8765   ins_pipe( ialu_reg_long_mem );
 8766 %}
 8767 
 8768 // Subtract Long Register with Register.
 8769 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8770   match(Set dst (SubL dst src));
 8771   effect(KILL cr);
 8772   ins_cost(200);
 8773   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8774             "SBB    $dst.hi,$src.hi" %}
 8775   opcode(0x2B, 0x1B);
 8776   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8777   ins_pipe( ialu_reg_reg_long );
 8778 %}
 8779 
 8780 // Subtract Long Register with Immediate
 8781 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8782   match(Set dst (SubL dst src));
 8783   effect(KILL cr);
 8784   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8785             "SBB    $dst.hi,$src.hi" %}
 8786   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8787   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8788   ins_pipe( ialu_reg_long );
 8789 %}
 8790 
 8791 // Subtract Long Register with Memory
 8792 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8793   match(Set dst (SubL dst (LoadL mem)));
 8794   effect(KILL cr);
 8795   ins_cost(125);
 8796   format %{ "SUB    $dst.lo,$mem\n\t"
 8797             "SBB    $dst.hi,$mem+4" %}
 8798   opcode(0x2B, 0x1B);
 8799   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8800   ins_pipe( ialu_reg_long_mem );
 8801 %}
 8802 
 8803 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8804   match(Set dst (SubL zero dst));
 8805   effect(KILL cr);
 8806   ins_cost(300);
 8807   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8808   ins_encode( neg_long(dst) );
 8809   ins_pipe( ialu_reg_reg_long );
 8810 %}
 8811 
 8812 // And Long Register with Register
 8813 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8814   match(Set dst (AndL dst src));
 8815   effect(KILL cr);
 8816   format %{ "AND    $dst.lo,$src.lo\n\t"
 8817             "AND    $dst.hi,$src.hi" %}
 8818   opcode(0x23,0x23);
 8819   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8820   ins_pipe( ialu_reg_reg_long );
 8821 %}
 8822 
 8823 // And Long Register with Immediate
 8824 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8825   match(Set dst (AndL dst src));
 8826   effect(KILL cr);
 8827   format %{ "AND    $dst.lo,$src.lo\n\t"
 8828             "AND    $dst.hi,$src.hi" %}
 8829   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8830   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8831   ins_pipe( ialu_reg_long );
 8832 %}
 8833 
 8834 // And Long Register with Memory
 8835 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8836   match(Set dst (AndL dst (LoadL mem)));
 8837   effect(KILL cr);
 8838   ins_cost(125);
 8839   format %{ "AND    $dst.lo,$mem\n\t"
 8840             "AND    $dst.hi,$mem+4" %}
 8841   opcode(0x23, 0x23);
 8842   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8843   ins_pipe( ialu_reg_long_mem );
 8844 %}
 8845 
 8846 // BMI1 instructions
 8847 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8848   match(Set dst (AndL (XorL src1 minus_1) src2));
 8849   predicate(UseBMI1Instructions);
 8850   effect(KILL cr, TEMP dst);
 8851 
 8852   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8853             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8854          %}
 8855 
 8856   ins_encode %{
 8857     Register Rdst = $dst$$Register;
 8858     Register Rsrc1 = $src1$$Register;
 8859     Register Rsrc2 = $src2$$Register;
 8860     __ andnl(Rdst, Rsrc1, Rsrc2);
 8861     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8862   %}
 8863   ins_pipe(ialu_reg_reg_long);
 8864 %}
 8865 
 8866 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8867   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8868   predicate(UseBMI1Instructions);
 8869   effect(KILL cr, TEMP dst);
 8870 
 8871   ins_cost(125);
 8872   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8873             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8874          %}
 8875 
 8876   ins_encode %{
 8877     Register Rdst = $dst$$Register;
 8878     Register Rsrc1 = $src1$$Register;
 8879     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8880 
 8881     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8882     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8883   %}
 8884   ins_pipe(ialu_reg_mem);
 8885 %}
 8886 
 8887 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8888   match(Set dst (AndL (SubL imm_zero src) src));
 8889   predicate(UseBMI1Instructions);
 8890   effect(KILL cr, TEMP dst);
 8891 
 8892   format %{ "MOVL   $dst.hi, 0\n\t"
 8893             "BLSIL  $dst.lo, $src.lo\n\t"
 8894             "JNZ    done\n\t"
 8895             "BLSIL  $dst.hi, $src.hi\n"
 8896             "done:"
 8897          %}
 8898 
 8899   ins_encode %{
 8900     Label done;
 8901     Register Rdst = $dst$$Register;
 8902     Register Rsrc = $src$$Register;
 8903     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8904     __ blsil(Rdst, Rsrc);
 8905     __ jccb(Assembler::notZero, done);
 8906     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8907     __ bind(done);
 8908   %}
 8909   ins_pipe(ialu_reg);
 8910 %}
 8911 
 8912 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8913   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8914   predicate(UseBMI1Instructions);
 8915   effect(KILL cr, TEMP dst);
 8916 
 8917   ins_cost(125);
 8918   format %{ "MOVL   $dst.hi, 0\n\t"
 8919             "BLSIL  $dst.lo, $src\n\t"
 8920             "JNZ    done\n\t"
 8921             "BLSIL  $dst.hi, $src+4\n"
 8922             "done:"
 8923          %}
 8924 
 8925   ins_encode %{
 8926     Label done;
 8927     Register Rdst = $dst$$Register;
 8928     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8929 
 8930     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8931     __ blsil(Rdst, $src$$Address);
 8932     __ jccb(Assembler::notZero, done);
 8933     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8934     __ bind(done);
 8935   %}
 8936   ins_pipe(ialu_reg_mem);
 8937 %}
 8938 
 8939 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8940 %{
 8941   match(Set dst (XorL (AddL src minus_1) src));
 8942   predicate(UseBMI1Instructions);
 8943   effect(KILL cr, TEMP dst);
 8944 
 8945   format %{ "MOVL    $dst.hi, 0\n\t"
 8946             "BLSMSKL $dst.lo, $src.lo\n\t"
 8947             "JNC     done\n\t"
 8948             "BLSMSKL $dst.hi, $src.hi\n"
 8949             "done:"
 8950          %}
 8951 
 8952   ins_encode %{
 8953     Label done;
 8954     Register Rdst = $dst$$Register;
 8955     Register Rsrc = $src$$Register;
 8956     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8957     __ blsmskl(Rdst, Rsrc);
 8958     __ jccb(Assembler::carryClear, done);
 8959     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8960     __ bind(done);
 8961   %}
 8962 
 8963   ins_pipe(ialu_reg);
 8964 %}
 8965 
 8966 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8967 %{
 8968   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8969   predicate(UseBMI1Instructions);
 8970   effect(KILL cr, TEMP dst);
 8971 
 8972   ins_cost(125);
 8973   format %{ "MOVL    $dst.hi, 0\n\t"
 8974             "BLSMSKL $dst.lo, $src\n\t"
 8975             "JNC     done\n\t"
 8976             "BLSMSKL $dst.hi, $src+4\n"
 8977             "done:"
 8978          %}
 8979 
 8980   ins_encode %{
 8981     Label done;
 8982     Register Rdst = $dst$$Register;
 8983     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8984 
 8985     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8986     __ blsmskl(Rdst, $src$$Address);
 8987     __ jccb(Assembler::carryClear, done);
 8988     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8989     __ bind(done);
 8990   %}
 8991 
 8992   ins_pipe(ialu_reg_mem);
 8993 %}
 8994 
 8995 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8996 %{
 8997   match(Set dst (AndL (AddL src minus_1) src) );
 8998   predicate(UseBMI1Instructions);
 8999   effect(KILL cr, TEMP dst);
 9000 
 9001   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9002             "BLSRL  $dst.lo, $src.lo\n\t"
 9003             "JNC    done\n\t"
 9004             "BLSRL  $dst.hi, $src.hi\n"
 9005             "done:"
 9006   %}
 9007 
 9008   ins_encode %{
 9009     Label done;
 9010     Register Rdst = $dst$$Register;
 9011     Register Rsrc = $src$$Register;
 9012     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9013     __ blsrl(Rdst, Rsrc);
 9014     __ jccb(Assembler::carryClear, done);
 9015     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9016     __ bind(done);
 9017   %}
 9018 
 9019   ins_pipe(ialu_reg);
 9020 %}
 9021 
 9022 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9023 %{
 9024   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9025   predicate(UseBMI1Instructions);
 9026   effect(KILL cr, TEMP dst);
 9027 
 9028   ins_cost(125);
 9029   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9030             "BLSRL  $dst.lo, $src\n\t"
 9031             "JNC    done\n\t"
 9032             "BLSRL  $dst.hi, $src+4\n"
 9033             "done:"
 9034   %}
 9035 
 9036   ins_encode %{
 9037     Label done;
 9038     Register Rdst = $dst$$Register;
 9039     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9040     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9041     __ blsrl(Rdst, $src$$Address);
 9042     __ jccb(Assembler::carryClear, done);
 9043     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9044     __ bind(done);
 9045   %}
 9046 
 9047   ins_pipe(ialu_reg_mem);
 9048 %}
 9049 
 9050 // Or Long Register with Register
 9051 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9052   match(Set dst (OrL dst src));
 9053   effect(KILL cr);
 9054   format %{ "OR     $dst.lo,$src.lo\n\t"
 9055             "OR     $dst.hi,$src.hi" %}
 9056   opcode(0x0B,0x0B);
 9057   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9058   ins_pipe( ialu_reg_reg_long );
 9059 %}
 9060 
 9061 // Or Long Register with Immediate
 9062 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9063   match(Set dst (OrL dst src));
 9064   effect(KILL cr);
 9065   format %{ "OR     $dst.lo,$src.lo\n\t"
 9066             "OR     $dst.hi,$src.hi" %}
 9067   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9068   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9069   ins_pipe( ialu_reg_long );
 9070 %}
 9071 
 9072 // Or Long Register with Memory
 9073 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9074   match(Set dst (OrL dst (LoadL mem)));
 9075   effect(KILL cr);
 9076   ins_cost(125);
 9077   format %{ "OR     $dst.lo,$mem\n\t"
 9078             "OR     $dst.hi,$mem+4" %}
 9079   opcode(0x0B,0x0B);
 9080   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9081   ins_pipe( ialu_reg_long_mem );
 9082 %}
 9083 
 9084 // Xor Long Register with Register
 9085 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9086   match(Set dst (XorL dst src));
 9087   effect(KILL cr);
 9088   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9089             "XOR    $dst.hi,$src.hi" %}
 9090   opcode(0x33,0x33);
 9091   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9092   ins_pipe( ialu_reg_reg_long );
 9093 %}
 9094 
 9095 // Xor Long Register with Immediate -1
 9096 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9097   match(Set dst (XorL dst imm));
 9098   format %{ "NOT    $dst.lo\n\t"
 9099             "NOT    $dst.hi" %}
 9100   ins_encode %{
 9101      __ notl($dst$$Register);
 9102      __ notl(HIGH_FROM_LOW($dst$$Register));
 9103   %}
 9104   ins_pipe( ialu_reg_long );
 9105 %}
 9106 
 9107 // Xor Long Register with Immediate
 9108 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9109   match(Set dst (XorL dst src));
 9110   effect(KILL cr);
 9111   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9112             "XOR    $dst.hi,$src.hi" %}
 9113   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9114   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9115   ins_pipe( ialu_reg_long );
 9116 %}
 9117 
 9118 // Xor Long Register with Memory
 9119 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9120   match(Set dst (XorL dst (LoadL mem)));
 9121   effect(KILL cr);
 9122   ins_cost(125);
 9123   format %{ "XOR    $dst.lo,$mem\n\t"
 9124             "XOR    $dst.hi,$mem+4" %}
 9125   opcode(0x33,0x33);
 9126   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9127   ins_pipe( ialu_reg_long_mem );
 9128 %}
 9129 
 9130 // Shift Left Long by 1
 9131 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9132   predicate(UseNewLongLShift);
 9133   match(Set dst (LShiftL dst cnt));
 9134   effect(KILL cr);
 9135   ins_cost(100);
 9136   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9137             "ADC    $dst.hi,$dst.hi" %}
 9138   ins_encode %{
 9139     __ addl($dst$$Register,$dst$$Register);
 9140     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9141   %}
 9142   ins_pipe( ialu_reg_long );
 9143 %}
 9144 
 9145 // Shift Left Long by 2
 9146 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9147   predicate(UseNewLongLShift);
 9148   match(Set dst (LShiftL dst cnt));
 9149   effect(KILL cr);
 9150   ins_cost(100);
 9151   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9152             "ADC    $dst.hi,$dst.hi\n\t"
 9153             "ADD    $dst.lo,$dst.lo\n\t"
 9154             "ADC    $dst.hi,$dst.hi" %}
 9155   ins_encode %{
 9156     __ addl($dst$$Register,$dst$$Register);
 9157     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9158     __ addl($dst$$Register,$dst$$Register);
 9159     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9160   %}
 9161   ins_pipe( ialu_reg_long );
 9162 %}
 9163 
 9164 // Shift Left Long by 3
 9165 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9166   predicate(UseNewLongLShift);
 9167   match(Set dst (LShiftL dst cnt));
 9168   effect(KILL cr);
 9169   ins_cost(100);
 9170   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9171             "ADC    $dst.hi,$dst.hi\n\t"
 9172             "ADD    $dst.lo,$dst.lo\n\t"
 9173             "ADC    $dst.hi,$dst.hi\n\t"
 9174             "ADD    $dst.lo,$dst.lo\n\t"
 9175             "ADC    $dst.hi,$dst.hi" %}
 9176   ins_encode %{
 9177     __ addl($dst$$Register,$dst$$Register);
 9178     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9179     __ addl($dst$$Register,$dst$$Register);
 9180     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9181     __ addl($dst$$Register,$dst$$Register);
 9182     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9183   %}
 9184   ins_pipe( ialu_reg_long );
 9185 %}
 9186 
 9187 // Shift Left Long by 1-31
 9188 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9189   match(Set dst (LShiftL dst cnt));
 9190   effect(KILL cr);
 9191   ins_cost(200);
 9192   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9193             "SHL    $dst.lo,$cnt" %}
 9194   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9195   ins_encode( move_long_small_shift(dst,cnt) );
 9196   ins_pipe( ialu_reg_long );
 9197 %}
 9198 
 9199 // Shift Left Long by 32-63
 9200 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9201   match(Set dst (LShiftL dst cnt));
 9202   effect(KILL cr);
 9203   ins_cost(300);
 9204   format %{ "MOV    $dst.hi,$dst.lo\n"
 9205           "\tSHL    $dst.hi,$cnt-32\n"
 9206           "\tXOR    $dst.lo,$dst.lo" %}
 9207   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9208   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9209   ins_pipe( ialu_reg_long );
 9210 %}
 9211 
 9212 // Shift Left Long by variable
 9213 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9214   match(Set dst (LShiftL dst shift));
 9215   effect(KILL cr);
 9216   ins_cost(500+200);
 9217   size(17);
 9218   format %{ "TEST   $shift,32\n\t"
 9219             "JEQ,s  small\n\t"
 9220             "MOV    $dst.hi,$dst.lo\n\t"
 9221             "XOR    $dst.lo,$dst.lo\n"
 9222     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9223             "SHL    $dst.lo,$shift" %}
 9224   ins_encode( shift_left_long( dst, shift ) );
 9225   ins_pipe( pipe_slow );
 9226 %}
 9227 
 9228 // Shift Right Long by 1-31
 9229 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9230   match(Set dst (URShiftL dst cnt));
 9231   effect(KILL cr);
 9232   ins_cost(200);
 9233   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9234             "SHR    $dst.hi,$cnt" %}
 9235   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9236   ins_encode( move_long_small_shift(dst,cnt) );
 9237   ins_pipe( ialu_reg_long );
 9238 %}
 9239 
 9240 // Shift Right Long by 32-63
 9241 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9242   match(Set dst (URShiftL dst cnt));
 9243   effect(KILL cr);
 9244   ins_cost(300);
 9245   format %{ "MOV    $dst.lo,$dst.hi\n"
 9246           "\tSHR    $dst.lo,$cnt-32\n"
 9247           "\tXOR    $dst.hi,$dst.hi" %}
 9248   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9249   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9250   ins_pipe( ialu_reg_long );
 9251 %}
 9252 
 9253 // Shift Right Long by variable
 9254 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9255   match(Set dst (URShiftL dst shift));
 9256   effect(KILL cr);
 9257   ins_cost(600);
 9258   size(17);
 9259   format %{ "TEST   $shift,32\n\t"
 9260             "JEQ,s  small\n\t"
 9261             "MOV    $dst.lo,$dst.hi\n\t"
 9262             "XOR    $dst.hi,$dst.hi\n"
 9263     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9264             "SHR    $dst.hi,$shift" %}
 9265   ins_encode( shift_right_long( dst, shift ) );
 9266   ins_pipe( pipe_slow );
 9267 %}
 9268 
 9269 // Shift Right Long by 1-31
 9270 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9271   match(Set dst (RShiftL dst cnt));
 9272   effect(KILL cr);
 9273   ins_cost(200);
 9274   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9275             "SAR    $dst.hi,$cnt" %}
 9276   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9277   ins_encode( move_long_small_shift(dst,cnt) );
 9278   ins_pipe( ialu_reg_long );
 9279 %}
 9280 
 9281 // Shift Right Long by 32-63
 9282 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9283   match(Set dst (RShiftL dst cnt));
 9284   effect(KILL cr);
 9285   ins_cost(300);
 9286   format %{ "MOV    $dst.lo,$dst.hi\n"
 9287           "\tSAR    $dst.lo,$cnt-32\n"
 9288           "\tSAR    $dst.hi,31" %}
 9289   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9290   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9291   ins_pipe( ialu_reg_long );
 9292 %}
 9293 
 9294 // Shift Right arithmetic Long by variable
 9295 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9296   match(Set dst (RShiftL dst shift));
 9297   effect(KILL cr);
 9298   ins_cost(600);
 9299   size(18);
 9300   format %{ "TEST   $shift,32\n\t"
 9301             "JEQ,s  small\n\t"
 9302             "MOV    $dst.lo,$dst.hi\n\t"
 9303             "SAR    $dst.hi,31\n"
 9304     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9305             "SAR    $dst.hi,$shift" %}
 9306   ins_encode( shift_right_arith_long( dst, shift ) );
 9307   ins_pipe( pipe_slow );
 9308 %}
 9309 
 9310 
 9311 //----------Double Instructions------------------------------------------------
 9312 // Double Math
 9313 
 9314 // Compare & branch
 9315 
 9316 // P6 version of float compare, sets condition codes in EFLAGS
 9317 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9318   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9319   match(Set cr (CmpD src1 src2));
 9320   effect(KILL rax);
 9321   ins_cost(150);
 9322   format %{ "FLD    $src1\n\t"
 9323             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9324             "JNP    exit\n\t"
 9325             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9326             "SAHF\n"
 9327      "exit:\tNOP               // avoid branch to branch" %}
 9328   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9329   ins_encode( Push_Reg_DPR(src1),
 9330               OpcP, RegOpc(src2),
 9331               cmpF_P6_fixup );
 9332   ins_pipe( pipe_slow );
 9333 %}
 9334 
 9335 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9336   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9337   match(Set cr (CmpD src1 src2));
 9338   ins_cost(150);
 9339   format %{ "FLD    $src1\n\t"
 9340             "FUCOMIP ST,$src2  // P6 instruction" %}
 9341   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9342   ins_encode( Push_Reg_DPR(src1),
 9343               OpcP, RegOpc(src2));
 9344   ins_pipe( pipe_slow );
 9345 %}
 9346 
 9347 // Compare & branch
 9348 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9349   predicate(UseSSE<=1);
 9350   match(Set cr (CmpD src1 src2));
 9351   effect(KILL rax);
 9352   ins_cost(200);
 9353   format %{ "FLD    $src1\n\t"
 9354             "FCOMp  $src2\n\t"
 9355             "FNSTSW AX\n\t"
 9356             "TEST   AX,0x400\n\t"
 9357             "JZ,s   flags\n\t"
 9358             "MOV    AH,1\t# unordered treat as LT\n"
 9359     "flags:\tSAHF" %}
 9360   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9361   ins_encode( Push_Reg_DPR(src1),
 9362               OpcP, RegOpc(src2),
 9363               fpu_flags);
 9364   ins_pipe( pipe_slow );
 9365 %}
 9366 
 9367 // Compare vs zero into -1,0,1
 9368 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9369   predicate(UseSSE<=1);
 9370   match(Set dst (CmpD3 src1 zero));
 9371   effect(KILL cr, KILL rax);
 9372   ins_cost(280);
 9373   format %{ "FTSTD  $dst,$src1" %}
 9374   opcode(0xE4, 0xD9);
 9375   ins_encode( Push_Reg_DPR(src1),
 9376               OpcS, OpcP, PopFPU,
 9377               CmpF_Result(dst));
 9378   ins_pipe( pipe_slow );
 9379 %}
 9380 
 9381 // Compare into -1,0,1
 9382 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9383   predicate(UseSSE<=1);
 9384   match(Set dst (CmpD3 src1 src2));
 9385   effect(KILL cr, KILL rax);
 9386   ins_cost(300);
 9387   format %{ "FCMPD  $dst,$src1,$src2" %}
 9388   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9389   ins_encode( Push_Reg_DPR(src1),
 9390               OpcP, RegOpc(src2),
 9391               CmpF_Result(dst));
 9392   ins_pipe( pipe_slow );
 9393 %}
 9394 
 9395 // float compare and set condition codes in EFLAGS by XMM regs
 9396 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9397   predicate(UseSSE>=2);
 9398   match(Set cr (CmpD src1 src2));
 9399   ins_cost(145);
 9400   format %{ "UCOMISD $src1,$src2\n\t"
 9401             "JNP,s   exit\n\t"
 9402             "PUSHF\t# saw NaN, set CF\n\t"
 9403             "AND     [rsp], #0xffffff2b\n\t"
 9404             "POPF\n"
 9405     "exit:" %}
 9406   ins_encode %{
 9407     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9408     emit_cmpfp_fixup(masm);
 9409   %}
 9410   ins_pipe( pipe_slow );
 9411 %}
 9412 
 9413 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9414   predicate(UseSSE>=2);
 9415   match(Set cr (CmpD src1 src2));
 9416   ins_cost(100);
 9417   format %{ "UCOMISD $src1,$src2" %}
 9418   ins_encode %{
 9419     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9420   %}
 9421   ins_pipe( pipe_slow );
 9422 %}
 9423 
 9424 // float compare and set condition codes in EFLAGS by XMM regs
 9425 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9426   predicate(UseSSE>=2);
 9427   match(Set cr (CmpD src1 (LoadD src2)));
 9428   ins_cost(145);
 9429   format %{ "UCOMISD $src1,$src2\n\t"
 9430             "JNP,s   exit\n\t"
 9431             "PUSHF\t# saw NaN, set CF\n\t"
 9432             "AND     [rsp], #0xffffff2b\n\t"
 9433             "POPF\n"
 9434     "exit:" %}
 9435   ins_encode %{
 9436     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9437     emit_cmpfp_fixup(masm);
 9438   %}
 9439   ins_pipe( pipe_slow );
 9440 %}
 9441 
 9442 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9443   predicate(UseSSE>=2);
 9444   match(Set cr (CmpD src1 (LoadD src2)));
 9445   ins_cost(100);
 9446   format %{ "UCOMISD $src1,$src2" %}
 9447   ins_encode %{
 9448     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9449   %}
 9450   ins_pipe( pipe_slow );
 9451 %}
 9452 
 9453 // Compare into -1,0,1 in XMM
 9454 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9455   predicate(UseSSE>=2);
 9456   match(Set dst (CmpD3 src1 src2));
 9457   effect(KILL cr);
 9458   ins_cost(255);
 9459   format %{ "UCOMISD $src1, $src2\n\t"
 9460             "MOV     $dst, #-1\n\t"
 9461             "JP,s    done\n\t"
 9462             "JB,s    done\n\t"
 9463             "SETNE   $dst\n\t"
 9464             "MOVZB   $dst, $dst\n"
 9465     "done:" %}
 9466   ins_encode %{
 9467     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9468     emit_cmpfp3(masm, $dst$$Register);
 9469   %}
 9470   ins_pipe( pipe_slow );
 9471 %}
 9472 
 9473 // Compare into -1,0,1 in XMM and memory
 9474 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9475   predicate(UseSSE>=2);
 9476   match(Set dst (CmpD3 src1 (LoadD src2)));
 9477   effect(KILL cr);
 9478   ins_cost(275);
 9479   format %{ "UCOMISD $src1, $src2\n\t"
 9480             "MOV     $dst, #-1\n\t"
 9481             "JP,s    done\n\t"
 9482             "JB,s    done\n\t"
 9483             "SETNE   $dst\n\t"
 9484             "MOVZB   $dst, $dst\n"
 9485     "done:" %}
 9486   ins_encode %{
 9487     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9488     emit_cmpfp3(masm, $dst$$Register);
 9489   %}
 9490   ins_pipe( pipe_slow );
 9491 %}
 9492 
 9493 
 9494 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9495   predicate (UseSSE <=1);
 9496   match(Set dst (SubD dst src));
 9497 
 9498   format %{ "FLD    $src\n\t"
 9499             "DSUBp  $dst,ST" %}
 9500   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9501   ins_cost(150);
 9502   ins_encode( Push_Reg_DPR(src),
 9503               OpcP, RegOpc(dst) );
 9504   ins_pipe( fpu_reg_reg );
 9505 %}
 9506 
 9507 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9508   predicate (UseSSE <=1);
 9509   match(Set dst (RoundDouble (SubD src1 src2)));
 9510   ins_cost(250);
 9511 
 9512   format %{ "FLD    $src2\n\t"
 9513             "DSUB   ST,$src1\n\t"
 9514             "FSTP_D $dst\t# D-round" %}
 9515   opcode(0xD8, 0x5);
 9516   ins_encode( Push_Reg_DPR(src2),
 9517               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9518   ins_pipe( fpu_mem_reg_reg );
 9519 %}
 9520 
 9521 
 9522 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9523   predicate (UseSSE <=1);
 9524   match(Set dst (SubD dst (LoadD src)));
 9525   ins_cost(150);
 9526 
 9527   format %{ "FLD    $src\n\t"
 9528             "DSUBp  $dst,ST" %}
 9529   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9530   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9531               OpcP, RegOpc(dst), ClearInstMark );
 9532   ins_pipe( fpu_reg_mem );
 9533 %}
 9534 
 9535 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9536   predicate (UseSSE<=1);
 9537   match(Set dst (AbsD src));
 9538   ins_cost(100);
 9539   format %{ "FABS" %}
 9540   opcode(0xE1, 0xD9);
 9541   ins_encode( OpcS, OpcP );
 9542   ins_pipe( fpu_reg_reg );
 9543 %}
 9544 
 9545 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9546   predicate(UseSSE<=1);
 9547   match(Set dst (NegD src));
 9548   ins_cost(100);
 9549   format %{ "FCHS" %}
 9550   opcode(0xE0, 0xD9);
 9551   ins_encode( OpcS, OpcP );
 9552   ins_pipe( fpu_reg_reg );
 9553 %}
 9554 
 9555 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9556   predicate(UseSSE<=1);
 9557   match(Set dst (AddD dst src));
 9558   format %{ "FLD    $src\n\t"
 9559             "DADD   $dst,ST" %}
 9560   size(4);
 9561   ins_cost(150);
 9562   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9563   ins_encode( Push_Reg_DPR(src),
 9564               OpcP, RegOpc(dst) );
 9565   ins_pipe( fpu_reg_reg );
 9566 %}
 9567 
 9568 
 9569 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9570   predicate(UseSSE<=1);
 9571   match(Set dst (RoundDouble (AddD src1 src2)));
 9572   ins_cost(250);
 9573 
 9574   format %{ "FLD    $src2\n\t"
 9575             "DADD   ST,$src1\n\t"
 9576             "FSTP_D $dst\t# D-round" %}
 9577   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9578   ins_encode( Push_Reg_DPR(src2),
 9579               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9580   ins_pipe( fpu_mem_reg_reg );
 9581 %}
 9582 
 9583 
 9584 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9585   predicate(UseSSE<=1);
 9586   match(Set dst (AddD dst (LoadD src)));
 9587   ins_cost(150);
 9588 
 9589   format %{ "FLD    $src\n\t"
 9590             "DADDp  $dst,ST" %}
 9591   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9592   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9593               OpcP, RegOpc(dst), ClearInstMark );
 9594   ins_pipe( fpu_reg_mem );
 9595 %}
 9596 
 9597 // add-to-memory
 9598 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9599   predicate(UseSSE<=1);
 9600   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9601   ins_cost(150);
 9602 
 9603   format %{ "FLD_D  $dst\n\t"
 9604             "DADD   ST,$src\n\t"
 9605             "FST_D  $dst" %}
 9606   opcode(0xDD, 0x0);
 9607   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9608               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9609               SetInstMark,
 9610               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9611               ClearInstMark);
 9612   ins_pipe( fpu_reg_mem );
 9613 %}
 9614 
 9615 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9616   predicate(UseSSE<=1);
 9617   match(Set dst (AddD dst con));
 9618   ins_cost(125);
 9619   format %{ "FLD1\n\t"
 9620             "DADDp  $dst,ST" %}
 9621   ins_encode %{
 9622     __ fld1();
 9623     __ faddp($dst$$reg);
 9624   %}
 9625   ins_pipe(fpu_reg);
 9626 %}
 9627 
 9628 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9629   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9630   match(Set dst (AddD dst con));
 9631   ins_cost(200);
 9632   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9633             "DADDp  $dst,ST" %}
 9634   ins_encode %{
 9635     __ fld_d($constantaddress($con));
 9636     __ faddp($dst$$reg);
 9637   %}
 9638   ins_pipe(fpu_reg_mem);
 9639 %}
 9640 
 9641 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9642   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9643   match(Set dst (RoundDouble (AddD src con)));
 9644   ins_cost(200);
 9645   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9646             "DADD   ST,$src\n\t"
 9647             "FSTP_D $dst\t# D-round" %}
 9648   ins_encode %{
 9649     __ fld_d($constantaddress($con));
 9650     __ fadd($src$$reg);
 9651     __ fstp_d(Address(rsp, $dst$$disp));
 9652   %}
 9653   ins_pipe(fpu_mem_reg_con);
 9654 %}
 9655 
 9656 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9657   predicate(UseSSE<=1);
 9658   match(Set dst (MulD dst src));
 9659   format %{ "FLD    $src\n\t"
 9660             "DMULp  $dst,ST" %}
 9661   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9662   ins_cost(150);
 9663   ins_encode( Push_Reg_DPR(src),
 9664               OpcP, RegOpc(dst) );
 9665   ins_pipe( fpu_reg_reg );
 9666 %}
 9667 
 9668 // Strict FP instruction biases argument before multiply then
 9669 // biases result to avoid double rounding of subnormals.
 9670 //
 9671 // scale arg1 by multiplying arg1 by 2^(-15360)
 9672 // load arg2
 9673 // multiply scaled arg1 by arg2
 9674 // rescale product by 2^(15360)
 9675 //
 9676 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9677   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9678   match(Set dst (MulD dst src));
 9679   ins_cost(1);   // Select this instruction for all FP double multiplies
 9680 
 9681   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9682             "DMULp  $dst,ST\n\t"
 9683             "FLD    $src\n\t"
 9684             "DMULp  $dst,ST\n\t"
 9685             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9686             "DMULp  $dst,ST\n\t" %}
 9687   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9688   ins_encode( strictfp_bias1(dst),
 9689               Push_Reg_DPR(src),
 9690               OpcP, RegOpc(dst),
 9691               strictfp_bias2(dst) );
 9692   ins_pipe( fpu_reg_reg );
 9693 %}
 9694 
 9695 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9696   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9697   match(Set dst (MulD dst con));
 9698   ins_cost(200);
 9699   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9700             "DMULp  $dst,ST" %}
 9701   ins_encode %{
 9702     __ fld_d($constantaddress($con));
 9703     __ fmulp($dst$$reg);
 9704   %}
 9705   ins_pipe(fpu_reg_mem);
 9706 %}
 9707 
 9708 
 9709 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9710   predicate( UseSSE<=1 );
 9711   match(Set dst (MulD dst (LoadD src)));
 9712   ins_cost(200);
 9713   format %{ "FLD_D  $src\n\t"
 9714             "DMULp  $dst,ST" %}
 9715   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9716   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9717               OpcP, RegOpc(dst), ClearInstMark );
 9718   ins_pipe( fpu_reg_mem );
 9719 %}
 9720 
 9721 //
 9722 // Cisc-alternate to reg-reg multiply
 9723 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9724   predicate( UseSSE<=1 );
 9725   match(Set dst (MulD src (LoadD mem)));
 9726   ins_cost(250);
 9727   format %{ "FLD_D  $mem\n\t"
 9728             "DMUL   ST,$src\n\t"
 9729             "FSTP_D $dst" %}
 9730   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9731   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9732               OpcReg_FPR(src),
 9733               Pop_Reg_DPR(dst), ClearInstMark );
 9734   ins_pipe( fpu_reg_reg_mem );
 9735 %}
 9736 
 9737 
 9738 // MACRO3 -- addDPR a mulDPR
 9739 // This instruction is a '2-address' instruction in that the result goes
 9740 // back to src2.  This eliminates a move from the macro; possibly the
 9741 // register allocator will have to add it back (and maybe not).
 9742 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9743   predicate( UseSSE<=1 );
 9744   match(Set src2 (AddD (MulD src0 src1) src2));
 9745   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9746             "DMUL   ST,$src1\n\t"
 9747             "DADDp  $src2,ST" %}
 9748   ins_cost(250);
 9749   opcode(0xDD); /* LoadD DD /0 */
 9750   ins_encode( Push_Reg_FPR(src0),
 9751               FMul_ST_reg(src1),
 9752               FAddP_reg_ST(src2) );
 9753   ins_pipe( fpu_reg_reg_reg );
 9754 %}
 9755 
 9756 
 9757 // MACRO3 -- subDPR a mulDPR
 9758 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9759   predicate( UseSSE<=1 );
 9760   match(Set src2 (SubD (MulD src0 src1) src2));
 9761   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9762             "DMUL   ST,$src1\n\t"
 9763             "DSUBRp $src2,ST" %}
 9764   ins_cost(250);
 9765   ins_encode( Push_Reg_FPR(src0),
 9766               FMul_ST_reg(src1),
 9767               Opcode(0xDE), Opc_plus(0xE0,src2));
 9768   ins_pipe( fpu_reg_reg_reg );
 9769 %}
 9770 
 9771 
 9772 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9773   predicate( UseSSE<=1 );
 9774   match(Set dst (DivD dst src));
 9775 
 9776   format %{ "FLD    $src\n\t"
 9777             "FDIVp  $dst,ST" %}
 9778   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9779   ins_cost(150);
 9780   ins_encode( Push_Reg_DPR(src),
 9781               OpcP, RegOpc(dst) );
 9782   ins_pipe( fpu_reg_reg );
 9783 %}
 9784 
 9785 // Strict FP instruction biases argument before division then
 9786 // biases result, to avoid double rounding of subnormals.
 9787 //
 9788 // scale dividend by multiplying dividend by 2^(-15360)
 9789 // load divisor
 9790 // divide scaled dividend by divisor
 9791 // rescale quotient by 2^(15360)
 9792 //
 9793 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9794   predicate (UseSSE<=1);
 9795   match(Set dst (DivD dst src));
 9796   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9797   ins_cost(01);
 9798 
 9799   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9800             "DMULp  $dst,ST\n\t"
 9801             "FLD    $src\n\t"
 9802             "FDIVp  $dst,ST\n\t"
 9803             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9804             "DMULp  $dst,ST\n\t" %}
 9805   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9806   ins_encode( strictfp_bias1(dst),
 9807               Push_Reg_DPR(src),
 9808               OpcP, RegOpc(dst),
 9809               strictfp_bias2(dst) );
 9810   ins_pipe( fpu_reg_reg );
 9811 %}
 9812 
 9813 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9814   predicate(UseSSE<=1);
 9815   match(Set dst (ModD dst src));
 9816   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9817 
 9818   format %{ "DMOD   $dst,$src" %}
 9819   ins_cost(250);
 9820   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9821               emitModDPR(),
 9822               Push_Result_Mod_DPR(src),
 9823               Pop_Reg_DPR(dst));
 9824   ins_pipe( pipe_slow );
 9825 %}
 9826 
 9827 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9828   predicate(UseSSE>=2);
 9829   match(Set dst (ModD src0 src1));
 9830   effect(KILL rax, KILL cr);
 9831 
 9832   format %{ "SUB    ESP,8\t # DMOD\n"
 9833           "\tMOVSD  [ESP+0],$src1\n"
 9834           "\tFLD_D  [ESP+0]\n"
 9835           "\tMOVSD  [ESP+0],$src0\n"
 9836           "\tFLD_D  [ESP+0]\n"
 9837      "loop:\tFPREM\n"
 9838           "\tFWAIT\n"
 9839           "\tFNSTSW AX\n"
 9840           "\tSAHF\n"
 9841           "\tJP     loop\n"
 9842           "\tFSTP_D [ESP+0]\n"
 9843           "\tMOVSD  $dst,[ESP+0]\n"
 9844           "\tADD    ESP,8\n"
 9845           "\tFSTP   ST0\t # Restore FPU Stack"
 9846     %}
 9847   ins_cost(250);
 9848   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9849   ins_pipe( pipe_slow );
 9850 %}
 9851 
 9852 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9853   predicate (UseSSE<=1);
 9854   match(Set dst(AtanD dst src));
 9855   format %{ "DATA   $dst,$src" %}
 9856   opcode(0xD9, 0xF3);
 9857   ins_encode( Push_Reg_DPR(src),
 9858               OpcP, OpcS, RegOpc(dst) );
 9859   ins_pipe( pipe_slow );
 9860 %}
 9861 
 9862 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9863   predicate (UseSSE>=2);
 9864   match(Set dst(AtanD dst src));
 9865   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9866   format %{ "DATA   $dst,$src" %}
 9867   opcode(0xD9, 0xF3);
 9868   ins_encode( Push_SrcD(src),
 9869               OpcP, OpcS, Push_ResultD(dst) );
 9870   ins_pipe( pipe_slow );
 9871 %}
 9872 
 9873 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9874   predicate (UseSSE<=1);
 9875   match(Set dst (SqrtD src));
 9876   format %{ "DSQRT  $dst,$src" %}
 9877   opcode(0xFA, 0xD9);
 9878   ins_encode( Push_Reg_DPR(src),
 9879               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9880   ins_pipe( pipe_slow );
 9881 %}
 9882 
 9883 //-------------Float Instructions-------------------------------
 9884 // Float Math
 9885 
 9886 // Code for float compare:
 9887 //     fcompp();
 9888 //     fwait(); fnstsw_ax();
 9889 //     sahf();
 9890 //     movl(dst, unordered_result);
 9891 //     jcc(Assembler::parity, exit);
 9892 //     movl(dst, less_result);
 9893 //     jcc(Assembler::below, exit);
 9894 //     movl(dst, equal_result);
 9895 //     jcc(Assembler::equal, exit);
 9896 //     movl(dst, greater_result);
 9897 //   exit:
 9898 
 9899 // P6 version of float compare, sets condition codes in EFLAGS
 9900 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9901   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9902   match(Set cr (CmpF src1 src2));
 9903   effect(KILL rax);
 9904   ins_cost(150);
 9905   format %{ "FLD    $src1\n\t"
 9906             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9907             "JNP    exit\n\t"
 9908             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9909             "SAHF\n"
 9910      "exit:\tNOP               // avoid branch to branch" %}
 9911   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9912   ins_encode( Push_Reg_DPR(src1),
 9913               OpcP, RegOpc(src2),
 9914               cmpF_P6_fixup );
 9915   ins_pipe( pipe_slow );
 9916 %}
 9917 
 9918 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9919   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9920   match(Set cr (CmpF src1 src2));
 9921   ins_cost(100);
 9922   format %{ "FLD    $src1\n\t"
 9923             "FUCOMIP ST,$src2  // P6 instruction" %}
 9924   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9925   ins_encode( Push_Reg_DPR(src1),
 9926               OpcP, RegOpc(src2));
 9927   ins_pipe( pipe_slow );
 9928 %}
 9929 
 9930 
 9931 // Compare & branch
 9932 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9933   predicate(UseSSE == 0);
 9934   match(Set cr (CmpF src1 src2));
 9935   effect(KILL rax);
 9936   ins_cost(200);
 9937   format %{ "FLD    $src1\n\t"
 9938             "FCOMp  $src2\n\t"
 9939             "FNSTSW AX\n\t"
 9940             "TEST   AX,0x400\n\t"
 9941             "JZ,s   flags\n\t"
 9942             "MOV    AH,1\t# unordered treat as LT\n"
 9943     "flags:\tSAHF" %}
 9944   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9945   ins_encode( Push_Reg_DPR(src1),
 9946               OpcP, RegOpc(src2),
 9947               fpu_flags);
 9948   ins_pipe( pipe_slow );
 9949 %}
 9950 
 9951 // Compare vs zero into -1,0,1
 9952 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9953   predicate(UseSSE == 0);
 9954   match(Set dst (CmpF3 src1 zero));
 9955   effect(KILL cr, KILL rax);
 9956   ins_cost(280);
 9957   format %{ "FTSTF  $dst,$src1" %}
 9958   opcode(0xE4, 0xD9);
 9959   ins_encode( Push_Reg_DPR(src1),
 9960               OpcS, OpcP, PopFPU,
 9961               CmpF_Result(dst));
 9962   ins_pipe( pipe_slow );
 9963 %}
 9964 
 9965 // Compare into -1,0,1
 9966 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9967   predicate(UseSSE == 0);
 9968   match(Set dst (CmpF3 src1 src2));
 9969   effect(KILL cr, KILL rax);
 9970   ins_cost(300);
 9971   format %{ "FCMPF  $dst,$src1,$src2" %}
 9972   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9973   ins_encode( Push_Reg_DPR(src1),
 9974               OpcP, RegOpc(src2),
 9975               CmpF_Result(dst));
 9976   ins_pipe( pipe_slow );
 9977 %}
 9978 
 9979 // float compare and set condition codes in EFLAGS by XMM regs
 9980 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9981   predicate(UseSSE>=1);
 9982   match(Set cr (CmpF src1 src2));
 9983   ins_cost(145);
 9984   format %{ "UCOMISS $src1,$src2\n\t"
 9985             "JNP,s   exit\n\t"
 9986             "PUSHF\t# saw NaN, set CF\n\t"
 9987             "AND     [rsp], #0xffffff2b\n\t"
 9988             "POPF\n"
 9989     "exit:" %}
 9990   ins_encode %{
 9991     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9992     emit_cmpfp_fixup(masm);
 9993   %}
 9994   ins_pipe( pipe_slow );
 9995 %}
 9996 
 9997 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9998   predicate(UseSSE>=1);
 9999   match(Set cr (CmpF src1 src2));
10000   ins_cost(100);
10001   format %{ "UCOMISS $src1,$src2" %}
10002   ins_encode %{
10003     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10004   %}
10005   ins_pipe( pipe_slow );
10006 %}
10007 
10008 // float compare and set condition codes in EFLAGS by XMM regs
10009 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10010   predicate(UseSSE>=1);
10011   match(Set cr (CmpF src1 (LoadF src2)));
10012   ins_cost(165);
10013   format %{ "UCOMISS $src1,$src2\n\t"
10014             "JNP,s   exit\n\t"
10015             "PUSHF\t# saw NaN, set CF\n\t"
10016             "AND     [rsp], #0xffffff2b\n\t"
10017             "POPF\n"
10018     "exit:" %}
10019   ins_encode %{
10020     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10021     emit_cmpfp_fixup(masm);
10022   %}
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10027   predicate(UseSSE>=1);
10028   match(Set cr (CmpF src1 (LoadF src2)));
10029   ins_cost(100);
10030   format %{ "UCOMISS $src1,$src2" %}
10031   ins_encode %{
10032     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10033   %}
10034   ins_pipe( pipe_slow );
10035 %}
10036 
10037 // Compare into -1,0,1 in XMM
10038 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10039   predicate(UseSSE>=1);
10040   match(Set dst (CmpF3 src1 src2));
10041   effect(KILL cr);
10042   ins_cost(255);
10043   format %{ "UCOMISS $src1, $src2\n\t"
10044             "MOV     $dst, #-1\n\t"
10045             "JP,s    done\n\t"
10046             "JB,s    done\n\t"
10047             "SETNE   $dst\n\t"
10048             "MOVZB   $dst, $dst\n"
10049     "done:" %}
10050   ins_encode %{
10051     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10052     emit_cmpfp3(masm, $dst$$Register);
10053   %}
10054   ins_pipe( pipe_slow );
10055 %}
10056 
10057 // Compare into -1,0,1 in XMM and memory
10058 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10059   predicate(UseSSE>=1);
10060   match(Set dst (CmpF3 src1 (LoadF src2)));
10061   effect(KILL cr);
10062   ins_cost(275);
10063   format %{ "UCOMISS $src1, $src2\n\t"
10064             "MOV     $dst, #-1\n\t"
10065             "JP,s    done\n\t"
10066             "JB,s    done\n\t"
10067             "SETNE   $dst\n\t"
10068             "MOVZB   $dst, $dst\n"
10069     "done:" %}
10070   ins_encode %{
10071     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10072     emit_cmpfp3(masm, $dst$$Register);
10073   %}
10074   ins_pipe( pipe_slow );
10075 %}
10076 
10077 // Spill to obtain 24-bit precision
10078 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10079   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10080   match(Set dst (SubF src1 src2));
10081 
10082   format %{ "FSUB   $dst,$src1 - $src2" %}
10083   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10084   ins_encode( Push_Reg_FPR(src1),
10085               OpcReg_FPR(src2),
10086               Pop_Mem_FPR(dst) );
10087   ins_pipe( fpu_mem_reg_reg );
10088 %}
10089 //
10090 // This instruction does not round to 24-bits
10091 instruct subFPR_reg(regFPR dst, regFPR src) %{
10092   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10093   match(Set dst (SubF dst src));
10094 
10095   format %{ "FSUB   $dst,$src" %}
10096   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10097   ins_encode( Push_Reg_FPR(src),
10098               OpcP, RegOpc(dst) );
10099   ins_pipe( fpu_reg_reg );
10100 %}
10101 
10102 // Spill to obtain 24-bit precision
10103 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10104   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10105   match(Set dst (AddF src1 src2));
10106 
10107   format %{ "FADD   $dst,$src1,$src2" %}
10108   opcode(0xD8, 0x0); /* D8 C0+i */
10109   ins_encode( Push_Reg_FPR(src2),
10110               OpcReg_FPR(src1),
10111               Pop_Mem_FPR(dst) );
10112   ins_pipe( fpu_mem_reg_reg );
10113 %}
10114 //
10115 // This instruction does not round to 24-bits
10116 instruct addFPR_reg(regFPR dst, regFPR src) %{
10117   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10118   match(Set dst (AddF dst src));
10119 
10120   format %{ "FLD    $src\n\t"
10121             "FADDp  $dst,ST" %}
10122   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10123   ins_encode( Push_Reg_FPR(src),
10124               OpcP, RegOpc(dst) );
10125   ins_pipe( fpu_reg_reg );
10126 %}
10127 
10128 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10129   predicate(UseSSE==0);
10130   match(Set dst (AbsF src));
10131   ins_cost(100);
10132   format %{ "FABS" %}
10133   opcode(0xE1, 0xD9);
10134   ins_encode( OpcS, OpcP );
10135   ins_pipe( fpu_reg_reg );
10136 %}
10137 
10138 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10139   predicate(UseSSE==0);
10140   match(Set dst (NegF src));
10141   ins_cost(100);
10142   format %{ "FCHS" %}
10143   opcode(0xE0, 0xD9);
10144   ins_encode( OpcS, OpcP );
10145   ins_pipe( fpu_reg_reg );
10146 %}
10147 
10148 // Cisc-alternate to addFPR_reg
10149 // Spill to obtain 24-bit precision
10150 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10151   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10152   match(Set dst (AddF src1 (LoadF src2)));
10153 
10154   format %{ "FLD    $src2\n\t"
10155             "FADD   ST,$src1\n\t"
10156             "FSTP_S $dst" %}
10157   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10158   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10159               OpcReg_FPR(src1),
10160               Pop_Mem_FPR(dst), ClearInstMark );
10161   ins_pipe( fpu_mem_reg_mem );
10162 %}
10163 //
10164 // Cisc-alternate to addFPR_reg
10165 // This instruction does not round to 24-bits
10166 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10167   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10168   match(Set dst (AddF dst (LoadF src)));
10169 
10170   format %{ "FADD   $dst,$src" %}
10171   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10172   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10173               OpcP, RegOpc(dst), ClearInstMark );
10174   ins_pipe( fpu_reg_mem );
10175 %}
10176 
10177 // // Following two instructions for _222_mpegaudio
10178 // Spill to obtain 24-bit precision
10179 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181   match(Set dst (AddF src1 src2));
10182 
10183   format %{ "FADD   $dst,$src1,$src2" %}
10184   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10185   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10186               OpcReg_FPR(src2),
10187               Pop_Mem_FPR(dst), ClearInstMark );
10188   ins_pipe( fpu_mem_reg_mem );
10189 %}
10190 
10191 // Cisc-spill variant
10192 // Spill to obtain 24-bit precision
10193 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10194   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195   match(Set dst (AddF src1 (LoadF src2)));
10196 
10197   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10198   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10199   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10200               OpcP, RMopc_Mem(secondary,src1),
10201               Pop_Mem_FPR(dst),
10202               ClearInstMark);
10203   ins_pipe( fpu_mem_mem_mem );
10204 %}
10205 
10206 // Spill to obtain 24-bit precision
10207 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10208   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10209   match(Set dst (AddF src1 src2));
10210 
10211   format %{ "FADD   $dst,$src1,$src2" %}
10212   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10213   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10214               OpcP, RMopc_Mem(secondary,src1),
10215               Pop_Mem_FPR(dst),
10216               ClearInstMark);
10217   ins_pipe( fpu_mem_mem_mem );
10218 %}
10219 
10220 
10221 // Spill to obtain 24-bit precision
10222 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10223   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224   match(Set dst (AddF src con));
10225   format %{ "FLD    $src\n\t"
10226             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10227             "FSTP_S $dst"  %}
10228   ins_encode %{
10229     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10230     __ fadd_s($constantaddress($con));
10231     __ fstp_s(Address(rsp, $dst$$disp));
10232   %}
10233   ins_pipe(fpu_mem_reg_con);
10234 %}
10235 //
10236 // This instruction does not round to 24-bits
10237 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10238   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10239   match(Set dst (AddF src con));
10240   format %{ "FLD    $src\n\t"
10241             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10242             "FSTP   $dst"  %}
10243   ins_encode %{
10244     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10245     __ fadd_s($constantaddress($con));
10246     __ fstp_d($dst$$reg);
10247   %}
10248   ins_pipe(fpu_reg_reg_con);
10249 %}
10250 
10251 // Spill to obtain 24-bit precision
10252 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10253   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254   match(Set dst (MulF src1 src2));
10255 
10256   format %{ "FLD    $src1\n\t"
10257             "FMUL   $src2\n\t"
10258             "FSTP_S $dst"  %}
10259   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10260   ins_encode( Push_Reg_FPR(src1),
10261               OpcReg_FPR(src2),
10262               Pop_Mem_FPR(dst) );
10263   ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (MulF src1 src2));
10270 
10271   format %{ "FLD    $src1\n\t"
10272             "FMUL   $src2\n\t"
10273             "FSTP_S $dst"  %}
10274   opcode(0xD8, 0x1); /* D8 C8+i */
10275   ins_encode( Push_Reg_FPR(src2),
10276               OpcReg_FPR(src1),
10277               Pop_Reg_FPR(dst) );
10278   ins_pipe( fpu_reg_reg_reg );
10279 %}
10280 
10281 
10282 // Spill to obtain 24-bit precision
10283 // Cisc-alternate to reg-reg multiply
10284 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10285   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286   match(Set dst (MulF src1 (LoadF src2)));
10287 
10288   format %{ "FLD_S  $src2\n\t"
10289             "FMUL   $src1\n\t"
10290             "FSTP_S $dst"  %}
10291   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10292   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10293               OpcReg_FPR(src1),
10294               Pop_Mem_FPR(dst), ClearInstMark );
10295   ins_pipe( fpu_mem_reg_mem );
10296 %}
10297 //
10298 // This instruction does not round to 24-bits
10299 // Cisc-alternate to reg-reg multiply
10300 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10301   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302   match(Set dst (MulF src1 (LoadF src2)));
10303 
10304   format %{ "FMUL   $dst,$src1,$src2" %}
10305   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10306   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10307               OpcReg_FPR(src1),
10308               Pop_Reg_FPR(dst), ClearInstMark );
10309   ins_pipe( fpu_reg_reg_mem );
10310 %}
10311 
10312 // Spill to obtain 24-bit precision
10313 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10314   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315   match(Set dst (MulF src1 src2));
10316 
10317   format %{ "FMUL   $dst,$src1,$src2" %}
10318   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10319   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10320               OpcP, RMopc_Mem(secondary,src1),
10321               Pop_Mem_FPR(dst),
10322               ClearInstMark );
10323   ins_pipe( fpu_mem_mem_mem );
10324 %}
10325 
10326 // Spill to obtain 24-bit precision
10327 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10328   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329   match(Set dst (MulF src con));
10330 
10331   format %{ "FLD    $src\n\t"
10332             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10333             "FSTP_S $dst"  %}
10334   ins_encode %{
10335     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10336     __ fmul_s($constantaddress($con));
10337     __ fstp_s(Address(rsp, $dst$$disp));
10338   %}
10339   ins_pipe(fpu_mem_reg_con);
10340 %}
10341 //
10342 // This instruction does not round to 24-bits
10343 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10344   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10345   match(Set dst (MulF src con));
10346 
10347   format %{ "FLD    $src\n\t"
10348             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10349             "FSTP   $dst"  %}
10350   ins_encode %{
10351     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10352     __ fmul_s($constantaddress($con));
10353     __ fstp_d($dst$$reg);
10354   %}
10355   ins_pipe(fpu_reg_reg_con);
10356 %}
10357 
10358 
10359 //
10360 // MACRO1 -- subsume unshared load into mulFPR
10361 // This instruction does not round to 24-bits
10362 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10363   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10364   match(Set dst (MulF (LoadF mem1) src));
10365 
10366   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10367             "FMUL   ST,$src\n\t"
10368             "FSTP   $dst" %}
10369   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10370   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10371               OpcReg_FPR(src),
10372               Pop_Reg_FPR(dst), ClearInstMark );
10373   ins_pipe( fpu_reg_reg_mem );
10374 %}
10375 //
10376 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10377 // This instruction does not round to 24-bits
10378 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10379   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10380   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10381   ins_cost(95);
10382 
10383   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10384             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10385             "FADD   ST,$src2\n\t"
10386             "FSTP   $dst" %}
10387   opcode(0xD9); /* LoadF D9 /0 */
10388   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10389               FMul_ST_reg(src1),
10390               FAdd_ST_reg(src2),
10391               Pop_Reg_FPR(dst), ClearInstMark );
10392   ins_pipe( fpu_reg_mem_reg_reg );
10393 %}
10394 
10395 // MACRO3 -- addFPR a mulFPR
10396 // This instruction does not round to 24-bits.  It is a '2-address'
10397 // instruction in that the result goes back to src2.  This eliminates
10398 // a move from the macro; possibly the register allocator will have
10399 // to add it back (and maybe not).
10400 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10401   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10402   match(Set src2 (AddF (MulF src0 src1) src2));
10403 
10404   format %{ "FLD    $src0     ===MACRO3===\n\t"
10405             "FMUL   ST,$src1\n\t"
10406             "FADDP  $src2,ST" %}
10407   opcode(0xD9); /* LoadF D9 /0 */
10408   ins_encode( Push_Reg_FPR(src0),
10409               FMul_ST_reg(src1),
10410               FAddP_reg_ST(src2) );
10411   ins_pipe( fpu_reg_reg_reg );
10412 %}
10413 
10414 // MACRO4 -- divFPR subFPR
10415 // This instruction does not round to 24-bits
10416 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10417   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10418   match(Set dst (DivF (SubF src2 src1) src3));
10419 
10420   format %{ "FLD    $src2   ===MACRO4===\n\t"
10421             "FSUB   ST,$src1\n\t"
10422             "FDIV   ST,$src3\n\t"
10423             "FSTP  $dst" %}
10424   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10425   ins_encode( Push_Reg_FPR(src2),
10426               subFPR_divFPR_encode(src1,src3),
10427               Pop_Reg_FPR(dst) );
10428   ins_pipe( fpu_reg_reg_reg_reg );
10429 %}
10430 
10431 // Spill to obtain 24-bit precision
10432 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10433   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434   match(Set dst (DivF src1 src2));
10435 
10436   format %{ "FDIV   $dst,$src1,$src2" %}
10437   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10438   ins_encode( Push_Reg_FPR(src1),
10439               OpcReg_FPR(src2),
10440               Pop_Mem_FPR(dst) );
10441   ins_pipe( fpu_mem_reg_reg );
10442 %}
10443 //
10444 // This instruction does not round to 24-bits
10445 instruct divFPR_reg(regFPR dst, regFPR src) %{
10446   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447   match(Set dst (DivF dst src));
10448 
10449   format %{ "FDIV   $dst,$src" %}
10450   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451   ins_encode( Push_Reg_FPR(src),
10452               OpcP, RegOpc(dst) );
10453   ins_pipe( fpu_reg_reg );
10454 %}
10455 
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10459   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (ModF src1 src2));
10461   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10462 
10463   format %{ "FMOD   $dst,$src1,$src2" %}
10464   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10465               emitModDPR(),
10466               Push_Result_Mod_DPR(src2),
10467               Pop_Mem_FPR(dst));
10468   ins_pipe( pipe_slow );
10469 %}
10470 //
10471 // This instruction does not round to 24-bits
10472 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10473   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10474   match(Set dst (ModF dst src));
10475   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10476 
10477   format %{ "FMOD   $dst,$src" %}
10478   ins_encode(Push_Reg_Mod_DPR(dst, src),
10479               emitModDPR(),
10480               Push_Result_Mod_DPR(src),
10481               Pop_Reg_FPR(dst));
10482   ins_pipe( pipe_slow );
10483 %}
10484 
10485 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10486   predicate(UseSSE>=1);
10487   match(Set dst (ModF src0 src1));
10488   effect(KILL rax, KILL cr);
10489   format %{ "SUB    ESP,4\t # FMOD\n"
10490           "\tMOVSS  [ESP+0],$src1\n"
10491           "\tFLD_S  [ESP+0]\n"
10492           "\tMOVSS  [ESP+0],$src0\n"
10493           "\tFLD_S  [ESP+0]\n"
10494      "loop:\tFPREM\n"
10495           "\tFWAIT\n"
10496           "\tFNSTSW AX\n"
10497           "\tSAHF\n"
10498           "\tJP     loop\n"
10499           "\tFSTP_S [ESP+0]\n"
10500           "\tMOVSS  $dst,[ESP+0]\n"
10501           "\tADD    ESP,4\n"
10502           "\tFSTP   ST0\t # Restore FPU Stack"
10503     %}
10504   ins_cost(250);
10505   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10506   ins_pipe( pipe_slow );
10507 %}
10508 
10509 
10510 //----------Arithmetic Conversion Instructions---------------------------------
10511 // The conversions operations are all Alpha sorted.  Please keep it that way!
10512 
10513 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10514   predicate(UseSSE==0);
10515   match(Set dst (RoundFloat src));
10516   ins_cost(125);
10517   format %{ "FST_S  $dst,$src\t# F-round" %}
10518   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10519   ins_pipe( fpu_mem_reg );
10520 %}
10521 
10522 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10523   predicate(UseSSE<=1);
10524   match(Set dst (RoundDouble src));
10525   ins_cost(125);
10526   format %{ "FST_D  $dst,$src\t# D-round" %}
10527   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10528   ins_pipe( fpu_mem_reg );
10529 %}
10530 
10531 // Force rounding to 24-bit precision and 6-bit exponent
10532 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10533   predicate(UseSSE==0);
10534   match(Set dst (ConvD2F src));
10535   format %{ "FST_S  $dst,$src\t# F-round" %}
10536   expand %{
10537     roundFloat_mem_reg(dst,src);
10538   %}
10539 %}
10540 
10541 // Force rounding to 24-bit precision and 6-bit exponent
10542 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10543   predicate(UseSSE==1);
10544   match(Set dst (ConvD2F src));
10545   effect( KILL cr );
10546   format %{ "SUB    ESP,4\n\t"
10547             "FST_S  [ESP],$src\t# F-round\n\t"
10548             "MOVSS  $dst,[ESP]\n\t"
10549             "ADD ESP,4" %}
10550   ins_encode %{
10551     __ subptr(rsp, 4);
10552     if ($src$$reg != FPR1L_enc) {
10553       __ fld_s($src$$reg-1);
10554       __ fstp_s(Address(rsp, 0));
10555     } else {
10556       __ fst_s(Address(rsp, 0));
10557     }
10558     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10559     __ addptr(rsp, 4);
10560   %}
10561   ins_pipe( pipe_slow );
10562 %}
10563 
10564 // Force rounding double precision to single precision
10565 instruct convD2F_reg(regF dst, regD src) %{
10566   predicate(UseSSE>=2);
10567   match(Set dst (ConvD2F src));
10568   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10569   ins_encode %{
10570     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10571   %}
10572   ins_pipe( pipe_slow );
10573 %}
10574 
10575 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10576   predicate(UseSSE==0);
10577   match(Set dst (ConvF2D src));
10578   format %{ "FST_S  $dst,$src\t# D-round" %}
10579   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10580   ins_pipe( fpu_reg_reg );
10581 %}
10582 
10583 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10584   predicate(UseSSE==1);
10585   match(Set dst (ConvF2D src));
10586   format %{ "FST_D  $dst,$src\t# D-round" %}
10587   expand %{
10588     roundDouble_mem_reg(dst,src);
10589   %}
10590 %}
10591 
10592 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10593   predicate(UseSSE==1);
10594   match(Set dst (ConvF2D src));
10595   effect( KILL cr );
10596   format %{ "SUB    ESP,4\n\t"
10597             "MOVSS  [ESP] $src\n\t"
10598             "FLD_S  [ESP]\n\t"
10599             "ADD    ESP,4\n\t"
10600             "FSTP   $dst\t# D-round" %}
10601   ins_encode %{
10602     __ subptr(rsp, 4);
10603     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10604     __ fld_s(Address(rsp, 0));
10605     __ addptr(rsp, 4);
10606     __ fstp_d($dst$$reg);
10607   %}
10608   ins_pipe( pipe_slow );
10609 %}
10610 
10611 instruct convF2D_reg(regD dst, regF src) %{
10612   predicate(UseSSE>=2);
10613   match(Set dst (ConvF2D src));
10614   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10615   ins_encode %{
10616     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10617   %}
10618   ins_pipe( pipe_slow );
10619 %}
10620 
10621 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10622 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10623   predicate(UseSSE<=1);
10624   match(Set dst (ConvD2I src));
10625   effect( KILL tmp, KILL cr );
10626   format %{ "FLD    $src\t# Convert double to int \n\t"
10627             "FLDCW  trunc mode\n\t"
10628             "SUB    ESP,4\n\t"
10629             "FISTp  [ESP + #0]\n\t"
10630             "FLDCW  std/24-bit mode\n\t"
10631             "POP    EAX\n\t"
10632             "CMP    EAX,0x80000000\n\t"
10633             "JNE,s  fast\n\t"
10634             "FLD_D  $src\n\t"
10635             "CALL   d2i_wrapper\n"
10636       "fast:" %}
10637   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10638   ins_pipe( pipe_slow );
10639 %}
10640 
10641 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10642 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10643   predicate(UseSSE>=2);
10644   match(Set dst (ConvD2I src));
10645   effect( KILL tmp, KILL cr );
10646   format %{ "CVTTSD2SI $dst, $src\n\t"
10647             "CMP    $dst,0x80000000\n\t"
10648             "JNE,s  fast\n\t"
10649             "SUB    ESP, 8\n\t"
10650             "MOVSD  [ESP], $src\n\t"
10651             "FLD_D  [ESP]\n\t"
10652             "ADD    ESP, 8\n\t"
10653             "CALL   d2i_wrapper\n"
10654       "fast:" %}
10655   ins_encode %{
10656     Label fast;
10657     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10658     __ cmpl($dst$$Register, 0x80000000);
10659     __ jccb(Assembler::notEqual, fast);
10660     __ subptr(rsp, 8);
10661     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10662     __ fld_d(Address(rsp, 0));
10663     __ addptr(rsp, 8);
10664     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10665     __ post_call_nop();
10666     __ bind(fast);
10667   %}
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10672   predicate(UseSSE<=1);
10673   match(Set dst (ConvD2L src));
10674   effect( KILL cr );
10675   format %{ "FLD    $src\t# Convert double to long\n\t"
10676             "FLDCW  trunc mode\n\t"
10677             "SUB    ESP,8\n\t"
10678             "FISTp  [ESP + #0]\n\t"
10679             "FLDCW  std/24-bit mode\n\t"
10680             "POP    EAX\n\t"
10681             "POP    EDX\n\t"
10682             "CMP    EDX,0x80000000\n\t"
10683             "JNE,s  fast\n\t"
10684             "TEST   EAX,EAX\n\t"
10685             "JNE,s  fast\n\t"
10686             "FLD    $src\n\t"
10687             "CALL   d2l_wrapper\n"
10688       "fast:" %}
10689   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10690   ins_pipe( pipe_slow );
10691 %}
10692 
10693 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10694 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10695   predicate (UseSSE>=2);
10696   match(Set dst (ConvD2L src));
10697   effect( KILL cr );
10698   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10699             "MOVSD  [ESP],$src\n\t"
10700             "FLD_D  [ESP]\n\t"
10701             "FLDCW  trunc mode\n\t"
10702             "FISTp  [ESP + #0]\n\t"
10703             "FLDCW  std/24-bit mode\n\t"
10704             "POP    EAX\n\t"
10705             "POP    EDX\n\t"
10706             "CMP    EDX,0x80000000\n\t"
10707             "JNE,s  fast\n\t"
10708             "TEST   EAX,EAX\n\t"
10709             "JNE,s  fast\n\t"
10710             "SUB    ESP,8\n\t"
10711             "MOVSD  [ESP],$src\n\t"
10712             "FLD_D  [ESP]\n\t"
10713             "ADD    ESP,8\n\t"
10714             "CALL   d2l_wrapper\n"
10715       "fast:" %}
10716   ins_encode %{
10717     Label fast;
10718     __ subptr(rsp, 8);
10719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10720     __ fld_d(Address(rsp, 0));
10721     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10722     __ fistp_d(Address(rsp, 0));
10723     // Restore the rounding mode, mask the exception
10724     if (Compile::current()->in_24_bit_fp_mode()) {
10725       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10726     } else {
10727       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10728     }
10729     // Load the converted long, adjust CPU stack
10730     __ pop(rax);
10731     __ pop(rdx);
10732     __ cmpl(rdx, 0x80000000);
10733     __ jccb(Assembler::notEqual, fast);
10734     __ testl(rax, rax);
10735     __ jccb(Assembler::notEqual, fast);
10736     __ subptr(rsp, 8);
10737     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10738     __ fld_d(Address(rsp, 0));
10739     __ addptr(rsp, 8);
10740     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10741     __ post_call_nop();
10742     __ bind(fast);
10743   %}
10744   ins_pipe( pipe_slow );
10745 %}
10746 
10747 // Convert a double to an int.  Java semantics require we do complex
10748 // manglations in the corner cases.  So we set the rounding mode to
10749 // 'zero', store the darned double down as an int, and reset the
10750 // rounding mode to 'nearest'.  The hardware stores a flag value down
10751 // if we would overflow or converted a NAN; we check for this and
10752 // and go the slow path if needed.
10753 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10754   predicate(UseSSE==0);
10755   match(Set dst (ConvF2I src));
10756   effect( KILL tmp, KILL cr );
10757   format %{ "FLD    $src\t# Convert float to int \n\t"
10758             "FLDCW  trunc mode\n\t"
10759             "SUB    ESP,4\n\t"
10760             "FISTp  [ESP + #0]\n\t"
10761             "FLDCW  std/24-bit mode\n\t"
10762             "POP    EAX\n\t"
10763             "CMP    EAX,0x80000000\n\t"
10764             "JNE,s  fast\n\t"
10765             "FLD    $src\n\t"
10766             "CALL   d2i_wrapper\n"
10767       "fast:" %}
10768   // DPR2I_encoding works for FPR2I
10769   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10770   ins_pipe( pipe_slow );
10771 %}
10772 
10773 // Convert a float in xmm to an int reg.
10774 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10775   predicate(UseSSE>=1);
10776   match(Set dst (ConvF2I src));
10777   effect( KILL tmp, KILL cr );
10778   format %{ "CVTTSS2SI $dst, $src\n\t"
10779             "CMP    $dst,0x80000000\n\t"
10780             "JNE,s  fast\n\t"
10781             "SUB    ESP, 4\n\t"
10782             "MOVSS  [ESP], $src\n\t"
10783             "FLD    [ESP]\n\t"
10784             "ADD    ESP, 4\n\t"
10785             "CALL   d2i_wrapper\n"
10786       "fast:" %}
10787   ins_encode %{
10788     Label fast;
10789     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10790     __ cmpl($dst$$Register, 0x80000000);
10791     __ jccb(Assembler::notEqual, fast);
10792     __ subptr(rsp, 4);
10793     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10794     __ fld_s(Address(rsp, 0));
10795     __ addptr(rsp, 4);
10796     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10797     __ post_call_nop();
10798     __ bind(fast);
10799   %}
10800   ins_pipe( pipe_slow );
10801 %}
10802 
10803 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10804   predicate(UseSSE==0);
10805   match(Set dst (ConvF2L src));
10806   effect( KILL cr );
10807   format %{ "FLD    $src\t# Convert float to long\n\t"
10808             "FLDCW  trunc mode\n\t"
10809             "SUB    ESP,8\n\t"
10810             "FISTp  [ESP + #0]\n\t"
10811             "FLDCW  std/24-bit mode\n\t"
10812             "POP    EAX\n\t"
10813             "POP    EDX\n\t"
10814             "CMP    EDX,0x80000000\n\t"
10815             "JNE,s  fast\n\t"
10816             "TEST   EAX,EAX\n\t"
10817             "JNE,s  fast\n\t"
10818             "FLD    $src\n\t"
10819             "CALL   d2l_wrapper\n"
10820       "fast:" %}
10821   // DPR2L_encoding works for FPR2L
10822   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10823   ins_pipe( pipe_slow );
10824 %}
10825 
10826 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10827 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10828   predicate (UseSSE>=1);
10829   match(Set dst (ConvF2L src));
10830   effect( KILL cr );
10831   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10832             "MOVSS  [ESP],$src\n\t"
10833             "FLD_S  [ESP]\n\t"
10834             "FLDCW  trunc mode\n\t"
10835             "FISTp  [ESP + #0]\n\t"
10836             "FLDCW  std/24-bit mode\n\t"
10837             "POP    EAX\n\t"
10838             "POP    EDX\n\t"
10839             "CMP    EDX,0x80000000\n\t"
10840             "JNE,s  fast\n\t"
10841             "TEST   EAX,EAX\n\t"
10842             "JNE,s  fast\n\t"
10843             "SUB    ESP,4\t# Convert float to long\n\t"
10844             "MOVSS  [ESP],$src\n\t"
10845             "FLD_S  [ESP]\n\t"
10846             "ADD    ESP,4\n\t"
10847             "CALL   d2l_wrapper\n"
10848       "fast:" %}
10849   ins_encode %{
10850     Label fast;
10851     __ subptr(rsp, 8);
10852     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10853     __ fld_s(Address(rsp, 0));
10854     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10855     __ fistp_d(Address(rsp, 0));
10856     // Restore the rounding mode, mask the exception
10857     if (Compile::current()->in_24_bit_fp_mode()) {
10858       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10859     } else {
10860       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10861     }
10862     // Load the converted long, adjust CPU stack
10863     __ pop(rax);
10864     __ pop(rdx);
10865     __ cmpl(rdx, 0x80000000);
10866     __ jccb(Assembler::notEqual, fast);
10867     __ testl(rax, rax);
10868     __ jccb(Assembler::notEqual, fast);
10869     __ subptr(rsp, 4);
10870     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10871     __ fld_s(Address(rsp, 0));
10872     __ addptr(rsp, 4);
10873     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10874     __ post_call_nop();
10875     __ bind(fast);
10876   %}
10877   ins_pipe( pipe_slow );
10878 %}
10879 
10880 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10881   predicate( UseSSE<=1 );
10882   match(Set dst (ConvI2D src));
10883   format %{ "FILD   $src\n\t"
10884             "FSTP   $dst" %}
10885   opcode(0xDB, 0x0);  /* DB /0 */
10886   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10887   ins_pipe( fpu_reg_mem );
10888 %}
10889 
10890 instruct convI2D_reg(regD dst, rRegI src) %{
10891   predicate( UseSSE>=2 && !UseXmmI2D );
10892   match(Set dst (ConvI2D src));
10893   format %{ "CVTSI2SD $dst,$src" %}
10894   ins_encode %{
10895     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10896   %}
10897   ins_pipe( pipe_slow );
10898 %}
10899 
10900 instruct convI2D_mem(regD dst, memory mem) %{
10901   predicate( UseSSE>=2 );
10902   match(Set dst (ConvI2D (LoadI mem)));
10903   format %{ "CVTSI2SD $dst,$mem" %}
10904   ins_encode %{
10905     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10906   %}
10907   ins_pipe( pipe_slow );
10908 %}
10909 
10910 instruct convXI2D_reg(regD dst, rRegI src)
10911 %{
10912   predicate( UseSSE>=2 && UseXmmI2D );
10913   match(Set dst (ConvI2D src));
10914 
10915   format %{ "MOVD  $dst,$src\n\t"
10916             "CVTDQ2PD $dst,$dst\t# i2d" %}
10917   ins_encode %{
10918     __ movdl($dst$$XMMRegister, $src$$Register);
10919     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10920   %}
10921   ins_pipe(pipe_slow); // XXX
10922 %}
10923 
10924 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10925   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10926   match(Set dst (ConvI2D (LoadI mem)));
10927   format %{ "FILD   $mem\n\t"
10928             "FSTP   $dst" %}
10929   opcode(0xDB);      /* DB /0 */
10930   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10931               Pop_Reg_DPR(dst), ClearInstMark);
10932   ins_pipe( fpu_reg_mem );
10933 %}
10934 
10935 // Convert a byte to a float; no rounding step needed.
10936 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10937   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10938   match(Set dst (ConvI2F src));
10939   format %{ "FILD   $src\n\t"
10940             "FSTP   $dst" %}
10941 
10942   opcode(0xDB, 0x0);  /* DB /0 */
10943   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10944   ins_pipe( fpu_reg_mem );
10945 %}
10946 
10947 // In 24-bit mode, force exponent rounding by storing back out
10948 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10949   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10950   match(Set dst (ConvI2F src));
10951   ins_cost(200);
10952   format %{ "FILD   $src\n\t"
10953             "FSTP_S $dst" %}
10954   opcode(0xDB, 0x0);  /* DB /0 */
10955   ins_encode( Push_Mem_I(src),
10956               Pop_Mem_FPR(dst));
10957   ins_pipe( fpu_mem_mem );
10958 %}
10959 
10960 // In 24-bit mode, force exponent rounding by storing back out
10961 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10962   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10963   match(Set dst (ConvI2F (LoadI mem)));
10964   ins_cost(200);
10965   format %{ "FILD   $mem\n\t"
10966             "FSTP_S $dst" %}
10967   opcode(0xDB);  /* DB /0 */
10968   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10969               Pop_Mem_FPR(dst), ClearInstMark);
10970   ins_pipe( fpu_mem_mem );
10971 %}
10972 
10973 // This instruction does not round to 24-bits
10974 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10975   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10976   match(Set dst (ConvI2F src));
10977   format %{ "FILD   $src\n\t"
10978             "FSTP   $dst" %}
10979   opcode(0xDB, 0x0);  /* DB /0 */
10980   ins_encode( Push_Mem_I(src),
10981               Pop_Reg_FPR(dst));
10982   ins_pipe( fpu_reg_mem );
10983 %}
10984 
10985 // This instruction does not round to 24-bits
10986 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10987   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10988   match(Set dst (ConvI2F (LoadI mem)));
10989   format %{ "FILD   $mem\n\t"
10990             "FSTP   $dst" %}
10991   opcode(0xDB);      /* DB /0 */
10992   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10993               Pop_Reg_FPR(dst), ClearInstMark);
10994   ins_pipe( fpu_reg_mem );
10995 %}
10996 
10997 // Convert an int to a float in xmm; no rounding step needed.
10998 instruct convI2F_reg(regF dst, rRegI src) %{
10999   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11000   match(Set dst (ConvI2F src));
11001   format %{ "CVTSI2SS $dst, $src" %}
11002   ins_encode %{
11003     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11004   %}
11005   ins_pipe( pipe_slow );
11006 %}
11007 
11008  instruct convXI2F_reg(regF dst, rRegI src)
11009 %{
11010   predicate( UseSSE>=2 && UseXmmI2F );
11011   match(Set dst (ConvI2F src));
11012 
11013   format %{ "MOVD  $dst,$src\n\t"
11014             "CVTDQ2PS $dst,$dst\t# i2f" %}
11015   ins_encode %{
11016     __ movdl($dst$$XMMRegister, $src$$Register);
11017     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11018   %}
11019   ins_pipe(pipe_slow); // XXX
11020 %}
11021 
11022 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11023   match(Set dst (ConvI2L src));
11024   effect(KILL cr);
11025   ins_cost(375);
11026   format %{ "MOV    $dst.lo,$src\n\t"
11027             "MOV    $dst.hi,$src\n\t"
11028             "SAR    $dst.hi,31" %}
11029   ins_encode(convert_int_long(dst,src));
11030   ins_pipe( ialu_reg_reg_long );
11031 %}
11032 
11033 // Zero-extend convert int to long
11034 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11035   match(Set dst (AndL (ConvI2L src) mask) );
11036   effect( KILL flags );
11037   ins_cost(250);
11038   format %{ "MOV    $dst.lo,$src\n\t"
11039             "XOR    $dst.hi,$dst.hi" %}
11040   opcode(0x33); // XOR
11041   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11042   ins_pipe( ialu_reg_reg_long );
11043 %}
11044 
11045 // Zero-extend long
11046 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11047   match(Set dst (AndL src mask) );
11048   effect( KILL flags );
11049   ins_cost(250);
11050   format %{ "MOV    $dst.lo,$src.lo\n\t"
11051             "XOR    $dst.hi,$dst.hi\n\t" %}
11052   opcode(0x33); // XOR
11053   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11054   ins_pipe( ialu_reg_reg_long );
11055 %}
11056 
11057 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11058   predicate (UseSSE<=1);
11059   match(Set dst (ConvL2D src));
11060   effect( KILL cr );
11061   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11062             "PUSH   $src.lo\n\t"
11063             "FILD   ST,[ESP + #0]\n\t"
11064             "ADD    ESP,8\n\t"
11065             "FSTP_D $dst\t# D-round" %}
11066   opcode(0xDF, 0x5);  /* DF /5 */
11067   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11068   ins_pipe( pipe_slow );
11069 %}
11070 
11071 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11072   predicate (UseSSE>=2);
11073   match(Set dst (ConvL2D src));
11074   effect( KILL cr );
11075   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11076             "PUSH   $src.lo\n\t"
11077             "FILD_D [ESP]\n\t"
11078             "FSTP_D [ESP]\n\t"
11079             "MOVSD  $dst,[ESP]\n\t"
11080             "ADD    ESP,8" %}
11081   opcode(0xDF, 0x5);  /* DF /5 */
11082   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11083   ins_pipe( pipe_slow );
11084 %}
11085 
11086 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11087   predicate (UseSSE>=1);
11088   match(Set dst (ConvL2F src));
11089   effect( KILL cr );
11090   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11091             "PUSH   $src.lo\n\t"
11092             "FILD_D [ESP]\n\t"
11093             "FSTP_S [ESP]\n\t"
11094             "MOVSS  $dst,[ESP]\n\t"
11095             "ADD    ESP,8" %}
11096   opcode(0xDF, 0x5);  /* DF /5 */
11097   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11098   ins_pipe( pipe_slow );
11099 %}
11100 
11101 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11102   match(Set dst (ConvL2F src));
11103   effect( KILL cr );
11104   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11105             "PUSH   $src.lo\n\t"
11106             "FILD   ST,[ESP + #0]\n\t"
11107             "ADD    ESP,8\n\t"
11108             "FSTP_S $dst\t# F-round" %}
11109   opcode(0xDF, 0x5);  /* DF /5 */
11110   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11111   ins_pipe( pipe_slow );
11112 %}
11113 
11114 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11115   match(Set dst (ConvL2I src));
11116   effect( DEF dst, USE src );
11117   format %{ "MOV    $dst,$src.lo" %}
11118   ins_encode(enc_CopyL_Lo(dst,src));
11119   ins_pipe( ialu_reg_reg );
11120 %}
11121 
11122 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11123   match(Set dst (MoveF2I src));
11124   effect( DEF dst, USE src );
11125   ins_cost(100);
11126   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11127   ins_encode %{
11128     __ movl($dst$$Register, Address(rsp, $src$$disp));
11129   %}
11130   ins_pipe( ialu_reg_mem );
11131 %}
11132 
11133 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11134   predicate(UseSSE==0);
11135   match(Set dst (MoveF2I src));
11136   effect( DEF dst, USE src );
11137 
11138   ins_cost(125);
11139   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11140   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11141   ins_pipe( fpu_mem_reg );
11142 %}
11143 
11144 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11145   predicate(UseSSE>=1);
11146   match(Set dst (MoveF2I src));
11147   effect( DEF dst, USE src );
11148 
11149   ins_cost(95);
11150   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11151   ins_encode %{
11152     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11153   %}
11154   ins_pipe( pipe_slow );
11155 %}
11156 
11157 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11158   predicate(UseSSE>=2);
11159   match(Set dst (MoveF2I src));
11160   effect( DEF dst, USE src );
11161   ins_cost(85);
11162   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11163   ins_encode %{
11164     __ movdl($dst$$Register, $src$$XMMRegister);
11165   %}
11166   ins_pipe( pipe_slow );
11167 %}
11168 
11169 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11170   match(Set dst (MoveI2F src));
11171   effect( DEF dst, USE src );
11172 
11173   ins_cost(100);
11174   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11175   ins_encode %{
11176     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11177   %}
11178   ins_pipe( ialu_mem_reg );
11179 %}
11180 
11181 
11182 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11183   predicate(UseSSE==0);
11184   match(Set dst (MoveI2F src));
11185   effect(DEF dst, USE src);
11186 
11187   ins_cost(125);
11188   format %{ "FLD_S  $src\n\t"
11189             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11190   opcode(0xD9);               /* D9 /0, FLD m32real */
11191   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11192               Pop_Reg_FPR(dst), ClearInstMark );
11193   ins_pipe( fpu_reg_mem );
11194 %}
11195 
11196 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11197   predicate(UseSSE>=1);
11198   match(Set dst (MoveI2F src));
11199   effect( DEF dst, USE src );
11200 
11201   ins_cost(95);
11202   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11203   ins_encode %{
11204     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11205   %}
11206   ins_pipe( pipe_slow );
11207 %}
11208 
11209 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11210   predicate(UseSSE>=2);
11211   match(Set dst (MoveI2F src));
11212   effect( DEF dst, USE src );
11213 
11214   ins_cost(85);
11215   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11216   ins_encode %{
11217     __ movdl($dst$$XMMRegister, $src$$Register);
11218   %}
11219   ins_pipe( pipe_slow );
11220 %}
11221 
11222 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11223   match(Set dst (MoveD2L src));
11224   effect(DEF dst, USE src);
11225 
11226   ins_cost(250);
11227   format %{ "MOV    $dst.lo,$src\n\t"
11228             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11229   opcode(0x8B, 0x8B);
11230   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11231   ins_pipe( ialu_mem_long_reg );
11232 %}
11233 
11234 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11235   predicate(UseSSE<=1);
11236   match(Set dst (MoveD2L src));
11237   effect(DEF dst, USE src);
11238 
11239   ins_cost(125);
11240   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11241   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11242   ins_pipe( fpu_mem_reg );
11243 %}
11244 
11245 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11246   predicate(UseSSE>=2);
11247   match(Set dst (MoveD2L src));
11248   effect(DEF dst, USE src);
11249   ins_cost(95);
11250   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11251   ins_encode %{
11252     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11253   %}
11254   ins_pipe( pipe_slow );
11255 %}
11256 
11257 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11258   predicate(UseSSE>=2);
11259   match(Set dst (MoveD2L src));
11260   effect(DEF dst, USE src, TEMP tmp);
11261   ins_cost(85);
11262   format %{ "MOVD   $dst.lo,$src\n\t"
11263             "PSHUFLW $tmp,$src,0x4E\n\t"
11264             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11265   ins_encode %{
11266     __ movdl($dst$$Register, $src$$XMMRegister);
11267     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11268     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11269   %}
11270   ins_pipe( pipe_slow );
11271 %}
11272 
11273 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11274   match(Set dst (MoveL2D src));
11275   effect(DEF dst, USE src);
11276 
11277   ins_cost(200);
11278   format %{ "MOV    $dst,$src.lo\n\t"
11279             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11280   opcode(0x89, 0x89);
11281   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11282   ins_pipe( ialu_mem_long_reg );
11283 %}
11284 
11285 
11286 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11287   predicate(UseSSE<=1);
11288   match(Set dst (MoveL2D src));
11289   effect(DEF dst, USE src);
11290   ins_cost(125);
11291 
11292   format %{ "FLD_D  $src\n\t"
11293             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11294   opcode(0xDD);               /* DD /0, FLD m64real */
11295   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11296               Pop_Reg_DPR(dst), ClearInstMark );
11297   ins_pipe( fpu_reg_mem );
11298 %}
11299 
11300 
11301 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11302   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11303   match(Set dst (MoveL2D src));
11304   effect(DEF dst, USE src);
11305 
11306   ins_cost(95);
11307   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11308   ins_encode %{
11309     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11310   %}
11311   ins_pipe( pipe_slow );
11312 %}
11313 
11314 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11315   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11316   match(Set dst (MoveL2D src));
11317   effect(DEF dst, USE src);
11318 
11319   ins_cost(95);
11320   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11321   ins_encode %{
11322     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11323   %}
11324   ins_pipe( pipe_slow );
11325 %}
11326 
11327 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11328   predicate(UseSSE>=2);
11329   match(Set dst (MoveL2D src));
11330   effect(TEMP dst, USE src, TEMP tmp);
11331   ins_cost(85);
11332   format %{ "MOVD   $dst,$src.lo\n\t"
11333             "MOVD   $tmp,$src.hi\n\t"
11334             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11335   ins_encode %{
11336     __ movdl($dst$$XMMRegister, $src$$Register);
11337     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11338     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11339   %}
11340   ins_pipe( pipe_slow );
11341 %}
11342 
11343 //----------------------------- CompressBits/ExpandBits ------------------------
11344 
11345 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11346   predicate(n->bottom_type()->isa_long());
11347   match(Set dst (CompressBits src mask));
11348   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11349   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11350   ins_encode %{
11351     Label exit, partail_result;
11352     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11353     // Merge the results of upper and lower destination registers such that upper destination
11354     // results are contiguously laid out after the lower destination result.
11355     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11356     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11357     __ popcntl($rtmp$$Register, $mask$$Register);
11358     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11359     __ cmpl($rtmp$$Register, 32);
11360     __ jccb(Assembler::equal, exit);
11361     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11362     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11363     // Shift left the contents of upper destination register by true bit count of lower mask register
11364     // and merge with lower destination register.
11365     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11366     __ orl($dst$$Register, $rtmp$$Register);
11367     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11368     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11369     // since contents of upper destination have already been copied to lower destination
11370     // register.
11371     __ cmpl($rtmp$$Register, 0);
11372     __ jccb(Assembler::greater, partail_result);
11373     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11374     __ jmp(exit);
11375     __ bind(partail_result);
11376     // Perform right shift over upper destination register to move out bits already copied
11377     // to lower destination register.
11378     __ subl($rtmp$$Register, 32);
11379     __ negl($rtmp$$Register);
11380     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11381     __ bind(exit);
11382   %}
11383   ins_pipe( pipe_slow );
11384 %}
11385 
11386 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11387   predicate(n->bottom_type()->isa_long());
11388   match(Set dst (ExpandBits src mask));
11389   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11390   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11391   ins_encode %{
11392     // Extraction operation sequentially reads the bits from source register starting from LSB
11393     // and lays them out into destination register at bit locations corresponding to true bits
11394     // in mask register. Thus number of source bits read are equal to combined true bit count
11395     // of mask register pair.
11396     Label exit, mask_clipping;
11397     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11398     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11399     __ popcntl($rtmp$$Register, $mask$$Register);
11400     // If true bit count of lower mask register is 32 then none of bit of lower source register
11401     // will feed to upper destination register.
11402     __ cmpl($rtmp$$Register, 32);
11403     __ jccb(Assembler::equal, exit);
11404     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11405     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11406     // Shift right the contents of lower source register to remove already consumed bits.
11407     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11408     // Extract the bits from lower source register starting from LSB under the influence
11409     // of upper mask register.
11410     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11411     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11412     __ subl($rtmp$$Register, 32);
11413     __ negl($rtmp$$Register);
11414     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11415     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11416     // Clear the set bits in upper mask register which have been used to extract the contents
11417     // from lower source register.
11418     __ bind(mask_clipping);
11419     __ blsrl($mask$$Register, $mask$$Register);
11420     __ decrementl($rtmp$$Register, 1);
11421     __ jccb(Assembler::greater, mask_clipping);
11422     // Starting from LSB extract the bits from upper source register under the influence of
11423     // remaining set bits in upper mask register.
11424     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11425     // Merge the partial results extracted from lower and upper source register bits.
11426     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11427     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11428     __ bind(exit);
11429   %}
11430   ins_pipe( pipe_slow );
11431 %}
11432 
11433 // =======================================================================
11434 // Fast clearing of an array
11435 // Small non-constant length ClearArray for non-AVX512 targets.
11436 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11437   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11438   match(Set dummy (ClearArray cnt base));
11439   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11440 
11441   format %{ $$template
11442     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11443     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11444     $$emit$$"JG     LARGE\n\t"
11445     $$emit$$"SHL    ECX, 1\n\t"
11446     $$emit$$"DEC    ECX\n\t"
11447     $$emit$$"JS     DONE\t# Zero length\n\t"
11448     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11449     $$emit$$"DEC    ECX\n\t"
11450     $$emit$$"JGE    LOOP\n\t"
11451     $$emit$$"JMP    DONE\n\t"
11452     $$emit$$"# LARGE:\n\t"
11453     if (UseFastStosb) {
11454        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11455        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11456     } else if (UseXMMForObjInit) {
11457        $$emit$$"MOV     RDI,RAX\n\t"
11458        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11459        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11460        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11461        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11462        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11463        $$emit$$"ADD     0x40,RAX\n\t"
11464        $$emit$$"# L_zero_64_bytes:\n\t"
11465        $$emit$$"SUB     0x8,RCX\n\t"
11466        $$emit$$"JGE     L_loop\n\t"
11467        $$emit$$"ADD     0x4,RCX\n\t"
11468        $$emit$$"JL      L_tail\n\t"
11469        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11470        $$emit$$"ADD     0x20,RAX\n\t"
11471        $$emit$$"SUB     0x4,RCX\n\t"
11472        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11473        $$emit$$"ADD     0x4,RCX\n\t"
11474        $$emit$$"JLE     L_end\n\t"
11475        $$emit$$"DEC     RCX\n\t"
11476        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11477        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11478        $$emit$$"ADD     0x8,RAX\n\t"
11479        $$emit$$"DEC     RCX\n\t"
11480        $$emit$$"JGE     L_sloop\n\t"
11481        $$emit$$"# L_end:\n\t"
11482     } else {
11483        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11484        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11485     }
11486     $$emit$$"# DONE"
11487   %}
11488   ins_encode %{
11489     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11490                  $tmp$$XMMRegister, false, knoreg);
11491   %}
11492   ins_pipe( pipe_slow );
11493 %}
11494 
11495 // Small non-constant length ClearArray for AVX512 targets.
11496 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11498   match(Set dummy (ClearArray cnt base));
11499   ins_cost(125);
11500   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11501 
11502   format %{ $$template
11503     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11504     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11505     $$emit$$"JG     LARGE\n\t"
11506     $$emit$$"SHL    ECX, 1\n\t"
11507     $$emit$$"DEC    ECX\n\t"
11508     $$emit$$"JS     DONE\t# Zero length\n\t"
11509     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11510     $$emit$$"DEC    ECX\n\t"
11511     $$emit$$"JGE    LOOP\n\t"
11512     $$emit$$"JMP    DONE\n\t"
11513     $$emit$$"# LARGE:\n\t"
11514     if (UseFastStosb) {
11515        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11516        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11517     } else if (UseXMMForObjInit) {
11518        $$emit$$"MOV     RDI,RAX\n\t"
11519        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11520        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11521        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11522        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11523        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11524        $$emit$$"ADD     0x40,RAX\n\t"
11525        $$emit$$"# L_zero_64_bytes:\n\t"
11526        $$emit$$"SUB     0x8,RCX\n\t"
11527        $$emit$$"JGE     L_loop\n\t"
11528        $$emit$$"ADD     0x4,RCX\n\t"
11529        $$emit$$"JL      L_tail\n\t"
11530        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11531        $$emit$$"ADD     0x20,RAX\n\t"
11532        $$emit$$"SUB     0x4,RCX\n\t"
11533        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11534        $$emit$$"ADD     0x4,RCX\n\t"
11535        $$emit$$"JLE     L_end\n\t"
11536        $$emit$$"DEC     RCX\n\t"
11537        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11538        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11539        $$emit$$"ADD     0x8,RAX\n\t"
11540        $$emit$$"DEC     RCX\n\t"
11541        $$emit$$"JGE     L_sloop\n\t"
11542        $$emit$$"# L_end:\n\t"
11543     } else {
11544        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11545        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11546     }
11547     $$emit$$"# DONE"
11548   %}
11549   ins_encode %{
11550     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11551                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11552   %}
11553   ins_pipe( pipe_slow );
11554 %}
11555 
11556 // Large non-constant length ClearArray for non-AVX512 targets.
11557 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11558   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11559   match(Set dummy (ClearArray cnt base));
11560   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11561   format %{ $$template
11562     if (UseFastStosb) {
11563        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11564        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11565        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11566     } else if (UseXMMForObjInit) {
11567        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11568        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11569        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11570        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11571        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11572        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11573        $$emit$$"ADD     0x40,RAX\n\t"
11574        $$emit$$"# L_zero_64_bytes:\n\t"
11575        $$emit$$"SUB     0x8,RCX\n\t"
11576        $$emit$$"JGE     L_loop\n\t"
11577        $$emit$$"ADD     0x4,RCX\n\t"
11578        $$emit$$"JL      L_tail\n\t"
11579        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11580        $$emit$$"ADD     0x20,RAX\n\t"
11581        $$emit$$"SUB     0x4,RCX\n\t"
11582        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11583        $$emit$$"ADD     0x4,RCX\n\t"
11584        $$emit$$"JLE     L_end\n\t"
11585        $$emit$$"DEC     RCX\n\t"
11586        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11587        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11588        $$emit$$"ADD     0x8,RAX\n\t"
11589        $$emit$$"DEC     RCX\n\t"
11590        $$emit$$"JGE     L_sloop\n\t"
11591        $$emit$$"# L_end:\n\t"
11592     } else {
11593        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11594        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11595        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11596     }
11597     $$emit$$"# DONE"
11598   %}
11599   ins_encode %{
11600     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11601                  $tmp$$XMMRegister, true, knoreg);
11602   %}
11603   ins_pipe( pipe_slow );
11604 %}
11605 
11606 // Large non-constant length ClearArray for AVX512 targets.
11607 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11608   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11609   match(Set dummy (ClearArray cnt base));
11610   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11611   format %{ $$template
11612     if (UseFastStosb) {
11613        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11614        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11615        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11616     } else if (UseXMMForObjInit) {
11617        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11618        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11619        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11620        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11621        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11622        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11623        $$emit$$"ADD     0x40,RAX\n\t"
11624        $$emit$$"# L_zero_64_bytes:\n\t"
11625        $$emit$$"SUB     0x8,RCX\n\t"
11626        $$emit$$"JGE     L_loop\n\t"
11627        $$emit$$"ADD     0x4,RCX\n\t"
11628        $$emit$$"JL      L_tail\n\t"
11629        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630        $$emit$$"ADD     0x20,RAX\n\t"
11631        $$emit$$"SUB     0x4,RCX\n\t"
11632        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11633        $$emit$$"ADD     0x4,RCX\n\t"
11634        $$emit$$"JLE     L_end\n\t"
11635        $$emit$$"DEC     RCX\n\t"
11636        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11637        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11638        $$emit$$"ADD     0x8,RAX\n\t"
11639        $$emit$$"DEC     RCX\n\t"
11640        $$emit$$"JGE     L_sloop\n\t"
11641        $$emit$$"# L_end:\n\t"
11642     } else {
11643        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11644        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11645        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11646     }
11647     $$emit$$"# DONE"
11648   %}
11649   ins_encode %{
11650     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11651                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11652   %}
11653   ins_pipe( pipe_slow );
11654 %}
11655 
11656 // Small constant length ClearArray for AVX512 targets.
11657 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11658 %{
11659   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11660   match(Set dummy (ClearArray cnt base));
11661   ins_cost(100);
11662   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11663   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11664   ins_encode %{
11665    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11666   %}
11667   ins_pipe(pipe_slow);
11668 %}
11669 
11670 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11671                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11672   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11673   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11674   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11675 
11676   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11677   ins_encode %{
11678     __ string_compare($str1$$Register, $str2$$Register,
11679                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11680                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11681   %}
11682   ins_pipe( pipe_slow );
11683 %}
11684 
11685 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11686                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11687   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11688   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11689   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11690 
11691   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11692   ins_encode %{
11693     __ string_compare($str1$$Register, $str2$$Register,
11694                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11695                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11696   %}
11697   ins_pipe( pipe_slow );
11698 %}
11699 
11700 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11701                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11702   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11703   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11704   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11705 
11706   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11707   ins_encode %{
11708     __ string_compare($str1$$Register, $str2$$Register,
11709                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11710                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11711   %}
11712   ins_pipe( pipe_slow );
11713 %}
11714 
11715 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11716                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11717   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11718   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11719   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11720 
11721   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11722   ins_encode %{
11723     __ string_compare($str1$$Register, $str2$$Register,
11724                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11725                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11726   %}
11727   ins_pipe( pipe_slow );
11728 %}
11729 
11730 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11731                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11732   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11733   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11734   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11735 
11736   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11737   ins_encode %{
11738     __ string_compare($str1$$Register, $str2$$Register,
11739                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11740                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11741   %}
11742   ins_pipe( pipe_slow );
11743 %}
11744 
11745 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11746                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11747   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11748   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11749   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11750 
11751   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11752   ins_encode %{
11753     __ string_compare($str1$$Register, $str2$$Register,
11754                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11755                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11756   %}
11757   ins_pipe( pipe_slow );
11758 %}
11759 
11760 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11761                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11762   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11763   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11764   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11765 
11766   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11767   ins_encode %{
11768     __ string_compare($str2$$Register, $str1$$Register,
11769                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11770                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11771   %}
11772   ins_pipe( pipe_slow );
11773 %}
11774 
11775 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11776                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11777   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11778   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11779   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11780 
11781   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11782   ins_encode %{
11783     __ string_compare($str2$$Register, $str1$$Register,
11784                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11785                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11786   %}
11787   ins_pipe( pipe_slow );
11788 %}
11789 
11790 // fast string equals
11791 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11792                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11793   predicate(!VM_Version::supports_avx512vlbw());
11794   match(Set result (StrEquals (Binary str1 str2) cnt));
11795   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11796 
11797   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11798   ins_encode %{
11799     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11800                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11801                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11802   %}
11803 
11804   ins_pipe( pipe_slow );
11805 %}
11806 
11807 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11808                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11809   predicate(VM_Version::supports_avx512vlbw());
11810   match(Set result (StrEquals (Binary str1 str2) cnt));
11811   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11812 
11813   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11814   ins_encode %{
11815     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11816                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11817                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11818   %}
11819 
11820   ins_pipe( pipe_slow );
11821 %}
11822 
11823 
11824 // fast search of substring with known size.
11825 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11826                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11827   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11828   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11829   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11830 
11831   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11832   ins_encode %{
11833     int icnt2 = (int)$int_cnt2$$constant;
11834     if (icnt2 >= 16) {
11835       // IndexOf for constant substrings with size >= 16 elements
11836       // which don't need to be loaded through stack.
11837       __ string_indexofC8($str1$$Register, $str2$$Register,
11838                           $cnt1$$Register, $cnt2$$Register,
11839                           icnt2, $result$$Register,
11840                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11841     } else {
11842       // Small strings are loaded through stack if they cross page boundary.
11843       __ string_indexof($str1$$Register, $str2$$Register,
11844                         $cnt1$$Register, $cnt2$$Register,
11845                         icnt2, $result$$Register,
11846                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11847     }
11848   %}
11849   ins_pipe( pipe_slow );
11850 %}
11851 
11852 // fast search of substring with known size.
11853 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11854                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11855   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11856   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11857   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11858 
11859   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11860   ins_encode %{
11861     int icnt2 = (int)$int_cnt2$$constant;
11862     if (icnt2 >= 8) {
11863       // IndexOf for constant substrings with size >= 8 elements
11864       // which don't need to be loaded through stack.
11865       __ string_indexofC8($str1$$Register, $str2$$Register,
11866                           $cnt1$$Register, $cnt2$$Register,
11867                           icnt2, $result$$Register,
11868                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11869     } else {
11870       // Small strings are loaded through stack if they cross page boundary.
11871       __ string_indexof($str1$$Register, $str2$$Register,
11872                         $cnt1$$Register, $cnt2$$Register,
11873                         icnt2, $result$$Register,
11874                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11875     }
11876   %}
11877   ins_pipe( pipe_slow );
11878 %}
11879 
11880 // fast search of substring with known size.
11881 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11882                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11883   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11884   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11885   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11886 
11887   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11888   ins_encode %{
11889     int icnt2 = (int)$int_cnt2$$constant;
11890     if (icnt2 >= 8) {
11891       // IndexOf for constant substrings with size >= 8 elements
11892       // which don't need to be loaded through stack.
11893       __ string_indexofC8($str1$$Register, $str2$$Register,
11894                           $cnt1$$Register, $cnt2$$Register,
11895                           icnt2, $result$$Register,
11896                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11897     } else {
11898       // Small strings are loaded through stack if they cross page boundary.
11899       __ string_indexof($str1$$Register, $str2$$Register,
11900                         $cnt1$$Register, $cnt2$$Register,
11901                         icnt2, $result$$Register,
11902                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11903     }
11904   %}
11905   ins_pipe( pipe_slow );
11906 %}
11907 
11908 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11909                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11910   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11911   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11912   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11913 
11914   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11915   ins_encode %{
11916     __ string_indexof($str1$$Register, $str2$$Register,
11917                       $cnt1$$Register, $cnt2$$Register,
11918                       (-1), $result$$Register,
11919                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11920   %}
11921   ins_pipe( pipe_slow );
11922 %}
11923 
11924 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11925                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11926   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11927   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11928   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11929 
11930   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11931   ins_encode %{
11932     __ string_indexof($str1$$Register, $str2$$Register,
11933                       $cnt1$$Register, $cnt2$$Register,
11934                       (-1), $result$$Register,
11935                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11936   %}
11937   ins_pipe( pipe_slow );
11938 %}
11939 
11940 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11941                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11942   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11943   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11944   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11945 
11946   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11947   ins_encode %{
11948     __ string_indexof($str1$$Register, $str2$$Register,
11949                       $cnt1$$Register, $cnt2$$Register,
11950                       (-1), $result$$Register,
11951                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11952   %}
11953   ins_pipe( pipe_slow );
11954 %}
11955 
11956 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11957                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11958   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11959   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11960   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11961   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11962   ins_encode %{
11963     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11964                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11965   %}
11966   ins_pipe( pipe_slow );
11967 %}
11968 
11969 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11970                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11971   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11972   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11973   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11974   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11975   ins_encode %{
11976     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11977                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11978   %}
11979   ins_pipe( pipe_slow );
11980 %}
11981 
11982 
11983 // fast array equals
11984 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11985                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11986 %{
11987   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11988   match(Set result (AryEq ary1 ary2));
11989   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11990   //ins_cost(300);
11991 
11992   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11993   ins_encode %{
11994     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11995                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11996                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12002                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12003 %{
12004   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12005   match(Set result (AryEq ary1 ary2));
12006   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12007   //ins_cost(300);
12008 
12009   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12010   ins_encode %{
12011     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12012                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12013                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12014   %}
12015   ins_pipe( pipe_slow );
12016 %}
12017 
12018 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12019                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12020 %{
12021   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12022   match(Set result (AryEq ary1 ary2));
12023   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12024   //ins_cost(300);
12025 
12026   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12027   ins_encode %{
12028     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12029                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12030                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12031   %}
12032   ins_pipe( pipe_slow );
12033 %}
12034 
12035 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12036                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12037 %{
12038   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12039   match(Set result (AryEq ary1 ary2));
12040   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12041   //ins_cost(300);
12042 
12043   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12044   ins_encode %{
12045     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12046                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12047                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12053                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12054 %{
12055   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12056   match(Set result (CountPositives ary1 len));
12057   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12058 
12059   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12060   ins_encode %{
12061     __ count_positives($ary1$$Register, $len$$Register,
12062                        $result$$Register, $tmp3$$Register,
12063                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12064   %}
12065   ins_pipe( pipe_slow );
12066 %}
12067 
12068 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12069                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12070 %{
12071   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12072   match(Set result (CountPositives ary1 len));
12073   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12074 
12075   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12076   ins_encode %{
12077     __ count_positives($ary1$$Register, $len$$Register,
12078                        $result$$Register, $tmp3$$Register,
12079                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12080   %}
12081   ins_pipe( pipe_slow );
12082 %}
12083 
12084 
12085 // fast char[] to byte[] compression
12086 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12087                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12088   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12089   match(Set result (StrCompressedCopy src (Binary dst len)));
12090   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12091 
12092   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12093   ins_encode %{
12094     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12095                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12096                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12097                            knoreg, knoreg);
12098   %}
12099   ins_pipe( pipe_slow );
12100 %}
12101 
12102 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12103                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12104   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12105   match(Set result (StrCompressedCopy src (Binary dst len)));
12106   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12107 
12108   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12109   ins_encode %{
12110     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12111                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12112                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12113                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12114   %}
12115   ins_pipe( pipe_slow );
12116 %}
12117 
12118 // fast byte[] to char[] inflation
12119 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12120                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12121   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12122   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12123   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12124 
12125   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12126   ins_encode %{
12127     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12128                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12129   %}
12130   ins_pipe( pipe_slow );
12131 %}
12132 
12133 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12134                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12135   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12136   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12137   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12138 
12139   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12140   ins_encode %{
12141     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12142                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12143   %}
12144   ins_pipe( pipe_slow );
12145 %}
12146 
12147 // encode char[] to byte[] in ISO_8859_1
12148 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12149                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12150                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12151   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12152   match(Set result (EncodeISOArray src (Binary dst len)));
12153   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12154 
12155   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12156   ins_encode %{
12157     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12158                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12159                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12160   %}
12161   ins_pipe( pipe_slow );
12162 %}
12163 
12164 // encode char[] to byte[] in ASCII
12165 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12166                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12167                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12168   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12169   match(Set result (EncodeISOArray src (Binary dst len)));
12170   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12171 
12172   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12173   ins_encode %{
12174     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12175                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12176                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12177   %}
12178   ins_pipe( pipe_slow );
12179 %}
12180 
12181 //----------Control Flow Instructions------------------------------------------
12182 // Signed compare Instructions
12183 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12184   match(Set cr (CmpI op1 op2));
12185   effect( DEF cr, USE op1, USE op2 );
12186   format %{ "CMP    $op1,$op2" %}
12187   opcode(0x3B);  /* Opcode 3B /r */
12188   ins_encode( OpcP, RegReg( op1, op2) );
12189   ins_pipe( ialu_cr_reg_reg );
12190 %}
12191 
12192 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12193   match(Set cr (CmpI op1 op2));
12194   effect( DEF cr, USE op1 );
12195   format %{ "CMP    $op1,$op2" %}
12196   opcode(0x81,0x07);  /* Opcode 81 /7 */
12197   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12198   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12199   ins_pipe( ialu_cr_reg_imm );
12200 %}
12201 
12202 // Cisc-spilled version of cmpI_eReg
12203 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12204   match(Set cr (CmpI op1 (LoadI op2)));
12205 
12206   format %{ "CMP    $op1,$op2" %}
12207   ins_cost(500);
12208   opcode(0x3B);  /* Opcode 3B /r */
12209   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12210   ins_pipe( ialu_cr_reg_mem );
12211 %}
12212 
12213 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12214   match(Set cr (CmpI src zero));
12215   effect( DEF cr, USE src );
12216 
12217   format %{ "TEST   $src,$src" %}
12218   opcode(0x85);
12219   ins_encode( OpcP, RegReg( src, src ) );
12220   ins_pipe( ialu_cr_reg_imm );
12221 %}
12222 
12223 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12224   match(Set cr (CmpI (AndI src con) zero));
12225 
12226   format %{ "TEST   $src,$con" %}
12227   opcode(0xF7,0x00);
12228   ins_encode( OpcP, RegOpc(src), Con32(con) );
12229   ins_pipe( ialu_cr_reg_imm );
12230 %}
12231 
12232 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12233   match(Set cr (CmpI (AndI src mem) zero));
12234 
12235   format %{ "TEST   $src,$mem" %}
12236   opcode(0x85);
12237   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12238   ins_pipe( ialu_cr_reg_mem );
12239 %}
12240 
12241 // Unsigned compare Instructions; really, same as signed except they
12242 // produce an eFlagsRegU instead of eFlagsReg.
12243 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12244   match(Set cr (CmpU op1 op2));
12245 
12246   format %{ "CMPu   $op1,$op2" %}
12247   opcode(0x3B);  /* Opcode 3B /r */
12248   ins_encode( OpcP, RegReg( op1, op2) );
12249   ins_pipe( ialu_cr_reg_reg );
12250 %}
12251 
12252 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12253   match(Set cr (CmpU op1 op2));
12254 
12255   format %{ "CMPu   $op1,$op2" %}
12256   opcode(0x81,0x07);  /* Opcode 81 /7 */
12257   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12258   ins_pipe( ialu_cr_reg_imm );
12259 %}
12260 
12261 // // Cisc-spilled version of cmpU_eReg
12262 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12263   match(Set cr (CmpU op1 (LoadI op2)));
12264 
12265   format %{ "CMPu   $op1,$op2" %}
12266   ins_cost(500);
12267   opcode(0x3B);  /* Opcode 3B /r */
12268   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12269   ins_pipe( ialu_cr_reg_mem );
12270 %}
12271 
12272 // // Cisc-spilled version of cmpU_eReg
12273 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12274 //  match(Set cr (CmpU (LoadI op1) op2));
12275 //
12276 //  format %{ "CMPu   $op1,$op2" %}
12277 //  ins_cost(500);
12278 //  opcode(0x39);  /* Opcode 39 /r */
12279 //  ins_encode( OpcP, RegMem( op1, op2) );
12280 //%}
12281 
12282 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12283   match(Set cr (CmpU src zero));
12284 
12285   format %{ "TESTu  $src,$src" %}
12286   opcode(0x85);
12287   ins_encode( OpcP, RegReg( src, src ) );
12288   ins_pipe( ialu_cr_reg_imm );
12289 %}
12290 
12291 // Unsigned pointer compare Instructions
12292 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12293   match(Set cr (CmpP op1 op2));
12294 
12295   format %{ "CMPu   $op1,$op2" %}
12296   opcode(0x3B);  /* Opcode 3B /r */
12297   ins_encode( OpcP, RegReg( op1, op2) );
12298   ins_pipe( ialu_cr_reg_reg );
12299 %}
12300 
12301 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12302   match(Set cr (CmpP op1 op2));
12303 
12304   format %{ "CMPu   $op1,$op2" %}
12305   opcode(0x81,0x07);  /* Opcode 81 /7 */
12306   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12307   ins_pipe( ialu_cr_reg_imm );
12308 %}
12309 
12310 // // Cisc-spilled version of cmpP_eReg
12311 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12312   match(Set cr (CmpP op1 (LoadP op2)));
12313 
12314   format %{ "CMPu   $op1,$op2" %}
12315   ins_cost(500);
12316   opcode(0x3B);  /* Opcode 3B /r */
12317   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12318   ins_pipe( ialu_cr_reg_mem );
12319 %}
12320 
12321 // // Cisc-spilled version of cmpP_eReg
12322 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12323 //  match(Set cr (CmpP (LoadP op1) op2));
12324 //
12325 //  format %{ "CMPu   $op1,$op2" %}
12326 //  ins_cost(500);
12327 //  opcode(0x39);  /* Opcode 39 /r */
12328 //  ins_encode( OpcP, RegMem( op1, op2) );
12329 //%}
12330 
12331 // Compare raw pointer (used in out-of-heap check).
12332 // Only works because non-oop pointers must be raw pointers
12333 // and raw pointers have no anti-dependencies.
12334 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12335   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12336   match(Set cr (CmpP op1 (LoadP op2)));
12337 
12338   format %{ "CMPu   $op1,$op2" %}
12339   opcode(0x3B);  /* Opcode 3B /r */
12340   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12341   ins_pipe( ialu_cr_reg_mem );
12342 %}
12343 
12344 //
12345 // This will generate a signed flags result. This should be ok
12346 // since any compare to a zero should be eq/neq.
12347 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12348   match(Set cr (CmpP src zero));
12349 
12350   format %{ "TEST   $src,$src" %}
12351   opcode(0x85);
12352   ins_encode( OpcP, RegReg( src, src ) );
12353   ins_pipe( ialu_cr_reg_imm );
12354 %}
12355 
12356 // Cisc-spilled version of testP_reg
12357 // This will generate a signed flags result. This should be ok
12358 // since any compare to a zero should be eq/neq.
12359 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12360   match(Set cr (CmpP (LoadP op) zero));
12361 
12362   format %{ "TEST   $op,0xFFFFFFFF" %}
12363   ins_cost(500);
12364   opcode(0xF7);               /* Opcode F7 /0 */
12365   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12366   ins_pipe( ialu_cr_reg_imm );
12367 %}
12368 
12369 // Yanked all unsigned pointer compare operations.
12370 // Pointer compares are done with CmpP which is already unsigned.
12371 
12372 //----------Max and Min--------------------------------------------------------
12373 // Min Instructions
12374 ////
12375 //   *** Min and Max using the conditional move are slower than the
12376 //   *** branch version on a Pentium III.
12377 // // Conditional move for min
12378 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12379 //  effect( USE_DEF op2, USE op1, USE cr );
12380 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12381 //  opcode(0x4C,0x0F);
12382 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12383 //  ins_pipe( pipe_cmov_reg );
12384 //%}
12385 //
12386 //// Min Register with Register (P6 version)
12387 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12388 //  predicate(VM_Version::supports_cmov() );
12389 //  match(Set op2 (MinI op1 op2));
12390 //  ins_cost(200);
12391 //  expand %{
12392 //    eFlagsReg cr;
12393 //    compI_eReg(cr,op1,op2);
12394 //    cmovI_reg_lt(op2,op1,cr);
12395 //  %}
12396 //%}
12397 
12398 // Min Register with Register (generic version)
12399 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12400   match(Set dst (MinI dst src));
12401   effect(KILL flags);
12402   ins_cost(300);
12403 
12404   format %{ "MIN    $dst,$src" %}
12405   opcode(0xCC);
12406   ins_encode( min_enc(dst,src) );
12407   ins_pipe( pipe_slow );
12408 %}
12409 
12410 // Max Register with Register
12411 //   *** Min and Max using the conditional move are slower than the
12412 //   *** branch version on a Pentium III.
12413 // // Conditional move for max
12414 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12415 //  effect( USE_DEF op2, USE op1, USE cr );
12416 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12417 //  opcode(0x4F,0x0F);
12418 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12419 //  ins_pipe( pipe_cmov_reg );
12420 //%}
12421 //
12422 // // Max Register with Register (P6 version)
12423 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12424 //  predicate(VM_Version::supports_cmov() );
12425 //  match(Set op2 (MaxI op1 op2));
12426 //  ins_cost(200);
12427 //  expand %{
12428 //    eFlagsReg cr;
12429 //    compI_eReg(cr,op1,op2);
12430 //    cmovI_reg_gt(op2,op1,cr);
12431 //  %}
12432 //%}
12433 
12434 // Max Register with Register (generic version)
12435 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12436   match(Set dst (MaxI dst src));
12437   effect(KILL flags);
12438   ins_cost(300);
12439 
12440   format %{ "MAX    $dst,$src" %}
12441   opcode(0xCC);
12442   ins_encode( max_enc(dst,src) );
12443   ins_pipe( pipe_slow );
12444 %}
12445 
12446 // ============================================================================
12447 // Counted Loop limit node which represents exact final iterator value.
12448 // Note: the resulting value should fit into integer range since
12449 // counted loops have limit check on overflow.
12450 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12451   match(Set limit (LoopLimit (Binary init limit) stride));
12452   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12453   ins_cost(300);
12454 
12455   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12456   ins_encode %{
12457     int strd = (int)$stride$$constant;
12458     assert(strd != 1 && strd != -1, "sanity");
12459     int m1 = (strd > 0) ? 1 : -1;
12460     // Convert limit to long (EAX:EDX)
12461     __ cdql();
12462     // Convert init to long (init:tmp)
12463     __ movl($tmp$$Register, $init$$Register);
12464     __ sarl($tmp$$Register, 31);
12465     // $limit - $init
12466     __ subl($limit$$Register, $init$$Register);
12467     __ sbbl($limit_hi$$Register, $tmp$$Register);
12468     // + ($stride - 1)
12469     if (strd > 0) {
12470       __ addl($limit$$Register, (strd - 1));
12471       __ adcl($limit_hi$$Register, 0);
12472       __ movl($tmp$$Register, strd);
12473     } else {
12474       __ addl($limit$$Register, (strd + 1));
12475       __ adcl($limit_hi$$Register, -1);
12476       __ lneg($limit_hi$$Register, $limit$$Register);
12477       __ movl($tmp$$Register, -strd);
12478     }
12479     // signed division: (EAX:EDX) / pos_stride
12480     __ idivl($tmp$$Register);
12481     if (strd < 0) {
12482       // restore sign
12483       __ negl($tmp$$Register);
12484     }
12485     // (EAX) * stride
12486     __ mull($tmp$$Register);
12487     // + init (ignore upper bits)
12488     __ addl($limit$$Register, $init$$Register);
12489   %}
12490   ins_pipe( pipe_slow );
12491 %}
12492 
12493 // ============================================================================
12494 // Branch Instructions
12495 // Jump Table
12496 instruct jumpXtnd(rRegI switch_val) %{
12497   match(Jump switch_val);
12498   ins_cost(350);
12499   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12500   ins_encode %{
12501     // Jump to Address(table_base + switch_reg)
12502     Address index(noreg, $switch_val$$Register, Address::times_1);
12503     __ jump(ArrayAddress($constantaddress, index), noreg);
12504   %}
12505   ins_pipe(pipe_jmp);
12506 %}
12507 
12508 // Jump Direct - Label defines a relative address from JMP+1
12509 instruct jmpDir(label labl) %{
12510   match(Goto);
12511   effect(USE labl);
12512 
12513   ins_cost(300);
12514   format %{ "JMP    $labl" %}
12515   size(5);
12516   ins_encode %{
12517     Label* L = $labl$$label;
12518     __ jmp(*L, false); // Always long jump
12519   %}
12520   ins_pipe( pipe_jmp );
12521 %}
12522 
12523 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12524 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12525   match(If cop cr);
12526   effect(USE labl);
12527 
12528   ins_cost(300);
12529   format %{ "J$cop    $labl" %}
12530   size(6);
12531   ins_encode %{
12532     Label* L = $labl$$label;
12533     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12534   %}
12535   ins_pipe( pipe_jcc );
12536 %}
12537 
12538 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12539 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12540   match(CountedLoopEnd cop cr);
12541   effect(USE labl);
12542 
12543   ins_cost(300);
12544   format %{ "J$cop    $labl\t# Loop end" %}
12545   size(6);
12546   ins_encode %{
12547     Label* L = $labl$$label;
12548     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12549   %}
12550   ins_pipe( pipe_jcc );
12551 %}
12552 
12553 // Jump Direct Conditional - using unsigned comparison
12554 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12555   match(If cop cmp);
12556   effect(USE labl);
12557 
12558   ins_cost(300);
12559   format %{ "J$cop,u  $labl" %}
12560   size(6);
12561   ins_encode %{
12562     Label* L = $labl$$label;
12563     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12564   %}
12565   ins_pipe(pipe_jcc);
12566 %}
12567 
12568 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12569   match(If cop cmp);
12570   effect(USE labl);
12571 
12572   ins_cost(200);
12573   format %{ "J$cop,u  $labl" %}
12574   size(6);
12575   ins_encode %{
12576     Label* L = $labl$$label;
12577     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12578   %}
12579   ins_pipe(pipe_jcc);
12580 %}
12581 
12582 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12583   match(If cop cmp);
12584   effect(USE labl);
12585 
12586   ins_cost(200);
12587   format %{ $$template
12588     if ($cop$$cmpcode == Assembler::notEqual) {
12589       $$emit$$"JP,u   $labl\n\t"
12590       $$emit$$"J$cop,u   $labl"
12591     } else {
12592       $$emit$$"JP,u   done\n\t"
12593       $$emit$$"J$cop,u   $labl\n\t"
12594       $$emit$$"done:"
12595     }
12596   %}
12597   ins_encode %{
12598     Label* l = $labl$$label;
12599     if ($cop$$cmpcode == Assembler::notEqual) {
12600       __ jcc(Assembler::parity, *l, false);
12601       __ jcc(Assembler::notEqual, *l, false);
12602     } else if ($cop$$cmpcode == Assembler::equal) {
12603       Label done;
12604       __ jccb(Assembler::parity, done);
12605       __ jcc(Assembler::equal, *l, false);
12606       __ bind(done);
12607     } else {
12608        ShouldNotReachHere();
12609     }
12610   %}
12611   ins_pipe(pipe_jcc);
12612 %}
12613 
12614 // ============================================================================
12615 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12616 // array for an instance of the superklass.  Set a hidden internal cache on a
12617 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12618 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12619 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12620   match(Set result (PartialSubtypeCheck sub super));
12621   effect( KILL rcx, KILL cr );
12622 
12623   ins_cost(1100);  // slightly larger than the next version
12624   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12625             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12626             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12627             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12628             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12629             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12630             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12631      "miss:\t" %}
12632 
12633   opcode(0x1); // Force a XOR of EDI
12634   ins_encode( enc_PartialSubtypeCheck() );
12635   ins_pipe( pipe_slow );
12636 %}
12637 
12638 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12639   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12640   effect( KILL rcx, KILL result );
12641 
12642   ins_cost(1000);
12643   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12644             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12645             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12646             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12647             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12648             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12649      "miss:\t" %}
12650 
12651   opcode(0x0);  // No need to XOR EDI
12652   ins_encode( enc_PartialSubtypeCheck() );
12653   ins_pipe( pipe_slow );
12654 %}
12655 
12656 // ============================================================================
12657 // Branch Instructions -- short offset versions
12658 //
12659 // These instructions are used to replace jumps of a long offset (the default
12660 // match) with jumps of a shorter offset.  These instructions are all tagged
12661 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12662 // match rules in general matching.  Instead, the ADLC generates a conversion
12663 // method in the MachNode which can be used to do in-place replacement of the
12664 // long variant with the shorter variant.  The compiler will determine if a
12665 // branch can be taken by the is_short_branch_offset() predicate in the machine
12666 // specific code section of the file.
12667 
12668 // Jump Direct - Label defines a relative address from JMP+1
12669 instruct jmpDir_short(label labl) %{
12670   match(Goto);
12671   effect(USE labl);
12672 
12673   ins_cost(300);
12674   format %{ "JMP,s  $labl" %}
12675   size(2);
12676   ins_encode %{
12677     Label* L = $labl$$label;
12678     __ jmpb(*L);
12679   %}
12680   ins_pipe( pipe_jmp );
12681   ins_short_branch(1);
12682 %}
12683 
12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12685 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12686   match(If cop cr);
12687   effect(USE labl);
12688 
12689   ins_cost(300);
12690   format %{ "J$cop,s  $labl" %}
12691   size(2);
12692   ins_encode %{
12693     Label* L = $labl$$label;
12694     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12695   %}
12696   ins_pipe( pipe_jcc );
12697   ins_short_branch(1);
12698 %}
12699 
12700 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12701 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12702   match(CountedLoopEnd cop cr);
12703   effect(USE labl);
12704 
12705   ins_cost(300);
12706   format %{ "J$cop,s  $labl\t# Loop end" %}
12707   size(2);
12708   ins_encode %{
12709     Label* L = $labl$$label;
12710     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12711   %}
12712   ins_pipe( pipe_jcc );
12713   ins_short_branch(1);
12714 %}
12715 
12716 // Jump Direct Conditional - using unsigned comparison
12717 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12718   match(If cop cmp);
12719   effect(USE labl);
12720 
12721   ins_cost(300);
12722   format %{ "J$cop,us $labl" %}
12723   size(2);
12724   ins_encode %{
12725     Label* L = $labl$$label;
12726     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12727   %}
12728   ins_pipe( pipe_jcc );
12729   ins_short_branch(1);
12730 %}
12731 
12732 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12733   match(If cop cmp);
12734   effect(USE labl);
12735 
12736   ins_cost(300);
12737   format %{ "J$cop,us $labl" %}
12738   size(2);
12739   ins_encode %{
12740     Label* L = $labl$$label;
12741     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12742   %}
12743   ins_pipe( pipe_jcc );
12744   ins_short_branch(1);
12745 %}
12746 
12747 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12748   match(If cop cmp);
12749   effect(USE labl);
12750 
12751   ins_cost(300);
12752   format %{ $$template
12753     if ($cop$$cmpcode == Assembler::notEqual) {
12754       $$emit$$"JP,u,s   $labl\n\t"
12755       $$emit$$"J$cop,u,s   $labl"
12756     } else {
12757       $$emit$$"JP,u,s   done\n\t"
12758       $$emit$$"J$cop,u,s  $labl\n\t"
12759       $$emit$$"done:"
12760     }
12761   %}
12762   size(4);
12763   ins_encode %{
12764     Label* l = $labl$$label;
12765     if ($cop$$cmpcode == Assembler::notEqual) {
12766       __ jccb(Assembler::parity, *l);
12767       __ jccb(Assembler::notEqual, *l);
12768     } else if ($cop$$cmpcode == Assembler::equal) {
12769       Label done;
12770       __ jccb(Assembler::parity, done);
12771       __ jccb(Assembler::equal, *l);
12772       __ bind(done);
12773     } else {
12774        ShouldNotReachHere();
12775     }
12776   %}
12777   ins_pipe(pipe_jcc);
12778   ins_short_branch(1);
12779 %}
12780 
12781 // ============================================================================
12782 // Long Compare
12783 //
12784 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12785 // is tricky.  The flavor of compare used depends on whether we are testing
12786 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12787 // The GE test is the negated LT test.  The LE test can be had by commuting
12788 // the operands (yielding a GE test) and then negating; negate again for the
12789 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12790 // NE test is negated from that.
12791 
12792 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12793 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12794 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12795 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12796 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12797 // foo match ends up with the wrong leaf.  One fix is to not match both
12798 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12799 // both forms beat the trinary form of long-compare and both are very useful
12800 // on Intel which has so few registers.
12801 
12802 // Manifest a CmpL result in an integer register.  Very painful.
12803 // This is the test to avoid.
12804 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12805   match(Set dst (CmpL3 src1 src2));
12806   effect( KILL flags );
12807   ins_cost(1000);
12808   format %{ "XOR    $dst,$dst\n\t"
12809             "CMP    $src1.hi,$src2.hi\n\t"
12810             "JLT,s  m_one\n\t"
12811             "JGT,s  p_one\n\t"
12812             "CMP    $src1.lo,$src2.lo\n\t"
12813             "JB,s   m_one\n\t"
12814             "JEQ,s  done\n"
12815     "p_one:\tINC    $dst\n\t"
12816             "JMP,s  done\n"
12817     "m_one:\tDEC    $dst\n"
12818      "done:" %}
12819   ins_encode %{
12820     Label p_one, m_one, done;
12821     __ xorptr($dst$$Register, $dst$$Register);
12822     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12823     __ jccb(Assembler::less,    m_one);
12824     __ jccb(Assembler::greater, p_one);
12825     __ cmpl($src1$$Register, $src2$$Register);
12826     __ jccb(Assembler::below,   m_one);
12827     __ jccb(Assembler::equal,   done);
12828     __ bind(p_one);
12829     __ incrementl($dst$$Register);
12830     __ jmpb(done);
12831     __ bind(m_one);
12832     __ decrementl($dst$$Register);
12833     __ bind(done);
12834   %}
12835   ins_pipe( pipe_slow );
12836 %}
12837 
12838 //======
12839 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12840 // compares.  Can be used for LE or GT compares by reversing arguments.
12841 // NOT GOOD FOR EQ/NE tests.
12842 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12843   match( Set flags (CmpL src zero ));
12844   ins_cost(100);
12845   format %{ "TEST   $src.hi,$src.hi" %}
12846   opcode(0x85);
12847   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12848   ins_pipe( ialu_cr_reg_reg );
12849 %}
12850 
12851 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12852 // compares.  Can be used for LE or GT compares by reversing arguments.
12853 // NOT GOOD FOR EQ/NE tests.
12854 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12855   match( Set flags (CmpL src1 src2 ));
12856   effect( TEMP tmp );
12857   ins_cost(300);
12858   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12859             "MOV    $tmp,$src1.hi\n\t"
12860             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12861   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12862   ins_pipe( ialu_cr_reg_reg );
12863 %}
12864 
12865 // Long compares reg < zero/req OR reg >= zero/req.
12866 // Just a wrapper for a normal branch, plus the predicate test.
12867 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12868   match(If cmp flags);
12869   effect(USE labl);
12870   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12871   expand %{
12872     jmpCon(cmp,flags,labl);    // JLT or JGE...
12873   %}
12874 %}
12875 
12876 //======
12877 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12878 // compares.  Can be used for LE or GT compares by reversing arguments.
12879 // NOT GOOD FOR EQ/NE tests.
12880 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12881   match(Set flags (CmpUL src zero));
12882   ins_cost(100);
12883   format %{ "TEST   $src.hi,$src.hi" %}
12884   opcode(0x85);
12885   ins_encode(OpcP, RegReg_Hi2(src, src));
12886   ins_pipe(ialu_cr_reg_reg);
12887 %}
12888 
12889 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12890 // compares.  Can be used for LE or GT compares by reversing arguments.
12891 // NOT GOOD FOR EQ/NE tests.
12892 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12893   match(Set flags (CmpUL src1 src2));
12894   effect(TEMP tmp);
12895   ins_cost(300);
12896   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12897             "MOV    $tmp,$src1.hi\n\t"
12898             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12899   ins_encode(long_cmp_flags2(src1, src2, tmp));
12900   ins_pipe(ialu_cr_reg_reg);
12901 %}
12902 
12903 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12904 // Just a wrapper for a normal branch, plus the predicate test.
12905 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12906   match(If cmp flags);
12907   effect(USE labl);
12908   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12909   expand %{
12910     jmpCon(cmp, flags, labl);    // JLT or JGE...
12911   %}
12912 %}
12913 
12914 // Compare 2 longs and CMOVE longs.
12915 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12916   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12917   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12918   ins_cost(400);
12919   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12920             "CMOV$cmp $dst.hi,$src.hi" %}
12921   opcode(0x0F,0x40);
12922   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12923   ins_pipe( pipe_cmov_reg_long );
12924 %}
12925 
12926 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12927   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12928   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12929   ins_cost(500);
12930   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12931             "CMOV$cmp $dst.hi,$src.hi" %}
12932   opcode(0x0F,0x40);
12933   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12934   ins_pipe( pipe_cmov_reg_long );
12935 %}
12936 
12937 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12938   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12939   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12940   ins_cost(400);
12941   expand %{
12942     cmovLL_reg_LTGE(cmp, flags, dst, src);
12943   %}
12944 %}
12945 
12946 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12947   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12948   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12949   ins_cost(500);
12950   expand %{
12951     cmovLL_mem_LTGE(cmp, flags, dst, src);
12952   %}
12953 %}
12954 
12955 // Compare 2 longs and CMOVE ints.
12956 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12957   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12958   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12959   ins_cost(200);
12960   format %{ "CMOV$cmp $dst,$src" %}
12961   opcode(0x0F,0x40);
12962   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12963   ins_pipe( pipe_cmov_reg );
12964 %}
12965 
12966 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12967   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12968   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12969   ins_cost(250);
12970   format %{ "CMOV$cmp $dst,$src" %}
12971   opcode(0x0F,0x40);
12972   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12973   ins_pipe( pipe_cmov_mem );
12974 %}
12975 
12976 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12977   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12978   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12979   ins_cost(200);
12980   expand %{
12981     cmovII_reg_LTGE(cmp, flags, dst, src);
12982   %}
12983 %}
12984 
12985 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12986   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12987   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12988   ins_cost(250);
12989   expand %{
12990     cmovII_mem_LTGE(cmp, flags, dst, src);
12991   %}
12992 %}
12993 
12994 // Compare 2 longs and CMOVE ptrs.
12995 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12996   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12997   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12998   ins_cost(200);
12999   format %{ "CMOV$cmp $dst,$src" %}
13000   opcode(0x0F,0x40);
13001   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13002   ins_pipe( pipe_cmov_reg );
13003 %}
13004 
13005 // Compare 2 unsigned longs and CMOVE ptrs.
13006 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13007   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13008   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13009   ins_cost(200);
13010   expand %{
13011     cmovPP_reg_LTGE(cmp,flags,dst,src);
13012   %}
13013 %}
13014 
13015 // Compare 2 longs and CMOVE doubles
13016 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13017   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13018   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13019   ins_cost(200);
13020   expand %{
13021     fcmovDPR_regS(cmp,flags,dst,src);
13022   %}
13023 %}
13024 
13025 // Compare 2 longs and CMOVE doubles
13026 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13027   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13028   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13029   ins_cost(200);
13030   expand %{
13031     fcmovD_regS(cmp,flags,dst,src);
13032   %}
13033 %}
13034 
13035 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13036   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13037   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13038   ins_cost(200);
13039   expand %{
13040     fcmovFPR_regS(cmp,flags,dst,src);
13041   %}
13042 %}
13043 
13044 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13045   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13046   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13047   ins_cost(200);
13048   expand %{
13049     fcmovF_regS(cmp,flags,dst,src);
13050   %}
13051 %}
13052 
13053 //======
13054 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13055 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13056   match( Set flags (CmpL src zero ));
13057   effect(TEMP tmp);
13058   ins_cost(200);
13059   format %{ "MOV    $tmp,$src.lo\n\t"
13060             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13061   ins_encode( long_cmp_flags0( src, tmp ) );
13062   ins_pipe( ialu_reg_reg_long );
13063 %}
13064 
13065 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13066 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13067   match( Set flags (CmpL src1 src2 ));
13068   ins_cost(200+300);
13069   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13070             "JNE,s  skip\n\t"
13071             "CMP    $src1.hi,$src2.hi\n\t"
13072      "skip:\t" %}
13073   ins_encode( long_cmp_flags1( src1, src2 ) );
13074   ins_pipe( ialu_cr_reg_reg );
13075 %}
13076 
13077 // Long compare reg == zero/reg OR reg != zero/reg
13078 // Just a wrapper for a normal branch, plus the predicate test.
13079 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13080   match(If cmp flags);
13081   effect(USE labl);
13082   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13083   expand %{
13084     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13085   %}
13086 %}
13087 
13088 //======
13089 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13090 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13091   match(Set flags (CmpUL src zero));
13092   effect(TEMP tmp);
13093   ins_cost(200);
13094   format %{ "MOV    $tmp,$src.lo\n\t"
13095             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13096   ins_encode(long_cmp_flags0(src, tmp));
13097   ins_pipe(ialu_reg_reg_long);
13098 %}
13099 
13100 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13101 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13102   match(Set flags (CmpUL src1 src2));
13103   ins_cost(200+300);
13104   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13105             "JNE,s  skip\n\t"
13106             "CMP    $src1.hi,$src2.hi\n\t"
13107      "skip:\t" %}
13108   ins_encode(long_cmp_flags1(src1, src2));
13109   ins_pipe(ialu_cr_reg_reg);
13110 %}
13111 
13112 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13113 // Just a wrapper for a normal branch, plus the predicate test.
13114 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13115   match(If cmp flags);
13116   effect(USE labl);
13117   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13118   expand %{
13119     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13120   %}
13121 %}
13122 
13123 // Compare 2 longs and CMOVE longs.
13124 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13125   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13126   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13127   ins_cost(400);
13128   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13129             "CMOV$cmp $dst.hi,$src.hi" %}
13130   opcode(0x0F,0x40);
13131   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13132   ins_pipe( pipe_cmov_reg_long );
13133 %}
13134 
13135 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13136   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13137   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13138   ins_cost(500);
13139   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13140             "CMOV$cmp $dst.hi,$src.hi" %}
13141   opcode(0x0F,0x40);
13142   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13143   ins_pipe( pipe_cmov_reg_long );
13144 %}
13145 
13146 // Compare 2 longs and CMOVE ints.
13147 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13148   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13149   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13150   ins_cost(200);
13151   format %{ "CMOV$cmp $dst,$src" %}
13152   opcode(0x0F,0x40);
13153   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13154   ins_pipe( pipe_cmov_reg );
13155 %}
13156 
13157 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13158   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13159   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13160   ins_cost(250);
13161   format %{ "CMOV$cmp $dst,$src" %}
13162   opcode(0x0F,0x40);
13163   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13164   ins_pipe( pipe_cmov_mem );
13165 %}
13166 
13167 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13168   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13169   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13170   ins_cost(200);
13171   expand %{
13172     cmovII_reg_EQNE(cmp, flags, dst, src);
13173   %}
13174 %}
13175 
13176 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13177   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13179   ins_cost(250);
13180   expand %{
13181     cmovII_mem_EQNE(cmp, flags, dst, src);
13182   %}
13183 %}
13184 
13185 // Compare 2 longs and CMOVE ptrs.
13186 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13187   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13188   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13189   ins_cost(200);
13190   format %{ "CMOV$cmp $dst,$src" %}
13191   opcode(0x0F,0x40);
13192   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13193   ins_pipe( pipe_cmov_reg );
13194 %}
13195 
13196 // Compare 2 unsigned longs and CMOVE ptrs.
13197 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13198   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13199   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13200   ins_cost(200);
13201   expand %{
13202     cmovPP_reg_EQNE(cmp,flags,dst,src);
13203   %}
13204 %}
13205 
13206 // Compare 2 longs and CMOVE doubles
13207 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13208   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13209   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13210   ins_cost(200);
13211   expand %{
13212     fcmovDPR_regS(cmp,flags,dst,src);
13213   %}
13214 %}
13215 
13216 // Compare 2 longs and CMOVE doubles
13217 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13218   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13219   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13220   ins_cost(200);
13221   expand %{
13222     fcmovD_regS(cmp,flags,dst,src);
13223   %}
13224 %}
13225 
13226 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13227   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13228   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13229   ins_cost(200);
13230   expand %{
13231     fcmovFPR_regS(cmp,flags,dst,src);
13232   %}
13233 %}
13234 
13235 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13236   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13237   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13238   ins_cost(200);
13239   expand %{
13240     fcmovF_regS(cmp,flags,dst,src);
13241   %}
13242 %}
13243 
13244 //======
13245 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13246 // Same as cmpL_reg_flags_LEGT except must negate src
13247 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13248   match( Set flags (CmpL src zero ));
13249   effect( TEMP tmp );
13250   ins_cost(300);
13251   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13252             "CMP    $tmp,$src.lo\n\t"
13253             "SBB    $tmp,$src.hi\n\t" %}
13254   ins_encode( long_cmp_flags3(src, tmp) );
13255   ins_pipe( ialu_reg_reg_long );
13256 %}
13257 
13258 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13259 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13260 // requires a commuted test to get the same result.
13261 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13262   match( Set flags (CmpL src1 src2 ));
13263   effect( TEMP tmp );
13264   ins_cost(300);
13265   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13266             "MOV    $tmp,$src2.hi\n\t"
13267             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13268   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13269   ins_pipe( ialu_cr_reg_reg );
13270 %}
13271 
13272 // Long compares reg < zero/req OR reg >= zero/req.
13273 // Just a wrapper for a normal branch, plus the predicate test
13274 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13275   match(If cmp flags);
13276   effect(USE labl);
13277   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13278   ins_cost(300);
13279   expand %{
13280     jmpCon(cmp,flags,labl);    // JGT or JLE...
13281   %}
13282 %}
13283 
13284 //======
13285 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13286 // Same as cmpUL_reg_flags_LEGT except must negate src
13287 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13288   match(Set flags (CmpUL src zero));
13289   effect(TEMP tmp);
13290   ins_cost(300);
13291   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13292             "CMP    $tmp,$src.lo\n\t"
13293             "SBB    $tmp,$src.hi\n\t" %}
13294   ins_encode(long_cmp_flags3(src, tmp));
13295   ins_pipe(ialu_reg_reg_long);
13296 %}
13297 
13298 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13299 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13300 // requires a commuted test to get the same result.
13301 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13302   match(Set flags (CmpUL src1 src2));
13303   effect(TEMP tmp);
13304   ins_cost(300);
13305   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13306             "MOV    $tmp,$src2.hi\n\t"
13307             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13308   ins_encode(long_cmp_flags2( src2, src1, tmp));
13309   ins_pipe(ialu_cr_reg_reg);
13310 %}
13311 
13312 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13313 // Just a wrapper for a normal branch, plus the predicate test
13314 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13315   match(If cmp flags);
13316   effect(USE labl);
13317   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13318   ins_cost(300);
13319   expand %{
13320     jmpCon(cmp, flags, labl);    // JGT or JLE...
13321   %}
13322 %}
13323 
13324 // Compare 2 longs and CMOVE longs.
13325 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13326   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13327   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13328   ins_cost(400);
13329   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13330             "CMOV$cmp $dst.hi,$src.hi" %}
13331   opcode(0x0F,0x40);
13332   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13333   ins_pipe( pipe_cmov_reg_long );
13334 %}
13335 
13336 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13337   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13338   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13339   ins_cost(500);
13340   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13341             "CMOV$cmp $dst.hi,$src.hi+4" %}
13342   opcode(0x0F,0x40);
13343   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13344   ins_pipe( pipe_cmov_reg_long );
13345 %}
13346 
13347 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13348   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13349   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13350   ins_cost(400);
13351   expand %{
13352     cmovLL_reg_LEGT(cmp, flags, dst, src);
13353   %}
13354 %}
13355 
13356 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13357   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13358   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13359   ins_cost(500);
13360   expand %{
13361     cmovLL_mem_LEGT(cmp, flags, dst, src);
13362   %}
13363 %}
13364 
13365 // Compare 2 longs and CMOVE ints.
13366 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13367   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13368   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13369   ins_cost(200);
13370   format %{ "CMOV$cmp $dst,$src" %}
13371   opcode(0x0F,0x40);
13372   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13373   ins_pipe( pipe_cmov_reg );
13374 %}
13375 
13376 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13377   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13378   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13379   ins_cost(250);
13380   format %{ "CMOV$cmp $dst,$src" %}
13381   opcode(0x0F,0x40);
13382   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13383   ins_pipe( pipe_cmov_mem );
13384 %}
13385 
13386 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13387   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13388   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13389   ins_cost(200);
13390   expand %{
13391     cmovII_reg_LEGT(cmp, flags, dst, src);
13392   %}
13393 %}
13394 
13395 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13396   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13397   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13398   ins_cost(250);
13399   expand %{
13400     cmovII_mem_LEGT(cmp, flags, dst, src);
13401   %}
13402 %}
13403 
13404 // Compare 2 longs and CMOVE ptrs.
13405 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13406   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13407   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13408   ins_cost(200);
13409   format %{ "CMOV$cmp $dst,$src" %}
13410   opcode(0x0F,0x40);
13411   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13412   ins_pipe( pipe_cmov_reg );
13413 %}
13414 
13415 // Compare 2 unsigned longs and CMOVE ptrs.
13416 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13417   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13418   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13419   ins_cost(200);
13420   expand %{
13421     cmovPP_reg_LEGT(cmp,flags,dst,src);
13422   %}
13423 %}
13424 
13425 // Compare 2 longs and CMOVE doubles
13426 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13427   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13428   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13429   ins_cost(200);
13430   expand %{
13431     fcmovDPR_regS(cmp,flags,dst,src);
13432   %}
13433 %}
13434 
13435 // Compare 2 longs and CMOVE doubles
13436 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13437   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13439   ins_cost(200);
13440   expand %{
13441     fcmovD_regS(cmp,flags,dst,src);
13442   %}
13443 %}
13444 
13445 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13446   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13448   ins_cost(200);
13449   expand %{
13450     fcmovFPR_regS(cmp,flags,dst,src);
13451   %}
13452 %}
13453 
13454 
13455 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13456   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13457   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13458   ins_cost(200);
13459   expand %{
13460     fcmovF_regS(cmp,flags,dst,src);
13461   %}
13462 %}
13463 
13464 
13465 // ============================================================================
13466 // Procedure Call/Return Instructions
13467 // Call Java Static Instruction
13468 // Note: If this code changes, the corresponding ret_addr_offset() and
13469 //       compute_padding() functions will have to be adjusted.
13470 instruct CallStaticJavaDirect(method meth) %{
13471   match(CallStaticJava);
13472   effect(USE meth);
13473 
13474   ins_cost(300);
13475   format %{ "CALL,static " %}
13476   opcode(0xE8); /* E8 cd */
13477   ins_encode( pre_call_resets,
13478               Java_Static_Call( meth ),
13479               call_epilog,
13480               post_call_FPU );
13481   ins_pipe( pipe_slow );
13482   ins_alignment(4);
13483 %}
13484 
13485 // Call Java Dynamic Instruction
13486 // Note: If this code changes, the corresponding ret_addr_offset() and
13487 //       compute_padding() functions will have to be adjusted.
13488 instruct CallDynamicJavaDirect(method meth) %{
13489   match(CallDynamicJava);
13490   effect(USE meth);
13491 
13492   ins_cost(300);
13493   format %{ "MOV    EAX,(oop)-1\n\t"
13494             "CALL,dynamic" %}
13495   opcode(0xE8); /* E8 cd */
13496   ins_encode( pre_call_resets,
13497               Java_Dynamic_Call( meth ),
13498               call_epilog,
13499               post_call_FPU );
13500   ins_pipe( pipe_slow );
13501   ins_alignment(4);
13502 %}
13503 
13504 // Call Runtime Instruction
13505 instruct CallRuntimeDirect(method meth) %{
13506   match(CallRuntime );
13507   effect(USE meth);
13508 
13509   ins_cost(300);
13510   format %{ "CALL,runtime " %}
13511   opcode(0xE8); /* E8 cd */
13512   // Use FFREEs to clear entries in float stack
13513   ins_encode( pre_call_resets,
13514               FFree_Float_Stack_All,
13515               Java_To_Runtime( meth ),
13516               post_call_FPU );
13517   ins_pipe( pipe_slow );
13518 %}
13519 
13520 // Call runtime without safepoint
13521 instruct CallLeafDirect(method meth) %{
13522   match(CallLeaf);
13523   effect(USE meth);
13524 
13525   ins_cost(300);
13526   format %{ "CALL_LEAF,runtime " %}
13527   opcode(0xE8); /* E8 cd */
13528   ins_encode( pre_call_resets,
13529               FFree_Float_Stack_All,
13530               Java_To_Runtime( meth ),
13531               Verify_FPU_For_Leaf, post_call_FPU );
13532   ins_pipe( pipe_slow );
13533 %}
13534 
13535 instruct CallLeafNoFPDirect(method meth) %{
13536   match(CallLeafNoFP);
13537   effect(USE meth);
13538 
13539   ins_cost(300);
13540   format %{ "CALL_LEAF_NOFP,runtime " %}
13541   opcode(0xE8); /* E8 cd */
13542   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13543   ins_pipe( pipe_slow );
13544 %}
13545 
13546 
13547 // Return Instruction
13548 // Remove the return address & jump to it.
13549 instruct Ret() %{
13550   match(Return);
13551   format %{ "RET" %}
13552   opcode(0xC3);
13553   ins_encode(OpcP);
13554   ins_pipe( pipe_jmp );
13555 %}
13556 
13557 // Tail Call; Jump from runtime stub to Java code.
13558 // Also known as an 'interprocedural jump'.
13559 // Target of jump will eventually return to caller.
13560 // TailJump below removes the return address.
13561 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13562 // emitted just above the TailCall which has reset ebp to the caller state.
13563 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13564   match(TailCall jump_target method_ptr);
13565   ins_cost(300);
13566   format %{ "JMP    $jump_target \t# EBX holds method" %}
13567   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13568   ins_encode( OpcP, RegOpc(jump_target) );
13569   ins_pipe( pipe_jmp );
13570 %}
13571 
13572 
13573 // Tail Jump; remove the return address; jump to target.
13574 // TailCall above leaves the return address around.
13575 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13576   match( TailJump jump_target ex_oop );
13577   ins_cost(300);
13578   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13579             "JMP    $jump_target " %}
13580   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13581   ins_encode( enc_pop_rdx,
13582               OpcP, RegOpc(jump_target) );
13583   ins_pipe( pipe_jmp );
13584 %}
13585 
13586 // Create exception oop: created by stack-crawling runtime code.
13587 // Created exception is now available to this handler, and is setup
13588 // just prior to jumping to this handler.  No code emitted.
13589 instruct CreateException( eAXRegP ex_oop )
13590 %{
13591   match(Set ex_oop (CreateEx));
13592 
13593   size(0);
13594   // use the following format syntax
13595   format %{ "# exception oop is in EAX; no code emitted" %}
13596   ins_encode();
13597   ins_pipe( empty );
13598 %}
13599 
13600 
13601 // Rethrow exception:
13602 // The exception oop will come in the first argument position.
13603 // Then JUMP (not call) to the rethrow stub code.
13604 instruct RethrowException()
13605 %{
13606   match(Rethrow);
13607 
13608   // use the following format syntax
13609   format %{ "JMP    rethrow_stub" %}
13610   ins_encode(enc_rethrow);
13611   ins_pipe( pipe_jmp );
13612 %}
13613 
13614 // inlined locking and unlocking
13615 
13616 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13617   predicate(LockingMode != LM_LIGHTWEIGHT);
13618   match(Set cr (FastLock object box));
13619   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13620   ins_cost(300);
13621   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13622   ins_encode %{
13623     __ get_thread($thread$$Register);
13624     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13625                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13626   %}
13627   ins_pipe(pipe_slow);
13628 %}
13629 
13630 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13631   predicate(LockingMode != LM_LIGHTWEIGHT);
13632   match(Set cr (FastUnlock object box));
13633   effect(TEMP tmp, USE_KILL box);
13634   ins_cost(300);
13635   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13636   ins_encode %{
13637     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13638   %}
13639   ins_pipe(pipe_slow);
13640 %}
13641 
13642 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13643   predicate(LockingMode == LM_LIGHTWEIGHT);
13644   match(Set cr (FastLock object box));
13645   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13646   ins_cost(300);
13647   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13648   ins_encode %{
13649     __ get_thread($thread$$Register);
13650     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13651   %}
13652   ins_pipe(pipe_slow);
13653 %}
13654 
13655 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13656   predicate(LockingMode == LM_LIGHTWEIGHT);
13657   match(Set cr (FastUnlock object eax_reg));
13658   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13659   ins_cost(300);
13660   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13661   ins_encode %{
13662     __ get_thread($thread$$Register);
13663     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13664   %}
13665   ins_pipe(pipe_slow);
13666 %}
13667 
13668 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13669   predicate(Matcher::vector_length(n) <= 32);
13670   match(Set dst (MaskAll src));
13671   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13672   ins_encode %{
13673     int mask_len = Matcher::vector_length(this);
13674     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13675   %}
13676   ins_pipe( pipe_slow );
13677 %}
13678 
13679 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13680   predicate(Matcher::vector_length(n) > 32);
13681   match(Set dst (MaskAll src));
13682   effect(TEMP ktmp);
13683   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13684   ins_encode %{
13685     int mask_len = Matcher::vector_length(this);
13686     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13687   %}
13688   ins_pipe( pipe_slow );
13689 %}
13690 
13691 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13692   predicate(Matcher::vector_length(n) > 32);
13693   match(Set dst (MaskAll src));
13694   effect(TEMP ktmp);
13695   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13696   ins_encode %{
13697     int mask_len = Matcher::vector_length(this);
13698     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13699   %}
13700   ins_pipe( pipe_slow );
13701 %}
13702 
13703 // ============================================================================
13704 // Safepoint Instruction
13705 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13706   match(SafePoint poll);
13707   effect(KILL cr, USE poll);
13708 
13709   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13710   ins_cost(125);
13711   // EBP would need size(3)
13712   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13713   ins_encode %{
13714     __ set_inst_mark();
13715     __ relocate(relocInfo::poll_type);
13716     __ clear_inst_mark();
13717     address pre_pc = __ pc();
13718     __ testl(rax, Address($poll$$Register, 0));
13719     address post_pc = __ pc();
13720     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13721   %}
13722   ins_pipe(ialu_reg_mem);
13723 %}
13724 
13725 
13726 // ============================================================================
13727 // This name is KNOWN by the ADLC and cannot be changed.
13728 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13729 // for this guy.
13730 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13731   match(Set dst (ThreadLocal));
13732   effect(DEF dst, KILL cr);
13733 
13734   format %{ "MOV    $dst, Thread::current()" %}
13735   ins_encode %{
13736     Register dstReg = as_Register($dst$$reg);
13737     __ get_thread(dstReg);
13738   %}
13739   ins_pipe( ialu_reg_fat );
13740 %}
13741 
13742 
13743 
13744 //----------PEEPHOLE RULES-----------------------------------------------------
13745 // These must follow all instruction definitions as they use the names
13746 // defined in the instructions definitions.
13747 //
13748 // peepmatch ( root_instr_name [preceding_instruction]* );
13749 //
13750 // peepconstraint %{
13751 // (instruction_number.operand_name relational_op instruction_number.operand_name
13752 //  [, ...] );
13753 // // instruction numbers are zero-based using left to right order in peepmatch
13754 //
13755 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13756 // // provide an instruction_number.operand_name for each operand that appears
13757 // // in the replacement instruction's match rule
13758 //
13759 // ---------VM FLAGS---------------------------------------------------------
13760 //
13761 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13762 //
13763 // Each peephole rule is given an identifying number starting with zero and
13764 // increasing by one in the order seen by the parser.  An individual peephole
13765 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13766 // on the command-line.
13767 //
13768 // ---------CURRENT LIMITATIONS----------------------------------------------
13769 //
13770 // Only match adjacent instructions in same basic block
13771 // Only equality constraints
13772 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13773 // Only one replacement instruction
13774 //
13775 // ---------EXAMPLE----------------------------------------------------------
13776 //
13777 // // pertinent parts of existing instructions in architecture description
13778 // instruct movI(rRegI dst, rRegI src) %{
13779 //   match(Set dst (CopyI src));
13780 // %}
13781 //
13782 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13783 //   match(Set dst (AddI dst src));
13784 //   effect(KILL cr);
13785 // %}
13786 //
13787 // // Change (inc mov) to lea
13788 // peephole %{
13789 //   // increment preceded by register-register move
13790 //   peepmatch ( incI_eReg movI );
13791 //   // require that the destination register of the increment
13792 //   // match the destination register of the move
13793 //   peepconstraint ( 0.dst == 1.dst );
13794 //   // construct a replacement instruction that sets
13795 //   // the destination to ( move's source register + one )
13796 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13797 // %}
13798 //
13799 // Implementation no longer uses movX instructions since
13800 // machine-independent system no longer uses CopyX nodes.
13801 //
13802 // peephole %{
13803 //   peepmatch ( incI_eReg movI );
13804 //   peepconstraint ( 0.dst == 1.dst );
13805 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13806 // %}
13807 //
13808 // peephole %{
13809 //   peepmatch ( decI_eReg movI );
13810 //   peepconstraint ( 0.dst == 1.dst );
13811 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13812 // %}
13813 //
13814 // peephole %{
13815 //   peepmatch ( addI_eReg_imm movI );
13816 //   peepconstraint ( 0.dst == 1.dst );
13817 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13818 // %}
13819 //
13820 // peephole %{
13821 //   peepmatch ( addP_eReg_imm movP );
13822 //   peepconstraint ( 0.dst == 1.dst );
13823 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13824 // %}
13825 
13826 // // Change load of spilled value to only a spill
13827 // instruct storeI(memory mem, rRegI src) %{
13828 //   match(Set mem (StoreI mem src));
13829 // %}
13830 //
13831 // instruct loadI(rRegI dst, memory mem) %{
13832 //   match(Set dst (LoadI mem));
13833 // %}
13834 //
13835 peephole %{
13836   peepmatch ( loadI storeI );
13837   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13838   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13839 %}
13840 
13841 //----------SMARTSPILL RULES---------------------------------------------------
13842 // These must follow all instruction definitions as they use the names
13843 // defined in the instructions definitions.