1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  661               "JA       #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  712     }
  713     __ relocate(relocInfo::poll_return_type);
  714     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  715   }
  716 }
  717 
  718 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  719   return MachNode::size(ra_); // too many variables; just compute it
  720                               // the hard way
  721 }
  722 
  723 int MachEpilogNode::reloc() const {
  724   return 0; // a large enough number
  725 }
  726 
  727 const Pipeline * MachEpilogNode::pipeline() const {
  728   return MachNode::pipeline_class();
  729 }
  730 
  731 //=============================================================================
  732 
  733 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  734 static enum RC rc_class( OptoReg::Name reg ) {
  735 
  736   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  737   if (OptoReg::is_stack(reg)) return rc_stack;
  738 
  739   VMReg r = OptoReg::as_VMReg(reg);
  740   if (r->is_Register()) return rc_int;
  741   if (r->is_FloatRegister()) {
  742     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  743     return rc_float;
  744   }
  745   if (r->is_KRegister()) return rc_kreg;
  746   assert(r->is_XMMRegister(), "must be");
  747   return rc_xmm;
  748 }
  749 
  750 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  751                         int opcode, const char *op_str, int size, outputStream* st ) {
  752   if( cbuf ) {
  753     emit_opcode  (*cbuf, opcode );
  754     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  755 #ifndef PRODUCT
  756   } else if( !do_size ) {
  757     if( size != 0 ) st->print("\n\t");
  758     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  759       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  760       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  761     } else { // FLD, FST, PUSH, POP
  762       st->print("%s [ESP + #%d]",op_str,offset);
  763     }
  764 #endif
  765   }
  766   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  767   return size+3+offset_size;
  768 }
  769 
  770 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  771 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  772                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  773   int in_size_in_bits = Assembler::EVEX_32bit;
  774   int evex_encoding = 0;
  775   if (reg_lo+1 == reg_hi) {
  776     in_size_in_bits = Assembler::EVEX_64bit;
  777     evex_encoding = Assembler::VEX_W;
  778   }
  779   if (cbuf) {
  780     MacroAssembler _masm(cbuf);
  781     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  782     //                          it maps more cases to single byte displacement
  783     _masm.set_managed();
  784     if (reg_lo+1 == reg_hi) { // double move?
  785       if (is_load) {
  786         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  787       } else {
  788         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  789       }
  790     } else {
  791       if (is_load) {
  792         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  793       } else {
  794         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  795       }
  796     }
  797 #ifndef PRODUCT
  798   } else if (!do_size) {
  799     if (size != 0) st->print("\n\t");
  800     if (reg_lo+1 == reg_hi) { // double move?
  801       if (is_load) st->print("%s %s,[ESP + #%d]",
  802                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  803                               Matcher::regName[reg_lo], offset);
  804       else         st->print("MOVSD  [ESP + #%d],%s",
  805                               offset, Matcher::regName[reg_lo]);
  806     } else {
  807       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  808                               Matcher::regName[reg_lo], offset);
  809       else         st->print("MOVSS  [ESP + #%d],%s",
  810                               offset, Matcher::regName[reg_lo]);
  811     }
  812 #endif
  813   }
  814   bool is_single_byte = false;
  815   if ((UseAVX > 2) && (offset != 0)) {
  816     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  817   }
  818   int offset_size = 0;
  819   if (UseAVX > 2 ) {
  820     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  821   } else {
  822     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  823   }
  824   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  825   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  826   return size+5+offset_size;
  827 }
  828 
  829 
  830 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  831                             int src_hi, int dst_hi, int size, outputStream* st ) {
  832   if (cbuf) {
  833     MacroAssembler _masm(cbuf);
  834     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  835     _masm.set_managed();
  836     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  837       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  838                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  839     } else {
  840       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  841                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  842     }
  843 #ifndef PRODUCT
  844   } else if (!do_size) {
  845     if (size != 0) st->print("\n\t");
  846     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  847       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  848         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  849       } else {
  850         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       }
  852     } else {
  853       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  854         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  855       } else {
  856         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       }
  858     }
  859 #endif
  860   }
  861   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  862   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  863   int sz = (UseAVX > 2) ? 6 : 4;
  864   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  865       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  866   return size + sz;
  867 }
  868 
  869 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  870                             int src_hi, int dst_hi, int size, outputStream* st ) {
  871   // 32-bit
  872   if (cbuf) {
  873     MacroAssembler _masm(cbuf);
  874     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  875     _masm.set_managed();
  876     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  877              as_Register(Matcher::_regEncode[src_lo]));
  878 #ifndef PRODUCT
  879   } else if (!do_size) {
  880     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  881 #endif
  882   }
  883   return (UseAVX> 2) ? 6 : 4;
  884 }
  885 
  886 
  887 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  888                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  889   // 32-bit
  890   if (cbuf) {
  891     MacroAssembler _masm(cbuf);
  892     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  893     _masm.set_managed();
  894     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  895              as_XMMRegister(Matcher::_regEncode[src_lo]));
  896 #ifndef PRODUCT
  897   } else if (!do_size) {
  898     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  899 #endif
  900   }
  901   return (UseAVX> 2) ? 6 : 4;
  902 }
  903 
  904 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  905   if( cbuf ) {
  906     emit_opcode(*cbuf, 0x8B );
  907     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  908 #ifndef PRODUCT
  909   } else if( !do_size ) {
  910     if( size != 0 ) st->print("\n\t");
  911     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  912 #endif
  913   }
  914   return size+2;
  915 }
  916 
  917 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  918                                  int offset, int size, outputStream* st ) {
  919   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  920     if( cbuf ) {
  921       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  922       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  923 #ifndef PRODUCT
  924     } else if( !do_size ) {
  925       if( size != 0 ) st->print("\n\t");
  926       st->print("FLD    %s",Matcher::regName[src_lo]);
  927 #endif
  928     }
  929     size += 2;
  930   }
  931 
  932   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  933   const char *op_str;
  934   int op;
  935   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  936     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  937     op = 0xDD;
  938   } else {                   // 32-bit store
  939     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  940     op = 0xD9;
  941     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  942   }
  943 
  944   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  945 }
  946 
  947 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  948 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  949                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  950 
  951 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  952                             int stack_offset, int reg, uint ireg, outputStream* st);
  953 
  954 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  955                                      int dst_offset, uint ireg, outputStream* st) {
  956   if (cbuf) {
  957     MacroAssembler _masm(cbuf);
  958     switch (ireg) {
  959     case Op_VecS:
  960       __ pushl(Address(rsp, src_offset));
  961       __ popl (Address(rsp, dst_offset));
  962       break;
  963     case Op_VecD:
  964       __ pushl(Address(rsp, src_offset));
  965       __ popl (Address(rsp, dst_offset));
  966       __ pushl(Address(rsp, src_offset+4));
  967       __ popl (Address(rsp, dst_offset+4));
  968       break;
  969     case Op_VecX:
  970       __ movdqu(Address(rsp, -16), xmm0);
  971       __ movdqu(xmm0, Address(rsp, src_offset));
  972       __ movdqu(Address(rsp, dst_offset), xmm0);
  973       __ movdqu(xmm0, Address(rsp, -16));
  974       break;
  975     case Op_VecY:
  976       __ vmovdqu(Address(rsp, -32), xmm0);
  977       __ vmovdqu(xmm0, Address(rsp, src_offset));
  978       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, -32));
  980       break;
  981     case Op_VecZ:
  982       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  983       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  984       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  986       break;
  987     default:
  988       ShouldNotReachHere();
  989     }
  990 #ifndef PRODUCT
  991   } else {
  992     switch (ireg) {
  993     case Op_VecS:
  994       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  995                 "popl    [rsp + #%d]",
  996                 src_offset, dst_offset);
  997       break;
  998     case Op_VecD:
  999       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1000                 "popq    [rsp + #%d]\n\t"
 1001                 "pushl   [rsp + #%d]\n\t"
 1002                 "popq    [rsp + #%d]",
 1003                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1004       break;
 1005      case Op_VecX:
 1006       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1007                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1008                 "movdqu  [rsp + #%d], xmm0\n\t"
 1009                 "movdqu  xmm0, [rsp - #16]",
 1010                 src_offset, dst_offset);
 1011       break;
 1012     case Op_VecY:
 1013       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1014                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1015                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1016                 "vmovdqu xmm0, [rsp - #32]",
 1017                 src_offset, dst_offset);
 1018       break;
 1019     case Op_VecZ:
 1020       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1021                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1022                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1023                 "vmovdqu xmm0, [rsp - #64]",
 1024                 src_offset, dst_offset);
 1025       break;
 1026     default:
 1027       ShouldNotReachHere();
 1028     }
 1029 #endif
 1030   }
 1031 }
 1032 
 1033 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1034   // Get registers to move
 1035   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1036   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1037   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1038   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1039 
 1040   enum RC src_second_rc = rc_class(src_second);
 1041   enum RC src_first_rc = rc_class(src_first);
 1042   enum RC dst_second_rc = rc_class(dst_second);
 1043   enum RC dst_first_rc = rc_class(dst_first);
 1044 
 1045   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1046 
 1047   // Generate spill code!
 1048   int size = 0;
 1049 
 1050   if( src_first == dst_first && src_second == dst_second )
 1051     return size;            // Self copy, no move
 1052 
 1053   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1054     uint ireg = ideal_reg();
 1055     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1056     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1057     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1058     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1059       // mem -> mem
 1060       int src_offset = ra_->reg2offset(src_first);
 1061       int dst_offset = ra_->reg2offset(dst_first);
 1062       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1063     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1064       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1066       int stack_offset = ra_->reg2offset(dst_first);
 1067       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1068     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1069       int stack_offset = ra_->reg2offset(src_first);
 1070       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1071     } else {
 1072       ShouldNotReachHere();
 1073     }
 1074     return 0;
 1075   }
 1076 
 1077   // --------------------------------------
 1078   // Check for mem-mem move.  push/pop to move.
 1079   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1080     if( src_second == dst_first ) { // overlapping stack copy ranges
 1081       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1082       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1083       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1084       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1085     }
 1086     // move low bits
 1087     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1088     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1089     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1090       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1091       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1092     }
 1093     return size;
 1094   }
 1095 
 1096   // --------------------------------------
 1097   // Check for integer reg-reg copy
 1098   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1099     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1100 
 1101   // Check for integer store
 1102   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1103     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1104 
 1105   // Check for integer load
 1106   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1107     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1108 
 1109   // Check for integer reg-xmm reg copy
 1110   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1111     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1112             "no 64 bit integer-float reg moves" );
 1113     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1114   }
 1115   // --------------------------------------
 1116   // Check for float reg-reg copy
 1117   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1118     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1119             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1120     if( cbuf ) {
 1121 
 1122       // Note the mucking with the register encode to compensate for the 0/1
 1123       // indexing issue mentioned in a comment in the reg_def sections
 1124       // for FPR registers many lines above here.
 1125 
 1126       if( src_first != FPR1L_num ) {
 1127         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1128         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1129         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1130         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1131      } else {
 1132         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1133         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1134      }
 1135 #ifndef PRODUCT
 1136     } else if( !do_size ) {
 1137       if( size != 0 ) st->print("\n\t");
 1138       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1139       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1140 #endif
 1141     }
 1142     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1143   }
 1144 
 1145   // Check for float store
 1146   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1147     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1148   }
 1149 
 1150   // Check for float load
 1151   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1152     int offset = ra_->reg2offset(src_first);
 1153     const char *op_str;
 1154     int op;
 1155     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1156       op_str = "FLD_D";
 1157       op = 0xDD;
 1158     } else {                   // 32-bit load
 1159       op_str = "FLD_S";
 1160       op = 0xD9;
 1161       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1162     }
 1163     if( cbuf ) {
 1164       emit_opcode  (*cbuf, op );
 1165       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1166       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1167       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1168 #ifndef PRODUCT
 1169     } else if( !do_size ) {
 1170       if( size != 0 ) st->print("\n\t");
 1171       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1172 #endif
 1173     }
 1174     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1175     return size + 3+offset_size+2;
 1176   }
 1177 
 1178   // Check for xmm reg-reg copy
 1179   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1180     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1181             (src_first+1 == src_second && dst_first+1 == dst_second),
 1182             "no non-adjacent float-moves" );
 1183     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1184   }
 1185 
 1186   // Check for xmm reg-integer reg copy
 1187   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1188     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1189             "no 64 bit float-integer reg moves" );
 1190     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1191   }
 1192 
 1193   // Check for xmm store
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1195     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1196   }
 1197 
 1198   // Check for float xmm load
 1199   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1200     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1201   }
 1202 
 1203   // Copy from float reg to xmm reg
 1204   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1205     // copy to the top of stack from floating point reg
 1206     // and use LEA to preserve flags
 1207     if( cbuf ) {
 1208       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1209       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1210       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1211       emit_d8(*cbuf,0xF8);
 1212 #ifndef PRODUCT
 1213     } else if( !do_size ) {
 1214       if( size != 0 ) st->print("\n\t");
 1215       st->print("LEA    ESP,[ESP-8]");
 1216 #endif
 1217     }
 1218     size += 4;
 1219 
 1220     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1221 
 1222     // Copy from the temp memory to the xmm reg.
 1223     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1224 
 1225     if( cbuf ) {
 1226       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1227       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1228       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1229       emit_d8(*cbuf,0x08);
 1230 #ifndef PRODUCT
 1231     } else if( !do_size ) {
 1232       if( size != 0 ) st->print("\n\t");
 1233       st->print("LEA    ESP,[ESP+8]");
 1234 #endif
 1235     }
 1236     size += 4;
 1237     return size;
 1238   }
 1239 
 1240   // AVX-512 opmask specific spilling.
 1241   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1242     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1243     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1244     MacroAssembler _masm(cbuf);
 1245     int offset = ra_->reg2offset(src_first);
 1246     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247     return 0;
 1248   }
 1249 
 1250   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1251     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1252     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1253     MacroAssembler _masm(cbuf);
 1254     int offset = ra_->reg2offset(dst_first);
 1255     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1256     return 0;
 1257   }
 1258 
 1259   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1260     Unimplemented();
 1261     return 0;
 1262   }
 1263 
 1264   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1265     Unimplemented();
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1270     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1271     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1272     MacroAssembler _masm(cbuf);
 1273     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1274     return 0;
 1275   }
 1276 
 1277   assert( size > 0, "missed a case" );
 1278 
 1279   // --------------------------------------------------------------------
 1280   // Check for second bits still needing moving.
 1281   if( src_second == dst_second )
 1282     return size;               // Self copy; no move
 1283   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1284 
 1285   // Check for second word int-int move
 1286   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1287     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1288 
 1289   // Check for second word integer store
 1290   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1291     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1292 
 1293   // Check for second word integer load
 1294   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1295     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1296 
 1297   Unimplemented();
 1298   return 0; // Mute compiler
 1299 }
 1300 
 1301 #ifndef PRODUCT
 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1303   implementation( NULL, ra_, false, st );
 1304 }
 1305 #endif
 1306 
 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1308   implementation( &cbuf, ra_, false, NULL );
 1309 }
 1310 
 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1312   return MachNode::size(ra_);
 1313 }
 1314 
 1315 
 1316 //=============================================================================
 1317 #ifndef PRODUCT
 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1320   int reg = ra_->get_reg_first(this);
 1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1322 }
 1323 #endif
 1324 
 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1327   int reg = ra_->get_encode(this);
 1328   if( offset >= 128 ) {
 1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1330     emit_rm(cbuf, 0x2, reg, 0x04);
 1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1332     emit_d32(cbuf, offset);
 1333   }
 1334   else {
 1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1336     emit_rm(cbuf, 0x1, reg, 0x04);
 1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1338     emit_d8(cbuf, offset);
 1339   }
 1340 }
 1341 
 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1344   if( offset >= 128 ) {
 1345     return 7;
 1346   }
 1347   else {
 1348     return 4;
 1349   }
 1350 }
 1351 
 1352 //=============================================================================
 1353 #ifndef PRODUCT
 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1357   st->print_cr("\tNOP");
 1358   st->print_cr("\tNOP");
 1359   if( !OptoBreakpoint )
 1360     st->print_cr("\tNOP");
 1361 }
 1362 #endif
 1363 
 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1365   MacroAssembler masm(&cbuf);
 1366 #ifdef ASSERT
 1367   uint insts_size = cbuf.insts_size();
 1368 #endif
 1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1370   masm.jump_cc(Assembler::notEqual,
 1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1372   /* WARNING these NOPs are critical so that verified entry point is properly
 1373      aligned for patching by NativeJump::patch_verified_entry() */
 1374   int nops_cnt = 2;
 1375   if( !OptoBreakpoint ) // Leave space for int3
 1376      nops_cnt += 1;
 1377   masm.nop(nops_cnt);
 1378 
 1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1380 }
 1381 
 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1383   return OptoBreakpoint ? 11 : 12;
 1384 }
 1385 
 1386 
 1387 //=============================================================================
 1388 
 1389 // Vector calling convention not supported.
 1390 const bool Matcher::supports_vector_calling_convention() {
 1391   return false;
 1392 }
 1393 
 1394 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1395   Unimplemented();
 1396   return OptoRegPair(0, 0);
 1397 }
 1398 
 1399 // Is this branch offset short enough that a short branch can be used?
 1400 //
 1401 // NOTE: If the platform does not provide any short branch variants, then
 1402 //       this method should return false for offset 0.
 1403 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1404   // The passed offset is relative to address of the branch.
 1405   // On 86 a branch displacement is calculated relative to address
 1406   // of a next instruction.
 1407   offset -= br_size;
 1408 
 1409   // the short version of jmpConUCF2 contains multiple branches,
 1410   // making the reach slightly less
 1411   if (rule == jmpConUCF2_rule)
 1412     return (-126 <= offset && offset <= 125);
 1413   return (-128 <= offset && offset <= 127);
 1414 }
 1415 
 1416 // Return whether or not this register is ever used as an argument.  This
 1417 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1418 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1419 // arguments in those registers not be available to the callee.
 1420 bool Matcher::can_be_java_arg( int reg ) {
 1421   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1422   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1423   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1424   return false;
 1425 }
 1426 
 1427 bool Matcher::is_spillable_arg( int reg ) {
 1428   return can_be_java_arg(reg);
 1429 }
 1430 
 1431 uint Matcher::int_pressure_limit()
 1432 {
 1433   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1434 }
 1435 
 1436 uint Matcher::float_pressure_limit()
 1437 {
 1438   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1439 }
 1440 
 1441 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1442   // Use hardware integer DIV instruction when
 1443   // it is faster than a code which use multiply.
 1444   // Only when constant divisor fits into 32 bit
 1445   // (min_jint is excluded to get only correct
 1446   // positive 32 bit values from negative).
 1447   return VM_Version::has_fast_idiv() &&
 1448          (divisor == (int)divisor && divisor != min_jint);
 1449 }
 1450 
 1451 // Register for DIVI projection of divmodI
 1452 RegMask Matcher::divI_proj_mask() {
 1453   return EAX_REG_mask();
 1454 }
 1455 
 1456 // Register for MODI projection of divmodI
 1457 RegMask Matcher::modI_proj_mask() {
 1458   return EDX_REG_mask();
 1459 }
 1460 
 1461 // Register for DIVL projection of divmodL
 1462 RegMask Matcher::divL_proj_mask() {
 1463   ShouldNotReachHere();
 1464   return RegMask();
 1465 }
 1466 
 1467 // Register for MODL projection of divmodL
 1468 RegMask Matcher::modL_proj_mask() {
 1469   ShouldNotReachHere();
 1470   return RegMask();
 1471 }
 1472 
 1473 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1474   return NO_REG_mask();
 1475 }
 1476 
 1477 // Returns true if the high 32 bits of the value is known to be zero.
 1478 bool is_operand_hi32_zero(Node* n) {
 1479   int opc = n->Opcode();
 1480   if (opc == Op_AndL) {
 1481     Node* o2 = n->in(2);
 1482     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1483       return true;
 1484     }
 1485   }
 1486   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1487     return true;
 1488   }
 1489   return false;
 1490 }
 1491 
 1492 %}
 1493 
 1494 //----------ENCODING BLOCK-----------------------------------------------------
 1495 // This block specifies the encoding classes used by the compiler to output
 1496 // byte streams.  Encoding classes generate functions which are called by
 1497 // Machine Instruction Nodes in order to generate the bit encoding of the
 1498 // instruction.  Operands specify their base encoding interface with the
 1499 // interface keyword.  There are currently supported four interfaces,
 1500 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1501 // operand to generate a function which returns its register number when
 1502 // queried.   CONST_INTER causes an operand to generate a function which
 1503 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1504 // operand to generate four functions which return the Base Register, the
 1505 // Index Register, the Scale Value, and the Offset Value of the operand when
 1506 // queried.  COND_INTER causes an operand to generate six functions which
 1507 // return the encoding code (ie - encoding bits for the instruction)
 1508 // associated with each basic boolean condition for a conditional instruction.
 1509 // Instructions specify two basic values for encoding.  They use the
 1510 // ins_encode keyword to specify their encoding class (which must be one of
 1511 // the class names specified in the encoding block), and they use the
 1512 // opcode keyword to specify, in order, their primary, secondary, and
 1513 // tertiary opcode.  Only the opcode sections which a particular instruction
 1514 // needs for encoding need to be specified.
 1515 encode %{
 1516   // Build emit functions for each basic byte or larger field in the intel
 1517   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1518   // code in the enc_class source block.  Emit functions will live in the
 1519   // main source block for now.  In future, we can generalize this by
 1520   // adding a syntax that specifies the sizes of fields in an order,
 1521   // so that the adlc can build the emit functions automagically
 1522 
 1523   // Emit primary opcode
 1524   enc_class OpcP %{
 1525     emit_opcode(cbuf, $primary);
 1526   %}
 1527 
 1528   // Emit secondary opcode
 1529   enc_class OpcS %{
 1530     emit_opcode(cbuf, $secondary);
 1531   %}
 1532 
 1533   // Emit opcode directly
 1534   enc_class Opcode(immI d8) %{
 1535     emit_opcode(cbuf, $d8$$constant);
 1536   %}
 1537 
 1538   enc_class SizePrefix %{
 1539     emit_opcode(cbuf,0x66);
 1540   %}
 1541 
 1542   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1543     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1544   %}
 1545 
 1546   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1547     emit_opcode(cbuf,$opcode$$constant);
 1548     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1549   %}
 1550 
 1551   enc_class mov_r32_imm0( rRegI dst ) %{
 1552     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1553     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1554   %}
 1555 
 1556   enc_class cdq_enc %{
 1557     // Full implementation of Java idiv and irem; checks for
 1558     // special case as described in JVM spec., p.243 & p.271.
 1559     //
 1560     //         normal case                           special case
 1561     //
 1562     // input : rax,: dividend                         min_int
 1563     //         reg: divisor                          -1
 1564     //
 1565     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1566     //         rdx: remainder (= rax, irem reg)       0
 1567     //
 1568     //  Code sequnce:
 1569     //
 1570     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1571     //  0F 85 0B 00 00 00    jne         normal_case
 1572     //  33 D2                xor         rdx,edx
 1573     //  83 F9 FF             cmp         rcx,0FFh
 1574     //  0F 84 03 00 00 00    je          done
 1575     //                  normal_case:
 1576     //  99                   cdq
 1577     //  F7 F9                idiv        rax,ecx
 1578     //                  done:
 1579     //
 1580     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1581     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1582     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1583     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1584     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1585     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1586     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1587     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1588     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1589     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1591     // normal_case:
 1592     emit_opcode(cbuf,0x99);                                         // cdq
 1593     // idiv (note: must be emitted by the user of this rule)
 1594     // normal:
 1595   %}
 1596 
 1597   // Dense encoding for older common ops
 1598   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1599     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1600   %}
 1601 
 1602 
 1603   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1604   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1605     // Check for 8-bit immediate, and set sign extend bit in opcode
 1606     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1607       emit_opcode(cbuf, $primary | 0x02);
 1608     }
 1609     else {                          // If 32-bit immediate
 1610       emit_opcode(cbuf, $primary);
 1611     }
 1612   %}
 1613 
 1614   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1615     // Emit primary opcode and set sign-extend bit
 1616     // Check for 8-bit immediate, and set sign extend bit in opcode
 1617     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1618       emit_opcode(cbuf, $primary | 0x02);    }
 1619     else {                          // If 32-bit immediate
 1620       emit_opcode(cbuf, $primary);
 1621     }
 1622     // Emit r/m byte with secondary opcode, after primary opcode.
 1623     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1624   %}
 1625 
 1626   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1627     // Check for 8-bit immediate, and set sign extend bit in opcode
 1628     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1629       $$$emit8$imm$$constant;
 1630     }
 1631     else {                          // If 32-bit immediate
 1632       // Output immediate
 1633       $$$emit32$imm$$constant;
 1634     }
 1635   %}
 1636 
 1637   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1638     // Emit primary opcode and set sign-extend bit
 1639     // Check for 8-bit immediate, and set sign extend bit in opcode
 1640     int con = (int)$imm$$constant; // Throw away top bits
 1641     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1642     // Emit r/m byte with secondary opcode, after primary opcode.
 1643     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1644     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1645     else                               emit_d32(cbuf,con);
 1646   %}
 1647 
 1648   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1649     // Emit primary opcode and set sign-extend bit
 1650     // Check for 8-bit immediate, and set sign extend bit in opcode
 1651     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1652     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1653     // Emit r/m byte with tertiary opcode, after primary opcode.
 1654     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1655     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1656     else                               emit_d32(cbuf,con);
 1657   %}
 1658 
 1659   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1660     emit_cc(cbuf, $secondary, $dst$$reg );
 1661   %}
 1662 
 1663   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1664     int destlo = $dst$$reg;
 1665     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1666     // bswap lo
 1667     emit_opcode(cbuf, 0x0F);
 1668     emit_cc(cbuf, 0xC8, destlo);
 1669     // bswap hi
 1670     emit_opcode(cbuf, 0x0F);
 1671     emit_cc(cbuf, 0xC8, desthi);
 1672     // xchg lo and hi
 1673     emit_opcode(cbuf, 0x87);
 1674     emit_rm(cbuf, 0x3, destlo, desthi);
 1675   %}
 1676 
 1677   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1678     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1679   %}
 1680 
 1681   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1682     $$$emit8$primary;
 1683     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1684   %}
 1685 
 1686   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1687     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1688     emit_d8(cbuf, op >> 8 );
 1689     emit_d8(cbuf, op & 255);
 1690   %}
 1691 
 1692   // emulate a CMOV with a conditional branch around a MOV
 1693   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1694     // Invert sense of branch from sense of CMOV
 1695     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1696     emit_d8( cbuf, $brOffs$$constant );
 1697   %}
 1698 
 1699   enc_class enc_PartialSubtypeCheck( ) %{
 1700     Register Redi = as_Register(EDI_enc); // result register
 1701     Register Reax = as_Register(EAX_enc); // super class
 1702     Register Recx = as_Register(ECX_enc); // killed
 1703     Register Resi = as_Register(ESI_enc); // sub class
 1704     Label miss;
 1705 
 1706     MacroAssembler _masm(&cbuf);
 1707     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1708                                      NULL, &miss,
 1709                                      /*set_cond_codes:*/ true);
 1710     if ($primary) {
 1711       __ xorptr(Redi, Redi);
 1712     }
 1713     __ bind(miss);
 1714   %}
 1715 
 1716   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1717     MacroAssembler masm(&cbuf);
 1718     int start = masm.offset();
 1719     if (UseSSE >= 2) {
 1720       if (VerifyFPU) {
 1721         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1722       }
 1723     } else {
 1724       // External c_calling_convention expects the FPU stack to be 'clean'.
 1725       // Compiled code leaves it dirty.  Do cleanup now.
 1726       masm.empty_FPU_stack();
 1727     }
 1728     if (sizeof_FFree_Float_Stack_All == -1) {
 1729       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1730     } else {
 1731       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1732     }
 1733   %}
 1734 
 1735   enc_class Verify_FPU_For_Leaf %{
 1736     if( VerifyFPU ) {
 1737       MacroAssembler masm(&cbuf);
 1738       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1739     }
 1740   %}
 1741 
 1742   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1743     // This is the instruction starting address for relocation info.
 1744     MacroAssembler _masm(&cbuf);
 1745     cbuf.set_insts_mark();
 1746     $$$emit8$primary;
 1747     // CALL directly to the runtime
 1748     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1749                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1750     __ post_call_nop();
 1751 
 1752     if (UseSSE >= 2) {
 1753       MacroAssembler _masm(&cbuf);
 1754       BasicType rt = tf()->return_type();
 1755 
 1756       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1757         // A C runtime call where the return value is unused.  In SSE2+
 1758         // mode the result needs to be removed from the FPU stack.  It's
 1759         // likely that this function call could be removed by the
 1760         // optimizer if the C function is a pure function.
 1761         __ ffree(0);
 1762       } else if (rt == T_FLOAT) {
 1763         __ lea(rsp, Address(rsp, -4));
 1764         __ fstp_s(Address(rsp, 0));
 1765         __ movflt(xmm0, Address(rsp, 0));
 1766         __ lea(rsp, Address(rsp,  4));
 1767       } else if (rt == T_DOUBLE) {
 1768         __ lea(rsp, Address(rsp, -8));
 1769         __ fstp_d(Address(rsp, 0));
 1770         __ movdbl(xmm0, Address(rsp, 0));
 1771         __ lea(rsp, Address(rsp,  8));
 1772       }
 1773     }
 1774   %}
 1775 
 1776   enc_class pre_call_resets %{
 1777     // If method sets FPU control word restore it here
 1778     debug_only(int off0 = cbuf.insts_size());
 1779     if (ra_->C->in_24_bit_fp_mode()) {
 1780       MacroAssembler _masm(&cbuf);
 1781       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1782     }
 1783     // Clear upper bits of YMM registers when current compiled code uses
 1784     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1785     MacroAssembler _masm(&cbuf);
 1786     __ vzeroupper();
 1787     debug_only(int off1 = cbuf.insts_size());
 1788     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1789   %}
 1790 
 1791   enc_class post_call_FPU %{
 1792     // If method sets FPU control word do it here also
 1793     if (Compile::current()->in_24_bit_fp_mode()) {
 1794       MacroAssembler masm(&cbuf);
 1795       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1796     }
 1797   %}
 1798 
 1799   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1800     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1801     // who we intended to call.
 1802     MacroAssembler _masm(&cbuf);
 1803     cbuf.set_insts_mark();
 1804     $$$emit8$primary;
 1805 
 1806     if (!_method) {
 1807       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1808                      runtime_call_Relocation::spec(),
 1809                      RELOC_IMM32);
 1810       __ post_call_nop();
 1811     } else {
 1812       int method_index = resolved_method_index(cbuf);
 1813       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1814                                                   : static_call_Relocation::spec(method_index);
 1815       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1816                      rspec, RELOC_DISP32);
 1817       __ post_call_nop();
 1818       address mark = cbuf.insts_mark();
 1819       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1820         // Calls of the same statically bound method can share
 1821         // a stub to the interpreter.
 1822         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1823       } else {
 1824         // Emit stubs for static call.
 1825         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1826         if (stub == NULL) {
 1827           ciEnv::current()->record_failure("CodeCache is full");
 1828           return;
 1829         }
 1830       }
 1831     }
 1832   %}
 1833 
 1834   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1835     MacroAssembler _masm(&cbuf);
 1836     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1837     __ post_call_nop();
 1838   %}
 1839 
 1840   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1841     int disp = in_bytes(Method::from_compiled_offset());
 1842     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1843 
 1844     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1845     MacroAssembler _masm(&cbuf);
 1846     cbuf.set_insts_mark();
 1847     $$$emit8$primary;
 1848     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1849     emit_d8(cbuf, disp);             // Displacement
 1850     __ post_call_nop();
 1851   %}
 1852 
 1853 //   Following encoding is no longer used, but may be restored if calling
 1854 //   convention changes significantly.
 1855 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1856 //
 1857 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1858 //     // int ic_reg     = Matcher::inline_cache_reg();
 1859 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1860 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1861 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1862 //
 1863 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1864 //     // // so we load it immediately before the call
 1865 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1866 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1867 //
 1868 //     // xor rbp,ebp
 1869 //     emit_opcode(cbuf, 0x33);
 1870 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1871 //
 1872 //     // CALL to interpreter.
 1873 //     cbuf.set_insts_mark();
 1874 //     $$$emit8$primary;
 1875 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1876 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1877 //   %}
 1878 
 1879   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1880     $$$emit8$primary;
 1881     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1882     $$$emit8$shift$$constant;
 1883   %}
 1884 
 1885   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1886     // Load immediate does not have a zero or sign extended version
 1887     // for 8-bit immediates
 1888     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1889     $$$emit32$src$$constant;
 1890   %}
 1891 
 1892   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1893     // Load immediate does not have a zero or sign extended version
 1894     // for 8-bit immediates
 1895     emit_opcode(cbuf, $primary + $dst$$reg);
 1896     $$$emit32$src$$constant;
 1897   %}
 1898 
 1899   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1900     // Load immediate does not have a zero or sign extended version
 1901     // for 8-bit immediates
 1902     int dst_enc = $dst$$reg;
 1903     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1904     if (src_con == 0) {
 1905       // xor dst, dst
 1906       emit_opcode(cbuf, 0x33);
 1907       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1908     } else {
 1909       emit_opcode(cbuf, $primary + dst_enc);
 1910       emit_d32(cbuf, src_con);
 1911     }
 1912   %}
 1913 
 1914   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1915     // Load immediate does not have a zero or sign extended version
 1916     // for 8-bit immediates
 1917     int dst_enc = $dst$$reg + 2;
 1918     int src_con = ((julong)($src$$constant)) >> 32;
 1919     if (src_con == 0) {
 1920       // xor dst, dst
 1921       emit_opcode(cbuf, 0x33);
 1922       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1923     } else {
 1924       emit_opcode(cbuf, $primary + dst_enc);
 1925       emit_d32(cbuf, src_con);
 1926     }
 1927   %}
 1928 
 1929 
 1930   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1931   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1932     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1933   %}
 1934 
 1935   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1936     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1937   %}
 1938 
 1939   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1940     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1941   %}
 1942 
 1943   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1944     $$$emit8$primary;
 1945     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1946   %}
 1947 
 1948   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1949     $$$emit8$secondary;
 1950     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1951   %}
 1952 
 1953   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1954     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1955   %}
 1956 
 1957   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1958     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1959   %}
 1960 
 1961   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1962     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1963   %}
 1964 
 1965   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1966     // Output immediate
 1967     $$$emit32$src$$constant;
 1968   %}
 1969 
 1970   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1971     // Output Float immediate bits
 1972     jfloat jf = $src$$constant;
 1973     int    jf_as_bits = jint_cast( jf );
 1974     emit_d32(cbuf, jf_as_bits);
 1975   %}
 1976 
 1977   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1978     // Output Float immediate bits
 1979     jfloat jf = $src$$constant;
 1980     int    jf_as_bits = jint_cast( jf );
 1981     emit_d32(cbuf, jf_as_bits);
 1982   %}
 1983 
 1984   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1985     // Output immediate
 1986     $$$emit16$src$$constant;
 1987   %}
 1988 
 1989   enc_class Con_d32(immI src) %{
 1990     emit_d32(cbuf,$src$$constant);
 1991   %}
 1992 
 1993   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1994     // Output immediate memory reference
 1995     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1996     emit_d32(cbuf, 0x00);
 1997   %}
 1998 
 1999   enc_class lock_prefix( ) %{
 2000     emit_opcode(cbuf,0xF0);         // [Lock]
 2001   %}
 2002 
 2003   // Cmp-xchg long value.
 2004   // Note: we need to swap rbx, and rcx before and after the
 2005   //       cmpxchg8 instruction because the instruction uses
 2006   //       rcx as the high order word of the new value to store but
 2007   //       our register encoding uses rbx,.
 2008   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2009 
 2010     // XCHG  rbx,ecx
 2011     emit_opcode(cbuf,0x87);
 2012     emit_opcode(cbuf,0xD9);
 2013     // [Lock]
 2014     emit_opcode(cbuf,0xF0);
 2015     // CMPXCHG8 [Eptr]
 2016     emit_opcode(cbuf,0x0F);
 2017     emit_opcode(cbuf,0xC7);
 2018     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2019     // XCHG  rbx,ecx
 2020     emit_opcode(cbuf,0x87);
 2021     emit_opcode(cbuf,0xD9);
 2022   %}
 2023 
 2024   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2025     // [Lock]
 2026     emit_opcode(cbuf,0xF0);
 2027 
 2028     // CMPXCHG [Eptr]
 2029     emit_opcode(cbuf,0x0F);
 2030     emit_opcode(cbuf,0xB1);
 2031     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2032   %}
 2033 
 2034   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2035     // [Lock]
 2036     emit_opcode(cbuf,0xF0);
 2037 
 2038     // CMPXCHGB [Eptr]
 2039     emit_opcode(cbuf,0x0F);
 2040     emit_opcode(cbuf,0xB0);
 2041     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2042   %}
 2043 
 2044   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2045     // [Lock]
 2046     emit_opcode(cbuf,0xF0);
 2047 
 2048     // 16-bit mode
 2049     emit_opcode(cbuf, 0x66);
 2050 
 2051     // CMPXCHGW [Eptr]
 2052     emit_opcode(cbuf,0x0F);
 2053     emit_opcode(cbuf,0xB1);
 2054     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2055   %}
 2056 
 2057   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2058     int res_encoding = $res$$reg;
 2059 
 2060     // MOV  res,0
 2061     emit_opcode( cbuf, 0xB8 + res_encoding);
 2062     emit_d32( cbuf, 0 );
 2063     // JNE,s  fail
 2064     emit_opcode(cbuf,0x75);
 2065     emit_d8(cbuf, 5 );
 2066     // MOV  res,1
 2067     emit_opcode( cbuf, 0xB8 + res_encoding);
 2068     emit_d32( cbuf, 1 );
 2069     // fail:
 2070   %}
 2071 
 2072   enc_class set_instruction_start( ) %{
 2073     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2074   %}
 2075 
 2076   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2077     int reg_encoding = $ereg$$reg;
 2078     int base  = $mem$$base;
 2079     int index = $mem$$index;
 2080     int scale = $mem$$scale;
 2081     int displace = $mem$$disp;
 2082     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2083     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2084   %}
 2085 
 2086   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2087     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2088     int base  = $mem$$base;
 2089     int index = $mem$$index;
 2090     int scale = $mem$$scale;
 2091     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2092     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2093     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2094   %}
 2095 
 2096   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2097     int r1, r2;
 2098     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2099     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2100     emit_opcode(cbuf,0x0F);
 2101     emit_opcode(cbuf,$tertiary);
 2102     emit_rm(cbuf, 0x3, r1, r2);
 2103     emit_d8(cbuf,$cnt$$constant);
 2104     emit_d8(cbuf,$primary);
 2105     emit_rm(cbuf, 0x3, $secondary, r1);
 2106     emit_d8(cbuf,$cnt$$constant);
 2107   %}
 2108 
 2109   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2110     emit_opcode( cbuf, 0x8B ); // Move
 2111     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2112     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2113       emit_d8(cbuf,$primary);
 2114       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2115       emit_d8(cbuf,$cnt$$constant-32);
 2116     }
 2117     emit_d8(cbuf,$primary);
 2118     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2119     emit_d8(cbuf,31);
 2120   %}
 2121 
 2122   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2123     int r1, r2;
 2124     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2125     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2126 
 2127     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2128     emit_rm(cbuf, 0x3, r1, r2);
 2129     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2130       emit_opcode(cbuf,$primary);
 2131       emit_rm(cbuf, 0x3, $secondary, r1);
 2132       emit_d8(cbuf,$cnt$$constant-32);
 2133     }
 2134     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2135     emit_rm(cbuf, 0x3, r2, r2);
 2136   %}
 2137 
 2138   // Clone of RegMem but accepts an extra parameter to access each
 2139   // half of a double in memory; it never needs relocation info.
 2140   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2141     emit_opcode(cbuf,$opcode$$constant);
 2142     int reg_encoding = $rm_reg$$reg;
 2143     int base     = $mem$$base;
 2144     int index    = $mem$$index;
 2145     int scale    = $mem$$scale;
 2146     int displace = $mem$$disp + $disp_for_half$$constant;
 2147     relocInfo::relocType disp_reloc = relocInfo::none;
 2148     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2149   %}
 2150 
 2151   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2152   //
 2153   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2154   // and it never needs relocation information.
 2155   // Frequently used to move data between FPU's Stack Top and memory.
 2156   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2157     int rm_byte_opcode = $rm_opcode$$constant;
 2158     int base     = $mem$$base;
 2159     int index    = $mem$$index;
 2160     int scale    = $mem$$scale;
 2161     int displace = $mem$$disp;
 2162     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2163     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2164   %}
 2165 
 2166   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2167     int rm_byte_opcode = $rm_opcode$$constant;
 2168     int base     = $mem$$base;
 2169     int index    = $mem$$index;
 2170     int scale    = $mem$$scale;
 2171     int displace = $mem$$disp;
 2172     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2173     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2174   %}
 2175 
 2176   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2177     int reg_encoding = $dst$$reg;
 2178     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2179     int index        = 0x04;            // 0x04 indicates no index
 2180     int scale        = 0x00;            // 0x00 indicates no scale
 2181     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2182     relocInfo::relocType disp_reloc = relocInfo::none;
 2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2184   %}
 2185 
 2186   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2187     // Compare dst,src
 2188     emit_opcode(cbuf,0x3B);
 2189     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2190     // jmp dst < src around move
 2191     emit_opcode(cbuf,0x7C);
 2192     emit_d8(cbuf,2);
 2193     // move dst,src
 2194     emit_opcode(cbuf,0x8B);
 2195     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2196   %}
 2197 
 2198   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2199     // Compare dst,src
 2200     emit_opcode(cbuf,0x3B);
 2201     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2202     // jmp dst > src around move
 2203     emit_opcode(cbuf,0x7F);
 2204     emit_d8(cbuf,2);
 2205     // move dst,src
 2206     emit_opcode(cbuf,0x8B);
 2207     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2208   %}
 2209 
 2210   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2211     // If src is FPR1, we can just FST to store it.
 2212     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2213     int reg_encoding = 0x2; // Just store
 2214     int base  = $mem$$base;
 2215     int index = $mem$$index;
 2216     int scale = $mem$$scale;
 2217     int displace = $mem$$disp;
 2218     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2219     if( $src$$reg != FPR1L_enc ) {
 2220       reg_encoding = 0x3;  // Store & pop
 2221       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2222       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2223     }
 2224     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2225     emit_opcode(cbuf,$primary);
 2226     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2227   %}
 2228 
 2229   enc_class neg_reg(rRegI dst) %{
 2230     // NEG $dst
 2231     emit_opcode(cbuf,0xF7);
 2232     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2233   %}
 2234 
 2235   enc_class setLT_reg(eCXRegI dst) %{
 2236     // SETLT $dst
 2237     emit_opcode(cbuf,0x0F);
 2238     emit_opcode(cbuf,0x9C);
 2239     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2240   %}
 2241 
 2242   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2243     int tmpReg = $tmp$$reg;
 2244 
 2245     // SUB $p,$q
 2246     emit_opcode(cbuf,0x2B);
 2247     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2248     // SBB $tmp,$tmp
 2249     emit_opcode(cbuf,0x1B);
 2250     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2251     // AND $tmp,$y
 2252     emit_opcode(cbuf,0x23);
 2253     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2254     // ADD $p,$tmp
 2255     emit_opcode(cbuf,0x03);
 2256     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2257   %}
 2258 
 2259   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2260     // TEST shift,32
 2261     emit_opcode(cbuf,0xF7);
 2262     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2263     emit_d32(cbuf,0x20);
 2264     // JEQ,s small
 2265     emit_opcode(cbuf, 0x74);
 2266     emit_d8(cbuf, 0x04);
 2267     // MOV    $dst.hi,$dst.lo
 2268     emit_opcode( cbuf, 0x8B );
 2269     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2270     // CLR    $dst.lo
 2271     emit_opcode(cbuf, 0x33);
 2272     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2273 // small:
 2274     // SHLD   $dst.hi,$dst.lo,$shift
 2275     emit_opcode(cbuf,0x0F);
 2276     emit_opcode(cbuf,0xA5);
 2277     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2278     // SHL    $dst.lo,$shift"
 2279     emit_opcode(cbuf,0xD3);
 2280     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2281   %}
 2282 
 2283   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2284     // TEST shift,32
 2285     emit_opcode(cbuf,0xF7);
 2286     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2287     emit_d32(cbuf,0x20);
 2288     // JEQ,s small
 2289     emit_opcode(cbuf, 0x74);
 2290     emit_d8(cbuf, 0x04);
 2291     // MOV    $dst.lo,$dst.hi
 2292     emit_opcode( cbuf, 0x8B );
 2293     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2294     // CLR    $dst.hi
 2295     emit_opcode(cbuf, 0x33);
 2296     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2297 // small:
 2298     // SHRD   $dst.lo,$dst.hi,$shift
 2299     emit_opcode(cbuf,0x0F);
 2300     emit_opcode(cbuf,0xAD);
 2301     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2302     // SHR    $dst.hi,$shift"
 2303     emit_opcode(cbuf,0xD3);
 2304     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2305   %}
 2306 
 2307   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2308     // TEST shift,32
 2309     emit_opcode(cbuf,0xF7);
 2310     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2311     emit_d32(cbuf,0x20);
 2312     // JEQ,s small
 2313     emit_opcode(cbuf, 0x74);
 2314     emit_d8(cbuf, 0x05);
 2315     // MOV    $dst.lo,$dst.hi
 2316     emit_opcode( cbuf, 0x8B );
 2317     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2318     // SAR    $dst.hi,31
 2319     emit_opcode(cbuf, 0xC1);
 2320     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2321     emit_d8(cbuf, 0x1F );
 2322 // small:
 2323     // SHRD   $dst.lo,$dst.hi,$shift
 2324     emit_opcode(cbuf,0x0F);
 2325     emit_opcode(cbuf,0xAD);
 2326     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2327     // SAR    $dst.hi,$shift"
 2328     emit_opcode(cbuf,0xD3);
 2329     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2330   %}
 2331 
 2332 
 2333   // ----------------- Encodings for floating point unit -----------------
 2334   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2335   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2336     $$$emit8$primary;
 2337     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2338   %}
 2339 
 2340   // Pop argument in FPR0 with FSTP ST(0)
 2341   enc_class PopFPU() %{
 2342     emit_opcode( cbuf, 0xDD );
 2343     emit_d8( cbuf, 0xD8 );
 2344   %}
 2345 
 2346   // !!!!! equivalent to Pop_Reg_F
 2347   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2348     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2349     emit_d8( cbuf, 0xD8+$dst$$reg );
 2350   %}
 2351 
 2352   enc_class Push_Reg_DPR( regDPR dst ) %{
 2353     emit_opcode( cbuf, 0xD9 );
 2354     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2355   %}
 2356 
 2357   enc_class strictfp_bias1( regDPR dst ) %{
 2358     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2359     emit_opcode( cbuf, 0x2D );
 2360     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2361     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2362     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2363   %}
 2364 
 2365   enc_class strictfp_bias2( regDPR dst ) %{
 2366     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2367     emit_opcode( cbuf, 0x2D );
 2368     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2369     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2370     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2371   %}
 2372 
 2373   // Special case for moving an integer register to a stack slot.
 2374   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2375     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2376   %}
 2377 
 2378   // Special case for moving a register to a stack slot.
 2379   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2380     // Opcode already emitted
 2381     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2382     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2383     emit_d32(cbuf, $dst$$disp);   // Displacement
 2384   %}
 2385 
 2386   // Push the integer in stackSlot 'src' onto FP-stack
 2387   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2388     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2389   %}
 2390 
 2391   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2392   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2393     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2394   %}
 2395 
 2396   // Same as Pop_Mem_F except for opcode
 2397   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2398   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2399     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2400   %}
 2401 
 2402   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2403     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2404     emit_d8( cbuf, 0xD8+$dst$$reg );
 2405   %}
 2406 
 2407   enc_class Push_Reg_FPR( regFPR dst ) %{
 2408     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2409     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2410   %}
 2411 
 2412   // Push FPU's float to a stack-slot, and pop FPU-stack
 2413   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2414     int pop = 0x02;
 2415     if ($src$$reg != FPR1L_enc) {
 2416       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2417       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2418       pop = 0x03;
 2419     }
 2420     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2421   %}
 2422 
 2423   // Push FPU's double to a stack-slot, and pop FPU-stack
 2424   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2425     int pop = 0x02;
 2426     if ($src$$reg != FPR1L_enc) {
 2427       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2428       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2429       pop = 0x03;
 2430     }
 2431     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2432   %}
 2433 
 2434   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2435   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2436     int pop = 0xD0 - 1; // -1 since we skip FLD
 2437     if ($src$$reg != FPR1L_enc) {
 2438       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2439       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2440       pop = 0xD8;
 2441     }
 2442     emit_opcode( cbuf, 0xDD );
 2443     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2444   %}
 2445 
 2446 
 2447   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2448     // load dst in FPR0
 2449     emit_opcode( cbuf, 0xD9 );
 2450     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2451     if ($src$$reg != FPR1L_enc) {
 2452       // fincstp
 2453       emit_opcode (cbuf, 0xD9);
 2454       emit_opcode (cbuf, 0xF7);
 2455       // swap src with FPR1:
 2456       // FXCH FPR1 with src
 2457       emit_opcode(cbuf, 0xD9);
 2458       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2459       // fdecstp
 2460       emit_opcode (cbuf, 0xD9);
 2461       emit_opcode (cbuf, 0xF6);
 2462     }
 2463   %}
 2464 
 2465   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2466     MacroAssembler _masm(&cbuf);
 2467     __ subptr(rsp, 8);
 2468     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2469     __ fld_d(Address(rsp, 0));
 2470     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2471     __ fld_d(Address(rsp, 0));
 2472   %}
 2473 
 2474   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2475     MacroAssembler _masm(&cbuf);
 2476     __ subptr(rsp, 4);
 2477     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2478     __ fld_s(Address(rsp, 0));
 2479     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2480     __ fld_s(Address(rsp, 0));
 2481   %}
 2482 
 2483   enc_class Push_ResultD(regD dst) %{
 2484     MacroAssembler _masm(&cbuf);
 2485     __ fstp_d(Address(rsp, 0));
 2486     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2487     __ addptr(rsp, 8);
 2488   %}
 2489 
 2490   enc_class Push_ResultF(regF dst, immI d8) %{
 2491     MacroAssembler _masm(&cbuf);
 2492     __ fstp_s(Address(rsp, 0));
 2493     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2494     __ addptr(rsp, $d8$$constant);
 2495   %}
 2496 
 2497   enc_class Push_SrcD(regD src) %{
 2498     MacroAssembler _masm(&cbuf);
 2499     __ subptr(rsp, 8);
 2500     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2501     __ fld_d(Address(rsp, 0));
 2502   %}
 2503 
 2504   enc_class push_stack_temp_qword() %{
 2505     MacroAssembler _masm(&cbuf);
 2506     __ subptr(rsp, 8);
 2507   %}
 2508 
 2509   enc_class pop_stack_temp_qword() %{
 2510     MacroAssembler _masm(&cbuf);
 2511     __ addptr(rsp, 8);
 2512   %}
 2513 
 2514   enc_class push_xmm_to_fpr1(regD src) %{
 2515     MacroAssembler _masm(&cbuf);
 2516     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2517     __ fld_d(Address(rsp, 0));
 2518   %}
 2519 
 2520   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2521     if ($src$$reg != FPR1L_enc) {
 2522       // fincstp
 2523       emit_opcode (cbuf, 0xD9);
 2524       emit_opcode (cbuf, 0xF7);
 2525       // FXCH FPR1 with src
 2526       emit_opcode(cbuf, 0xD9);
 2527       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2528       // fdecstp
 2529       emit_opcode (cbuf, 0xD9);
 2530       emit_opcode (cbuf, 0xF6);
 2531     }
 2532     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2533     // // FSTP   FPR$dst$$reg
 2534     // emit_opcode( cbuf, 0xDD );
 2535     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2536   %}
 2537 
 2538   enc_class fnstsw_sahf_skip_parity() %{
 2539     // fnstsw ax
 2540     emit_opcode( cbuf, 0xDF );
 2541     emit_opcode( cbuf, 0xE0 );
 2542     // sahf
 2543     emit_opcode( cbuf, 0x9E );
 2544     // jnp  ::skip
 2545     emit_opcode( cbuf, 0x7B );
 2546     emit_opcode( cbuf, 0x05 );
 2547   %}
 2548 
 2549   enc_class emitModDPR() %{
 2550     // fprem must be iterative
 2551     // :: loop
 2552     // fprem
 2553     emit_opcode( cbuf, 0xD9 );
 2554     emit_opcode( cbuf, 0xF8 );
 2555     // wait
 2556     emit_opcode( cbuf, 0x9b );
 2557     // fnstsw ax
 2558     emit_opcode( cbuf, 0xDF );
 2559     emit_opcode( cbuf, 0xE0 );
 2560     // sahf
 2561     emit_opcode( cbuf, 0x9E );
 2562     // jp  ::loop
 2563     emit_opcode( cbuf, 0x0F );
 2564     emit_opcode( cbuf, 0x8A );
 2565     emit_opcode( cbuf, 0xF4 );
 2566     emit_opcode( cbuf, 0xFF );
 2567     emit_opcode( cbuf, 0xFF );
 2568     emit_opcode( cbuf, 0xFF );
 2569   %}
 2570 
 2571   enc_class fpu_flags() %{
 2572     // fnstsw_ax
 2573     emit_opcode( cbuf, 0xDF);
 2574     emit_opcode( cbuf, 0xE0);
 2575     // test ax,0x0400
 2576     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2577     emit_opcode( cbuf, 0xA9 );
 2578     emit_d16   ( cbuf, 0x0400 );
 2579     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2580     // // test rax,0x0400
 2581     // emit_opcode( cbuf, 0xA9 );
 2582     // emit_d32   ( cbuf, 0x00000400 );
 2583     //
 2584     // jz exit (no unordered comparison)
 2585     emit_opcode( cbuf, 0x74 );
 2586     emit_d8    ( cbuf, 0x02 );
 2587     // mov ah,1 - treat as LT case (set carry flag)
 2588     emit_opcode( cbuf, 0xB4 );
 2589     emit_d8    ( cbuf, 0x01 );
 2590     // sahf
 2591     emit_opcode( cbuf, 0x9E);
 2592   %}
 2593 
 2594   enc_class cmpF_P6_fixup() %{
 2595     // Fixup the integer flags in case comparison involved a NaN
 2596     //
 2597     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2598     emit_opcode( cbuf, 0x7B );
 2599     emit_d8    ( cbuf, 0x03 );
 2600     // MOV AH,1 - treat as LT case (set carry flag)
 2601     emit_opcode( cbuf, 0xB4 );
 2602     emit_d8    ( cbuf, 0x01 );
 2603     // SAHF
 2604     emit_opcode( cbuf, 0x9E);
 2605     // NOP     // target for branch to avoid branch to branch
 2606     emit_opcode( cbuf, 0x90);
 2607   %}
 2608 
 2609 //     fnstsw_ax();
 2610 //     sahf();
 2611 //     movl(dst, nan_result);
 2612 //     jcc(Assembler::parity, exit);
 2613 //     movl(dst, less_result);
 2614 //     jcc(Assembler::below, exit);
 2615 //     movl(dst, equal_result);
 2616 //     jcc(Assembler::equal, exit);
 2617 //     movl(dst, greater_result);
 2618 
 2619 // less_result     =  1;
 2620 // greater_result  = -1;
 2621 // equal_result    = 0;
 2622 // nan_result      = -1;
 2623 
 2624   enc_class CmpF_Result(rRegI dst) %{
 2625     // fnstsw_ax();
 2626     emit_opcode( cbuf, 0xDF);
 2627     emit_opcode( cbuf, 0xE0);
 2628     // sahf
 2629     emit_opcode( cbuf, 0x9E);
 2630     // movl(dst, nan_result);
 2631     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2632     emit_d32( cbuf, -1 );
 2633     // jcc(Assembler::parity, exit);
 2634     emit_opcode( cbuf, 0x7A );
 2635     emit_d8    ( cbuf, 0x13 );
 2636     // movl(dst, less_result);
 2637     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2638     emit_d32( cbuf, -1 );
 2639     // jcc(Assembler::below, exit);
 2640     emit_opcode( cbuf, 0x72 );
 2641     emit_d8    ( cbuf, 0x0C );
 2642     // movl(dst, equal_result);
 2643     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2644     emit_d32( cbuf, 0 );
 2645     // jcc(Assembler::equal, exit);
 2646     emit_opcode( cbuf, 0x74 );
 2647     emit_d8    ( cbuf, 0x05 );
 2648     // movl(dst, greater_result);
 2649     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2650     emit_d32( cbuf, 1 );
 2651   %}
 2652 
 2653 
 2654   // Compare the longs and set flags
 2655   // BROKEN!  Do Not use as-is
 2656   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2657     // CMP    $src1.hi,$src2.hi
 2658     emit_opcode( cbuf, 0x3B );
 2659     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2660     // JNE,s  done
 2661     emit_opcode(cbuf,0x75);
 2662     emit_d8(cbuf, 2 );
 2663     // CMP    $src1.lo,$src2.lo
 2664     emit_opcode( cbuf, 0x3B );
 2665     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2666 // done:
 2667   %}
 2668 
 2669   enc_class convert_int_long( regL dst, rRegI src ) %{
 2670     // mov $dst.lo,$src
 2671     int dst_encoding = $dst$$reg;
 2672     int src_encoding = $src$$reg;
 2673     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2674     // mov $dst.hi,$src
 2675     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2676     // sar $dst.hi,31
 2677     emit_opcode( cbuf, 0xC1 );
 2678     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2679     emit_d8(cbuf, 0x1F );
 2680   %}
 2681 
 2682   enc_class convert_long_double( eRegL src ) %{
 2683     // push $src.hi
 2684     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2685     // push $src.lo
 2686     emit_opcode(cbuf, 0x50+$src$$reg  );
 2687     // fild 64-bits at [SP]
 2688     emit_opcode(cbuf,0xdf);
 2689     emit_d8(cbuf, 0x6C);
 2690     emit_d8(cbuf, 0x24);
 2691     emit_d8(cbuf, 0x00);
 2692     // pop stack
 2693     emit_opcode(cbuf, 0x83); // add  SP, #8
 2694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2695     emit_d8(cbuf, 0x8);
 2696   %}
 2697 
 2698   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2699     // IMUL   EDX:EAX,$src1
 2700     emit_opcode( cbuf, 0xF7 );
 2701     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2702     // SAR    EDX,$cnt-32
 2703     int shift_count = ((int)$cnt$$constant) - 32;
 2704     if (shift_count > 0) {
 2705       emit_opcode(cbuf, 0xC1);
 2706       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2707       emit_d8(cbuf, shift_count);
 2708     }
 2709   %}
 2710 
 2711   // this version doesn't have add sp, 8
 2712   enc_class convert_long_double2( eRegL src ) %{
 2713     // push $src.hi
 2714     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2715     // push $src.lo
 2716     emit_opcode(cbuf, 0x50+$src$$reg  );
 2717     // fild 64-bits at [SP]
 2718     emit_opcode(cbuf,0xdf);
 2719     emit_d8(cbuf, 0x6C);
 2720     emit_d8(cbuf, 0x24);
 2721     emit_d8(cbuf, 0x00);
 2722   %}
 2723 
 2724   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2725     // Basic idea: long = (long)int * (long)int
 2726     // IMUL EDX:EAX, src
 2727     emit_opcode( cbuf, 0xF7 );
 2728     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2729   %}
 2730 
 2731   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2732     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2733     // MUL EDX:EAX, src
 2734     emit_opcode( cbuf, 0xF7 );
 2735     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2736   %}
 2737 
 2738   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2739     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2740     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2741     // MOV    $tmp,$src.lo
 2742     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2743     // IMUL   $tmp,EDX
 2744     emit_opcode( cbuf, 0x0F );
 2745     emit_opcode( cbuf, 0xAF );
 2746     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2747     // MOV    EDX,$src.hi
 2748     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2749     // IMUL   EDX,EAX
 2750     emit_opcode( cbuf, 0x0F );
 2751     emit_opcode( cbuf, 0xAF );
 2752     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2753     // ADD    $tmp,EDX
 2754     emit_opcode( cbuf, 0x03 );
 2755     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2756     // MUL   EDX:EAX,$src.lo
 2757     emit_opcode( cbuf, 0xF7 );
 2758     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2759     // ADD    EDX,ESI
 2760     emit_opcode( cbuf, 0x03 );
 2761     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2762   %}
 2763 
 2764   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2765     // Basic idea: lo(result) = lo(src * y_lo)
 2766     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2767     // IMUL   $tmp,EDX,$src
 2768     emit_opcode( cbuf, 0x6B );
 2769     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2770     emit_d8( cbuf, (int)$src$$constant );
 2771     // MOV    EDX,$src
 2772     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2773     emit_d32( cbuf, (int)$src$$constant );
 2774     // MUL   EDX:EAX,EDX
 2775     emit_opcode( cbuf, 0xF7 );
 2776     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2777     // ADD    EDX,ESI
 2778     emit_opcode( cbuf, 0x03 );
 2779     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2780   %}
 2781 
 2782   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2783     // PUSH src1.hi
 2784     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2785     // PUSH src1.lo
 2786     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2787     // PUSH src2.hi
 2788     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2789     // PUSH src2.lo
 2790     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2791     // CALL directly to the runtime
 2792     MacroAssembler _masm(&cbuf);
 2793     cbuf.set_insts_mark();
 2794     emit_opcode(cbuf,0xE8);       // Call into runtime
 2795     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2796     __ post_call_nop();
 2797     // Restore stack
 2798     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2799     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2800     emit_d8(cbuf, 4*4);
 2801   %}
 2802 
 2803   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2804     // PUSH src1.hi
 2805     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2806     // PUSH src1.lo
 2807     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2808     // PUSH src2.hi
 2809     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2810     // PUSH src2.lo
 2811     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2812     // CALL directly to the runtime
 2813     MacroAssembler _masm(&cbuf);
 2814     cbuf.set_insts_mark();
 2815     emit_opcode(cbuf,0xE8);       // Call into runtime
 2816     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2817     __ post_call_nop();
 2818     // Restore stack
 2819     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2820     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2821     emit_d8(cbuf, 4*4);
 2822   %}
 2823 
 2824   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2825     // MOV   $tmp,$src.lo
 2826     emit_opcode(cbuf, 0x8B);
 2827     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2828     // OR    $tmp,$src.hi
 2829     emit_opcode(cbuf, 0x0B);
 2830     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2831   %}
 2832 
 2833   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2834     // CMP    $src1.lo,$src2.lo
 2835     emit_opcode( cbuf, 0x3B );
 2836     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2837     // JNE,s  skip
 2838     emit_cc(cbuf, 0x70, 0x5);
 2839     emit_d8(cbuf,2);
 2840     // CMP    $src1.hi,$src2.hi
 2841     emit_opcode( cbuf, 0x3B );
 2842     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2843   %}
 2844 
 2845   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2846     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2847     emit_opcode( cbuf, 0x3B );
 2848     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2849     // MOV    $tmp,$src1.hi
 2850     emit_opcode( cbuf, 0x8B );
 2851     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2852     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2853     emit_opcode( cbuf, 0x1B );
 2854     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2855   %}
 2856 
 2857   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2858     // XOR    $tmp,$tmp
 2859     emit_opcode(cbuf,0x33);  // XOR
 2860     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2861     // CMP    $tmp,$src.lo
 2862     emit_opcode( cbuf, 0x3B );
 2863     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2864     // SBB    $tmp,$src.hi
 2865     emit_opcode( cbuf, 0x1B );
 2866     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2867   %}
 2868 
 2869  // Sniff, sniff... smells like Gnu Superoptimizer
 2870   enc_class neg_long( eRegL dst ) %{
 2871     emit_opcode(cbuf,0xF7);    // NEG hi
 2872     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2873     emit_opcode(cbuf,0xF7);    // NEG lo
 2874     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2875     emit_opcode(cbuf,0x83);    // SBB hi,0
 2876     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2877     emit_d8    (cbuf,0 );
 2878   %}
 2879 
 2880   enc_class enc_pop_rdx() %{
 2881     emit_opcode(cbuf,0x5A);
 2882   %}
 2883 
 2884   enc_class enc_rethrow() %{
 2885     MacroAssembler _masm(&cbuf);
 2886     cbuf.set_insts_mark();
 2887     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2888     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2889                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2890     __ post_call_nop();
 2891   %}
 2892 
 2893 
 2894   // Convert a double to an int.  Java semantics require we do complex
 2895   // manglelations in the corner cases.  So we set the rounding mode to
 2896   // 'zero', store the darned double down as an int, and reset the
 2897   // rounding mode to 'nearest'.  The hardware throws an exception which
 2898   // patches up the correct value directly to the stack.
 2899   enc_class DPR2I_encoding( regDPR src ) %{
 2900     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2901     // exceptions here, so that a NAN or other corner-case value will
 2902     // thrown an exception (but normal values get converted at full speed).
 2903     // However, I2C adapters and other float-stack manglers leave pending
 2904     // invalid-op exceptions hanging.  We would have to clear them before
 2905     // enabling them and that is more expensive than just testing for the
 2906     // invalid value Intel stores down in the corner cases.
 2907     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2908     emit_opcode(cbuf,0x2D);
 2909     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2910     // Allocate a word
 2911     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2912     emit_opcode(cbuf,0xEC);
 2913     emit_d8(cbuf,0x04);
 2914     // Encoding assumes a double has been pushed into FPR0.
 2915     // Store down the double as an int, popping the FPU stack
 2916     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2917     emit_opcode(cbuf,0x1C);
 2918     emit_d8(cbuf,0x24);
 2919     // Restore the rounding mode; mask the exception
 2920     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2921     emit_opcode(cbuf,0x2D);
 2922     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2923         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2924         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2925 
 2926     // Load the converted int; adjust CPU stack
 2927     emit_opcode(cbuf,0x58);       // POP EAX
 2928     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2929     emit_d32   (cbuf,0x80000000); //         0x80000000
 2930     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2931     emit_d8    (cbuf,0x07);       // Size of slow_call
 2932     // Push src onto stack slow-path
 2933     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2934     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2935     // CALL directly to the runtime
 2936     MacroAssembler _masm(&cbuf);
 2937     cbuf.set_insts_mark();
 2938     emit_opcode(cbuf,0xE8);       // Call into runtime
 2939     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2940     __ post_call_nop();
 2941     // Carry on here...
 2942   %}
 2943 
 2944   enc_class DPR2L_encoding( regDPR src ) %{
 2945     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2946     emit_opcode(cbuf,0x2D);
 2947     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2948     // Allocate a word
 2949     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2950     emit_opcode(cbuf,0xEC);
 2951     emit_d8(cbuf,0x08);
 2952     // Encoding assumes a double has been pushed into FPR0.
 2953     // Store down the double as a long, popping the FPU stack
 2954     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2955     emit_opcode(cbuf,0x3C);
 2956     emit_d8(cbuf,0x24);
 2957     // Restore the rounding mode; mask the exception
 2958     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2959     emit_opcode(cbuf,0x2D);
 2960     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2961         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2962         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2963 
 2964     // Load the converted int; adjust CPU stack
 2965     emit_opcode(cbuf,0x58);       // POP EAX
 2966     emit_opcode(cbuf,0x5A);       // POP EDX
 2967     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2968     emit_d8    (cbuf,0xFA);       // rdx
 2969     emit_d32   (cbuf,0x80000000); //         0x80000000
 2970     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2971     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2972     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2973     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2974     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2975     emit_d8    (cbuf,0x07);       // Size of slow_call
 2976     // Push src onto stack slow-path
 2977     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2978     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2979     // CALL directly to the runtime
 2980     MacroAssembler _masm(&cbuf);
 2981     cbuf.set_insts_mark();
 2982     emit_opcode(cbuf,0xE8);       // Call into runtime
 2983     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2984     __ post_call_nop();
 2985     // Carry on here...
 2986   %}
 2987 
 2988   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2989     // Operand was loaded from memory into fp ST (stack top)
 2990     // FMUL   ST,$src  /* D8 C8+i */
 2991     emit_opcode(cbuf, 0xD8);
 2992     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2993   %}
 2994 
 2995   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2996     // FADDP  ST,src2  /* D8 C0+i */
 2997     emit_opcode(cbuf, 0xD8);
 2998     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2999     //could use FADDP  src2,fpST  /* DE C0+i */
 3000   %}
 3001 
 3002   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3003     // FADDP  src2,ST  /* DE C0+i */
 3004     emit_opcode(cbuf, 0xDE);
 3005     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3006   %}
 3007 
 3008   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3009     // Operand has been loaded into fp ST (stack top)
 3010       // FSUB   ST,$src1
 3011       emit_opcode(cbuf, 0xD8);
 3012       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3013 
 3014       // FDIV
 3015       emit_opcode(cbuf, 0xD8);
 3016       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3017   %}
 3018 
 3019   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3020     // Operand was loaded from memory into fp ST (stack top)
 3021     // FADD   ST,$src  /* D8 C0+i */
 3022     emit_opcode(cbuf, 0xD8);
 3023     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3024 
 3025     // FMUL  ST,src2  /* D8 C*+i */
 3026     emit_opcode(cbuf, 0xD8);
 3027     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3028   %}
 3029 
 3030 
 3031   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3032     // Operand was loaded from memory into fp ST (stack top)
 3033     // FADD   ST,$src  /* D8 C0+i */
 3034     emit_opcode(cbuf, 0xD8);
 3035     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3036 
 3037     // FMULP  src2,ST  /* DE C8+i */
 3038     emit_opcode(cbuf, 0xDE);
 3039     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3040   %}
 3041 
 3042   // Atomically load the volatile long
 3043   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3044     emit_opcode(cbuf,0xDF);
 3045     int rm_byte_opcode = 0x05;
 3046     int base     = $mem$$base;
 3047     int index    = $mem$$index;
 3048     int scale    = $mem$$scale;
 3049     int displace = $mem$$disp;
 3050     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3051     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3052     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3053   %}
 3054 
 3055   // Volatile Store Long.  Must be atomic, so move it into
 3056   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3057   // target address before the store (for null-ptr checks)
 3058   // so the memory operand is used twice in the encoding.
 3059   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3060     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3061     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3062     emit_opcode(cbuf,0xDF);
 3063     int rm_byte_opcode = 0x07;
 3064     int base     = $mem$$base;
 3065     int index    = $mem$$index;
 3066     int scale    = $mem$$scale;
 3067     int displace = $mem$$disp;
 3068     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3069     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3070   %}
 3071 
 3072 %}
 3073 
 3074 
 3075 //----------FRAME--------------------------------------------------------------
 3076 // Definition of frame structure and management information.
 3077 //
 3078 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3079 //                             |   (to get allocators register number
 3080 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3081 //  r   CALLER     |        |
 3082 //  o     |        +--------+      pad to even-align allocators stack-slot
 3083 //  w     V        |  pad0  |        numbers; owned by CALLER
 3084 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3085 //  h     ^        |   in   |  5
 3086 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3087 //  |     |        |        |  3
 3088 //  |     |        +--------+
 3089 //  V     |        | old out|      Empty on Intel, window on Sparc
 3090 //        |    old |preserve|      Must be even aligned.
 3091 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3092 //        |        |   in   |  3   area for Intel ret address
 3093 //     Owned by    |preserve|      Empty on Sparc.
 3094 //       SELF      +--------+
 3095 //        |        |  pad2  |  2   pad to align old SP
 3096 //        |        +--------+  1
 3097 //        |        | locks  |  0
 3098 //        |        +--------+----> OptoReg::stack0(), even aligned
 3099 //        |        |  pad1  | 11   pad to align new SP
 3100 //        |        +--------+
 3101 //        |        |        | 10
 3102 //        |        | spills |  9   spills
 3103 //        V        |        |  8   (pad0 slot for callee)
 3104 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3105 //        ^        |  out   |  7
 3106 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3107 //     Owned by    +--------+
 3108 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3109 //        |    new |preserve|      Must be even-aligned.
 3110 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3111 //        |        |        |
 3112 //
 3113 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3114 //         known from SELF's arguments and the Java calling convention.
 3115 //         Region 6-7 is determined per call site.
 3116 // Note 2: If the calling convention leaves holes in the incoming argument
 3117 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3118 //         are owned by the CALLEE.  Holes should not be necessary in the
 3119 //         incoming area, as the Java calling convention is completely under
 3120 //         the control of the AD file.  Doubles can be sorted and packed to
 3121 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3122 //         varargs C calling conventions.
 3123 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3124 //         even aligned with pad0 as needed.
 3125 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3126 //         region 6-11 is even aligned; it may be padded out more so that
 3127 //         the region from SP to FP meets the minimum stack alignment.
 3128 
 3129 frame %{
 3130   // These three registers define part of the calling convention
 3131   // between compiled code and the interpreter.
 3132   inline_cache_reg(EAX);                // Inline Cache Register
 3133 
 3134   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3135   cisc_spilling_operand_name(indOffset32);
 3136 
 3137   // Number of stack slots consumed by locking an object
 3138   sync_stack_slots(1);
 3139 
 3140   // Compiled code's Frame Pointer
 3141   frame_pointer(ESP);
 3142   // Interpreter stores its frame pointer in a register which is
 3143   // stored to the stack by I2CAdaptors.
 3144   // I2CAdaptors convert from interpreted java to compiled java.
 3145   interpreter_frame_pointer(EBP);
 3146 
 3147   // Stack alignment requirement
 3148   // Alignment size in bytes (128-bit -> 16 bytes)
 3149   stack_alignment(StackAlignmentInBytes);
 3150 
 3151   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3152   // for calls to C.  Supports the var-args backing area for register parms.
 3153   varargs_C_out_slots_killed(0);
 3154 
 3155   // The after-PROLOG location of the return address.  Location of
 3156   // return address specifies a type (REG or STACK) and a number
 3157   // representing the register number (i.e. - use a register name) or
 3158   // stack slot.
 3159   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3160   // Otherwise, it is above the locks and verification slot and alignment word
 3161   return_addr(STACK - 1 +
 3162               align_up((Compile::current()->in_preserve_stack_slots() +
 3163                         Compile::current()->fixed_slots()),
 3164                        stack_alignment_in_slots()));
 3165 
 3166   // Location of C & interpreter return values
 3167   c_return_value %{
 3168     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3169     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3170     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3171 
 3172     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3173     // that C functions return float and double results in XMM0.
 3174     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3175       return OptoRegPair(XMM0b_num,XMM0_num);
 3176     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3177       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3178 
 3179     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3180   %}
 3181 
 3182   // Location of return values
 3183   return_value %{
 3184     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3185     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3186     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3187     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3188       return OptoRegPair(XMM0b_num,XMM0_num);
 3189     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3190       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3191     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3192   %}
 3193 
 3194 %}
 3195 
 3196 //----------ATTRIBUTES---------------------------------------------------------
 3197 //----------Operand Attributes-------------------------------------------------
 3198 op_attrib op_cost(0);        // Required cost attribute
 3199 
 3200 //----------Instruction Attributes---------------------------------------------
 3201 ins_attrib ins_cost(100);       // Required cost attribute
 3202 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3203 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3204                                 // non-matching short branch variant of some
 3205                                                             // long branch?
 3206 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3207                                 // specifies the alignment that some part of the instruction (not
 3208                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3209                                 // function must be provided for the instruction
 3210 
 3211 //----------OPERANDS-----------------------------------------------------------
 3212 // Operand definitions must precede instruction definitions for correct parsing
 3213 // in the ADLC because operands constitute user defined types which are used in
 3214 // instruction definitions.
 3215 
 3216 //----------Simple Operands----------------------------------------------------
 3217 // Immediate Operands
 3218 // Integer Immediate
 3219 operand immI() %{
 3220   match(ConI);
 3221 
 3222   op_cost(10);
 3223   format %{ %}
 3224   interface(CONST_INTER);
 3225 %}
 3226 
 3227 // Constant for test vs zero
 3228 operand immI_0() %{
 3229   predicate(n->get_int() == 0);
 3230   match(ConI);
 3231 
 3232   op_cost(0);
 3233   format %{ %}
 3234   interface(CONST_INTER);
 3235 %}
 3236 
 3237 // Constant for increment
 3238 operand immI_1() %{
 3239   predicate(n->get_int() == 1);
 3240   match(ConI);
 3241 
 3242   op_cost(0);
 3243   format %{ %}
 3244   interface(CONST_INTER);
 3245 %}
 3246 
 3247 // Constant for decrement
 3248 operand immI_M1() %{
 3249   predicate(n->get_int() == -1);
 3250   match(ConI);
 3251 
 3252   op_cost(0);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Valid scale values for addressing modes
 3258 operand immI2() %{
 3259   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3260   match(ConI);
 3261 
 3262   format %{ %}
 3263   interface(CONST_INTER);
 3264 %}
 3265 
 3266 operand immI8() %{
 3267   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3268   match(ConI);
 3269 
 3270   op_cost(5);
 3271   format %{ %}
 3272   interface(CONST_INTER);
 3273 %}
 3274 
 3275 operand immU8() %{
 3276   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3277   match(ConI);
 3278 
 3279   op_cost(5);
 3280   format %{ %}
 3281   interface(CONST_INTER);
 3282 %}
 3283 
 3284 operand immI16() %{
 3285   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3286   match(ConI);
 3287 
 3288   op_cost(10);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 // Int Immediate non-negative
 3294 operand immU31()
 3295 %{
 3296   predicate(n->get_int() >= 0);
 3297   match(ConI);
 3298 
 3299   op_cost(0);
 3300   format %{ %}
 3301   interface(CONST_INTER);
 3302 %}
 3303 
 3304 // Constant for long shifts
 3305 operand immI_32() %{
 3306   predicate( n->get_int() == 32 );
 3307   match(ConI);
 3308 
 3309   op_cost(0);
 3310   format %{ %}
 3311   interface(CONST_INTER);
 3312 %}
 3313 
 3314 operand immI_1_31() %{
 3315   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3316   match(ConI);
 3317 
 3318   op_cost(0);
 3319   format %{ %}
 3320   interface(CONST_INTER);
 3321 %}
 3322 
 3323 operand immI_32_63() %{
 3324   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3325   match(ConI);
 3326   op_cost(0);
 3327 
 3328   format %{ %}
 3329   interface(CONST_INTER);
 3330 %}
 3331 
 3332 operand immI_2() %{
 3333   predicate( n->get_int() == 2 );
 3334   match(ConI);
 3335 
 3336   op_cost(0);
 3337   format %{ %}
 3338   interface(CONST_INTER);
 3339 %}
 3340 
 3341 operand immI_3() %{
 3342   predicate( n->get_int() == 3 );
 3343   match(ConI);
 3344 
 3345   op_cost(0);
 3346   format %{ %}
 3347   interface(CONST_INTER);
 3348 %}
 3349 
 3350 operand immI_4()
 3351 %{
 3352   predicate(n->get_int() == 4);
 3353   match(ConI);
 3354 
 3355   op_cost(0);
 3356   format %{ %}
 3357   interface(CONST_INTER);
 3358 %}
 3359 
 3360 operand immI_8()
 3361 %{
 3362   predicate(n->get_int() == 8);
 3363   match(ConI);
 3364 
 3365   op_cost(0);
 3366   format %{ %}
 3367   interface(CONST_INTER);
 3368 %}
 3369 
 3370 // Pointer Immediate
 3371 operand immP() %{
 3372   match(ConP);
 3373 
 3374   op_cost(10);
 3375   format %{ %}
 3376   interface(CONST_INTER);
 3377 %}
 3378 
 3379 // NULL Pointer Immediate
 3380 operand immP0() %{
 3381   predicate( n->get_ptr() == 0 );
 3382   match(ConP);
 3383   op_cost(0);
 3384 
 3385   format %{ %}
 3386   interface(CONST_INTER);
 3387 %}
 3388 
 3389 // Long Immediate
 3390 operand immL() %{
 3391   match(ConL);
 3392 
 3393   op_cost(20);
 3394   format %{ %}
 3395   interface(CONST_INTER);
 3396 %}
 3397 
 3398 // Long Immediate zero
 3399 operand immL0() %{
 3400   predicate( n->get_long() == 0L );
 3401   match(ConL);
 3402   op_cost(0);
 3403 
 3404   format %{ %}
 3405   interface(CONST_INTER);
 3406 %}
 3407 
 3408 // Long Immediate zero
 3409 operand immL_M1() %{
 3410   predicate( n->get_long() == -1L );
 3411   match(ConL);
 3412   op_cost(0);
 3413 
 3414   format %{ %}
 3415   interface(CONST_INTER);
 3416 %}
 3417 
 3418 // Long immediate from 0 to 127.
 3419 // Used for a shorter form of long mul by 10.
 3420 operand immL_127() %{
 3421   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3422   match(ConL);
 3423   op_cost(0);
 3424 
 3425   format %{ %}
 3426   interface(CONST_INTER);
 3427 %}
 3428 
 3429 // Long Immediate: low 32-bit mask
 3430 operand immL_32bits() %{
 3431   predicate(n->get_long() == 0xFFFFFFFFL);
 3432   match(ConL);
 3433   op_cost(0);
 3434 
 3435   format %{ %}
 3436   interface(CONST_INTER);
 3437 %}
 3438 
 3439 // Long Immediate: low 32-bit mask
 3440 operand immL32() %{
 3441   predicate(n->get_long() == (int)(n->get_long()));
 3442   match(ConL);
 3443   op_cost(20);
 3444 
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 //Double Immediate zero
 3450 operand immDPR0() %{
 3451   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3452   // bug that generates code such that NaNs compare equal to 0.0
 3453   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3454   match(ConD);
 3455 
 3456   op_cost(5);
 3457   format %{ %}
 3458   interface(CONST_INTER);
 3459 %}
 3460 
 3461 // Double Immediate one
 3462 operand immDPR1() %{
 3463   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3464   match(ConD);
 3465 
 3466   op_cost(5);
 3467   format %{ %}
 3468   interface(CONST_INTER);
 3469 %}
 3470 
 3471 // Double Immediate
 3472 operand immDPR() %{
 3473   predicate(UseSSE<=1);
 3474   match(ConD);
 3475 
 3476   op_cost(5);
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 operand immD() %{
 3482   predicate(UseSSE>=2);
 3483   match(ConD);
 3484 
 3485   op_cost(5);
 3486   format %{ %}
 3487   interface(CONST_INTER);
 3488 %}
 3489 
 3490 // Double Immediate zero
 3491 operand immD0() %{
 3492   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3493   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3494   // compare equal to -0.0.
 3495   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3496   match(ConD);
 3497 
 3498   format %{ %}
 3499   interface(CONST_INTER);
 3500 %}
 3501 
 3502 // Float Immediate zero
 3503 operand immFPR0() %{
 3504   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3505   match(ConF);
 3506 
 3507   op_cost(5);
 3508   format %{ %}
 3509   interface(CONST_INTER);
 3510 %}
 3511 
 3512 // Float Immediate one
 3513 operand immFPR1() %{
 3514   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3515   match(ConF);
 3516 
 3517   op_cost(5);
 3518   format %{ %}
 3519   interface(CONST_INTER);
 3520 %}
 3521 
 3522 // Float Immediate
 3523 operand immFPR() %{
 3524   predicate( UseSSE == 0 );
 3525   match(ConF);
 3526 
 3527   op_cost(5);
 3528   format %{ %}
 3529   interface(CONST_INTER);
 3530 %}
 3531 
 3532 // Float Immediate
 3533 operand immF() %{
 3534   predicate(UseSSE >= 1);
 3535   match(ConF);
 3536 
 3537   op_cost(5);
 3538   format %{ %}
 3539   interface(CONST_INTER);
 3540 %}
 3541 
 3542 // Float Immediate zero.  Zero and not -0.0
 3543 operand immF0() %{
 3544   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3545   match(ConF);
 3546 
 3547   op_cost(5);
 3548   format %{ %}
 3549   interface(CONST_INTER);
 3550 %}
 3551 
 3552 // Immediates for special shifts (sign extend)
 3553 
 3554 // Constants for increment
 3555 operand immI_16() %{
 3556   predicate( n->get_int() == 16 );
 3557   match(ConI);
 3558 
 3559   format %{ %}
 3560   interface(CONST_INTER);
 3561 %}
 3562 
 3563 operand immI_24() %{
 3564   predicate( n->get_int() == 24 );
 3565   match(ConI);
 3566 
 3567   format %{ %}
 3568   interface(CONST_INTER);
 3569 %}
 3570 
 3571 // Constant for byte-wide masking
 3572 operand immI_255() %{
 3573   predicate( n->get_int() == 255 );
 3574   match(ConI);
 3575 
 3576   format %{ %}
 3577   interface(CONST_INTER);
 3578 %}
 3579 
 3580 // Constant for short-wide masking
 3581 operand immI_65535() %{
 3582   predicate(n->get_int() == 65535);
 3583   match(ConI);
 3584 
 3585   format %{ %}
 3586   interface(CONST_INTER);
 3587 %}
 3588 
 3589 operand kReg()
 3590 %{
 3591   constraint(ALLOC_IN_RC(vectmask_reg));
 3592   match(RegVectMask);
 3593   format %{%}
 3594   interface(REG_INTER);
 3595 %}
 3596 
 3597 operand kReg_K1()
 3598 %{
 3599   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3600   match(RegVectMask);
 3601   format %{%}
 3602   interface(REG_INTER);
 3603 %}
 3604 
 3605 operand kReg_K2()
 3606 %{
 3607   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3608   match(RegVectMask);
 3609   format %{%}
 3610   interface(REG_INTER);
 3611 %}
 3612 
 3613 // Special Registers
 3614 operand kReg_K3()
 3615 %{
 3616   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3617   match(RegVectMask);
 3618   format %{%}
 3619   interface(REG_INTER);
 3620 %}
 3621 
 3622 operand kReg_K4()
 3623 %{
 3624   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3625   match(RegVectMask);
 3626   format %{%}
 3627   interface(REG_INTER);
 3628 %}
 3629 
 3630 operand kReg_K5()
 3631 %{
 3632   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3633   match(RegVectMask);
 3634   format %{%}
 3635   interface(REG_INTER);
 3636 %}
 3637 
 3638 operand kReg_K6()
 3639 %{
 3640   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3641   match(RegVectMask);
 3642   format %{%}
 3643   interface(REG_INTER);
 3644 %}
 3645 
 3646 // Special Registers
 3647 operand kReg_K7()
 3648 %{
 3649   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3650   match(RegVectMask);
 3651   format %{%}
 3652   interface(REG_INTER);
 3653 %}
 3654 
 3655 // Register Operands
 3656 // Integer Register
 3657 operand rRegI() %{
 3658   constraint(ALLOC_IN_RC(int_reg));
 3659   match(RegI);
 3660   match(xRegI);
 3661   match(eAXRegI);
 3662   match(eBXRegI);
 3663   match(eCXRegI);
 3664   match(eDXRegI);
 3665   match(eDIRegI);
 3666   match(eSIRegI);
 3667 
 3668   format %{ %}
 3669   interface(REG_INTER);
 3670 %}
 3671 
 3672 // Subset of Integer Register
 3673 operand xRegI(rRegI reg) %{
 3674   constraint(ALLOC_IN_RC(int_x_reg));
 3675   match(reg);
 3676   match(eAXRegI);
 3677   match(eBXRegI);
 3678   match(eCXRegI);
 3679   match(eDXRegI);
 3680 
 3681   format %{ %}
 3682   interface(REG_INTER);
 3683 %}
 3684 
 3685 // Special Registers
 3686 operand eAXRegI(xRegI reg) %{
 3687   constraint(ALLOC_IN_RC(eax_reg));
 3688   match(reg);
 3689   match(rRegI);
 3690 
 3691   format %{ "EAX" %}
 3692   interface(REG_INTER);
 3693 %}
 3694 
 3695 // Special Registers
 3696 operand eBXRegI(xRegI reg) %{
 3697   constraint(ALLOC_IN_RC(ebx_reg));
 3698   match(reg);
 3699   match(rRegI);
 3700 
 3701   format %{ "EBX" %}
 3702   interface(REG_INTER);
 3703 %}
 3704 
 3705 operand eCXRegI(xRegI reg) %{
 3706   constraint(ALLOC_IN_RC(ecx_reg));
 3707   match(reg);
 3708   match(rRegI);
 3709 
 3710   format %{ "ECX" %}
 3711   interface(REG_INTER);
 3712 %}
 3713 
 3714 operand eDXRegI(xRegI reg) %{
 3715   constraint(ALLOC_IN_RC(edx_reg));
 3716   match(reg);
 3717   match(rRegI);
 3718 
 3719   format %{ "EDX" %}
 3720   interface(REG_INTER);
 3721 %}
 3722 
 3723 operand eDIRegI(xRegI reg) %{
 3724   constraint(ALLOC_IN_RC(edi_reg));
 3725   match(reg);
 3726   match(rRegI);
 3727 
 3728   format %{ "EDI" %}
 3729   interface(REG_INTER);
 3730 %}
 3731 
 3732 operand naxRegI() %{
 3733   constraint(ALLOC_IN_RC(nax_reg));
 3734   match(RegI);
 3735   match(eCXRegI);
 3736   match(eDXRegI);
 3737   match(eSIRegI);
 3738   match(eDIRegI);
 3739 
 3740   format %{ %}
 3741   interface(REG_INTER);
 3742 %}
 3743 
 3744 operand nadxRegI() %{
 3745   constraint(ALLOC_IN_RC(nadx_reg));
 3746   match(RegI);
 3747   match(eBXRegI);
 3748   match(eCXRegI);
 3749   match(eSIRegI);
 3750   match(eDIRegI);
 3751 
 3752   format %{ %}
 3753   interface(REG_INTER);
 3754 %}
 3755 
 3756 operand ncxRegI() %{
 3757   constraint(ALLOC_IN_RC(ncx_reg));
 3758   match(RegI);
 3759   match(eAXRegI);
 3760   match(eDXRegI);
 3761   match(eSIRegI);
 3762   match(eDIRegI);
 3763 
 3764   format %{ %}
 3765   interface(REG_INTER);
 3766 %}
 3767 
 3768 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3769 // //
 3770 operand eSIRegI(xRegI reg) %{
 3771    constraint(ALLOC_IN_RC(esi_reg));
 3772    match(reg);
 3773    match(rRegI);
 3774 
 3775    format %{ "ESI" %}
 3776    interface(REG_INTER);
 3777 %}
 3778 
 3779 // Pointer Register
 3780 operand anyRegP() %{
 3781   constraint(ALLOC_IN_RC(any_reg));
 3782   match(RegP);
 3783   match(eAXRegP);
 3784   match(eBXRegP);
 3785   match(eCXRegP);
 3786   match(eDIRegP);
 3787   match(eRegP);
 3788 
 3789   format %{ %}
 3790   interface(REG_INTER);
 3791 %}
 3792 
 3793 operand eRegP() %{
 3794   constraint(ALLOC_IN_RC(int_reg));
 3795   match(RegP);
 3796   match(eAXRegP);
 3797   match(eBXRegP);
 3798   match(eCXRegP);
 3799   match(eDIRegP);
 3800 
 3801   format %{ %}
 3802   interface(REG_INTER);
 3803 %}
 3804 
 3805 operand rRegP() %{
 3806   constraint(ALLOC_IN_RC(int_reg));
 3807   match(RegP);
 3808   match(eAXRegP);
 3809   match(eBXRegP);
 3810   match(eCXRegP);
 3811   match(eDIRegP);
 3812 
 3813   format %{ %}
 3814   interface(REG_INTER);
 3815 %}
 3816 
 3817 // On windows95, EBP is not safe to use for implicit null tests.
 3818 operand eRegP_no_EBP() %{
 3819   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3820   match(RegP);
 3821   match(eAXRegP);
 3822   match(eBXRegP);
 3823   match(eCXRegP);
 3824   match(eDIRegP);
 3825 
 3826   op_cost(100);
 3827   format %{ %}
 3828   interface(REG_INTER);
 3829 %}
 3830 
 3831 operand naxRegP() %{
 3832   constraint(ALLOC_IN_RC(nax_reg));
 3833   match(RegP);
 3834   match(eBXRegP);
 3835   match(eDXRegP);
 3836   match(eCXRegP);
 3837   match(eSIRegP);
 3838   match(eDIRegP);
 3839 
 3840   format %{ %}
 3841   interface(REG_INTER);
 3842 %}
 3843 
 3844 operand nabxRegP() %{
 3845   constraint(ALLOC_IN_RC(nabx_reg));
 3846   match(RegP);
 3847   match(eCXRegP);
 3848   match(eDXRegP);
 3849   match(eSIRegP);
 3850   match(eDIRegP);
 3851 
 3852   format %{ %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 operand pRegP() %{
 3857   constraint(ALLOC_IN_RC(p_reg));
 3858   match(RegP);
 3859   match(eBXRegP);
 3860   match(eDXRegP);
 3861   match(eSIRegP);
 3862   match(eDIRegP);
 3863 
 3864   format %{ %}
 3865   interface(REG_INTER);
 3866 %}
 3867 
 3868 // Special Registers
 3869 // Return a pointer value
 3870 operand eAXRegP(eRegP reg) %{
 3871   constraint(ALLOC_IN_RC(eax_reg));
 3872   match(reg);
 3873   format %{ "EAX" %}
 3874   interface(REG_INTER);
 3875 %}
 3876 
 3877 // Used in AtomicAdd
 3878 operand eBXRegP(eRegP reg) %{
 3879   constraint(ALLOC_IN_RC(ebx_reg));
 3880   match(reg);
 3881   format %{ "EBX" %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Tail-call (interprocedural jump) to interpreter
 3886 operand eCXRegP(eRegP reg) %{
 3887   constraint(ALLOC_IN_RC(ecx_reg));
 3888   match(reg);
 3889   format %{ "ECX" %}
 3890   interface(REG_INTER);
 3891 %}
 3892 
 3893 operand eDXRegP(eRegP reg) %{
 3894   constraint(ALLOC_IN_RC(edx_reg));
 3895   match(reg);
 3896   format %{ "EDX" %}
 3897   interface(REG_INTER);
 3898 %}
 3899 
 3900 operand eSIRegP(eRegP reg) %{
 3901   constraint(ALLOC_IN_RC(esi_reg));
 3902   match(reg);
 3903   format %{ "ESI" %}
 3904   interface(REG_INTER);
 3905 %}
 3906 
 3907 // Used in rep stosw
 3908 operand eDIRegP(eRegP reg) %{
 3909   constraint(ALLOC_IN_RC(edi_reg));
 3910   match(reg);
 3911   format %{ "EDI" %}
 3912   interface(REG_INTER);
 3913 %}
 3914 
 3915 operand eRegL() %{
 3916   constraint(ALLOC_IN_RC(long_reg));
 3917   match(RegL);
 3918   match(eADXRegL);
 3919 
 3920   format %{ %}
 3921   interface(REG_INTER);
 3922 %}
 3923 
 3924 operand eADXRegL( eRegL reg ) %{
 3925   constraint(ALLOC_IN_RC(eadx_reg));
 3926   match(reg);
 3927 
 3928   format %{ "EDX:EAX" %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand eBCXRegL( eRegL reg ) %{
 3933   constraint(ALLOC_IN_RC(ebcx_reg));
 3934   match(reg);
 3935 
 3936   format %{ "EBX:ECX" %}
 3937   interface(REG_INTER);
 3938 %}
 3939 
 3940 operand eBDPRegL( eRegL reg ) %{
 3941   constraint(ALLOC_IN_RC(ebpd_reg));
 3942   match(reg);
 3943 
 3944   format %{ "EBP:EDI" %}
 3945   interface(REG_INTER);
 3946 %}
 3947 // Special case for integer high multiply
 3948 operand eADXRegL_low_only() %{
 3949   constraint(ALLOC_IN_RC(eadx_reg));
 3950   match(RegL);
 3951 
 3952   format %{ "EAX" %}
 3953   interface(REG_INTER);
 3954 %}
 3955 
 3956 // Flags register, used as output of compare instructions
 3957 operand rFlagsReg() %{
 3958   constraint(ALLOC_IN_RC(int_flags));
 3959   match(RegFlags);
 3960 
 3961   format %{ "EFLAGS" %}
 3962   interface(REG_INTER);
 3963 %}
 3964 
 3965 // Flags register, used as output of compare instructions
 3966 operand eFlagsReg() %{
 3967   constraint(ALLOC_IN_RC(int_flags));
 3968   match(RegFlags);
 3969 
 3970   format %{ "EFLAGS" %}
 3971   interface(REG_INTER);
 3972 %}
 3973 
 3974 // Flags register, used as output of FLOATING POINT compare instructions
 3975 operand eFlagsRegU() %{
 3976   constraint(ALLOC_IN_RC(int_flags));
 3977   match(RegFlags);
 3978 
 3979   format %{ "EFLAGS_U" %}
 3980   interface(REG_INTER);
 3981 %}
 3982 
 3983 operand eFlagsRegUCF() %{
 3984   constraint(ALLOC_IN_RC(int_flags));
 3985   match(RegFlags);
 3986   predicate(false);
 3987 
 3988   format %{ "EFLAGS_U_CF" %}
 3989   interface(REG_INTER);
 3990 %}
 3991 
 3992 // Condition Code Register used by long compare
 3993 operand flagsReg_long_LTGE() %{
 3994   constraint(ALLOC_IN_RC(int_flags));
 3995   match(RegFlags);
 3996   format %{ "FLAGS_LTGE" %}
 3997   interface(REG_INTER);
 3998 %}
 3999 operand flagsReg_long_EQNE() %{
 4000   constraint(ALLOC_IN_RC(int_flags));
 4001   match(RegFlags);
 4002   format %{ "FLAGS_EQNE" %}
 4003   interface(REG_INTER);
 4004 %}
 4005 operand flagsReg_long_LEGT() %{
 4006   constraint(ALLOC_IN_RC(int_flags));
 4007   match(RegFlags);
 4008   format %{ "FLAGS_LEGT" %}
 4009   interface(REG_INTER);
 4010 %}
 4011 
 4012 // Condition Code Register used by unsigned long compare
 4013 operand flagsReg_ulong_LTGE() %{
 4014   constraint(ALLOC_IN_RC(int_flags));
 4015   match(RegFlags);
 4016   format %{ "FLAGS_U_LTGE" %}
 4017   interface(REG_INTER);
 4018 %}
 4019 operand flagsReg_ulong_EQNE() %{
 4020   constraint(ALLOC_IN_RC(int_flags));
 4021   match(RegFlags);
 4022   format %{ "FLAGS_U_EQNE" %}
 4023   interface(REG_INTER);
 4024 %}
 4025 operand flagsReg_ulong_LEGT() %{
 4026   constraint(ALLOC_IN_RC(int_flags));
 4027   match(RegFlags);
 4028   format %{ "FLAGS_U_LEGT" %}
 4029   interface(REG_INTER);
 4030 %}
 4031 
 4032 // Float register operands
 4033 operand regDPR() %{
 4034   predicate( UseSSE < 2 );
 4035   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4036   match(RegD);
 4037   match(regDPR1);
 4038   match(regDPR2);
 4039   format %{ %}
 4040   interface(REG_INTER);
 4041 %}
 4042 
 4043 operand regDPR1(regDPR reg) %{
 4044   predicate( UseSSE < 2 );
 4045   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4046   match(reg);
 4047   format %{ "FPR1" %}
 4048   interface(REG_INTER);
 4049 %}
 4050 
 4051 operand regDPR2(regDPR reg) %{
 4052   predicate( UseSSE < 2 );
 4053   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4054   match(reg);
 4055   format %{ "FPR2" %}
 4056   interface(REG_INTER);
 4057 %}
 4058 
 4059 operand regnotDPR1(regDPR reg) %{
 4060   predicate( UseSSE < 2 );
 4061   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4062   match(reg);
 4063   format %{ %}
 4064   interface(REG_INTER);
 4065 %}
 4066 
 4067 // Float register operands
 4068 operand regFPR() %{
 4069   predicate( UseSSE < 2 );
 4070   constraint(ALLOC_IN_RC(fp_flt_reg));
 4071   match(RegF);
 4072   match(regFPR1);
 4073   format %{ %}
 4074   interface(REG_INTER);
 4075 %}
 4076 
 4077 // Float register operands
 4078 operand regFPR1(regFPR reg) %{
 4079   predicate( UseSSE < 2 );
 4080   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4081   match(reg);
 4082   format %{ "FPR1" %}
 4083   interface(REG_INTER);
 4084 %}
 4085 
 4086 // XMM Float register operands
 4087 operand regF() %{
 4088   predicate( UseSSE>=1 );
 4089   constraint(ALLOC_IN_RC(float_reg_legacy));
 4090   match(RegF);
 4091   format %{ %}
 4092   interface(REG_INTER);
 4093 %}
 4094 
 4095 operand legRegF() %{
 4096   predicate( UseSSE>=1 );
 4097   constraint(ALLOC_IN_RC(float_reg_legacy));
 4098   match(RegF);
 4099   format %{ %}
 4100   interface(REG_INTER);
 4101 %}
 4102 
 4103 // Float register operands
 4104 operand vlRegF() %{
 4105    constraint(ALLOC_IN_RC(float_reg_vl));
 4106    match(RegF);
 4107 
 4108    format %{ %}
 4109    interface(REG_INTER);
 4110 %}
 4111 
 4112 // XMM Double register operands
 4113 operand regD() %{
 4114   predicate( UseSSE>=2 );
 4115   constraint(ALLOC_IN_RC(double_reg_legacy));
 4116   match(RegD);
 4117   format %{ %}
 4118   interface(REG_INTER);
 4119 %}
 4120 
 4121 // Double register operands
 4122 operand legRegD() %{
 4123   predicate( UseSSE>=2 );
 4124   constraint(ALLOC_IN_RC(double_reg_legacy));
 4125   match(RegD);
 4126   format %{ %}
 4127   interface(REG_INTER);
 4128 %}
 4129 
 4130 operand vlRegD() %{
 4131    constraint(ALLOC_IN_RC(double_reg_vl));
 4132    match(RegD);
 4133 
 4134    format %{ %}
 4135    interface(REG_INTER);
 4136 %}
 4137 
 4138 //----------Memory Operands----------------------------------------------------
 4139 // Direct Memory Operand
 4140 operand direct(immP addr) %{
 4141   match(addr);
 4142 
 4143   format %{ "[$addr]" %}
 4144   interface(MEMORY_INTER) %{
 4145     base(0xFFFFFFFF);
 4146     index(0x4);
 4147     scale(0x0);
 4148     disp($addr);
 4149   %}
 4150 %}
 4151 
 4152 // Indirect Memory Operand
 4153 operand indirect(eRegP reg) %{
 4154   constraint(ALLOC_IN_RC(int_reg));
 4155   match(reg);
 4156 
 4157   format %{ "[$reg]" %}
 4158   interface(MEMORY_INTER) %{
 4159     base($reg);
 4160     index(0x4);
 4161     scale(0x0);
 4162     disp(0x0);
 4163   %}
 4164 %}
 4165 
 4166 // Indirect Memory Plus Short Offset Operand
 4167 operand indOffset8(eRegP reg, immI8 off) %{
 4168   match(AddP reg off);
 4169 
 4170   format %{ "[$reg + $off]" %}
 4171   interface(MEMORY_INTER) %{
 4172     base($reg);
 4173     index(0x4);
 4174     scale(0x0);
 4175     disp($off);
 4176   %}
 4177 %}
 4178 
 4179 // Indirect Memory Plus Long Offset Operand
 4180 operand indOffset32(eRegP reg, immI off) %{
 4181   match(AddP reg off);
 4182 
 4183   format %{ "[$reg + $off]" %}
 4184   interface(MEMORY_INTER) %{
 4185     base($reg);
 4186     index(0x4);
 4187     scale(0x0);
 4188     disp($off);
 4189   %}
 4190 %}
 4191 
 4192 // Indirect Memory Plus Long Offset Operand
 4193 operand indOffset32X(rRegI reg, immP off) %{
 4194   match(AddP off reg);
 4195 
 4196   format %{ "[$reg + $off]" %}
 4197   interface(MEMORY_INTER) %{
 4198     base($reg);
 4199     index(0x4);
 4200     scale(0x0);
 4201     disp($off);
 4202   %}
 4203 %}
 4204 
 4205 // Indirect Memory Plus Index Register Plus Offset Operand
 4206 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4207   match(AddP (AddP reg ireg) off);
 4208 
 4209   op_cost(10);
 4210   format %{"[$reg + $off + $ireg]" %}
 4211   interface(MEMORY_INTER) %{
 4212     base($reg);
 4213     index($ireg);
 4214     scale(0x0);
 4215     disp($off);
 4216   %}
 4217 %}
 4218 
 4219 // Indirect Memory Plus Index Register Plus Offset Operand
 4220 operand indIndex(eRegP reg, rRegI ireg) %{
 4221   match(AddP reg ireg);
 4222 
 4223   op_cost(10);
 4224   format %{"[$reg + $ireg]" %}
 4225   interface(MEMORY_INTER) %{
 4226     base($reg);
 4227     index($ireg);
 4228     scale(0x0);
 4229     disp(0x0);
 4230   %}
 4231 %}
 4232 
 4233 // // -------------------------------------------------------------------------
 4234 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4235 // // -------------------------------------------------------------------------
 4236 // // Scaled Memory Operands
 4237 // // Indirect Memory Times Scale Plus Offset Operand
 4238 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4239 //   match(AddP off (LShiftI ireg scale));
 4240 //
 4241 //   op_cost(10);
 4242 //   format %{"[$off + $ireg << $scale]" %}
 4243 //   interface(MEMORY_INTER) %{
 4244 //     base(0x4);
 4245 //     index($ireg);
 4246 //     scale($scale);
 4247 //     disp($off);
 4248 //   %}
 4249 // %}
 4250 
 4251 // Indirect Memory Times Scale Plus Index Register
 4252 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4253   match(AddP reg (LShiftI ireg scale));
 4254 
 4255   op_cost(10);
 4256   format %{"[$reg + $ireg << $scale]" %}
 4257   interface(MEMORY_INTER) %{
 4258     base($reg);
 4259     index($ireg);
 4260     scale($scale);
 4261     disp(0x0);
 4262   %}
 4263 %}
 4264 
 4265 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4266 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4267   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4268 
 4269   op_cost(10);
 4270   format %{"[$reg + $off + $ireg << $scale]" %}
 4271   interface(MEMORY_INTER) %{
 4272     base($reg);
 4273     index($ireg);
 4274     scale($scale);
 4275     disp($off);
 4276   %}
 4277 %}
 4278 
 4279 //----------Load Long Memory Operands------------------------------------------
 4280 // The load-long idiom will use it's address expression again after loading
 4281 // the first word of the long.  If the load-long destination overlaps with
 4282 // registers used in the addressing expression, the 2nd half will be loaded
 4283 // from a clobbered address.  Fix this by requiring that load-long use
 4284 // address registers that do not overlap with the load-long target.
 4285 
 4286 // load-long support
 4287 operand load_long_RegP() %{
 4288   constraint(ALLOC_IN_RC(esi_reg));
 4289   match(RegP);
 4290   match(eSIRegP);
 4291   op_cost(100);
 4292   format %{  %}
 4293   interface(REG_INTER);
 4294 %}
 4295 
 4296 // Indirect Memory Operand Long
 4297 operand load_long_indirect(load_long_RegP reg) %{
 4298   constraint(ALLOC_IN_RC(esi_reg));
 4299   match(reg);
 4300 
 4301   format %{ "[$reg]" %}
 4302   interface(MEMORY_INTER) %{
 4303     base($reg);
 4304     index(0x4);
 4305     scale(0x0);
 4306     disp(0x0);
 4307   %}
 4308 %}
 4309 
 4310 // Indirect Memory Plus Long Offset Operand
 4311 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4312   match(AddP reg off);
 4313 
 4314   format %{ "[$reg + $off]" %}
 4315   interface(MEMORY_INTER) %{
 4316     base($reg);
 4317     index(0x4);
 4318     scale(0x0);
 4319     disp($off);
 4320   %}
 4321 %}
 4322 
 4323 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4324 
 4325 
 4326 //----------Special Memory Operands--------------------------------------------
 4327 // Stack Slot Operand - This operand is used for loading and storing temporary
 4328 //                      values on the stack where a match requires a value to
 4329 //                      flow through memory.
 4330 operand stackSlotP(sRegP reg) %{
 4331   constraint(ALLOC_IN_RC(stack_slots));
 4332   // No match rule because this operand is only generated in matching
 4333   format %{ "[$reg]" %}
 4334   interface(MEMORY_INTER) %{
 4335     base(0x4);   // ESP
 4336     index(0x4);  // No Index
 4337     scale(0x0);  // No Scale
 4338     disp($reg);  // Stack Offset
 4339   %}
 4340 %}
 4341 
 4342 operand stackSlotI(sRegI reg) %{
 4343   constraint(ALLOC_IN_RC(stack_slots));
 4344   // No match rule because this operand is only generated in matching
 4345   format %{ "[$reg]" %}
 4346   interface(MEMORY_INTER) %{
 4347     base(0x4);   // ESP
 4348     index(0x4);  // No Index
 4349     scale(0x0);  // No Scale
 4350     disp($reg);  // Stack Offset
 4351   %}
 4352 %}
 4353 
 4354 operand stackSlotF(sRegF reg) %{
 4355   constraint(ALLOC_IN_RC(stack_slots));
 4356   // No match rule because this operand is only generated in matching
 4357   format %{ "[$reg]" %}
 4358   interface(MEMORY_INTER) %{
 4359     base(0x4);   // ESP
 4360     index(0x4);  // No Index
 4361     scale(0x0);  // No Scale
 4362     disp($reg);  // Stack Offset
 4363   %}
 4364 %}
 4365 
 4366 operand stackSlotD(sRegD reg) %{
 4367   constraint(ALLOC_IN_RC(stack_slots));
 4368   // No match rule because this operand is only generated in matching
 4369   format %{ "[$reg]" %}
 4370   interface(MEMORY_INTER) %{
 4371     base(0x4);   // ESP
 4372     index(0x4);  // No Index
 4373     scale(0x0);  // No Scale
 4374     disp($reg);  // Stack Offset
 4375   %}
 4376 %}
 4377 
 4378 operand stackSlotL(sRegL reg) %{
 4379   constraint(ALLOC_IN_RC(stack_slots));
 4380   // No match rule because this operand is only generated in matching
 4381   format %{ "[$reg]" %}
 4382   interface(MEMORY_INTER) %{
 4383     base(0x4);   // ESP
 4384     index(0x4);  // No Index
 4385     scale(0x0);  // No Scale
 4386     disp($reg);  // Stack Offset
 4387   %}
 4388 %}
 4389 
 4390 //----------Conditional Branch Operands----------------------------------------
 4391 // Comparison Op  - This is the operation of the comparison, and is limited to
 4392 //                  the following set of codes:
 4393 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4394 //
 4395 // Other attributes of the comparison, such as unsignedness, are specified
 4396 // by the comparison instruction that sets a condition code flags register.
 4397 // That result is represented by a flags operand whose subtype is appropriate
 4398 // to the unsignedness (etc.) of the comparison.
 4399 //
 4400 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4401 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4402 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4403 
 4404 // Comparison Code
 4405 operand cmpOp() %{
 4406   match(Bool);
 4407 
 4408   format %{ "" %}
 4409   interface(COND_INTER) %{
 4410     equal(0x4, "e");
 4411     not_equal(0x5, "ne");
 4412     less(0xC, "l");
 4413     greater_equal(0xD, "ge");
 4414     less_equal(0xE, "le");
 4415     greater(0xF, "g");
 4416     overflow(0x0, "o");
 4417     no_overflow(0x1, "no");
 4418   %}
 4419 %}
 4420 
 4421 // Comparison Code, unsigned compare.  Used by FP also, with
 4422 // C2 (unordered) turned into GT or LT already.  The other bits
 4423 // C0 and C3 are turned into Carry & Zero flags.
 4424 operand cmpOpU() %{
 4425   match(Bool);
 4426 
 4427   format %{ "" %}
 4428   interface(COND_INTER) %{
 4429     equal(0x4, "e");
 4430     not_equal(0x5, "ne");
 4431     less(0x2, "b");
 4432     greater_equal(0x3, "nb");
 4433     less_equal(0x6, "be");
 4434     greater(0x7, "nbe");
 4435     overflow(0x0, "o");
 4436     no_overflow(0x1, "no");
 4437   %}
 4438 %}
 4439 
 4440 // Floating comparisons that don't require any fixup for the unordered case
 4441 operand cmpOpUCF() %{
 4442   match(Bool);
 4443   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4444             n->as_Bool()->_test._test == BoolTest::ge ||
 4445             n->as_Bool()->_test._test == BoolTest::le ||
 4446             n->as_Bool()->_test._test == BoolTest::gt);
 4447   format %{ "" %}
 4448   interface(COND_INTER) %{
 4449     equal(0x4, "e");
 4450     not_equal(0x5, "ne");
 4451     less(0x2, "b");
 4452     greater_equal(0x3, "nb");
 4453     less_equal(0x6, "be");
 4454     greater(0x7, "nbe");
 4455     overflow(0x0, "o");
 4456     no_overflow(0x1, "no");
 4457   %}
 4458 %}
 4459 
 4460 
 4461 // Floating comparisons that can be fixed up with extra conditional jumps
 4462 operand cmpOpUCF2() %{
 4463   match(Bool);
 4464   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4465             n->as_Bool()->_test._test == BoolTest::eq);
 4466   format %{ "" %}
 4467   interface(COND_INTER) %{
 4468     equal(0x4, "e");
 4469     not_equal(0x5, "ne");
 4470     less(0x2, "b");
 4471     greater_equal(0x3, "nb");
 4472     less_equal(0x6, "be");
 4473     greater(0x7, "nbe");
 4474     overflow(0x0, "o");
 4475     no_overflow(0x1, "no");
 4476   %}
 4477 %}
 4478 
 4479 // Comparison Code for FP conditional move
 4480 operand cmpOp_fcmov() %{
 4481   match(Bool);
 4482 
 4483   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4484             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4485   format %{ "" %}
 4486   interface(COND_INTER) %{
 4487     equal        (0x0C8);
 4488     not_equal    (0x1C8);
 4489     less         (0x0C0);
 4490     greater_equal(0x1C0);
 4491     less_equal   (0x0D0);
 4492     greater      (0x1D0);
 4493     overflow(0x0, "o"); // not really supported by the instruction
 4494     no_overflow(0x1, "no"); // not really supported by the instruction
 4495   %}
 4496 %}
 4497 
 4498 // Comparison Code used in long compares
 4499 operand cmpOp_commute() %{
 4500   match(Bool);
 4501 
 4502   format %{ "" %}
 4503   interface(COND_INTER) %{
 4504     equal(0x4, "e");
 4505     not_equal(0x5, "ne");
 4506     less(0xF, "g");
 4507     greater_equal(0xE, "le");
 4508     less_equal(0xD, "ge");
 4509     greater(0xC, "l");
 4510     overflow(0x0, "o");
 4511     no_overflow(0x1, "no");
 4512   %}
 4513 %}
 4514 
 4515 // Comparison Code used in unsigned long compares
 4516 operand cmpOpU_commute() %{
 4517   match(Bool);
 4518 
 4519   format %{ "" %}
 4520   interface(COND_INTER) %{
 4521     equal(0x4, "e");
 4522     not_equal(0x5, "ne");
 4523     less(0x7, "nbe");
 4524     greater_equal(0x6, "be");
 4525     less_equal(0x3, "nb");
 4526     greater(0x2, "b");
 4527     overflow(0x0, "o");
 4528     no_overflow(0x1, "no");
 4529   %}
 4530 %}
 4531 
 4532 //----------OPERAND CLASSES----------------------------------------------------
 4533 // Operand Classes are groups of operands that are used as to simplify
 4534 // instruction definitions by not requiring the AD writer to specify separate
 4535 // instructions for every form of operand when the instruction accepts
 4536 // multiple operand types with the same basic encoding and format.  The classic
 4537 // case of this is memory operands.
 4538 
 4539 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4540                indIndex, indIndexScale, indIndexScaleOffset);
 4541 
 4542 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4543 // This means some kind of offset is always required and you cannot use
 4544 // an oop as the offset (done when working on static globals).
 4545 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4546                     indIndex, indIndexScale, indIndexScaleOffset);
 4547 
 4548 
 4549 //----------PIPELINE-----------------------------------------------------------
 4550 // Rules which define the behavior of the target architectures pipeline.
 4551 pipeline %{
 4552 
 4553 //----------ATTRIBUTES---------------------------------------------------------
 4554 attributes %{
 4555   variable_size_instructions;        // Fixed size instructions
 4556   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4557   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4558   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4559   instruction_fetch_units = 1;       // of 16 bytes
 4560 
 4561   // List of nop instructions
 4562   nops( MachNop );
 4563 %}
 4564 
 4565 //----------RESOURCES----------------------------------------------------------
 4566 // Resources are the functional units available to the machine
 4567 
 4568 // Generic P2/P3 pipeline
 4569 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4570 // 3 instructions decoded per cycle.
 4571 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4572 // 2 ALU op, only ALU0 handles mul/div instructions.
 4573 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4574            MS0, MS1, MEM = MS0 | MS1,
 4575            BR, FPU,
 4576            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4577 
 4578 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4579 // Pipeline Description specifies the stages in the machine's pipeline
 4580 
 4581 // Generic P2/P3 pipeline
 4582 pipe_desc(S0, S1, S2, S3, S4, S5);
 4583 
 4584 //----------PIPELINE CLASSES---------------------------------------------------
 4585 // Pipeline Classes describe the stages in which input and output are
 4586 // referenced by the hardware pipeline.
 4587 
 4588 // Naming convention: ialu or fpu
 4589 // Then: _reg
 4590 // Then: _reg if there is a 2nd register
 4591 // Then: _long if it's a pair of instructions implementing a long
 4592 // Then: _fat if it requires the big decoder
 4593 //   Or: _mem if it requires the big decoder and a memory unit.
 4594 
 4595 // Integer ALU reg operation
 4596 pipe_class ialu_reg(rRegI dst) %{
 4597     single_instruction;
 4598     dst    : S4(write);
 4599     dst    : S3(read);
 4600     DECODE : S0;        // any decoder
 4601     ALU    : S3;        // any alu
 4602 %}
 4603 
 4604 // Long ALU reg operation
 4605 pipe_class ialu_reg_long(eRegL dst) %{
 4606     instruction_count(2);
 4607     dst    : S4(write);
 4608     dst    : S3(read);
 4609     DECODE : S0(2);     // any 2 decoders
 4610     ALU    : S3(2);     // both alus
 4611 %}
 4612 
 4613 // Integer ALU reg operation using big decoder
 4614 pipe_class ialu_reg_fat(rRegI dst) %{
 4615     single_instruction;
 4616     dst    : S4(write);
 4617     dst    : S3(read);
 4618     D0     : S0;        // big decoder only
 4619     ALU    : S3;        // any alu
 4620 %}
 4621 
 4622 // Long ALU reg operation using big decoder
 4623 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4624     instruction_count(2);
 4625     dst    : S4(write);
 4626     dst    : S3(read);
 4627     D0     : S0(2);     // big decoder only; twice
 4628     ALU    : S3(2);     // any 2 alus
 4629 %}
 4630 
 4631 // Integer ALU reg-reg operation
 4632 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4633     single_instruction;
 4634     dst    : S4(write);
 4635     src    : S3(read);
 4636     DECODE : S0;        // any decoder
 4637     ALU    : S3;        // any alu
 4638 %}
 4639 
 4640 // Long ALU reg-reg operation
 4641 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4642     instruction_count(2);
 4643     dst    : S4(write);
 4644     src    : S3(read);
 4645     DECODE : S0(2);     // any 2 decoders
 4646     ALU    : S3(2);     // both alus
 4647 %}
 4648 
 4649 // Integer ALU reg-reg operation
 4650 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4651     single_instruction;
 4652     dst    : S4(write);
 4653     src    : S3(read);
 4654     D0     : S0;        // big decoder only
 4655     ALU    : S3;        // any alu
 4656 %}
 4657 
 4658 // Long ALU reg-reg operation
 4659 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4660     instruction_count(2);
 4661     dst    : S4(write);
 4662     src    : S3(read);
 4663     D0     : S0(2);     // big decoder only; twice
 4664     ALU    : S3(2);     // both alus
 4665 %}
 4666 
 4667 // Integer ALU reg-mem operation
 4668 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4669     single_instruction;
 4670     dst    : S5(write);
 4671     mem    : S3(read);
 4672     D0     : S0;        // big decoder only
 4673     ALU    : S4;        // any alu
 4674     MEM    : S3;        // any mem
 4675 %}
 4676 
 4677 // Long ALU reg-mem operation
 4678 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4679     instruction_count(2);
 4680     dst    : S5(write);
 4681     mem    : S3(read);
 4682     D0     : S0(2);     // big decoder only; twice
 4683     ALU    : S4(2);     // any 2 alus
 4684     MEM    : S3(2);     // both mems
 4685 %}
 4686 
 4687 // Integer mem operation (prefetch)
 4688 pipe_class ialu_mem(memory mem)
 4689 %{
 4690     single_instruction;
 4691     mem    : S3(read);
 4692     D0     : S0;        // big decoder only
 4693     MEM    : S3;        // any mem
 4694 %}
 4695 
 4696 // Integer Store to Memory
 4697 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4698     single_instruction;
 4699     mem    : S3(read);
 4700     src    : S5(read);
 4701     D0     : S0;        // big decoder only
 4702     ALU    : S4;        // any alu
 4703     MEM    : S3;
 4704 %}
 4705 
 4706 // Long Store to Memory
 4707 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4708     instruction_count(2);
 4709     mem    : S3(read);
 4710     src    : S5(read);
 4711     D0     : S0(2);     // big decoder only; twice
 4712     ALU    : S4(2);     // any 2 alus
 4713     MEM    : S3(2);     // Both mems
 4714 %}
 4715 
 4716 // Integer Store to Memory
 4717 pipe_class ialu_mem_imm(memory mem) %{
 4718     single_instruction;
 4719     mem    : S3(read);
 4720     D0     : S0;        // big decoder only
 4721     ALU    : S4;        // any alu
 4722     MEM    : S3;
 4723 %}
 4724 
 4725 // Integer ALU0 reg-reg operation
 4726 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4727     single_instruction;
 4728     dst    : S4(write);
 4729     src    : S3(read);
 4730     D0     : S0;        // Big decoder only
 4731     ALU0   : S3;        // only alu0
 4732 %}
 4733 
 4734 // Integer ALU0 reg-mem operation
 4735 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4736     single_instruction;
 4737     dst    : S5(write);
 4738     mem    : S3(read);
 4739     D0     : S0;        // big decoder only
 4740     ALU0   : S4;        // ALU0 only
 4741     MEM    : S3;        // any mem
 4742 %}
 4743 
 4744 // Integer ALU reg-reg operation
 4745 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4746     single_instruction;
 4747     cr     : S4(write);
 4748     src1   : S3(read);
 4749     src2   : S3(read);
 4750     DECODE : S0;        // any decoder
 4751     ALU    : S3;        // any alu
 4752 %}
 4753 
 4754 // Integer ALU reg-imm operation
 4755 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4756     single_instruction;
 4757     cr     : S4(write);
 4758     src1   : S3(read);
 4759     DECODE : S0;        // any decoder
 4760     ALU    : S3;        // any alu
 4761 %}
 4762 
 4763 // Integer ALU reg-mem operation
 4764 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4765     single_instruction;
 4766     cr     : S4(write);
 4767     src1   : S3(read);
 4768     src2   : S3(read);
 4769     D0     : S0;        // big decoder only
 4770     ALU    : S4;        // any alu
 4771     MEM    : S3;
 4772 %}
 4773 
 4774 // Conditional move reg-reg
 4775 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4776     instruction_count(4);
 4777     y      : S4(read);
 4778     q      : S3(read);
 4779     p      : S3(read);
 4780     DECODE : S0(4);     // any decoder
 4781 %}
 4782 
 4783 // Conditional move reg-reg
 4784 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4785     single_instruction;
 4786     dst    : S4(write);
 4787     src    : S3(read);
 4788     cr     : S3(read);
 4789     DECODE : S0;        // any decoder
 4790 %}
 4791 
 4792 // Conditional move reg-mem
 4793 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4794     single_instruction;
 4795     dst    : S4(write);
 4796     src    : S3(read);
 4797     cr     : S3(read);
 4798     DECODE : S0;        // any decoder
 4799     MEM    : S3;
 4800 %}
 4801 
 4802 // Conditional move reg-reg long
 4803 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4804     single_instruction;
 4805     dst    : S4(write);
 4806     src    : S3(read);
 4807     cr     : S3(read);
 4808     DECODE : S0(2);     // any 2 decoders
 4809 %}
 4810 
 4811 // Conditional move double reg-reg
 4812 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4813     single_instruction;
 4814     dst    : S4(write);
 4815     src    : S3(read);
 4816     cr     : S3(read);
 4817     DECODE : S0;        // any decoder
 4818 %}
 4819 
 4820 // Float reg-reg operation
 4821 pipe_class fpu_reg(regDPR dst) %{
 4822     instruction_count(2);
 4823     dst    : S3(read);
 4824     DECODE : S0(2);     // any 2 decoders
 4825     FPU    : S3;
 4826 %}
 4827 
 4828 // Float reg-reg operation
 4829 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4830     instruction_count(2);
 4831     dst    : S4(write);
 4832     src    : S3(read);
 4833     DECODE : S0(2);     // any 2 decoders
 4834     FPU    : S3;
 4835 %}
 4836 
 4837 // Float reg-reg operation
 4838 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4839     instruction_count(3);
 4840     dst    : S4(write);
 4841     src1   : S3(read);
 4842     src2   : S3(read);
 4843     DECODE : S0(3);     // any 3 decoders
 4844     FPU    : S3(2);
 4845 %}
 4846 
 4847 // Float reg-reg operation
 4848 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4849     instruction_count(4);
 4850     dst    : S4(write);
 4851     src1   : S3(read);
 4852     src2   : S3(read);
 4853     src3   : S3(read);
 4854     DECODE : S0(4);     // any 3 decoders
 4855     FPU    : S3(2);
 4856 %}
 4857 
 4858 // Float reg-reg operation
 4859 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4860     instruction_count(4);
 4861     dst    : S4(write);
 4862     src1   : S3(read);
 4863     src2   : S3(read);
 4864     src3   : S3(read);
 4865     DECODE : S1(3);     // any 3 decoders
 4866     D0     : S0;        // Big decoder only
 4867     FPU    : S3(2);
 4868     MEM    : S3;
 4869 %}
 4870 
 4871 // Float reg-mem operation
 4872 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4873     instruction_count(2);
 4874     dst    : S5(write);
 4875     mem    : S3(read);
 4876     D0     : S0;        // big decoder only
 4877     DECODE : S1;        // any decoder for FPU POP
 4878     FPU    : S4;
 4879     MEM    : S3;        // any mem
 4880 %}
 4881 
 4882 // Float reg-mem operation
 4883 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4884     instruction_count(3);
 4885     dst    : S5(write);
 4886     src1   : S3(read);
 4887     mem    : S3(read);
 4888     D0     : S0;        // big decoder only
 4889     DECODE : S1(2);     // any decoder for FPU POP
 4890     FPU    : S4;
 4891     MEM    : S3;        // any mem
 4892 %}
 4893 
 4894 // Float mem-reg operation
 4895 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4896     instruction_count(2);
 4897     src    : S5(read);
 4898     mem    : S3(read);
 4899     DECODE : S0;        // any decoder for FPU PUSH
 4900     D0     : S1;        // big decoder only
 4901     FPU    : S4;
 4902     MEM    : S3;        // any mem
 4903 %}
 4904 
 4905 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4906     instruction_count(3);
 4907     src1   : S3(read);
 4908     src2   : S3(read);
 4909     mem    : S3(read);
 4910     DECODE : S0(2);     // any decoder for FPU PUSH
 4911     D0     : S1;        // big decoder only
 4912     FPU    : S4;
 4913     MEM    : S3;        // any mem
 4914 %}
 4915 
 4916 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4917     instruction_count(3);
 4918     src1   : S3(read);
 4919     src2   : S3(read);
 4920     mem    : S4(read);
 4921     DECODE : S0;        // any decoder for FPU PUSH
 4922     D0     : S0(2);     // big decoder only
 4923     FPU    : S4;
 4924     MEM    : S3(2);     // any mem
 4925 %}
 4926 
 4927 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4928     instruction_count(2);
 4929     src1   : S3(read);
 4930     dst    : S4(read);
 4931     D0     : S0(2);     // big decoder only
 4932     MEM    : S3(2);     // any mem
 4933 %}
 4934 
 4935 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4936     instruction_count(3);
 4937     src1   : S3(read);
 4938     src2   : S3(read);
 4939     dst    : S4(read);
 4940     D0     : S0(3);     // big decoder only
 4941     FPU    : S4;
 4942     MEM    : S3(3);     // any mem
 4943 %}
 4944 
 4945 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4946     instruction_count(3);
 4947     src1   : S4(read);
 4948     mem    : S4(read);
 4949     DECODE : S0;        // any decoder for FPU PUSH
 4950     D0     : S0(2);     // big decoder only
 4951     FPU    : S4;
 4952     MEM    : S3(2);     // any mem
 4953 %}
 4954 
 4955 // Float load constant
 4956 pipe_class fpu_reg_con(regDPR dst) %{
 4957     instruction_count(2);
 4958     dst    : S5(write);
 4959     D0     : S0;        // big decoder only for the load
 4960     DECODE : S1;        // any decoder for FPU POP
 4961     FPU    : S4;
 4962     MEM    : S3;        // any mem
 4963 %}
 4964 
 4965 // Float load constant
 4966 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4967     instruction_count(3);
 4968     dst    : S5(write);
 4969     src    : S3(read);
 4970     D0     : S0;        // big decoder only for the load
 4971     DECODE : S1(2);     // any decoder for FPU POP
 4972     FPU    : S4;
 4973     MEM    : S3;        // any mem
 4974 %}
 4975 
 4976 // UnConditional branch
 4977 pipe_class pipe_jmp( label labl ) %{
 4978     single_instruction;
 4979     BR   : S3;
 4980 %}
 4981 
 4982 // Conditional branch
 4983 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4984     single_instruction;
 4985     cr    : S1(read);
 4986     BR    : S3;
 4987 %}
 4988 
 4989 // Allocation idiom
 4990 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4991     instruction_count(1); force_serialization;
 4992     fixed_latency(6);
 4993     heap_ptr : S3(read);
 4994     DECODE   : S0(3);
 4995     D0       : S2;
 4996     MEM      : S3;
 4997     ALU      : S3(2);
 4998     dst      : S5(write);
 4999     BR       : S5;
 5000 %}
 5001 
 5002 // Generic big/slow expanded idiom
 5003 pipe_class pipe_slow(  ) %{
 5004     instruction_count(10); multiple_bundles; force_serialization;
 5005     fixed_latency(100);
 5006     D0  : S0(2);
 5007     MEM : S3(2);
 5008 %}
 5009 
 5010 // The real do-nothing guy
 5011 pipe_class empty( ) %{
 5012     instruction_count(0);
 5013 %}
 5014 
 5015 // Define the class for the Nop node
 5016 define %{
 5017    MachNop = empty;
 5018 %}
 5019 
 5020 %}
 5021 
 5022 //----------INSTRUCTIONS-------------------------------------------------------
 5023 //
 5024 // match      -- States which machine-independent subtree may be replaced
 5025 //               by this instruction.
 5026 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5027 //               selection to identify a minimum cost tree of machine
 5028 //               instructions that matches a tree of machine-independent
 5029 //               instructions.
 5030 // format     -- A string providing the disassembly for this instruction.
 5031 //               The value of an instruction's operand may be inserted
 5032 //               by referring to it with a '$' prefix.
 5033 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5034 //               to within an encode class as $primary, $secondary, and $tertiary
 5035 //               respectively.  The primary opcode is commonly used to
 5036 //               indicate the type of machine instruction, while secondary
 5037 //               and tertiary are often used for prefix options or addressing
 5038 //               modes.
 5039 // ins_encode -- A list of encode classes with parameters. The encode class
 5040 //               name must have been defined in an 'enc_class' specification
 5041 //               in the encode section of the architecture description.
 5042 
 5043 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5044 // Load Float
 5045 instruct MoveF2LEG(legRegF dst, regF src) %{
 5046   match(Set dst src);
 5047   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5048   ins_encode %{
 5049     ShouldNotReachHere();
 5050   %}
 5051   ins_pipe( fpu_reg_reg );
 5052 %}
 5053 
 5054 // Load Float
 5055 instruct MoveLEG2F(regF dst, legRegF src) %{
 5056   match(Set dst src);
 5057   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5058   ins_encode %{
 5059     ShouldNotReachHere();
 5060   %}
 5061   ins_pipe( fpu_reg_reg );
 5062 %}
 5063 
 5064 // Load Float
 5065 instruct MoveF2VL(vlRegF dst, regF src) %{
 5066   match(Set dst src);
 5067   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5068   ins_encode %{
 5069     ShouldNotReachHere();
 5070   %}
 5071   ins_pipe( fpu_reg_reg );
 5072 %}
 5073 
 5074 // Load Float
 5075 instruct MoveVL2F(regF dst, vlRegF src) %{
 5076   match(Set dst src);
 5077   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5078   ins_encode %{
 5079     ShouldNotReachHere();
 5080   %}
 5081   ins_pipe( fpu_reg_reg );
 5082 %}
 5083 
 5084 
 5085 
 5086 // Load Double
 5087 instruct MoveD2LEG(legRegD dst, regD src) %{
 5088   match(Set dst src);
 5089   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5090   ins_encode %{
 5091     ShouldNotReachHere();
 5092   %}
 5093   ins_pipe( fpu_reg_reg );
 5094 %}
 5095 
 5096 // Load Double
 5097 instruct MoveLEG2D(regD dst, legRegD src) %{
 5098   match(Set dst src);
 5099   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5100   ins_encode %{
 5101     ShouldNotReachHere();
 5102   %}
 5103   ins_pipe( fpu_reg_reg );
 5104 %}
 5105 
 5106 // Load Double
 5107 instruct MoveD2VL(vlRegD dst, regD src) %{
 5108   match(Set dst src);
 5109   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5110   ins_encode %{
 5111     ShouldNotReachHere();
 5112   %}
 5113   ins_pipe( fpu_reg_reg );
 5114 %}
 5115 
 5116 // Load Double
 5117 instruct MoveVL2D(regD dst, vlRegD src) %{
 5118   match(Set dst src);
 5119   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5120   ins_encode %{
 5121     ShouldNotReachHere();
 5122   %}
 5123   ins_pipe( fpu_reg_reg );
 5124 %}
 5125 
 5126 //----------BSWAP-Instruction--------------------------------------------------
 5127 instruct bytes_reverse_int(rRegI dst) %{
 5128   match(Set dst (ReverseBytesI dst));
 5129 
 5130   format %{ "BSWAP  $dst" %}
 5131   opcode(0x0F, 0xC8);
 5132   ins_encode( OpcP, OpcSReg(dst) );
 5133   ins_pipe( ialu_reg );
 5134 %}
 5135 
 5136 instruct bytes_reverse_long(eRegL dst) %{
 5137   match(Set dst (ReverseBytesL dst));
 5138 
 5139   format %{ "BSWAP  $dst.lo\n\t"
 5140             "BSWAP  $dst.hi\n\t"
 5141             "XCHG   $dst.lo $dst.hi" %}
 5142 
 5143   ins_cost(125);
 5144   ins_encode( bswap_long_bytes(dst) );
 5145   ins_pipe( ialu_reg_reg);
 5146 %}
 5147 
 5148 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5149   match(Set dst (ReverseBytesUS dst));
 5150   effect(KILL cr);
 5151 
 5152   format %{ "BSWAP  $dst\n\t"
 5153             "SHR    $dst,16\n\t" %}
 5154   ins_encode %{
 5155     __ bswapl($dst$$Register);
 5156     __ shrl($dst$$Register, 16);
 5157   %}
 5158   ins_pipe( ialu_reg );
 5159 %}
 5160 
 5161 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5162   match(Set dst (ReverseBytesS dst));
 5163   effect(KILL cr);
 5164 
 5165   format %{ "BSWAP  $dst\n\t"
 5166             "SAR    $dst,16\n\t" %}
 5167   ins_encode %{
 5168     __ bswapl($dst$$Register);
 5169     __ sarl($dst$$Register, 16);
 5170   %}
 5171   ins_pipe( ialu_reg );
 5172 %}
 5173 
 5174 
 5175 //---------- Zeros Count Instructions ------------------------------------------
 5176 
 5177 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5178   predicate(UseCountLeadingZerosInstruction);
 5179   match(Set dst (CountLeadingZerosI src));
 5180   effect(KILL cr);
 5181 
 5182   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5183   ins_encode %{
 5184     __ lzcntl($dst$$Register, $src$$Register);
 5185   %}
 5186   ins_pipe(ialu_reg);
 5187 %}
 5188 
 5189 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5190   predicate(!UseCountLeadingZerosInstruction);
 5191   match(Set dst (CountLeadingZerosI src));
 5192   effect(KILL cr);
 5193 
 5194   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5195             "JNZ    skip\n\t"
 5196             "MOV    $dst, -1\n"
 5197       "skip:\n\t"
 5198             "NEG    $dst\n\t"
 5199             "ADD    $dst, 31" %}
 5200   ins_encode %{
 5201     Register Rdst = $dst$$Register;
 5202     Register Rsrc = $src$$Register;
 5203     Label skip;
 5204     __ bsrl(Rdst, Rsrc);
 5205     __ jccb(Assembler::notZero, skip);
 5206     __ movl(Rdst, -1);
 5207     __ bind(skip);
 5208     __ negl(Rdst);
 5209     __ addl(Rdst, BitsPerInt - 1);
 5210   %}
 5211   ins_pipe(ialu_reg);
 5212 %}
 5213 
 5214 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5215   predicate(UseCountLeadingZerosInstruction);
 5216   match(Set dst (CountLeadingZerosL src));
 5217   effect(TEMP dst, KILL cr);
 5218 
 5219   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5220             "JNC    done\n\t"
 5221             "LZCNT  $dst, $src.lo\n\t"
 5222             "ADD    $dst, 32\n"
 5223       "done:" %}
 5224   ins_encode %{
 5225     Register Rdst = $dst$$Register;
 5226     Register Rsrc = $src$$Register;
 5227     Label done;
 5228     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5229     __ jccb(Assembler::carryClear, done);
 5230     __ lzcntl(Rdst, Rsrc);
 5231     __ addl(Rdst, BitsPerInt);
 5232     __ bind(done);
 5233   %}
 5234   ins_pipe(ialu_reg);
 5235 %}
 5236 
 5237 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5238   predicate(!UseCountLeadingZerosInstruction);
 5239   match(Set dst (CountLeadingZerosL src));
 5240   effect(TEMP dst, KILL cr);
 5241 
 5242   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5243             "JZ     msw_is_zero\n\t"
 5244             "ADD    $dst, 32\n\t"
 5245             "JMP    not_zero\n"
 5246       "msw_is_zero:\n\t"
 5247             "BSR    $dst, $src.lo\n\t"
 5248             "JNZ    not_zero\n\t"
 5249             "MOV    $dst, -1\n"
 5250       "not_zero:\n\t"
 5251             "NEG    $dst\n\t"
 5252             "ADD    $dst, 63\n" %}
 5253  ins_encode %{
 5254     Register Rdst = $dst$$Register;
 5255     Register Rsrc = $src$$Register;
 5256     Label msw_is_zero;
 5257     Label not_zero;
 5258     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5259     __ jccb(Assembler::zero, msw_is_zero);
 5260     __ addl(Rdst, BitsPerInt);
 5261     __ jmpb(not_zero);
 5262     __ bind(msw_is_zero);
 5263     __ bsrl(Rdst, Rsrc);
 5264     __ jccb(Assembler::notZero, not_zero);
 5265     __ movl(Rdst, -1);
 5266     __ bind(not_zero);
 5267     __ negl(Rdst);
 5268     __ addl(Rdst, BitsPerLong - 1);
 5269   %}
 5270   ins_pipe(ialu_reg);
 5271 %}
 5272 
 5273 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5274   predicate(UseCountTrailingZerosInstruction);
 5275   match(Set dst (CountTrailingZerosI src));
 5276   effect(KILL cr);
 5277 
 5278   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5279   ins_encode %{
 5280     __ tzcntl($dst$$Register, $src$$Register);
 5281   %}
 5282   ins_pipe(ialu_reg);
 5283 %}
 5284 
 5285 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5286   predicate(!UseCountTrailingZerosInstruction);
 5287   match(Set dst (CountTrailingZerosI src));
 5288   effect(KILL cr);
 5289 
 5290   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5291             "JNZ    done\n\t"
 5292             "MOV    $dst, 32\n"
 5293       "done:" %}
 5294   ins_encode %{
 5295     Register Rdst = $dst$$Register;
 5296     Label done;
 5297     __ bsfl(Rdst, $src$$Register);
 5298     __ jccb(Assembler::notZero, done);
 5299     __ movl(Rdst, BitsPerInt);
 5300     __ bind(done);
 5301   %}
 5302   ins_pipe(ialu_reg);
 5303 %}
 5304 
 5305 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5306   predicate(UseCountTrailingZerosInstruction);
 5307   match(Set dst (CountTrailingZerosL src));
 5308   effect(TEMP dst, KILL cr);
 5309 
 5310   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5311             "JNC    done\n\t"
 5312             "TZCNT  $dst, $src.hi\n\t"
 5313             "ADD    $dst, 32\n"
 5314             "done:" %}
 5315   ins_encode %{
 5316     Register Rdst = $dst$$Register;
 5317     Register Rsrc = $src$$Register;
 5318     Label done;
 5319     __ tzcntl(Rdst, Rsrc);
 5320     __ jccb(Assembler::carryClear, done);
 5321     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5322     __ addl(Rdst, BitsPerInt);
 5323     __ bind(done);
 5324   %}
 5325   ins_pipe(ialu_reg);
 5326 %}
 5327 
 5328 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5329   predicate(!UseCountTrailingZerosInstruction);
 5330   match(Set dst (CountTrailingZerosL src));
 5331   effect(TEMP dst, KILL cr);
 5332 
 5333   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5334             "JNZ    done\n\t"
 5335             "BSF    $dst, $src.hi\n\t"
 5336             "JNZ    msw_not_zero\n\t"
 5337             "MOV    $dst, 32\n"
 5338       "msw_not_zero:\n\t"
 5339             "ADD    $dst, 32\n"
 5340       "done:" %}
 5341   ins_encode %{
 5342     Register Rdst = $dst$$Register;
 5343     Register Rsrc = $src$$Register;
 5344     Label msw_not_zero;
 5345     Label done;
 5346     __ bsfl(Rdst, Rsrc);
 5347     __ jccb(Assembler::notZero, done);
 5348     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5349     __ jccb(Assembler::notZero, msw_not_zero);
 5350     __ movl(Rdst, BitsPerInt);
 5351     __ bind(msw_not_zero);
 5352     __ addl(Rdst, BitsPerInt);
 5353     __ bind(done);
 5354   %}
 5355   ins_pipe(ialu_reg);
 5356 %}
 5357 
 5358 
 5359 //---------- Population Count Instructions -------------------------------------
 5360 
 5361 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5362   predicate(UsePopCountInstruction);
 5363   match(Set dst (PopCountI src));
 5364   effect(KILL cr);
 5365 
 5366   format %{ "POPCNT $dst, $src" %}
 5367   ins_encode %{
 5368     __ popcntl($dst$$Register, $src$$Register);
 5369   %}
 5370   ins_pipe(ialu_reg);
 5371 %}
 5372 
 5373 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5374   predicate(UsePopCountInstruction);
 5375   match(Set dst (PopCountI (LoadI mem)));
 5376   effect(KILL cr);
 5377 
 5378   format %{ "POPCNT $dst, $mem" %}
 5379   ins_encode %{
 5380     __ popcntl($dst$$Register, $mem$$Address);
 5381   %}
 5382   ins_pipe(ialu_reg);
 5383 %}
 5384 
 5385 // Note: Long.bitCount(long) returns an int.
 5386 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5387   predicate(UsePopCountInstruction);
 5388   match(Set dst (PopCountL src));
 5389   effect(KILL cr, TEMP tmp, TEMP dst);
 5390 
 5391   format %{ "POPCNT $dst, $src.lo\n\t"
 5392             "POPCNT $tmp, $src.hi\n\t"
 5393             "ADD    $dst, $tmp" %}
 5394   ins_encode %{
 5395     __ popcntl($dst$$Register, $src$$Register);
 5396     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5397     __ addl($dst$$Register, $tmp$$Register);
 5398   %}
 5399   ins_pipe(ialu_reg);
 5400 %}
 5401 
 5402 // Note: Long.bitCount(long) returns an int.
 5403 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5404   predicate(UsePopCountInstruction);
 5405   match(Set dst (PopCountL (LoadL mem)));
 5406   effect(KILL cr, TEMP tmp, TEMP dst);
 5407 
 5408   format %{ "POPCNT $dst, $mem\n\t"
 5409             "POPCNT $tmp, $mem+4\n\t"
 5410             "ADD    $dst, $tmp" %}
 5411   ins_encode %{
 5412     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5413     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5414     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5415     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5416     __ addl($dst$$Register, $tmp$$Register);
 5417   %}
 5418   ins_pipe(ialu_reg);
 5419 %}
 5420 
 5421 
 5422 //----------Load/Store/Move Instructions---------------------------------------
 5423 //----------Load Instructions--------------------------------------------------
 5424 // Load Byte (8bit signed)
 5425 instruct loadB(xRegI dst, memory mem) %{
 5426   match(Set dst (LoadB mem));
 5427 
 5428   ins_cost(125);
 5429   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5430 
 5431   ins_encode %{
 5432     __ movsbl($dst$$Register, $mem$$Address);
 5433   %}
 5434 
 5435   ins_pipe(ialu_reg_mem);
 5436 %}
 5437 
 5438 // Load Byte (8bit signed) into Long Register
 5439 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5440   match(Set dst (ConvI2L (LoadB mem)));
 5441   effect(KILL cr);
 5442 
 5443   ins_cost(375);
 5444   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5445             "MOV    $dst.hi,$dst.lo\n\t"
 5446             "SAR    $dst.hi,7" %}
 5447 
 5448   ins_encode %{
 5449     __ movsbl($dst$$Register, $mem$$Address);
 5450     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5451     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5452   %}
 5453 
 5454   ins_pipe(ialu_reg_mem);
 5455 %}
 5456 
 5457 // Load Unsigned Byte (8bit UNsigned)
 5458 instruct loadUB(xRegI dst, memory mem) %{
 5459   match(Set dst (LoadUB mem));
 5460 
 5461   ins_cost(125);
 5462   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5463 
 5464   ins_encode %{
 5465     __ movzbl($dst$$Register, $mem$$Address);
 5466   %}
 5467 
 5468   ins_pipe(ialu_reg_mem);
 5469 %}
 5470 
 5471 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5472 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5473   match(Set dst (ConvI2L (LoadUB mem)));
 5474   effect(KILL cr);
 5475 
 5476   ins_cost(250);
 5477   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5478             "XOR    $dst.hi,$dst.hi" %}
 5479 
 5480   ins_encode %{
 5481     Register Rdst = $dst$$Register;
 5482     __ movzbl(Rdst, $mem$$Address);
 5483     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5484   %}
 5485 
 5486   ins_pipe(ialu_reg_mem);
 5487 %}
 5488 
 5489 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5490 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5491   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5492   effect(KILL cr);
 5493 
 5494   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5495             "XOR    $dst.hi,$dst.hi\n\t"
 5496             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5497   ins_encode %{
 5498     Register Rdst = $dst$$Register;
 5499     __ movzbl(Rdst, $mem$$Address);
 5500     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5501     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5502   %}
 5503   ins_pipe(ialu_reg_mem);
 5504 %}
 5505 
 5506 // Load Short (16bit signed)
 5507 instruct loadS(rRegI dst, memory mem) %{
 5508   match(Set dst (LoadS mem));
 5509 
 5510   ins_cost(125);
 5511   format %{ "MOVSX  $dst,$mem\t# short" %}
 5512 
 5513   ins_encode %{
 5514     __ movswl($dst$$Register, $mem$$Address);
 5515   %}
 5516 
 5517   ins_pipe(ialu_reg_mem);
 5518 %}
 5519 
 5520 // Load Short (16 bit signed) to Byte (8 bit signed)
 5521 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5522   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5523 
 5524   ins_cost(125);
 5525   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5526   ins_encode %{
 5527     __ movsbl($dst$$Register, $mem$$Address);
 5528   %}
 5529   ins_pipe(ialu_reg_mem);
 5530 %}
 5531 
 5532 // Load Short (16bit signed) into Long Register
 5533 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5534   match(Set dst (ConvI2L (LoadS mem)));
 5535   effect(KILL cr);
 5536 
 5537   ins_cost(375);
 5538   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5539             "MOV    $dst.hi,$dst.lo\n\t"
 5540             "SAR    $dst.hi,15" %}
 5541 
 5542   ins_encode %{
 5543     __ movswl($dst$$Register, $mem$$Address);
 5544     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5545     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5546   %}
 5547 
 5548   ins_pipe(ialu_reg_mem);
 5549 %}
 5550 
 5551 // Load Unsigned Short/Char (16bit unsigned)
 5552 instruct loadUS(rRegI dst, memory mem) %{
 5553   match(Set dst (LoadUS mem));
 5554 
 5555   ins_cost(125);
 5556   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5557 
 5558   ins_encode %{
 5559     __ movzwl($dst$$Register, $mem$$Address);
 5560   %}
 5561 
 5562   ins_pipe(ialu_reg_mem);
 5563 %}
 5564 
 5565 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5566 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5567   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5568 
 5569   ins_cost(125);
 5570   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5571   ins_encode %{
 5572     __ movsbl($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5578 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5579   match(Set dst (ConvI2L (LoadUS mem)));
 5580   effect(KILL cr);
 5581 
 5582   ins_cost(250);
 5583   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5584             "XOR    $dst.hi,$dst.hi" %}
 5585 
 5586   ins_encode %{
 5587     __ movzwl($dst$$Register, $mem$$Address);
 5588     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5589   %}
 5590 
 5591   ins_pipe(ialu_reg_mem);
 5592 %}
 5593 
 5594 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5595 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5596   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5597   effect(KILL cr);
 5598 
 5599   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5600             "XOR    $dst.hi,$dst.hi" %}
 5601   ins_encode %{
 5602     Register Rdst = $dst$$Register;
 5603     __ movzbl(Rdst, $mem$$Address);
 5604     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5605   %}
 5606   ins_pipe(ialu_reg_mem);
 5607 %}
 5608 
 5609 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5610 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5611   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5612   effect(KILL cr);
 5613 
 5614   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5615             "XOR    $dst.hi,$dst.hi\n\t"
 5616             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5617   ins_encode %{
 5618     Register Rdst = $dst$$Register;
 5619     __ movzwl(Rdst, $mem$$Address);
 5620     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5621     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Integer
 5627 instruct loadI(rRegI dst, memory mem) %{
 5628   match(Set dst (LoadI mem));
 5629 
 5630   ins_cost(125);
 5631   format %{ "MOV    $dst,$mem\t# int" %}
 5632 
 5633   ins_encode %{
 5634     __ movl($dst$$Register, $mem$$Address);
 5635   %}
 5636 
 5637   ins_pipe(ialu_reg_mem);
 5638 %}
 5639 
 5640 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5641 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5642   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5643 
 5644   ins_cost(125);
 5645   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5646   ins_encode %{
 5647     __ movsbl($dst$$Register, $mem$$Address);
 5648   %}
 5649   ins_pipe(ialu_reg_mem);
 5650 %}
 5651 
 5652 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5653 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5654   match(Set dst (AndI (LoadI mem) mask));
 5655 
 5656   ins_cost(125);
 5657   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5658   ins_encode %{
 5659     __ movzbl($dst$$Register, $mem$$Address);
 5660   %}
 5661   ins_pipe(ialu_reg_mem);
 5662 %}
 5663 
 5664 // Load Integer (32 bit signed) to Short (16 bit signed)
 5665 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5666   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5667 
 5668   ins_cost(125);
 5669   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5670   ins_encode %{
 5671     __ movswl($dst$$Register, $mem$$Address);
 5672   %}
 5673   ins_pipe(ialu_reg_mem);
 5674 %}
 5675 
 5676 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5677 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5678   match(Set dst (AndI (LoadI mem) mask));
 5679 
 5680   ins_cost(125);
 5681   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5682   ins_encode %{
 5683     __ movzwl($dst$$Register, $mem$$Address);
 5684   %}
 5685   ins_pipe(ialu_reg_mem);
 5686 %}
 5687 
 5688 // Load Integer into Long Register
 5689 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5690   match(Set dst (ConvI2L (LoadI mem)));
 5691   effect(KILL cr);
 5692 
 5693   ins_cost(375);
 5694   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5695             "MOV    $dst.hi,$dst.lo\n\t"
 5696             "SAR    $dst.hi,31" %}
 5697 
 5698   ins_encode %{
 5699     __ movl($dst$$Register, $mem$$Address);
 5700     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5701     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5702   %}
 5703 
 5704   ins_pipe(ialu_reg_mem);
 5705 %}
 5706 
 5707 // Load Integer with mask 0xFF into Long Register
 5708 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5709   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5710   effect(KILL cr);
 5711 
 5712   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5713             "XOR    $dst.hi,$dst.hi" %}
 5714   ins_encode %{
 5715     Register Rdst = $dst$$Register;
 5716     __ movzbl(Rdst, $mem$$Address);
 5717     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5718   %}
 5719   ins_pipe(ialu_reg_mem);
 5720 %}
 5721 
 5722 // Load Integer with mask 0xFFFF into Long Register
 5723 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5724   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5725   effect(KILL cr);
 5726 
 5727   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5728             "XOR    $dst.hi,$dst.hi" %}
 5729   ins_encode %{
 5730     Register Rdst = $dst$$Register;
 5731     __ movzwl(Rdst, $mem$$Address);
 5732     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5733   %}
 5734   ins_pipe(ialu_reg_mem);
 5735 %}
 5736 
 5737 // Load Integer with 31-bit mask into Long Register
 5738 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5739   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5740   effect(KILL cr);
 5741 
 5742   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5743             "XOR    $dst.hi,$dst.hi\n\t"
 5744             "AND    $dst.lo,$mask" %}
 5745   ins_encode %{
 5746     Register Rdst = $dst$$Register;
 5747     __ movl(Rdst, $mem$$Address);
 5748     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5749     __ andl(Rdst, $mask$$constant);
 5750   %}
 5751   ins_pipe(ialu_reg_mem);
 5752 %}
 5753 
 5754 // Load Unsigned Integer into Long Register
 5755 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5756   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5757   effect(KILL cr);
 5758 
 5759   ins_cost(250);
 5760   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5761             "XOR    $dst.hi,$dst.hi" %}
 5762 
 5763   ins_encode %{
 5764     __ movl($dst$$Register, $mem$$Address);
 5765     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5766   %}
 5767 
 5768   ins_pipe(ialu_reg_mem);
 5769 %}
 5770 
 5771 // Load Long.  Cannot clobber address while loading, so restrict address
 5772 // register to ESI
 5773 instruct loadL(eRegL dst, load_long_memory mem) %{
 5774   predicate(!((LoadLNode*)n)->require_atomic_access());
 5775   match(Set dst (LoadL mem));
 5776 
 5777   ins_cost(250);
 5778   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5779             "MOV    $dst.hi,$mem+4" %}
 5780 
 5781   ins_encode %{
 5782     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5783     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5784     __ movl($dst$$Register, Amemlo);
 5785     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5786   %}
 5787 
 5788   ins_pipe(ialu_reg_long_mem);
 5789 %}
 5790 
 5791 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5792 // then store it down to the stack and reload on the int
 5793 // side.
 5794 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5795   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5796   match(Set dst (LoadL mem));
 5797 
 5798   ins_cost(200);
 5799   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5800             "FISTp  $dst" %}
 5801   ins_encode(enc_loadL_volatile(mem,dst));
 5802   ins_pipe( fpu_reg_mem );
 5803 %}
 5804 
 5805 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5806   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5807   match(Set dst (LoadL mem));
 5808   effect(TEMP tmp);
 5809   ins_cost(180);
 5810   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5811             "MOVSD  $dst,$tmp" %}
 5812   ins_encode %{
 5813     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5814     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5815   %}
 5816   ins_pipe( pipe_slow );
 5817 %}
 5818 
 5819 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5820   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5821   match(Set dst (LoadL mem));
 5822   effect(TEMP tmp);
 5823   ins_cost(160);
 5824   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5825             "MOVD   $dst.lo,$tmp\n\t"
 5826             "PSRLQ  $tmp,32\n\t"
 5827             "MOVD   $dst.hi,$tmp" %}
 5828   ins_encode %{
 5829     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5830     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5831     __ psrlq($tmp$$XMMRegister, 32);
 5832     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5833   %}
 5834   ins_pipe( pipe_slow );
 5835 %}
 5836 
 5837 // Load Range
 5838 instruct loadRange(rRegI dst, memory mem) %{
 5839   match(Set dst (LoadRange mem));
 5840 
 5841   ins_cost(125);
 5842   format %{ "MOV    $dst,$mem" %}
 5843   opcode(0x8B);
 5844   ins_encode( OpcP, RegMem(dst,mem));
 5845   ins_pipe( ialu_reg_mem );
 5846 %}
 5847 
 5848 
 5849 // Load Pointer
 5850 instruct loadP(eRegP dst, memory mem) %{
 5851   match(Set dst (LoadP mem));
 5852 
 5853   ins_cost(125);
 5854   format %{ "MOV    $dst,$mem" %}
 5855   opcode(0x8B);
 5856   ins_encode( OpcP, RegMem(dst,mem));
 5857   ins_pipe( ialu_reg_mem );
 5858 %}
 5859 
 5860 // Load Klass Pointer
 5861 instruct loadKlass(eRegP dst, memory mem) %{
 5862   match(Set dst (LoadKlass mem));
 5863 
 5864   ins_cost(125);
 5865   format %{ "MOV    $dst,$mem" %}
 5866   opcode(0x8B);
 5867   ins_encode( OpcP, RegMem(dst,mem));
 5868   ins_pipe( ialu_reg_mem );
 5869 %}
 5870 
 5871 // Load Double
 5872 instruct loadDPR(regDPR dst, memory mem) %{
 5873   predicate(UseSSE<=1);
 5874   match(Set dst (LoadD mem));
 5875 
 5876   ins_cost(150);
 5877   format %{ "FLD_D  ST,$mem\n\t"
 5878             "FSTP   $dst" %}
 5879   opcode(0xDD);               /* DD /0 */
 5880   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5881               Pop_Reg_DPR(dst) );
 5882   ins_pipe( fpu_reg_mem );
 5883 %}
 5884 
 5885 // Load Double to XMM
 5886 instruct loadD(regD dst, memory mem) %{
 5887   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5888   match(Set dst (LoadD mem));
 5889   ins_cost(145);
 5890   format %{ "MOVSD  $dst,$mem" %}
 5891   ins_encode %{
 5892     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5893   %}
 5894   ins_pipe( pipe_slow );
 5895 %}
 5896 
 5897 instruct loadD_partial(regD dst, memory mem) %{
 5898   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5899   match(Set dst (LoadD mem));
 5900   ins_cost(145);
 5901   format %{ "MOVLPD $dst,$mem" %}
 5902   ins_encode %{
 5903     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5904   %}
 5905   ins_pipe( pipe_slow );
 5906 %}
 5907 
 5908 // Load to XMM register (single-precision floating point)
 5909 // MOVSS instruction
 5910 instruct loadF(regF dst, memory mem) %{
 5911   predicate(UseSSE>=1);
 5912   match(Set dst (LoadF mem));
 5913   ins_cost(145);
 5914   format %{ "MOVSS  $dst,$mem" %}
 5915   ins_encode %{
 5916     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5917   %}
 5918   ins_pipe( pipe_slow );
 5919 %}
 5920 
 5921 // Load Float
 5922 instruct loadFPR(regFPR dst, memory mem) %{
 5923   predicate(UseSSE==0);
 5924   match(Set dst (LoadF mem));
 5925 
 5926   ins_cost(150);
 5927   format %{ "FLD_S  ST,$mem\n\t"
 5928             "FSTP   $dst" %}
 5929   opcode(0xD9);               /* D9 /0 */
 5930   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5931               Pop_Reg_FPR(dst) );
 5932   ins_pipe( fpu_reg_mem );
 5933 %}
 5934 
 5935 // Load Effective Address
 5936 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5937   match(Set dst mem);
 5938 
 5939   ins_cost(110);
 5940   format %{ "LEA    $dst,$mem" %}
 5941   opcode(0x8D);
 5942   ins_encode( OpcP, RegMem(dst,mem));
 5943   ins_pipe( ialu_reg_reg_fat );
 5944 %}
 5945 
 5946 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5947   match(Set dst mem);
 5948 
 5949   ins_cost(110);
 5950   format %{ "LEA    $dst,$mem" %}
 5951   opcode(0x8D);
 5952   ins_encode( OpcP, RegMem(dst,mem));
 5953   ins_pipe( ialu_reg_reg_fat );
 5954 %}
 5955 
 5956 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5957   match(Set dst mem);
 5958 
 5959   ins_cost(110);
 5960   format %{ "LEA    $dst,$mem" %}
 5961   opcode(0x8D);
 5962   ins_encode( OpcP, RegMem(dst,mem));
 5963   ins_pipe( ialu_reg_reg_fat );
 5964 %}
 5965 
 5966 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5967   match(Set dst mem);
 5968 
 5969   ins_cost(110);
 5970   format %{ "LEA    $dst,$mem" %}
 5971   opcode(0x8D);
 5972   ins_encode( OpcP, RegMem(dst,mem));
 5973   ins_pipe( ialu_reg_reg_fat );
 5974 %}
 5975 
 5976 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5977   match(Set dst mem);
 5978 
 5979   ins_cost(110);
 5980   format %{ "LEA    $dst,$mem" %}
 5981   opcode(0x8D);
 5982   ins_encode( OpcP, RegMem(dst,mem));
 5983   ins_pipe( ialu_reg_reg_fat );
 5984 %}
 5985 
 5986 // Load Constant
 5987 instruct loadConI(rRegI dst, immI src) %{
 5988   match(Set dst src);
 5989 
 5990   format %{ "MOV    $dst,$src" %}
 5991   ins_encode( LdImmI(dst, src) );
 5992   ins_pipe( ialu_reg_fat );
 5993 %}
 5994 
 5995 // Load Constant zero
 5996 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5997   match(Set dst src);
 5998   effect(KILL cr);
 5999 
 6000   ins_cost(50);
 6001   format %{ "XOR    $dst,$dst" %}
 6002   opcode(0x33);  /* + rd */
 6003   ins_encode( OpcP, RegReg( dst, dst ) );
 6004   ins_pipe( ialu_reg );
 6005 %}
 6006 
 6007 instruct loadConP(eRegP dst, immP src) %{
 6008   match(Set dst src);
 6009 
 6010   format %{ "MOV    $dst,$src" %}
 6011   opcode(0xB8);  /* + rd */
 6012   ins_encode( LdImmP(dst, src) );
 6013   ins_pipe( ialu_reg_fat );
 6014 %}
 6015 
 6016 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6017   match(Set dst src);
 6018   effect(KILL cr);
 6019   ins_cost(200);
 6020   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6021             "MOV    $dst.hi,$src.hi" %}
 6022   opcode(0xB8);
 6023   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6024   ins_pipe( ialu_reg_long_fat );
 6025 %}
 6026 
 6027 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6028   match(Set dst src);
 6029   effect(KILL cr);
 6030   ins_cost(150);
 6031   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6032             "XOR    $dst.hi,$dst.hi" %}
 6033   opcode(0x33,0x33);
 6034   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6035   ins_pipe( ialu_reg_long );
 6036 %}
 6037 
 6038 // The instruction usage is guarded by predicate in operand immFPR().
 6039 instruct loadConFPR(regFPR dst, immFPR con) %{
 6040   match(Set dst con);
 6041   ins_cost(125);
 6042   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6043             "FSTP   $dst" %}
 6044   ins_encode %{
 6045     __ fld_s($constantaddress($con));
 6046     __ fstp_d($dst$$reg);
 6047   %}
 6048   ins_pipe(fpu_reg_con);
 6049 %}
 6050 
 6051 // The instruction usage is guarded by predicate in operand immFPR0().
 6052 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6053   match(Set dst con);
 6054   ins_cost(125);
 6055   format %{ "FLDZ   ST\n\t"
 6056             "FSTP   $dst" %}
 6057   ins_encode %{
 6058     __ fldz();
 6059     __ fstp_d($dst$$reg);
 6060   %}
 6061   ins_pipe(fpu_reg_con);
 6062 %}
 6063 
 6064 // The instruction usage is guarded by predicate in operand immFPR1().
 6065 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6066   match(Set dst con);
 6067   ins_cost(125);
 6068   format %{ "FLD1   ST\n\t"
 6069             "FSTP   $dst" %}
 6070   ins_encode %{
 6071     __ fld1();
 6072     __ fstp_d($dst$$reg);
 6073   %}
 6074   ins_pipe(fpu_reg_con);
 6075 %}
 6076 
 6077 // The instruction usage is guarded by predicate in operand immF().
 6078 instruct loadConF(regF dst, immF con) %{
 6079   match(Set dst con);
 6080   ins_cost(125);
 6081   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6082   ins_encode %{
 6083     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6084   %}
 6085   ins_pipe(pipe_slow);
 6086 %}
 6087 
 6088 // The instruction usage is guarded by predicate in operand immF0().
 6089 instruct loadConF0(regF dst, immF0 src) %{
 6090   match(Set dst src);
 6091   ins_cost(100);
 6092   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6093   ins_encode %{
 6094     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6095   %}
 6096   ins_pipe(pipe_slow);
 6097 %}
 6098 
 6099 // The instruction usage is guarded by predicate in operand immDPR().
 6100 instruct loadConDPR(regDPR dst, immDPR con) %{
 6101   match(Set dst con);
 6102   ins_cost(125);
 6103 
 6104   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6105             "FSTP   $dst" %}
 6106   ins_encode %{
 6107     __ fld_d($constantaddress($con));
 6108     __ fstp_d($dst$$reg);
 6109   %}
 6110   ins_pipe(fpu_reg_con);
 6111 %}
 6112 
 6113 // The instruction usage is guarded by predicate in operand immDPR0().
 6114 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6115   match(Set dst con);
 6116   ins_cost(125);
 6117 
 6118   format %{ "FLDZ   ST\n\t"
 6119             "FSTP   $dst" %}
 6120   ins_encode %{
 6121     __ fldz();
 6122     __ fstp_d($dst$$reg);
 6123   %}
 6124   ins_pipe(fpu_reg_con);
 6125 %}
 6126 
 6127 // The instruction usage is guarded by predicate in operand immDPR1().
 6128 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6129   match(Set dst con);
 6130   ins_cost(125);
 6131 
 6132   format %{ "FLD1   ST\n\t"
 6133             "FSTP   $dst" %}
 6134   ins_encode %{
 6135     __ fld1();
 6136     __ fstp_d($dst$$reg);
 6137   %}
 6138   ins_pipe(fpu_reg_con);
 6139 %}
 6140 
 6141 // The instruction usage is guarded by predicate in operand immD().
 6142 instruct loadConD(regD dst, immD con) %{
 6143   match(Set dst con);
 6144   ins_cost(125);
 6145   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6146   ins_encode %{
 6147     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6148   %}
 6149   ins_pipe(pipe_slow);
 6150 %}
 6151 
 6152 // The instruction usage is guarded by predicate in operand immD0().
 6153 instruct loadConD0(regD dst, immD0 src) %{
 6154   match(Set dst src);
 6155   ins_cost(100);
 6156   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6157   ins_encode %{
 6158     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6159   %}
 6160   ins_pipe( pipe_slow );
 6161 %}
 6162 
 6163 // Load Stack Slot
 6164 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6165   match(Set dst src);
 6166   ins_cost(125);
 6167 
 6168   format %{ "MOV    $dst,$src" %}
 6169   opcode(0x8B);
 6170   ins_encode( OpcP, RegMem(dst,src));
 6171   ins_pipe( ialu_reg_mem );
 6172 %}
 6173 
 6174 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6175   match(Set dst src);
 6176 
 6177   ins_cost(200);
 6178   format %{ "MOV    $dst,$src.lo\n\t"
 6179             "MOV    $dst+4,$src.hi" %}
 6180   opcode(0x8B, 0x8B);
 6181   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6182   ins_pipe( ialu_mem_long_reg );
 6183 %}
 6184 
 6185 // Load Stack Slot
 6186 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6187   match(Set dst src);
 6188   ins_cost(125);
 6189 
 6190   format %{ "MOV    $dst,$src" %}
 6191   opcode(0x8B);
 6192   ins_encode( OpcP, RegMem(dst,src));
 6193   ins_pipe( ialu_reg_mem );
 6194 %}
 6195 
 6196 // Load Stack Slot
 6197 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6198   match(Set dst src);
 6199   ins_cost(125);
 6200 
 6201   format %{ "FLD_S  $src\n\t"
 6202             "FSTP   $dst" %}
 6203   opcode(0xD9);               /* D9 /0, FLD m32real */
 6204   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6205               Pop_Reg_FPR(dst) );
 6206   ins_pipe( fpu_reg_mem );
 6207 %}
 6208 
 6209 // Load Stack Slot
 6210 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6211   match(Set dst src);
 6212   ins_cost(125);
 6213 
 6214   format %{ "FLD_D  $src\n\t"
 6215             "FSTP   $dst" %}
 6216   opcode(0xDD);               /* DD /0, FLD m64real */
 6217   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6218               Pop_Reg_DPR(dst) );
 6219   ins_pipe( fpu_reg_mem );
 6220 %}
 6221 
 6222 // Prefetch instructions for allocation.
 6223 // Must be safe to execute with invalid address (cannot fault).
 6224 
 6225 instruct prefetchAlloc0( memory mem ) %{
 6226   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6227   match(PrefetchAllocation mem);
 6228   ins_cost(0);
 6229   size(0);
 6230   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6231   ins_encode();
 6232   ins_pipe(empty);
 6233 %}
 6234 
 6235 instruct prefetchAlloc( memory mem ) %{
 6236   predicate(AllocatePrefetchInstr==3);
 6237   match( PrefetchAllocation mem );
 6238   ins_cost(100);
 6239 
 6240   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6241   ins_encode %{
 6242     __ prefetchw($mem$$Address);
 6243   %}
 6244   ins_pipe(ialu_mem);
 6245 %}
 6246 
 6247 instruct prefetchAllocNTA( memory mem ) %{
 6248   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6249   match(PrefetchAllocation mem);
 6250   ins_cost(100);
 6251 
 6252   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6253   ins_encode %{
 6254     __ prefetchnta($mem$$Address);
 6255   %}
 6256   ins_pipe(ialu_mem);
 6257 %}
 6258 
 6259 instruct prefetchAllocT0( memory mem ) %{
 6260   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6261   match(PrefetchAllocation mem);
 6262   ins_cost(100);
 6263 
 6264   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6265   ins_encode %{
 6266     __ prefetcht0($mem$$Address);
 6267   %}
 6268   ins_pipe(ialu_mem);
 6269 %}
 6270 
 6271 instruct prefetchAllocT2( memory mem ) %{
 6272   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6273   match(PrefetchAllocation mem);
 6274   ins_cost(100);
 6275 
 6276   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6277   ins_encode %{
 6278     __ prefetcht2($mem$$Address);
 6279   %}
 6280   ins_pipe(ialu_mem);
 6281 %}
 6282 
 6283 //----------Store Instructions-------------------------------------------------
 6284 
 6285 // Store Byte
 6286 instruct storeB(memory mem, xRegI src) %{
 6287   match(Set mem (StoreB mem src));
 6288 
 6289   ins_cost(125);
 6290   format %{ "MOV8   $mem,$src" %}
 6291   opcode(0x88);
 6292   ins_encode( OpcP, RegMem( src, mem ) );
 6293   ins_pipe( ialu_mem_reg );
 6294 %}
 6295 
 6296 // Store Char/Short
 6297 instruct storeC(memory mem, rRegI src) %{
 6298   match(Set mem (StoreC mem src));
 6299 
 6300   ins_cost(125);
 6301   format %{ "MOV16  $mem,$src" %}
 6302   opcode(0x89, 0x66);
 6303   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6304   ins_pipe( ialu_mem_reg );
 6305 %}
 6306 
 6307 // Store Integer
 6308 instruct storeI(memory mem, rRegI src) %{
 6309   match(Set mem (StoreI mem src));
 6310 
 6311   ins_cost(125);
 6312   format %{ "MOV    $mem,$src" %}
 6313   opcode(0x89);
 6314   ins_encode( OpcP, RegMem( src, mem ) );
 6315   ins_pipe( ialu_mem_reg );
 6316 %}
 6317 
 6318 // Store Long
 6319 instruct storeL(long_memory mem, eRegL src) %{
 6320   predicate(!((StoreLNode*)n)->require_atomic_access());
 6321   match(Set mem (StoreL mem src));
 6322 
 6323   ins_cost(200);
 6324   format %{ "MOV    $mem,$src.lo\n\t"
 6325             "MOV    $mem+4,$src.hi" %}
 6326   opcode(0x89, 0x89);
 6327   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6328   ins_pipe( ialu_mem_long_reg );
 6329 %}
 6330 
 6331 // Store Long to Integer
 6332 instruct storeL2I(memory mem, eRegL src) %{
 6333   match(Set mem (StoreI mem (ConvL2I src)));
 6334 
 6335   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6336   ins_encode %{
 6337     __ movl($mem$$Address, $src$$Register);
 6338   %}
 6339   ins_pipe(ialu_mem_reg);
 6340 %}
 6341 
 6342 // Volatile Store Long.  Must be atomic, so move it into
 6343 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6344 // target address before the store (for null-ptr checks)
 6345 // so the memory operand is used twice in the encoding.
 6346 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6347   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6348   match(Set mem (StoreL mem src));
 6349   effect( KILL cr );
 6350   ins_cost(400);
 6351   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6352             "FILD   $src\n\t"
 6353             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6354   opcode(0x3B);
 6355   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6356   ins_pipe( fpu_reg_mem );
 6357 %}
 6358 
 6359 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6360   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6361   match(Set mem (StoreL mem src));
 6362   effect( TEMP tmp, KILL cr );
 6363   ins_cost(380);
 6364   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6365             "MOVSD  $tmp,$src\n\t"
 6366             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6367   ins_encode %{
 6368     __ cmpl(rax, $mem$$Address);
 6369     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6370     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6371   %}
 6372   ins_pipe( pipe_slow );
 6373 %}
 6374 
 6375 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6376   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6377   match(Set mem (StoreL mem src));
 6378   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6379   ins_cost(360);
 6380   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6381             "MOVD   $tmp,$src.lo\n\t"
 6382             "MOVD   $tmp2,$src.hi\n\t"
 6383             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6384             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6385   ins_encode %{
 6386     __ cmpl(rax, $mem$$Address);
 6387     __ movdl($tmp$$XMMRegister, $src$$Register);
 6388     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6389     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6390     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6391   %}
 6392   ins_pipe( pipe_slow );
 6393 %}
 6394 
 6395 // Store Pointer; for storing unknown oops and raw pointers
 6396 instruct storeP(memory mem, anyRegP src) %{
 6397   match(Set mem (StoreP mem src));
 6398 
 6399   ins_cost(125);
 6400   format %{ "MOV    $mem,$src" %}
 6401   opcode(0x89);
 6402   ins_encode( OpcP, RegMem( src, mem ) );
 6403   ins_pipe( ialu_mem_reg );
 6404 %}
 6405 
 6406 // Store Integer Immediate
 6407 instruct storeImmI(memory mem, immI src) %{
 6408   match(Set mem (StoreI mem src));
 6409 
 6410   ins_cost(150);
 6411   format %{ "MOV    $mem,$src" %}
 6412   opcode(0xC7);               /* C7 /0 */
 6413   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6414   ins_pipe( ialu_mem_imm );
 6415 %}
 6416 
 6417 // Store Short/Char Immediate
 6418 instruct storeImmI16(memory mem, immI16 src) %{
 6419   predicate(UseStoreImmI16);
 6420   match(Set mem (StoreC mem src));
 6421 
 6422   ins_cost(150);
 6423   format %{ "MOV16  $mem,$src" %}
 6424   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6425   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6426   ins_pipe( ialu_mem_imm );
 6427 %}
 6428 
 6429 // Store Pointer Immediate; null pointers or constant oops that do not
 6430 // need card-mark barriers.
 6431 instruct storeImmP(memory mem, immP src) %{
 6432   match(Set mem (StoreP mem src));
 6433 
 6434   ins_cost(150);
 6435   format %{ "MOV    $mem,$src" %}
 6436   opcode(0xC7);               /* C7 /0 */
 6437   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6438   ins_pipe( ialu_mem_imm );
 6439 %}
 6440 
 6441 // Store Byte Immediate
 6442 instruct storeImmB(memory mem, immI8 src) %{
 6443   match(Set mem (StoreB mem src));
 6444 
 6445   ins_cost(150);
 6446   format %{ "MOV8   $mem,$src" %}
 6447   opcode(0xC6);               /* C6 /0 */
 6448   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6449   ins_pipe( ialu_mem_imm );
 6450 %}
 6451 
 6452 // Store CMS card-mark Immediate
 6453 instruct storeImmCM(memory mem, immI8 src) %{
 6454   match(Set mem (StoreCM mem src));
 6455 
 6456   ins_cost(150);
 6457   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6458   opcode(0xC6);               /* C6 /0 */
 6459   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6460   ins_pipe( ialu_mem_imm );
 6461 %}
 6462 
 6463 // Store Double
 6464 instruct storeDPR( memory mem, regDPR1 src) %{
 6465   predicate(UseSSE<=1);
 6466   match(Set mem (StoreD mem src));
 6467 
 6468   ins_cost(100);
 6469   format %{ "FST_D  $mem,$src" %}
 6470   opcode(0xDD);       /* DD /2 */
 6471   ins_encode( enc_FPR_store(mem,src) );
 6472   ins_pipe( fpu_mem_reg );
 6473 %}
 6474 
 6475 // Store double does rounding on x86
 6476 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6477   predicate(UseSSE<=1);
 6478   match(Set mem (StoreD mem (RoundDouble src)));
 6479 
 6480   ins_cost(100);
 6481   format %{ "FST_D  $mem,$src\t# round" %}
 6482   opcode(0xDD);       /* DD /2 */
 6483   ins_encode( enc_FPR_store(mem,src) );
 6484   ins_pipe( fpu_mem_reg );
 6485 %}
 6486 
 6487 // Store XMM register to memory (double-precision floating points)
 6488 // MOVSD instruction
 6489 instruct storeD(memory mem, regD src) %{
 6490   predicate(UseSSE>=2);
 6491   match(Set mem (StoreD mem src));
 6492   ins_cost(95);
 6493   format %{ "MOVSD  $mem,$src" %}
 6494   ins_encode %{
 6495     __ movdbl($mem$$Address, $src$$XMMRegister);
 6496   %}
 6497   ins_pipe( pipe_slow );
 6498 %}
 6499 
 6500 // Store XMM register to memory (single-precision floating point)
 6501 // MOVSS instruction
 6502 instruct storeF(memory mem, regF src) %{
 6503   predicate(UseSSE>=1);
 6504   match(Set mem (StoreF mem src));
 6505   ins_cost(95);
 6506   format %{ "MOVSS  $mem,$src" %}
 6507   ins_encode %{
 6508     __ movflt($mem$$Address, $src$$XMMRegister);
 6509   %}
 6510   ins_pipe( pipe_slow );
 6511 %}
 6512 
 6513 
 6514 // Store Float
 6515 instruct storeFPR( memory mem, regFPR1 src) %{
 6516   predicate(UseSSE==0);
 6517   match(Set mem (StoreF mem src));
 6518 
 6519   ins_cost(100);
 6520   format %{ "FST_S  $mem,$src" %}
 6521   opcode(0xD9);       /* D9 /2 */
 6522   ins_encode( enc_FPR_store(mem,src) );
 6523   ins_pipe( fpu_mem_reg );
 6524 %}
 6525 
 6526 // Store Float does rounding on x86
 6527 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6528   predicate(UseSSE==0);
 6529   match(Set mem (StoreF mem (RoundFloat src)));
 6530 
 6531   ins_cost(100);
 6532   format %{ "FST_S  $mem,$src\t# round" %}
 6533   opcode(0xD9);       /* D9 /2 */
 6534   ins_encode( enc_FPR_store(mem,src) );
 6535   ins_pipe( fpu_mem_reg );
 6536 %}
 6537 
 6538 // Store Float does rounding on x86
 6539 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6540   predicate(UseSSE<=1);
 6541   match(Set mem (StoreF mem (ConvD2F src)));
 6542 
 6543   ins_cost(100);
 6544   format %{ "FST_S  $mem,$src\t# D-round" %}
 6545   opcode(0xD9);       /* D9 /2 */
 6546   ins_encode( enc_FPR_store(mem,src) );
 6547   ins_pipe( fpu_mem_reg );
 6548 %}
 6549 
 6550 // Store immediate Float value (it is faster than store from FPU register)
 6551 // The instruction usage is guarded by predicate in operand immFPR().
 6552 instruct storeFPR_imm( memory mem, immFPR src) %{
 6553   match(Set mem (StoreF mem src));
 6554 
 6555   ins_cost(50);
 6556   format %{ "MOV    $mem,$src\t# store float" %}
 6557   opcode(0xC7);               /* C7 /0 */
 6558   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6559   ins_pipe( ialu_mem_imm );
 6560 %}
 6561 
 6562 // Store immediate Float value (it is faster than store from XMM register)
 6563 // The instruction usage is guarded by predicate in operand immF().
 6564 instruct storeF_imm( memory mem, immF src) %{
 6565   match(Set mem (StoreF mem src));
 6566 
 6567   ins_cost(50);
 6568   format %{ "MOV    $mem,$src\t# store float" %}
 6569   opcode(0xC7);               /* C7 /0 */
 6570   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6571   ins_pipe( ialu_mem_imm );
 6572 %}
 6573 
 6574 // Store Integer to stack slot
 6575 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6576   match(Set dst src);
 6577 
 6578   ins_cost(100);
 6579   format %{ "MOV    $dst,$src" %}
 6580   opcode(0x89);
 6581   ins_encode( OpcPRegSS( dst, src ) );
 6582   ins_pipe( ialu_mem_reg );
 6583 %}
 6584 
 6585 // Store Integer to stack slot
 6586 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6587   match(Set dst src);
 6588 
 6589   ins_cost(100);
 6590   format %{ "MOV    $dst,$src" %}
 6591   opcode(0x89);
 6592   ins_encode( OpcPRegSS( dst, src ) );
 6593   ins_pipe( ialu_mem_reg );
 6594 %}
 6595 
 6596 // Store Long to stack slot
 6597 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6598   match(Set dst src);
 6599 
 6600   ins_cost(200);
 6601   format %{ "MOV    $dst,$src.lo\n\t"
 6602             "MOV    $dst+4,$src.hi" %}
 6603   opcode(0x89, 0x89);
 6604   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6605   ins_pipe( ialu_mem_long_reg );
 6606 %}
 6607 
 6608 //----------MemBar Instructions-----------------------------------------------
 6609 // Memory barrier flavors
 6610 
 6611 instruct membar_acquire() %{
 6612   match(MemBarAcquire);
 6613   match(LoadFence);
 6614   ins_cost(400);
 6615 
 6616   size(0);
 6617   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6618   ins_encode();
 6619   ins_pipe(empty);
 6620 %}
 6621 
 6622 instruct membar_acquire_lock() %{
 6623   match(MemBarAcquireLock);
 6624   ins_cost(0);
 6625 
 6626   size(0);
 6627   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6628   ins_encode( );
 6629   ins_pipe(empty);
 6630 %}
 6631 
 6632 instruct membar_release() %{
 6633   match(MemBarRelease);
 6634   match(StoreFence);
 6635   ins_cost(400);
 6636 
 6637   size(0);
 6638   format %{ "MEMBAR-release ! (empty encoding)" %}
 6639   ins_encode( );
 6640   ins_pipe(empty);
 6641 %}
 6642 
 6643 instruct membar_release_lock() %{
 6644   match(MemBarReleaseLock);
 6645   ins_cost(0);
 6646 
 6647   size(0);
 6648   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6649   ins_encode( );
 6650   ins_pipe(empty);
 6651 %}
 6652 
 6653 instruct membar_volatile(eFlagsReg cr) %{
 6654   match(MemBarVolatile);
 6655   effect(KILL cr);
 6656   ins_cost(400);
 6657 
 6658   format %{
 6659     $$template
 6660     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6661   %}
 6662   ins_encode %{
 6663     __ membar(Assembler::StoreLoad);
 6664   %}
 6665   ins_pipe(pipe_slow);
 6666 %}
 6667 
 6668 instruct unnecessary_membar_volatile() %{
 6669   match(MemBarVolatile);
 6670   predicate(Matcher::post_store_load_barrier(n));
 6671   ins_cost(0);
 6672 
 6673   size(0);
 6674   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6675   ins_encode( );
 6676   ins_pipe(empty);
 6677 %}
 6678 
 6679 instruct membar_storestore() %{
 6680   match(MemBarStoreStore);
 6681   match(StoreStoreFence);
 6682   ins_cost(0);
 6683 
 6684   size(0);
 6685   format %{ "MEMBAR-storestore (empty encoding)" %}
 6686   ins_encode( );
 6687   ins_pipe(empty);
 6688 %}
 6689 
 6690 //----------Move Instructions--------------------------------------------------
 6691 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6692   match(Set dst (CastX2P src));
 6693   format %{ "# X2P  $dst, $src" %}
 6694   ins_encode( /*empty encoding*/ );
 6695   ins_cost(0);
 6696   ins_pipe(empty);
 6697 %}
 6698 
 6699 instruct castP2X(rRegI dst, eRegP src ) %{
 6700   match(Set dst (CastP2X src));
 6701   ins_cost(50);
 6702   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6703   ins_encode( enc_Copy( dst, src) );
 6704   ins_pipe( ialu_reg_reg );
 6705 %}
 6706 
 6707 //----------Conditional Move---------------------------------------------------
 6708 // Conditional move
 6709 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6710   predicate(!VM_Version::supports_cmov() );
 6711   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6712   ins_cost(200);
 6713   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6714             "MOV    $dst,$src\n"
 6715       "skip:" %}
 6716   ins_encode %{
 6717     Label Lskip;
 6718     // Invert sense of branch from sense of CMOV
 6719     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6720     __ movl($dst$$Register, $src$$Register);
 6721     __ bind(Lskip);
 6722   %}
 6723   ins_pipe( pipe_cmov_reg );
 6724 %}
 6725 
 6726 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6727   predicate(!VM_Version::supports_cmov() );
 6728   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6729   ins_cost(200);
 6730   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6731             "MOV    $dst,$src\n"
 6732       "skip:" %}
 6733   ins_encode %{
 6734     Label Lskip;
 6735     // Invert sense of branch from sense of CMOV
 6736     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6737     __ movl($dst$$Register, $src$$Register);
 6738     __ bind(Lskip);
 6739   %}
 6740   ins_pipe( pipe_cmov_reg );
 6741 %}
 6742 
 6743 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6744   predicate(VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6746   ins_cost(200);
 6747   format %{ "CMOV$cop $dst,$src" %}
 6748   opcode(0x0F,0x40);
 6749   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6750   ins_pipe( pipe_cmov_reg );
 6751 %}
 6752 
 6753 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6754   predicate(VM_Version::supports_cmov() );
 6755   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6756   ins_cost(200);
 6757   format %{ "CMOV$cop $dst,$src" %}
 6758   opcode(0x0F,0x40);
 6759   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6760   ins_pipe( pipe_cmov_reg );
 6761 %}
 6762 
 6763 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6764   predicate(VM_Version::supports_cmov() );
 6765   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6766   ins_cost(200);
 6767   expand %{
 6768     cmovI_regU(cop, cr, dst, src);
 6769   %}
 6770 %}
 6771 
 6772 // Conditional move
 6773 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6774   predicate(VM_Version::supports_cmov() );
 6775   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6776   ins_cost(250);
 6777   format %{ "CMOV$cop $dst,$src" %}
 6778   opcode(0x0F,0x40);
 6779   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6780   ins_pipe( pipe_cmov_mem );
 6781 %}
 6782 
 6783 // Conditional move
 6784 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6785   predicate(VM_Version::supports_cmov() );
 6786   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6787   ins_cost(250);
 6788   format %{ "CMOV$cop $dst,$src" %}
 6789   opcode(0x0F,0x40);
 6790   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6791   ins_pipe( pipe_cmov_mem );
 6792 %}
 6793 
 6794 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6795   predicate(VM_Version::supports_cmov() );
 6796   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6797   ins_cost(250);
 6798   expand %{
 6799     cmovI_memU(cop, cr, dst, src);
 6800   %}
 6801 %}
 6802 
 6803 // Conditional move
 6804 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6805   predicate(VM_Version::supports_cmov() );
 6806   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6807   ins_cost(200);
 6808   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6809   opcode(0x0F,0x40);
 6810   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6811   ins_pipe( pipe_cmov_reg );
 6812 %}
 6813 
 6814 // Conditional move (non-P6 version)
 6815 // Note:  a CMoveP is generated for  stubs and native wrappers
 6816 //        regardless of whether we are on a P6, so we
 6817 //        emulate a cmov here
 6818 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6819   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6820   ins_cost(300);
 6821   format %{ "Jn$cop   skip\n\t"
 6822           "MOV    $dst,$src\t# pointer\n"
 6823       "skip:" %}
 6824   opcode(0x8b);
 6825   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6826   ins_pipe( pipe_cmov_reg );
 6827 %}
 6828 
 6829 // Conditional move
 6830 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6831   predicate(VM_Version::supports_cmov() );
 6832   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6833   ins_cost(200);
 6834   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6835   opcode(0x0F,0x40);
 6836   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6837   ins_pipe( pipe_cmov_reg );
 6838 %}
 6839 
 6840 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6841   predicate(VM_Version::supports_cmov() );
 6842   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6843   ins_cost(200);
 6844   expand %{
 6845     cmovP_regU(cop, cr, dst, src);
 6846   %}
 6847 %}
 6848 
 6849 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6850 // correctly meets the two pointer arguments; one is an incoming
 6851 // register but the other is a memory operand.  ALSO appears to
 6852 // be buggy with implicit null checks.
 6853 //
 6854 //// Conditional move
 6855 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6856 //  predicate(VM_Version::supports_cmov() );
 6857 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6858 //  ins_cost(250);
 6859 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6860 //  opcode(0x0F,0x40);
 6861 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6862 //  ins_pipe( pipe_cmov_mem );
 6863 //%}
 6864 //
 6865 //// Conditional move
 6866 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6867 //  predicate(VM_Version::supports_cmov() );
 6868 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6869 //  ins_cost(250);
 6870 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6871 //  opcode(0x0F,0x40);
 6872 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6873 //  ins_pipe( pipe_cmov_mem );
 6874 //%}
 6875 
 6876 // Conditional move
 6877 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6878   predicate(UseSSE<=1);
 6879   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6880   ins_cost(200);
 6881   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6882   opcode(0xDA);
 6883   ins_encode( enc_cmov_dpr(cop,src) );
 6884   ins_pipe( pipe_cmovDPR_reg );
 6885 %}
 6886 
 6887 // Conditional move
 6888 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6889   predicate(UseSSE==0);
 6890   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6891   ins_cost(200);
 6892   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6893   opcode(0xDA);
 6894   ins_encode( enc_cmov_dpr(cop,src) );
 6895   ins_pipe( pipe_cmovDPR_reg );
 6896 %}
 6897 
 6898 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6899 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6900   predicate(UseSSE<=1);
 6901   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6902   ins_cost(200);
 6903   format %{ "Jn$cop   skip\n\t"
 6904             "MOV    $dst,$src\t# double\n"
 6905       "skip:" %}
 6906   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6907   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6908   ins_pipe( pipe_cmovDPR_reg );
 6909 %}
 6910 
 6911 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6912 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6913   predicate(UseSSE==0);
 6914   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6915   ins_cost(200);
 6916   format %{ "Jn$cop    skip\n\t"
 6917             "MOV    $dst,$src\t# float\n"
 6918       "skip:" %}
 6919   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6920   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6921   ins_pipe( pipe_cmovDPR_reg );
 6922 %}
 6923 
 6924 // No CMOVE with SSE/SSE2
 6925 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6926   predicate (UseSSE>=1);
 6927   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6928   ins_cost(200);
 6929   format %{ "Jn$cop   skip\n\t"
 6930             "MOVSS  $dst,$src\t# float\n"
 6931       "skip:" %}
 6932   ins_encode %{
 6933     Label skip;
 6934     // Invert sense of branch from sense of CMOV
 6935     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6936     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6937     __ bind(skip);
 6938   %}
 6939   ins_pipe( pipe_slow );
 6940 %}
 6941 
 6942 // No CMOVE with SSE/SSE2
 6943 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6944   predicate (UseSSE>=2);
 6945   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6946   ins_cost(200);
 6947   format %{ "Jn$cop   skip\n\t"
 6948             "MOVSD  $dst,$src\t# float\n"
 6949       "skip:" %}
 6950   ins_encode %{
 6951     Label skip;
 6952     // Invert sense of branch from sense of CMOV
 6953     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6954     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6955     __ bind(skip);
 6956   %}
 6957   ins_pipe( pipe_slow );
 6958 %}
 6959 
 6960 // unsigned version
 6961 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6962   predicate (UseSSE>=1);
 6963   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6964   ins_cost(200);
 6965   format %{ "Jn$cop   skip\n\t"
 6966             "MOVSS  $dst,$src\t# float\n"
 6967       "skip:" %}
 6968   ins_encode %{
 6969     Label skip;
 6970     // Invert sense of branch from sense of CMOV
 6971     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6972     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6973     __ bind(skip);
 6974   %}
 6975   ins_pipe( pipe_slow );
 6976 %}
 6977 
 6978 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6979   predicate (UseSSE>=1);
 6980   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6981   ins_cost(200);
 6982   expand %{
 6983     fcmovF_regU(cop, cr, dst, src);
 6984   %}
 6985 %}
 6986 
 6987 // unsigned version
 6988 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6989   predicate (UseSSE>=2);
 6990   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6991   ins_cost(200);
 6992   format %{ "Jn$cop   skip\n\t"
 6993             "MOVSD  $dst,$src\t# float\n"
 6994       "skip:" %}
 6995   ins_encode %{
 6996     Label skip;
 6997     // Invert sense of branch from sense of CMOV
 6998     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6999     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7000     __ bind(skip);
 7001   %}
 7002   ins_pipe( pipe_slow );
 7003 %}
 7004 
 7005 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7006   predicate (UseSSE>=2);
 7007   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7008   ins_cost(200);
 7009   expand %{
 7010     fcmovD_regU(cop, cr, dst, src);
 7011   %}
 7012 %}
 7013 
 7014 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7015   predicate(VM_Version::supports_cmov() );
 7016   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7017   ins_cost(200);
 7018   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7019             "CMOV$cop $dst.hi,$src.hi" %}
 7020   opcode(0x0F,0x40);
 7021   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7022   ins_pipe( pipe_cmov_reg_long );
 7023 %}
 7024 
 7025 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7026   predicate(VM_Version::supports_cmov() );
 7027   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7028   ins_cost(200);
 7029   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7030             "CMOV$cop $dst.hi,$src.hi" %}
 7031   opcode(0x0F,0x40);
 7032   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7033   ins_pipe( pipe_cmov_reg_long );
 7034 %}
 7035 
 7036 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7037   predicate(VM_Version::supports_cmov() );
 7038   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7039   ins_cost(200);
 7040   expand %{
 7041     cmovL_regU(cop, cr, dst, src);
 7042   %}
 7043 %}
 7044 
 7045 //----------Arithmetic Instructions--------------------------------------------
 7046 //----------Addition Instructions----------------------------------------------
 7047 
 7048 // Integer Addition Instructions
 7049 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7050   match(Set dst (AddI dst src));
 7051   effect(KILL cr);
 7052 
 7053   size(2);
 7054   format %{ "ADD    $dst,$src" %}
 7055   opcode(0x03);
 7056   ins_encode( OpcP, RegReg( dst, src) );
 7057   ins_pipe( ialu_reg_reg );
 7058 %}
 7059 
 7060 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7061   match(Set dst (AddI dst src));
 7062   effect(KILL cr);
 7063 
 7064   format %{ "ADD    $dst,$src" %}
 7065   opcode(0x81, 0x00); /* /0 id */
 7066   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7067   ins_pipe( ialu_reg );
 7068 %}
 7069 
 7070 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7071   predicate(UseIncDec);
 7072   match(Set dst (AddI dst src));
 7073   effect(KILL cr);
 7074 
 7075   size(1);
 7076   format %{ "INC    $dst" %}
 7077   opcode(0x40); /*  */
 7078   ins_encode( Opc_plus( primary, dst ) );
 7079   ins_pipe( ialu_reg );
 7080 %}
 7081 
 7082 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7083   match(Set dst (AddI src0 src1));
 7084   ins_cost(110);
 7085 
 7086   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7087   opcode(0x8D); /* 0x8D /r */
 7088   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7089   ins_pipe( ialu_reg_reg );
 7090 %}
 7091 
 7092 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7093   match(Set dst (AddP src0 src1));
 7094   ins_cost(110);
 7095 
 7096   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7097   opcode(0x8D); /* 0x8D /r */
 7098   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7099   ins_pipe( ialu_reg_reg );
 7100 %}
 7101 
 7102 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7103   predicate(UseIncDec);
 7104   match(Set dst (AddI dst src));
 7105   effect(KILL cr);
 7106 
 7107   size(1);
 7108   format %{ "DEC    $dst" %}
 7109   opcode(0x48); /*  */
 7110   ins_encode( Opc_plus( primary, dst ) );
 7111   ins_pipe( ialu_reg );
 7112 %}
 7113 
 7114 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7115   match(Set dst (AddP dst src));
 7116   effect(KILL cr);
 7117 
 7118   size(2);
 7119   format %{ "ADD    $dst,$src" %}
 7120   opcode(0x03);
 7121   ins_encode( OpcP, RegReg( dst, src) );
 7122   ins_pipe( ialu_reg_reg );
 7123 %}
 7124 
 7125 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7126   match(Set dst (AddP dst src));
 7127   effect(KILL cr);
 7128 
 7129   format %{ "ADD    $dst,$src" %}
 7130   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7131   // ins_encode( RegImm( dst, src) );
 7132   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7133   ins_pipe( ialu_reg );
 7134 %}
 7135 
 7136 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7137   match(Set dst (AddI dst (LoadI src)));
 7138   effect(KILL cr);
 7139 
 7140   ins_cost(150);
 7141   format %{ "ADD    $dst,$src" %}
 7142   opcode(0x03);
 7143   ins_encode( OpcP, RegMem( dst, src) );
 7144   ins_pipe( ialu_reg_mem );
 7145 %}
 7146 
 7147 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7148   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7149   effect(KILL cr);
 7150 
 7151   ins_cost(150);
 7152   format %{ "ADD    $dst,$src" %}
 7153   opcode(0x01);  /* Opcode 01 /r */
 7154   ins_encode( OpcP, RegMem( src, dst ) );
 7155   ins_pipe( ialu_mem_reg );
 7156 %}
 7157 
 7158 // Add Memory with Immediate
 7159 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7160   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7161   effect(KILL cr);
 7162 
 7163   ins_cost(125);
 7164   format %{ "ADD    $dst,$src" %}
 7165   opcode(0x81);               /* Opcode 81 /0 id */
 7166   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7167   ins_pipe( ialu_mem_imm );
 7168 %}
 7169 
 7170 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7171   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7172   effect(KILL cr);
 7173 
 7174   ins_cost(125);
 7175   format %{ "INC    $dst" %}
 7176   opcode(0xFF);               /* Opcode FF /0 */
 7177   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7178   ins_pipe( ialu_mem_imm );
 7179 %}
 7180 
 7181 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7182   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7183   effect(KILL cr);
 7184 
 7185   ins_cost(125);
 7186   format %{ "DEC    $dst" %}
 7187   opcode(0xFF);               /* Opcode FF /1 */
 7188   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7189   ins_pipe( ialu_mem_imm );
 7190 %}
 7191 
 7192 
 7193 instruct checkCastPP( eRegP dst ) %{
 7194   match(Set dst (CheckCastPP dst));
 7195 
 7196   size(0);
 7197   format %{ "#checkcastPP of $dst" %}
 7198   ins_encode( /*empty encoding*/ );
 7199   ins_pipe( empty );
 7200 %}
 7201 
 7202 instruct castPP( eRegP dst ) %{
 7203   match(Set dst (CastPP dst));
 7204   format %{ "#castPP of $dst" %}
 7205   ins_encode( /*empty encoding*/ );
 7206   ins_pipe( empty );
 7207 %}
 7208 
 7209 instruct castII( rRegI dst ) %{
 7210   match(Set dst (CastII dst));
 7211   format %{ "#castII of $dst" %}
 7212   ins_encode( /*empty encoding*/ );
 7213   ins_cost(0);
 7214   ins_pipe( empty );
 7215 %}
 7216 
 7217 instruct castLL( eRegL dst ) %{
 7218   match(Set dst (CastLL dst));
 7219   format %{ "#castLL of $dst" %}
 7220   ins_encode( /*empty encoding*/ );
 7221   ins_cost(0);
 7222   ins_pipe( empty );
 7223 %}
 7224 
 7225 instruct castFF( regF dst ) %{
 7226   predicate(UseSSE >= 1);
 7227   match(Set dst (CastFF dst));
 7228   format %{ "#castFF of $dst" %}
 7229   ins_encode( /*empty encoding*/ );
 7230   ins_cost(0);
 7231   ins_pipe( empty );
 7232 %}
 7233 
 7234 instruct castDD( regD dst ) %{
 7235   predicate(UseSSE >= 2);
 7236   match(Set dst (CastDD dst));
 7237   format %{ "#castDD of $dst" %}
 7238   ins_encode( /*empty encoding*/ );
 7239   ins_cost(0);
 7240   ins_pipe( empty );
 7241 %}
 7242 
 7243 instruct castFF_PR( regFPR dst ) %{
 7244   predicate(UseSSE < 1);
 7245   match(Set dst (CastFF dst));
 7246   format %{ "#castFF of $dst" %}
 7247   ins_encode( /*empty encoding*/ );
 7248   ins_cost(0);
 7249   ins_pipe( empty );
 7250 %}
 7251 
 7252 instruct castDD_PR( regDPR dst ) %{
 7253   predicate(UseSSE < 2);
 7254   match(Set dst (CastDD dst));
 7255   format %{ "#castDD of $dst" %}
 7256   ins_encode( /*empty encoding*/ );
 7257   ins_cost(0);
 7258   ins_pipe( empty );
 7259 %}
 7260 
 7261 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7262 
 7263 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7264   predicate(VM_Version::supports_cx8());
 7265   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7266   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7267   effect(KILL cr, KILL oldval);
 7268   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7269             "MOV    $res,0\n\t"
 7270             "JNE,s  fail\n\t"
 7271             "MOV    $res,1\n"
 7272           "fail:" %}
 7273   ins_encode( enc_cmpxchg8(mem_ptr),
 7274               enc_flags_ne_to_boolean(res) );
 7275   ins_pipe( pipe_cmpxchg );
 7276 %}
 7277 
 7278 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7279   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7280   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7281   effect(KILL cr, KILL oldval);
 7282   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7283             "MOV    $res,0\n\t"
 7284             "JNE,s  fail\n\t"
 7285             "MOV    $res,1\n"
 7286           "fail:" %}
 7287   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7288   ins_pipe( pipe_cmpxchg );
 7289 %}
 7290 
 7291 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7292   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7293   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7294   effect(KILL cr, KILL oldval);
 7295   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7296             "MOV    $res,0\n\t"
 7297             "JNE,s  fail\n\t"
 7298             "MOV    $res,1\n"
 7299           "fail:" %}
 7300   ins_encode( enc_cmpxchgb(mem_ptr),
 7301               enc_flags_ne_to_boolean(res) );
 7302   ins_pipe( pipe_cmpxchg );
 7303 %}
 7304 
 7305 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7306   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7307   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7308   effect(KILL cr, KILL oldval);
 7309   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7310             "MOV    $res,0\n\t"
 7311             "JNE,s  fail\n\t"
 7312             "MOV    $res,1\n"
 7313           "fail:" %}
 7314   ins_encode( enc_cmpxchgw(mem_ptr),
 7315               enc_flags_ne_to_boolean(res) );
 7316   ins_pipe( pipe_cmpxchg );
 7317 %}
 7318 
 7319 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7320   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7321   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7322   effect(KILL cr, KILL oldval);
 7323   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7324             "MOV    $res,0\n\t"
 7325             "JNE,s  fail\n\t"
 7326             "MOV    $res,1\n"
 7327           "fail:" %}
 7328   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7329   ins_pipe( pipe_cmpxchg );
 7330 %}
 7331 
 7332 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7333   predicate(VM_Version::supports_cx8());
 7334   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7335   effect(KILL cr);
 7336   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7337   ins_encode( enc_cmpxchg8(mem_ptr) );
 7338   ins_pipe( pipe_cmpxchg );
 7339 %}
 7340 
 7341 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7342   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7343   effect(KILL cr);
 7344   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7345   ins_encode( enc_cmpxchg(mem_ptr) );
 7346   ins_pipe( pipe_cmpxchg );
 7347 %}
 7348 
 7349 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7350   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7351   effect(KILL cr);
 7352   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7353   ins_encode( enc_cmpxchgb(mem_ptr) );
 7354   ins_pipe( pipe_cmpxchg );
 7355 %}
 7356 
 7357 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7358   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7359   effect(KILL cr);
 7360   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7361   ins_encode( enc_cmpxchgw(mem_ptr) );
 7362   ins_pipe( pipe_cmpxchg );
 7363 %}
 7364 
 7365 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7366   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7367   effect(KILL cr);
 7368   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7369   ins_encode( enc_cmpxchg(mem_ptr) );
 7370   ins_pipe( pipe_cmpxchg );
 7371 %}
 7372 
 7373 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7374   predicate(n->as_LoadStore()->result_not_used());
 7375   match(Set dummy (GetAndAddB mem add));
 7376   effect(KILL cr);
 7377   format %{ "ADDB  [$mem],$add" %}
 7378   ins_encode %{
 7379     __ lock();
 7380     __ addb($mem$$Address, $add$$constant);
 7381   %}
 7382   ins_pipe( pipe_cmpxchg );
 7383 %}
 7384 
 7385 // Important to match to xRegI: only 8-bit regs.
 7386 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7387   match(Set newval (GetAndAddB mem newval));
 7388   effect(KILL cr);
 7389   format %{ "XADDB  [$mem],$newval" %}
 7390   ins_encode %{
 7391     __ lock();
 7392     __ xaddb($mem$$Address, $newval$$Register);
 7393   %}
 7394   ins_pipe( pipe_cmpxchg );
 7395 %}
 7396 
 7397 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7398   predicate(n->as_LoadStore()->result_not_used());
 7399   match(Set dummy (GetAndAddS mem add));
 7400   effect(KILL cr);
 7401   format %{ "ADDS  [$mem],$add" %}
 7402   ins_encode %{
 7403     __ lock();
 7404     __ addw($mem$$Address, $add$$constant);
 7405   %}
 7406   ins_pipe( pipe_cmpxchg );
 7407 %}
 7408 
 7409 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7410   match(Set newval (GetAndAddS mem newval));
 7411   effect(KILL cr);
 7412   format %{ "XADDS  [$mem],$newval" %}
 7413   ins_encode %{
 7414     __ lock();
 7415     __ xaddw($mem$$Address, $newval$$Register);
 7416   %}
 7417   ins_pipe( pipe_cmpxchg );
 7418 %}
 7419 
 7420 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7421   predicate(n->as_LoadStore()->result_not_used());
 7422   match(Set dummy (GetAndAddI mem add));
 7423   effect(KILL cr);
 7424   format %{ "ADDL  [$mem],$add" %}
 7425   ins_encode %{
 7426     __ lock();
 7427     __ addl($mem$$Address, $add$$constant);
 7428   %}
 7429   ins_pipe( pipe_cmpxchg );
 7430 %}
 7431 
 7432 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7433   match(Set newval (GetAndAddI mem newval));
 7434   effect(KILL cr);
 7435   format %{ "XADDL  [$mem],$newval" %}
 7436   ins_encode %{
 7437     __ lock();
 7438     __ xaddl($mem$$Address, $newval$$Register);
 7439   %}
 7440   ins_pipe( pipe_cmpxchg );
 7441 %}
 7442 
 7443 // Important to match to xRegI: only 8-bit regs.
 7444 instruct xchgB( memory mem, xRegI newval) %{
 7445   match(Set newval (GetAndSetB mem newval));
 7446   format %{ "XCHGB  $newval,[$mem]" %}
 7447   ins_encode %{
 7448     __ xchgb($newval$$Register, $mem$$Address);
 7449   %}
 7450   ins_pipe( pipe_cmpxchg );
 7451 %}
 7452 
 7453 instruct xchgS( memory mem, rRegI newval) %{
 7454   match(Set newval (GetAndSetS mem newval));
 7455   format %{ "XCHGW  $newval,[$mem]" %}
 7456   ins_encode %{
 7457     __ xchgw($newval$$Register, $mem$$Address);
 7458   %}
 7459   ins_pipe( pipe_cmpxchg );
 7460 %}
 7461 
 7462 instruct xchgI( memory mem, rRegI newval) %{
 7463   match(Set newval (GetAndSetI mem newval));
 7464   format %{ "XCHGL  $newval,[$mem]" %}
 7465   ins_encode %{
 7466     __ xchgl($newval$$Register, $mem$$Address);
 7467   %}
 7468   ins_pipe( pipe_cmpxchg );
 7469 %}
 7470 
 7471 instruct xchgP( memory mem, pRegP newval) %{
 7472   match(Set newval (GetAndSetP mem newval));
 7473   format %{ "XCHGL  $newval,[$mem]" %}
 7474   ins_encode %{
 7475     __ xchgl($newval$$Register, $mem$$Address);
 7476   %}
 7477   ins_pipe( pipe_cmpxchg );
 7478 %}
 7479 
 7480 //----------Subtraction Instructions-------------------------------------------
 7481 
 7482 // Integer Subtraction Instructions
 7483 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7484   match(Set dst (SubI dst src));
 7485   effect(KILL cr);
 7486 
 7487   size(2);
 7488   format %{ "SUB    $dst,$src" %}
 7489   opcode(0x2B);
 7490   ins_encode( OpcP, RegReg( dst, src) );
 7491   ins_pipe( ialu_reg_reg );
 7492 %}
 7493 
 7494 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7495   match(Set dst (SubI dst src));
 7496   effect(KILL cr);
 7497 
 7498   format %{ "SUB    $dst,$src" %}
 7499   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7500   // ins_encode( RegImm( dst, src) );
 7501   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7502   ins_pipe( ialu_reg );
 7503 %}
 7504 
 7505 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7506   match(Set dst (SubI dst (LoadI src)));
 7507   effect(KILL cr);
 7508 
 7509   ins_cost(150);
 7510   format %{ "SUB    $dst,$src" %}
 7511   opcode(0x2B);
 7512   ins_encode( OpcP, RegMem( dst, src) );
 7513   ins_pipe( ialu_reg_mem );
 7514 %}
 7515 
 7516 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7517   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7518   effect(KILL cr);
 7519 
 7520   ins_cost(150);
 7521   format %{ "SUB    $dst,$src" %}
 7522   opcode(0x29);  /* Opcode 29 /r */
 7523   ins_encode( OpcP, RegMem( src, dst ) );
 7524   ins_pipe( ialu_mem_reg );
 7525 %}
 7526 
 7527 // Subtract from a pointer
 7528 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7529   match(Set dst (AddP dst (SubI zero src)));
 7530   effect(KILL cr);
 7531 
 7532   size(2);
 7533   format %{ "SUB    $dst,$src" %}
 7534   opcode(0x2B);
 7535   ins_encode( OpcP, RegReg( dst, src) );
 7536   ins_pipe( ialu_reg_reg );
 7537 %}
 7538 
 7539 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7540   match(Set dst (SubI zero dst));
 7541   effect(KILL cr);
 7542 
 7543   size(2);
 7544   format %{ "NEG    $dst" %}
 7545   opcode(0xF7,0x03);  // Opcode F7 /3
 7546   ins_encode( OpcP, RegOpc( dst ) );
 7547   ins_pipe( ialu_reg );
 7548 %}
 7549 
 7550 //----------Multiplication/Division Instructions-------------------------------
 7551 // Integer Multiplication Instructions
 7552 // Multiply Register
 7553 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7554   match(Set dst (MulI dst src));
 7555   effect(KILL cr);
 7556 
 7557   size(3);
 7558   ins_cost(300);
 7559   format %{ "IMUL   $dst,$src" %}
 7560   opcode(0xAF, 0x0F);
 7561   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7562   ins_pipe( ialu_reg_reg_alu0 );
 7563 %}
 7564 
 7565 // Multiply 32-bit Immediate
 7566 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7567   match(Set dst (MulI src imm));
 7568   effect(KILL cr);
 7569 
 7570   ins_cost(300);
 7571   format %{ "IMUL   $dst,$src,$imm" %}
 7572   opcode(0x69);  /* 69 /r id */
 7573   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7574   ins_pipe( ialu_reg_reg_alu0 );
 7575 %}
 7576 
 7577 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7578   match(Set dst src);
 7579   effect(KILL cr);
 7580 
 7581   // Note that this is artificially increased to make it more expensive than loadConL
 7582   ins_cost(250);
 7583   format %{ "MOV    EAX,$src\t// low word only" %}
 7584   opcode(0xB8);
 7585   ins_encode( LdImmL_Lo(dst, src) );
 7586   ins_pipe( ialu_reg_fat );
 7587 %}
 7588 
 7589 // Multiply by 32-bit Immediate, taking the shifted high order results
 7590 //  (special case for shift by 32)
 7591 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7592   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7593   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7594              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7595              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7596   effect(USE src1, KILL cr);
 7597 
 7598   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7599   ins_cost(0*100 + 1*400 - 150);
 7600   format %{ "IMUL   EDX:EAX,$src1" %}
 7601   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7602   ins_pipe( pipe_slow );
 7603 %}
 7604 
 7605 // Multiply by 32-bit Immediate, taking the shifted high order results
 7606 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7607   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7608   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7609              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7610              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7611   effect(USE src1, KILL cr);
 7612 
 7613   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7614   ins_cost(1*100 + 1*400 - 150);
 7615   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7616             "SAR    EDX,$cnt-32" %}
 7617   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7618   ins_pipe( pipe_slow );
 7619 %}
 7620 
 7621 // Multiply Memory 32-bit Immediate
 7622 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7623   match(Set dst (MulI (LoadI src) imm));
 7624   effect(KILL cr);
 7625 
 7626   ins_cost(300);
 7627   format %{ "IMUL   $dst,$src,$imm" %}
 7628   opcode(0x69);  /* 69 /r id */
 7629   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7630   ins_pipe( ialu_reg_mem_alu0 );
 7631 %}
 7632 
 7633 // Multiply Memory
 7634 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7635   match(Set dst (MulI dst (LoadI src)));
 7636   effect(KILL cr);
 7637 
 7638   ins_cost(350);
 7639   format %{ "IMUL   $dst,$src" %}
 7640   opcode(0xAF, 0x0F);
 7641   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7642   ins_pipe( ialu_reg_mem_alu0 );
 7643 %}
 7644 
 7645 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7646 %{
 7647   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7648   effect(KILL cr, KILL src2);
 7649 
 7650   expand %{ mulI_eReg(dst, src1, cr);
 7651            mulI_eReg(src2, src3, cr);
 7652            addI_eReg(dst, src2, cr); %}
 7653 %}
 7654 
 7655 // Multiply Register Int to Long
 7656 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7657   // Basic Idea: long = (long)int * (long)int
 7658   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7659   effect(DEF dst, USE src, USE src1, KILL flags);
 7660 
 7661   ins_cost(300);
 7662   format %{ "IMUL   $dst,$src1" %}
 7663 
 7664   ins_encode( long_int_multiply( dst, src1 ) );
 7665   ins_pipe( ialu_reg_reg_alu0 );
 7666 %}
 7667 
 7668 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7669   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7670   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7671   effect(KILL flags);
 7672 
 7673   ins_cost(300);
 7674   format %{ "MUL    $dst,$src1" %}
 7675 
 7676   ins_encode( long_uint_multiply(dst, src1) );
 7677   ins_pipe( ialu_reg_reg_alu0 );
 7678 %}
 7679 
 7680 // Multiply Register Long
 7681 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7682   match(Set dst (MulL dst src));
 7683   effect(KILL cr, TEMP tmp);
 7684   ins_cost(4*100+3*400);
 7685 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7686 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7687   format %{ "MOV    $tmp,$src.lo\n\t"
 7688             "IMUL   $tmp,EDX\n\t"
 7689             "MOV    EDX,$src.hi\n\t"
 7690             "IMUL   EDX,EAX\n\t"
 7691             "ADD    $tmp,EDX\n\t"
 7692             "MUL    EDX:EAX,$src.lo\n\t"
 7693             "ADD    EDX,$tmp" %}
 7694   ins_encode( long_multiply( dst, src, tmp ) );
 7695   ins_pipe( pipe_slow );
 7696 %}
 7697 
 7698 // Multiply Register Long where the left operand's high 32 bits are zero
 7699 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7700   predicate(is_operand_hi32_zero(n->in(1)));
 7701   match(Set dst (MulL dst src));
 7702   effect(KILL cr, TEMP tmp);
 7703   ins_cost(2*100+2*400);
 7704 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7705 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7706   format %{ "MOV    $tmp,$src.hi\n\t"
 7707             "IMUL   $tmp,EAX\n\t"
 7708             "MUL    EDX:EAX,$src.lo\n\t"
 7709             "ADD    EDX,$tmp" %}
 7710   ins_encode %{
 7711     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7712     __ imull($tmp$$Register, rax);
 7713     __ mull($src$$Register);
 7714     __ addl(rdx, $tmp$$Register);
 7715   %}
 7716   ins_pipe( pipe_slow );
 7717 %}
 7718 
 7719 // Multiply Register Long where the right operand's high 32 bits are zero
 7720 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7721   predicate(is_operand_hi32_zero(n->in(2)));
 7722   match(Set dst (MulL dst src));
 7723   effect(KILL cr, TEMP tmp);
 7724   ins_cost(2*100+2*400);
 7725 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7726 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7727   format %{ "MOV    $tmp,$src.lo\n\t"
 7728             "IMUL   $tmp,EDX\n\t"
 7729             "MUL    EDX:EAX,$src.lo\n\t"
 7730             "ADD    EDX,$tmp" %}
 7731   ins_encode %{
 7732     __ movl($tmp$$Register, $src$$Register);
 7733     __ imull($tmp$$Register, rdx);
 7734     __ mull($src$$Register);
 7735     __ addl(rdx, $tmp$$Register);
 7736   %}
 7737   ins_pipe( pipe_slow );
 7738 %}
 7739 
 7740 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7741 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7742   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7743   match(Set dst (MulL dst src));
 7744   effect(KILL cr);
 7745   ins_cost(1*400);
 7746 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7747 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7748   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7749   ins_encode %{
 7750     __ mull($src$$Register);
 7751   %}
 7752   ins_pipe( pipe_slow );
 7753 %}
 7754 
 7755 // Multiply Register Long by small constant
 7756 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7757   match(Set dst (MulL dst src));
 7758   effect(KILL cr, TEMP tmp);
 7759   ins_cost(2*100+2*400);
 7760   size(12);
 7761 // Basic idea: lo(result) = lo(src * EAX)
 7762 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7763   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7764             "MOV    EDX,$src\n\t"
 7765             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7766             "ADD    EDX,$tmp" %}
 7767   ins_encode( long_multiply_con( dst, src, tmp ) );
 7768   ins_pipe( pipe_slow );
 7769 %}
 7770 
 7771 // Integer DIV with Register
 7772 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7773   match(Set rax (DivI rax div));
 7774   effect(KILL rdx, KILL cr);
 7775   size(26);
 7776   ins_cost(30*100+10*100);
 7777   format %{ "CMP    EAX,0x80000000\n\t"
 7778             "JNE,s  normal\n\t"
 7779             "XOR    EDX,EDX\n\t"
 7780             "CMP    ECX,-1\n\t"
 7781             "JE,s   done\n"
 7782     "normal: CDQ\n\t"
 7783             "IDIV   $div\n\t"
 7784     "done:"        %}
 7785   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7786   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7787   ins_pipe( ialu_reg_reg_alu0 );
 7788 %}
 7789 
 7790 // Divide Register Long
 7791 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7792   match(Set dst (DivL src1 src2));
 7793   effect(CALL);
 7794   ins_cost(10000);
 7795   format %{ "PUSH   $src1.hi\n\t"
 7796             "PUSH   $src1.lo\n\t"
 7797             "PUSH   $src2.hi\n\t"
 7798             "PUSH   $src2.lo\n\t"
 7799             "CALL   SharedRuntime::ldiv\n\t"
 7800             "ADD    ESP,16" %}
 7801   ins_encode( long_div(src1,src2) );
 7802   ins_pipe( pipe_slow );
 7803 %}
 7804 
 7805 // Integer DIVMOD with Register, both quotient and mod results
 7806 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7807   match(DivModI rax div);
 7808   effect(KILL cr);
 7809   size(26);
 7810   ins_cost(30*100+10*100);
 7811   format %{ "CMP    EAX,0x80000000\n\t"
 7812             "JNE,s  normal\n\t"
 7813             "XOR    EDX,EDX\n\t"
 7814             "CMP    ECX,-1\n\t"
 7815             "JE,s   done\n"
 7816     "normal: CDQ\n\t"
 7817             "IDIV   $div\n\t"
 7818     "done:"        %}
 7819   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7820   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7821   ins_pipe( pipe_slow );
 7822 %}
 7823 
 7824 // Integer MOD with Register
 7825 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7826   match(Set rdx (ModI rax div));
 7827   effect(KILL rax, KILL cr);
 7828 
 7829   size(26);
 7830   ins_cost(300);
 7831   format %{ "CDQ\n\t"
 7832             "IDIV   $div" %}
 7833   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7834   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7835   ins_pipe( ialu_reg_reg_alu0 );
 7836 %}
 7837 
 7838 // Remainder Register Long
 7839 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7840   match(Set dst (ModL src1 src2));
 7841   effect(CALL);
 7842   ins_cost(10000);
 7843   format %{ "PUSH   $src1.hi\n\t"
 7844             "PUSH   $src1.lo\n\t"
 7845             "PUSH   $src2.hi\n\t"
 7846             "PUSH   $src2.lo\n\t"
 7847             "CALL   SharedRuntime::lrem\n\t"
 7848             "ADD    ESP,16" %}
 7849   ins_encode( long_mod(src1,src2) );
 7850   ins_pipe( pipe_slow );
 7851 %}
 7852 
 7853 // Divide Register Long (no special case since divisor != -1)
 7854 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7855   match(Set dst (DivL dst imm));
 7856   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7857   ins_cost(1000);
 7858   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7859             "XOR    $tmp2,$tmp2\n\t"
 7860             "CMP    $tmp,EDX\n\t"
 7861             "JA,s   fast\n\t"
 7862             "MOV    $tmp2,EAX\n\t"
 7863             "MOV    EAX,EDX\n\t"
 7864             "MOV    EDX,0\n\t"
 7865             "JLE,s  pos\n\t"
 7866             "LNEG   EAX : $tmp2\n\t"
 7867             "DIV    $tmp # unsigned division\n\t"
 7868             "XCHG   EAX,$tmp2\n\t"
 7869             "DIV    $tmp\n\t"
 7870             "LNEG   $tmp2 : EAX\n\t"
 7871             "JMP,s  done\n"
 7872     "pos:\n\t"
 7873             "DIV    $tmp\n\t"
 7874             "XCHG   EAX,$tmp2\n"
 7875     "fast:\n\t"
 7876             "DIV    $tmp\n"
 7877     "done:\n\t"
 7878             "MOV    EDX,$tmp2\n\t"
 7879             "NEG    EDX:EAX # if $imm < 0" %}
 7880   ins_encode %{
 7881     int con = (int)$imm$$constant;
 7882     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7883     int pcon = (con > 0) ? con : -con;
 7884     Label Lfast, Lpos, Ldone;
 7885 
 7886     __ movl($tmp$$Register, pcon);
 7887     __ xorl($tmp2$$Register,$tmp2$$Register);
 7888     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7889     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7890 
 7891     __ movl($tmp2$$Register, $dst$$Register); // save
 7892     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7893     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7894     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7895 
 7896     // Negative dividend.
 7897     // convert value to positive to use unsigned division
 7898     __ lneg($dst$$Register, $tmp2$$Register);
 7899     __ divl($tmp$$Register);
 7900     __ xchgl($dst$$Register, $tmp2$$Register);
 7901     __ divl($tmp$$Register);
 7902     // revert result back to negative
 7903     __ lneg($tmp2$$Register, $dst$$Register);
 7904     __ jmpb(Ldone);
 7905 
 7906     __ bind(Lpos);
 7907     __ divl($tmp$$Register); // Use unsigned division
 7908     __ xchgl($dst$$Register, $tmp2$$Register);
 7909     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7910 
 7911     __ bind(Lfast);
 7912     // fast path: src is positive
 7913     __ divl($tmp$$Register); // Use unsigned division
 7914 
 7915     __ bind(Ldone);
 7916     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7917     if (con < 0) {
 7918       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7919     }
 7920   %}
 7921   ins_pipe( pipe_slow );
 7922 %}
 7923 
 7924 // Remainder Register Long (remainder fit into 32 bits)
 7925 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7926   match(Set dst (ModL dst imm));
 7927   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7928   ins_cost(1000);
 7929   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7930             "CMP    $tmp,EDX\n\t"
 7931             "JA,s   fast\n\t"
 7932             "MOV    $tmp2,EAX\n\t"
 7933             "MOV    EAX,EDX\n\t"
 7934             "MOV    EDX,0\n\t"
 7935             "JLE,s  pos\n\t"
 7936             "LNEG   EAX : $tmp2\n\t"
 7937             "DIV    $tmp # unsigned division\n\t"
 7938             "MOV    EAX,$tmp2\n\t"
 7939             "DIV    $tmp\n\t"
 7940             "NEG    EDX\n\t"
 7941             "JMP,s  done\n"
 7942     "pos:\n\t"
 7943             "DIV    $tmp\n\t"
 7944             "MOV    EAX,$tmp2\n"
 7945     "fast:\n\t"
 7946             "DIV    $tmp\n"
 7947     "done:\n\t"
 7948             "MOV    EAX,EDX\n\t"
 7949             "SAR    EDX,31\n\t" %}
 7950   ins_encode %{
 7951     int con = (int)$imm$$constant;
 7952     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7953     int pcon = (con > 0) ? con : -con;
 7954     Label  Lfast, Lpos, Ldone;
 7955 
 7956     __ movl($tmp$$Register, pcon);
 7957     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7958     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7959 
 7960     __ movl($tmp2$$Register, $dst$$Register); // save
 7961     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7962     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7963     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7964 
 7965     // Negative dividend.
 7966     // convert value to positive to use unsigned division
 7967     __ lneg($dst$$Register, $tmp2$$Register);
 7968     __ divl($tmp$$Register);
 7969     __ movl($dst$$Register, $tmp2$$Register);
 7970     __ divl($tmp$$Register);
 7971     // revert remainder back to negative
 7972     __ negl(HIGH_FROM_LOW($dst$$Register));
 7973     __ jmpb(Ldone);
 7974 
 7975     __ bind(Lpos);
 7976     __ divl($tmp$$Register);
 7977     __ movl($dst$$Register, $tmp2$$Register);
 7978 
 7979     __ bind(Lfast);
 7980     // fast path: src is positive
 7981     __ divl($tmp$$Register);
 7982 
 7983     __ bind(Ldone);
 7984     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7985     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7986 
 7987   %}
 7988   ins_pipe( pipe_slow );
 7989 %}
 7990 
 7991 // Integer Shift Instructions
 7992 // Shift Left by one
 7993 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7994   match(Set dst (LShiftI dst shift));
 7995   effect(KILL cr);
 7996 
 7997   size(2);
 7998   format %{ "SHL    $dst,$shift" %}
 7999   opcode(0xD1, 0x4);  /* D1 /4 */
 8000   ins_encode( OpcP, RegOpc( dst ) );
 8001   ins_pipe( ialu_reg );
 8002 %}
 8003 
 8004 // Shift Left by 8-bit immediate
 8005 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8006   match(Set dst (LShiftI dst shift));
 8007   effect(KILL cr);
 8008 
 8009   size(3);
 8010   format %{ "SHL    $dst,$shift" %}
 8011   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8012   ins_encode( RegOpcImm( dst, shift) );
 8013   ins_pipe( ialu_reg );
 8014 %}
 8015 
 8016 // Shift Left by variable
 8017 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8018   match(Set dst (LShiftI dst shift));
 8019   effect(KILL cr);
 8020 
 8021   size(2);
 8022   format %{ "SHL    $dst,$shift" %}
 8023   opcode(0xD3, 0x4);  /* D3 /4 */
 8024   ins_encode( OpcP, RegOpc( dst ) );
 8025   ins_pipe( ialu_reg_reg );
 8026 %}
 8027 
 8028 // Arithmetic shift right by one
 8029 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8030   match(Set dst (RShiftI dst shift));
 8031   effect(KILL cr);
 8032 
 8033   size(2);
 8034   format %{ "SAR    $dst,$shift" %}
 8035   opcode(0xD1, 0x7);  /* D1 /7 */
 8036   ins_encode( OpcP, RegOpc( dst ) );
 8037   ins_pipe( ialu_reg );
 8038 %}
 8039 
 8040 // Arithmetic shift right by one
 8041 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8042   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8043   effect(KILL cr);
 8044   format %{ "SAR    $dst,$shift" %}
 8045   opcode(0xD1, 0x7);  /* D1 /7 */
 8046   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8047   ins_pipe( ialu_mem_imm );
 8048 %}
 8049 
 8050 // Arithmetic Shift Right by 8-bit immediate
 8051 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8052   match(Set dst (RShiftI dst shift));
 8053   effect(KILL cr);
 8054 
 8055   size(3);
 8056   format %{ "SAR    $dst,$shift" %}
 8057   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8058   ins_encode( RegOpcImm( dst, shift ) );
 8059   ins_pipe( ialu_mem_imm );
 8060 %}
 8061 
 8062 // Arithmetic Shift Right by 8-bit immediate
 8063 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8064   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8065   effect(KILL cr);
 8066 
 8067   format %{ "SAR    $dst,$shift" %}
 8068   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8069   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8070   ins_pipe( ialu_mem_imm );
 8071 %}
 8072 
 8073 // Arithmetic Shift Right by variable
 8074 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8075   match(Set dst (RShiftI dst shift));
 8076   effect(KILL cr);
 8077 
 8078   size(2);
 8079   format %{ "SAR    $dst,$shift" %}
 8080   opcode(0xD3, 0x7);  /* D3 /7 */
 8081   ins_encode( OpcP, RegOpc( dst ) );
 8082   ins_pipe( ialu_reg_reg );
 8083 %}
 8084 
 8085 // Logical shift right by one
 8086 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8087   match(Set dst (URShiftI dst shift));
 8088   effect(KILL cr);
 8089 
 8090   size(2);
 8091   format %{ "SHR    $dst,$shift" %}
 8092   opcode(0xD1, 0x5);  /* D1 /5 */
 8093   ins_encode( OpcP, RegOpc( dst ) );
 8094   ins_pipe( ialu_reg );
 8095 %}
 8096 
 8097 // Logical Shift Right by 8-bit immediate
 8098 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8099   match(Set dst (URShiftI dst shift));
 8100   effect(KILL cr);
 8101 
 8102   size(3);
 8103   format %{ "SHR    $dst,$shift" %}
 8104   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8105   ins_encode( RegOpcImm( dst, shift) );
 8106   ins_pipe( ialu_reg );
 8107 %}
 8108 
 8109 
 8110 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8111 // This idiom is used by the compiler for the i2b bytecode.
 8112 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8113   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8114 
 8115   size(3);
 8116   format %{ "MOVSX  $dst,$src :8" %}
 8117   ins_encode %{
 8118     __ movsbl($dst$$Register, $src$$Register);
 8119   %}
 8120   ins_pipe(ialu_reg_reg);
 8121 %}
 8122 
 8123 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8124 // This idiom is used by the compiler the i2s bytecode.
 8125 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8126   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8127 
 8128   size(3);
 8129   format %{ "MOVSX  $dst,$src :16" %}
 8130   ins_encode %{
 8131     __ movswl($dst$$Register, $src$$Register);
 8132   %}
 8133   ins_pipe(ialu_reg_reg);
 8134 %}
 8135 
 8136 
 8137 // Logical Shift Right by variable
 8138 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8139   match(Set dst (URShiftI dst shift));
 8140   effect(KILL cr);
 8141 
 8142   size(2);
 8143   format %{ "SHR    $dst,$shift" %}
 8144   opcode(0xD3, 0x5);  /* D3 /5 */
 8145   ins_encode( OpcP, RegOpc( dst ) );
 8146   ins_pipe( ialu_reg_reg );
 8147 %}
 8148 
 8149 
 8150 //----------Logical Instructions-----------------------------------------------
 8151 //----------Integer Logical Instructions---------------------------------------
 8152 // And Instructions
 8153 // And Register with Register
 8154 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8155   match(Set dst (AndI dst src));
 8156   effect(KILL cr);
 8157 
 8158   size(2);
 8159   format %{ "AND    $dst,$src" %}
 8160   opcode(0x23);
 8161   ins_encode( OpcP, RegReg( dst, src) );
 8162   ins_pipe( ialu_reg_reg );
 8163 %}
 8164 
 8165 // And Register with Immediate
 8166 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8167   match(Set dst (AndI dst src));
 8168   effect(KILL cr);
 8169 
 8170   format %{ "AND    $dst,$src" %}
 8171   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8172   // ins_encode( RegImm( dst, src) );
 8173   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8174   ins_pipe( ialu_reg );
 8175 %}
 8176 
 8177 // And Register with Memory
 8178 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8179   match(Set dst (AndI dst (LoadI src)));
 8180   effect(KILL cr);
 8181 
 8182   ins_cost(150);
 8183   format %{ "AND    $dst,$src" %}
 8184   opcode(0x23);
 8185   ins_encode( OpcP, RegMem( dst, src) );
 8186   ins_pipe( ialu_reg_mem );
 8187 %}
 8188 
 8189 // And Memory with Register
 8190 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8191   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8192   effect(KILL cr);
 8193 
 8194   ins_cost(150);
 8195   format %{ "AND    $dst,$src" %}
 8196   opcode(0x21);  /* Opcode 21 /r */
 8197   ins_encode( OpcP, RegMem( src, dst ) );
 8198   ins_pipe( ialu_mem_reg );
 8199 %}
 8200 
 8201 // And Memory with Immediate
 8202 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8203   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8204   effect(KILL cr);
 8205 
 8206   ins_cost(125);
 8207   format %{ "AND    $dst,$src" %}
 8208   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8209   // ins_encode( MemImm( dst, src) );
 8210   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8211   ins_pipe( ialu_mem_imm );
 8212 %}
 8213 
 8214 // BMI1 instructions
 8215 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8216   match(Set dst (AndI (XorI src1 minus_1) src2));
 8217   predicate(UseBMI1Instructions);
 8218   effect(KILL cr);
 8219 
 8220   format %{ "ANDNL  $dst, $src1, $src2" %}
 8221 
 8222   ins_encode %{
 8223     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8224   %}
 8225   ins_pipe(ialu_reg);
 8226 %}
 8227 
 8228 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8229   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8230   predicate(UseBMI1Instructions);
 8231   effect(KILL cr);
 8232 
 8233   ins_cost(125);
 8234   format %{ "ANDNL  $dst, $src1, $src2" %}
 8235 
 8236   ins_encode %{
 8237     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8238   %}
 8239   ins_pipe(ialu_reg_mem);
 8240 %}
 8241 
 8242 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8243   match(Set dst (AndI (SubI imm_zero src) src));
 8244   predicate(UseBMI1Instructions);
 8245   effect(KILL cr);
 8246 
 8247   format %{ "BLSIL  $dst, $src" %}
 8248 
 8249   ins_encode %{
 8250     __ blsil($dst$$Register, $src$$Register);
 8251   %}
 8252   ins_pipe(ialu_reg);
 8253 %}
 8254 
 8255 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8256   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8257   predicate(UseBMI1Instructions);
 8258   effect(KILL cr);
 8259 
 8260   ins_cost(125);
 8261   format %{ "BLSIL  $dst, $src" %}
 8262 
 8263   ins_encode %{
 8264     __ blsil($dst$$Register, $src$$Address);
 8265   %}
 8266   ins_pipe(ialu_reg_mem);
 8267 %}
 8268 
 8269 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8270 %{
 8271   match(Set dst (XorI (AddI src minus_1) src));
 8272   predicate(UseBMI1Instructions);
 8273   effect(KILL cr);
 8274 
 8275   format %{ "BLSMSKL $dst, $src" %}
 8276 
 8277   ins_encode %{
 8278     __ blsmskl($dst$$Register, $src$$Register);
 8279   %}
 8280 
 8281   ins_pipe(ialu_reg);
 8282 %}
 8283 
 8284 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8285 %{
 8286   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8287   predicate(UseBMI1Instructions);
 8288   effect(KILL cr);
 8289 
 8290   ins_cost(125);
 8291   format %{ "BLSMSKL $dst, $src" %}
 8292 
 8293   ins_encode %{
 8294     __ blsmskl($dst$$Register, $src$$Address);
 8295   %}
 8296 
 8297   ins_pipe(ialu_reg_mem);
 8298 %}
 8299 
 8300 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8301 %{
 8302   match(Set dst (AndI (AddI src minus_1) src) );
 8303   predicate(UseBMI1Instructions);
 8304   effect(KILL cr);
 8305 
 8306   format %{ "BLSRL  $dst, $src" %}
 8307 
 8308   ins_encode %{
 8309     __ blsrl($dst$$Register, $src$$Register);
 8310   %}
 8311 
 8312   ins_pipe(ialu_reg);
 8313 %}
 8314 
 8315 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8316 %{
 8317   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8318   predicate(UseBMI1Instructions);
 8319   effect(KILL cr);
 8320 
 8321   ins_cost(125);
 8322   format %{ "BLSRL  $dst, $src" %}
 8323 
 8324   ins_encode %{
 8325     __ blsrl($dst$$Register, $src$$Address);
 8326   %}
 8327 
 8328   ins_pipe(ialu_reg_mem);
 8329 %}
 8330 
 8331 // Or Instructions
 8332 // Or Register with Register
 8333 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8334   match(Set dst (OrI dst src));
 8335   effect(KILL cr);
 8336 
 8337   size(2);
 8338   format %{ "OR     $dst,$src" %}
 8339   opcode(0x0B);
 8340   ins_encode( OpcP, RegReg( dst, src) );
 8341   ins_pipe( ialu_reg_reg );
 8342 %}
 8343 
 8344 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8345   match(Set dst (OrI dst (CastP2X src)));
 8346   effect(KILL cr);
 8347 
 8348   size(2);
 8349   format %{ "OR     $dst,$src" %}
 8350   opcode(0x0B);
 8351   ins_encode( OpcP, RegReg( dst, src) );
 8352   ins_pipe( ialu_reg_reg );
 8353 %}
 8354 
 8355 
 8356 // Or Register with Immediate
 8357 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8358   match(Set dst (OrI dst src));
 8359   effect(KILL cr);
 8360 
 8361   format %{ "OR     $dst,$src" %}
 8362   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8363   // ins_encode( RegImm( dst, src) );
 8364   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8365   ins_pipe( ialu_reg );
 8366 %}
 8367 
 8368 // Or Register with Memory
 8369 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8370   match(Set dst (OrI dst (LoadI src)));
 8371   effect(KILL cr);
 8372 
 8373   ins_cost(150);
 8374   format %{ "OR     $dst,$src" %}
 8375   opcode(0x0B);
 8376   ins_encode( OpcP, RegMem( dst, src) );
 8377   ins_pipe( ialu_reg_mem );
 8378 %}
 8379 
 8380 // Or Memory with Register
 8381 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8382   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8383   effect(KILL cr);
 8384 
 8385   ins_cost(150);
 8386   format %{ "OR     $dst,$src" %}
 8387   opcode(0x09);  /* Opcode 09 /r */
 8388   ins_encode( OpcP, RegMem( src, dst ) );
 8389   ins_pipe( ialu_mem_reg );
 8390 %}
 8391 
 8392 // Or Memory with Immediate
 8393 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8394   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8395   effect(KILL cr);
 8396 
 8397   ins_cost(125);
 8398   format %{ "OR     $dst,$src" %}
 8399   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8400   // ins_encode( MemImm( dst, src) );
 8401   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8402   ins_pipe( ialu_mem_imm );
 8403 %}
 8404 
 8405 // ROL/ROR
 8406 // ROL expand
 8407 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8408   effect(USE_DEF dst, USE shift, KILL cr);
 8409 
 8410   format %{ "ROL    $dst, $shift" %}
 8411   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8412   ins_encode( OpcP, RegOpc( dst ));
 8413   ins_pipe( ialu_reg );
 8414 %}
 8415 
 8416 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8417   effect(USE_DEF dst, USE shift, KILL cr);
 8418 
 8419   format %{ "ROL    $dst, $shift" %}
 8420   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8421   ins_encode( RegOpcImm(dst, shift) );
 8422   ins_pipe(ialu_reg);
 8423 %}
 8424 
 8425 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8426   effect(USE_DEF dst, USE shift, KILL cr);
 8427 
 8428   format %{ "ROL    $dst, $shift" %}
 8429   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8430   ins_encode(OpcP, RegOpc(dst));
 8431   ins_pipe( ialu_reg_reg );
 8432 %}
 8433 // end of ROL expand
 8434 
 8435 // ROL 32bit by one once
 8436 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8437   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8438 
 8439   expand %{
 8440     rolI_eReg_imm1(dst, lshift, cr);
 8441   %}
 8442 %}
 8443 
 8444 // ROL 32bit var by imm8 once
 8445 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8446   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8447   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8448 
 8449   expand %{
 8450     rolI_eReg_imm8(dst, lshift, cr);
 8451   %}
 8452 %}
 8453 
 8454 // ROL 32bit var by var once
 8455 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8456   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8457 
 8458   expand %{
 8459     rolI_eReg_CL(dst, shift, cr);
 8460   %}
 8461 %}
 8462 
 8463 // ROL 32bit var by var once
 8464 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8465   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8466 
 8467   expand %{
 8468     rolI_eReg_CL(dst, shift, cr);
 8469   %}
 8470 %}
 8471 
 8472 // ROR expand
 8473 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8474   effect(USE_DEF dst, USE shift, KILL cr);
 8475 
 8476   format %{ "ROR    $dst, $shift" %}
 8477   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8478   ins_encode( OpcP, RegOpc( dst ) );
 8479   ins_pipe( ialu_reg );
 8480 %}
 8481 
 8482 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8483   effect (USE_DEF dst, USE shift, KILL cr);
 8484 
 8485   format %{ "ROR    $dst, $shift" %}
 8486   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8487   ins_encode( RegOpcImm(dst, shift) );
 8488   ins_pipe( ialu_reg );
 8489 %}
 8490 
 8491 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8492   effect(USE_DEF dst, USE shift, KILL cr);
 8493 
 8494   format %{ "ROR    $dst, $shift" %}
 8495   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8496   ins_encode(OpcP, RegOpc(dst));
 8497   ins_pipe( ialu_reg_reg );
 8498 %}
 8499 // end of ROR expand
 8500 
 8501 // ROR right once
 8502 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8503   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8504 
 8505   expand %{
 8506     rorI_eReg_imm1(dst, rshift, cr);
 8507   %}
 8508 %}
 8509 
 8510 // ROR 32bit by immI8 once
 8511 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8512   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8513   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8514 
 8515   expand %{
 8516     rorI_eReg_imm8(dst, rshift, cr);
 8517   %}
 8518 %}
 8519 
 8520 // ROR 32bit var by var once
 8521 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8522   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8523 
 8524   expand %{
 8525     rorI_eReg_CL(dst, shift, cr);
 8526   %}
 8527 %}
 8528 
 8529 // ROR 32bit var by var once
 8530 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8531   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8532 
 8533   expand %{
 8534     rorI_eReg_CL(dst, shift, cr);
 8535   %}
 8536 %}
 8537 
 8538 // Xor Instructions
 8539 // Xor Register with Register
 8540 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8541   match(Set dst (XorI dst src));
 8542   effect(KILL cr);
 8543 
 8544   size(2);
 8545   format %{ "XOR    $dst,$src" %}
 8546   opcode(0x33);
 8547   ins_encode( OpcP, RegReg( dst, src) );
 8548   ins_pipe( ialu_reg_reg );
 8549 %}
 8550 
 8551 // Xor Register with Immediate -1
 8552 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8553   match(Set dst (XorI dst imm));
 8554 
 8555   size(2);
 8556   format %{ "NOT    $dst" %}
 8557   ins_encode %{
 8558      __ notl($dst$$Register);
 8559   %}
 8560   ins_pipe( ialu_reg );
 8561 %}
 8562 
 8563 // Xor Register with Immediate
 8564 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8565   match(Set dst (XorI dst src));
 8566   effect(KILL cr);
 8567 
 8568   format %{ "XOR    $dst,$src" %}
 8569   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8570   // ins_encode( RegImm( dst, src) );
 8571   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8572   ins_pipe( ialu_reg );
 8573 %}
 8574 
 8575 // Xor Register with Memory
 8576 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8577   match(Set dst (XorI dst (LoadI src)));
 8578   effect(KILL cr);
 8579 
 8580   ins_cost(150);
 8581   format %{ "XOR    $dst,$src" %}
 8582   opcode(0x33);
 8583   ins_encode( OpcP, RegMem(dst, src) );
 8584   ins_pipe( ialu_reg_mem );
 8585 %}
 8586 
 8587 // Xor Memory with Register
 8588 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8589   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8590   effect(KILL cr);
 8591 
 8592   ins_cost(150);
 8593   format %{ "XOR    $dst,$src" %}
 8594   opcode(0x31);  /* Opcode 31 /r */
 8595   ins_encode( OpcP, RegMem( src, dst ) );
 8596   ins_pipe( ialu_mem_reg );
 8597 %}
 8598 
 8599 // Xor Memory with Immediate
 8600 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8601   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8602   effect(KILL cr);
 8603 
 8604   ins_cost(125);
 8605   format %{ "XOR    $dst,$src" %}
 8606   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8607   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8608   ins_pipe( ialu_mem_imm );
 8609 %}
 8610 
 8611 //----------Convert Int to Boolean---------------------------------------------
 8612 
 8613 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8614   effect( DEF dst, USE src );
 8615   format %{ "MOV    $dst,$src" %}
 8616   ins_encode( enc_Copy( dst, src) );
 8617   ins_pipe( ialu_reg_reg );
 8618 %}
 8619 
 8620 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8621   effect( USE_DEF dst, USE src, KILL cr );
 8622 
 8623   size(4);
 8624   format %{ "NEG    $dst\n\t"
 8625             "ADC    $dst,$src" %}
 8626   ins_encode( neg_reg(dst),
 8627               OpcRegReg(0x13,dst,src) );
 8628   ins_pipe( ialu_reg_reg_long );
 8629 %}
 8630 
 8631 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8632   match(Set dst (Conv2B src));
 8633 
 8634   expand %{
 8635     movI_nocopy(dst,src);
 8636     ci2b(dst,src,cr);
 8637   %}
 8638 %}
 8639 
 8640 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8641   effect( DEF dst, USE src );
 8642   format %{ "MOV    $dst,$src" %}
 8643   ins_encode( enc_Copy( dst, src) );
 8644   ins_pipe( ialu_reg_reg );
 8645 %}
 8646 
 8647 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8648   effect( USE_DEF dst, USE src, KILL cr );
 8649   format %{ "NEG    $dst\n\t"
 8650             "ADC    $dst,$src" %}
 8651   ins_encode( neg_reg(dst),
 8652               OpcRegReg(0x13,dst,src) );
 8653   ins_pipe( ialu_reg_reg_long );
 8654 %}
 8655 
 8656 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8657   match(Set dst (Conv2B src));
 8658 
 8659   expand %{
 8660     movP_nocopy(dst,src);
 8661     cp2b(dst,src,cr);
 8662   %}
 8663 %}
 8664 
 8665 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8666   match(Set dst (CmpLTMask p q));
 8667   effect(KILL cr);
 8668   ins_cost(400);
 8669 
 8670   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8671   format %{ "XOR    $dst,$dst\n\t"
 8672             "CMP    $p,$q\n\t"
 8673             "SETlt  $dst\n\t"
 8674             "NEG    $dst" %}
 8675   ins_encode %{
 8676     Register Rp = $p$$Register;
 8677     Register Rq = $q$$Register;
 8678     Register Rd = $dst$$Register;
 8679     Label done;
 8680     __ xorl(Rd, Rd);
 8681     __ cmpl(Rp, Rq);
 8682     __ setb(Assembler::less, Rd);
 8683     __ negl(Rd);
 8684   %}
 8685 
 8686   ins_pipe(pipe_slow);
 8687 %}
 8688 
 8689 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8690   match(Set dst (CmpLTMask dst zero));
 8691   effect(DEF dst, KILL cr);
 8692   ins_cost(100);
 8693 
 8694   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8695   ins_encode %{
 8696   __ sarl($dst$$Register, 31);
 8697   %}
 8698   ins_pipe(ialu_reg);
 8699 %}
 8700 
 8701 /* better to save a register than avoid a branch */
 8702 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8703   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8704   effect(KILL cr);
 8705   ins_cost(400);
 8706   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8707             "JGE    done\n\t"
 8708             "ADD    $p,$y\n"
 8709             "done:  " %}
 8710   ins_encode %{
 8711     Register Rp = $p$$Register;
 8712     Register Rq = $q$$Register;
 8713     Register Ry = $y$$Register;
 8714     Label done;
 8715     __ subl(Rp, Rq);
 8716     __ jccb(Assembler::greaterEqual, done);
 8717     __ addl(Rp, Ry);
 8718     __ bind(done);
 8719   %}
 8720 
 8721   ins_pipe(pipe_cmplt);
 8722 %}
 8723 
 8724 /* better to save a register than avoid a branch */
 8725 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8726   match(Set y (AndI (CmpLTMask p q) y));
 8727   effect(KILL cr);
 8728 
 8729   ins_cost(300);
 8730 
 8731   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8732             "JLT      done\n\t"
 8733             "XORL     $y, $y\n"
 8734             "done:  " %}
 8735   ins_encode %{
 8736     Register Rp = $p$$Register;
 8737     Register Rq = $q$$Register;
 8738     Register Ry = $y$$Register;
 8739     Label done;
 8740     __ cmpl(Rp, Rq);
 8741     __ jccb(Assembler::less, done);
 8742     __ xorl(Ry, Ry);
 8743     __ bind(done);
 8744   %}
 8745 
 8746   ins_pipe(pipe_cmplt);
 8747 %}
 8748 
 8749 /* If I enable this, I encourage spilling in the inner loop of compress.
 8750 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8751   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8752 */
 8753 //----------Overflow Math Instructions-----------------------------------------
 8754 
 8755 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8756 %{
 8757   match(Set cr (OverflowAddI op1 op2));
 8758   effect(DEF cr, USE_KILL op1, USE op2);
 8759 
 8760   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8761 
 8762   ins_encode %{
 8763     __ addl($op1$$Register, $op2$$Register);
 8764   %}
 8765   ins_pipe(ialu_reg_reg);
 8766 %}
 8767 
 8768 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8769 %{
 8770   match(Set cr (OverflowAddI op1 op2));
 8771   effect(DEF cr, USE_KILL op1, USE op2);
 8772 
 8773   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8774 
 8775   ins_encode %{
 8776     __ addl($op1$$Register, $op2$$constant);
 8777   %}
 8778   ins_pipe(ialu_reg_reg);
 8779 %}
 8780 
 8781 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8782 %{
 8783   match(Set cr (OverflowSubI op1 op2));
 8784 
 8785   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8786   ins_encode %{
 8787     __ cmpl($op1$$Register, $op2$$Register);
 8788   %}
 8789   ins_pipe(ialu_reg_reg);
 8790 %}
 8791 
 8792 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8793 %{
 8794   match(Set cr (OverflowSubI op1 op2));
 8795 
 8796   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8797   ins_encode %{
 8798     __ cmpl($op1$$Register, $op2$$constant);
 8799   %}
 8800   ins_pipe(ialu_reg_reg);
 8801 %}
 8802 
 8803 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8804 %{
 8805   match(Set cr (OverflowSubI zero op2));
 8806   effect(DEF cr, USE_KILL op2);
 8807 
 8808   format %{ "NEG    $op2\t# overflow check int" %}
 8809   ins_encode %{
 8810     __ negl($op2$$Register);
 8811   %}
 8812   ins_pipe(ialu_reg_reg);
 8813 %}
 8814 
 8815 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8816 %{
 8817   match(Set cr (OverflowMulI op1 op2));
 8818   effect(DEF cr, USE_KILL op1, USE op2);
 8819 
 8820   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8821   ins_encode %{
 8822     __ imull($op1$$Register, $op2$$Register);
 8823   %}
 8824   ins_pipe(ialu_reg_reg_alu0);
 8825 %}
 8826 
 8827 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8828 %{
 8829   match(Set cr (OverflowMulI op1 op2));
 8830   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8831 
 8832   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8833   ins_encode %{
 8834     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8835   %}
 8836   ins_pipe(ialu_reg_reg_alu0);
 8837 %}
 8838 
 8839 // Integer Absolute Instructions
 8840 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8841 %{
 8842   match(Set dst (AbsI src));
 8843   effect(TEMP dst, TEMP tmp, KILL cr);
 8844   format %{ "movl $tmp, $src\n\t"
 8845             "sarl $tmp, 31\n\t"
 8846             "movl $dst, $src\n\t"
 8847             "xorl $dst, $tmp\n\t"
 8848             "subl $dst, $tmp\n"
 8849           %}
 8850   ins_encode %{
 8851     __ movl($tmp$$Register, $src$$Register);
 8852     __ sarl($tmp$$Register, 31);
 8853     __ movl($dst$$Register, $src$$Register);
 8854     __ xorl($dst$$Register, $tmp$$Register);
 8855     __ subl($dst$$Register, $tmp$$Register);
 8856   %}
 8857 
 8858   ins_pipe(ialu_reg_reg);
 8859 %}
 8860 
 8861 //----------Long Instructions------------------------------------------------
 8862 // Add Long Register with Register
 8863 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8864   match(Set dst (AddL dst src));
 8865   effect(KILL cr);
 8866   ins_cost(200);
 8867   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8868             "ADC    $dst.hi,$src.hi" %}
 8869   opcode(0x03, 0x13);
 8870   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8871   ins_pipe( ialu_reg_reg_long );
 8872 %}
 8873 
 8874 // Add Long Register with Immediate
 8875 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8876   match(Set dst (AddL dst src));
 8877   effect(KILL cr);
 8878   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8879             "ADC    $dst.hi,$src.hi" %}
 8880   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8881   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8882   ins_pipe( ialu_reg_long );
 8883 %}
 8884 
 8885 // Add Long Register with Memory
 8886 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8887   match(Set dst (AddL dst (LoadL mem)));
 8888   effect(KILL cr);
 8889   ins_cost(125);
 8890   format %{ "ADD    $dst.lo,$mem\n\t"
 8891             "ADC    $dst.hi,$mem+4" %}
 8892   opcode(0x03, 0x13);
 8893   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8894   ins_pipe( ialu_reg_long_mem );
 8895 %}
 8896 
 8897 // Subtract Long Register with Register.
 8898 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8899   match(Set dst (SubL dst src));
 8900   effect(KILL cr);
 8901   ins_cost(200);
 8902   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8903             "SBB    $dst.hi,$src.hi" %}
 8904   opcode(0x2B, 0x1B);
 8905   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8906   ins_pipe( ialu_reg_reg_long );
 8907 %}
 8908 
 8909 // Subtract Long Register with Immediate
 8910 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8911   match(Set dst (SubL dst src));
 8912   effect(KILL cr);
 8913   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8914             "SBB    $dst.hi,$src.hi" %}
 8915   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8916   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8917   ins_pipe( ialu_reg_long );
 8918 %}
 8919 
 8920 // Subtract Long Register with Memory
 8921 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8922   match(Set dst (SubL dst (LoadL mem)));
 8923   effect(KILL cr);
 8924   ins_cost(125);
 8925   format %{ "SUB    $dst.lo,$mem\n\t"
 8926             "SBB    $dst.hi,$mem+4" %}
 8927   opcode(0x2B, 0x1B);
 8928   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8929   ins_pipe( ialu_reg_long_mem );
 8930 %}
 8931 
 8932 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8933   match(Set dst (SubL zero dst));
 8934   effect(KILL cr);
 8935   ins_cost(300);
 8936   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8937   ins_encode( neg_long(dst) );
 8938   ins_pipe( ialu_reg_reg_long );
 8939 %}
 8940 
 8941 // And Long Register with Register
 8942 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8943   match(Set dst (AndL dst src));
 8944   effect(KILL cr);
 8945   format %{ "AND    $dst.lo,$src.lo\n\t"
 8946             "AND    $dst.hi,$src.hi" %}
 8947   opcode(0x23,0x23);
 8948   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8949   ins_pipe( ialu_reg_reg_long );
 8950 %}
 8951 
 8952 // And Long Register with Immediate
 8953 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8954   match(Set dst (AndL dst src));
 8955   effect(KILL cr);
 8956   format %{ "AND    $dst.lo,$src.lo\n\t"
 8957             "AND    $dst.hi,$src.hi" %}
 8958   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8959   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8960   ins_pipe( ialu_reg_long );
 8961 %}
 8962 
 8963 // And Long Register with Memory
 8964 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8965   match(Set dst (AndL dst (LoadL mem)));
 8966   effect(KILL cr);
 8967   ins_cost(125);
 8968   format %{ "AND    $dst.lo,$mem\n\t"
 8969             "AND    $dst.hi,$mem+4" %}
 8970   opcode(0x23, 0x23);
 8971   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8972   ins_pipe( ialu_reg_long_mem );
 8973 %}
 8974 
 8975 // BMI1 instructions
 8976 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8977   match(Set dst (AndL (XorL src1 minus_1) src2));
 8978   predicate(UseBMI1Instructions);
 8979   effect(KILL cr, TEMP dst);
 8980 
 8981   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8982             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8983          %}
 8984 
 8985   ins_encode %{
 8986     Register Rdst = $dst$$Register;
 8987     Register Rsrc1 = $src1$$Register;
 8988     Register Rsrc2 = $src2$$Register;
 8989     __ andnl(Rdst, Rsrc1, Rsrc2);
 8990     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8991   %}
 8992   ins_pipe(ialu_reg_reg_long);
 8993 %}
 8994 
 8995 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8996   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8997   predicate(UseBMI1Instructions);
 8998   effect(KILL cr, TEMP dst);
 8999 
 9000   ins_cost(125);
 9001   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9002             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9003          %}
 9004 
 9005   ins_encode %{
 9006     Register Rdst = $dst$$Register;
 9007     Register Rsrc1 = $src1$$Register;
 9008     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9009 
 9010     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9011     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9012   %}
 9013   ins_pipe(ialu_reg_mem);
 9014 %}
 9015 
 9016 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9017   match(Set dst (AndL (SubL imm_zero src) src));
 9018   predicate(UseBMI1Instructions);
 9019   effect(KILL cr, TEMP dst);
 9020 
 9021   format %{ "MOVL   $dst.hi, 0\n\t"
 9022             "BLSIL  $dst.lo, $src.lo\n\t"
 9023             "JNZ    done\n\t"
 9024             "BLSIL  $dst.hi, $src.hi\n"
 9025             "done:"
 9026          %}
 9027 
 9028   ins_encode %{
 9029     Label done;
 9030     Register Rdst = $dst$$Register;
 9031     Register Rsrc = $src$$Register;
 9032     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9033     __ blsil(Rdst, Rsrc);
 9034     __ jccb(Assembler::notZero, done);
 9035     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9036     __ bind(done);
 9037   %}
 9038   ins_pipe(ialu_reg);
 9039 %}
 9040 
 9041 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9042   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9043   predicate(UseBMI1Instructions);
 9044   effect(KILL cr, TEMP dst);
 9045 
 9046   ins_cost(125);
 9047   format %{ "MOVL   $dst.hi, 0\n\t"
 9048             "BLSIL  $dst.lo, $src\n\t"
 9049             "JNZ    done\n\t"
 9050             "BLSIL  $dst.hi, $src+4\n"
 9051             "done:"
 9052          %}
 9053 
 9054   ins_encode %{
 9055     Label done;
 9056     Register Rdst = $dst$$Register;
 9057     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9058 
 9059     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9060     __ blsil(Rdst, $src$$Address);
 9061     __ jccb(Assembler::notZero, done);
 9062     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9063     __ bind(done);
 9064   %}
 9065   ins_pipe(ialu_reg_mem);
 9066 %}
 9067 
 9068 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9069 %{
 9070   match(Set dst (XorL (AddL src minus_1) src));
 9071   predicate(UseBMI1Instructions);
 9072   effect(KILL cr, TEMP dst);
 9073 
 9074   format %{ "MOVL    $dst.hi, 0\n\t"
 9075             "BLSMSKL $dst.lo, $src.lo\n\t"
 9076             "JNC     done\n\t"
 9077             "BLSMSKL $dst.hi, $src.hi\n"
 9078             "done:"
 9079          %}
 9080 
 9081   ins_encode %{
 9082     Label done;
 9083     Register Rdst = $dst$$Register;
 9084     Register Rsrc = $src$$Register;
 9085     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9086     __ blsmskl(Rdst, Rsrc);
 9087     __ jccb(Assembler::carryClear, done);
 9088     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9089     __ bind(done);
 9090   %}
 9091 
 9092   ins_pipe(ialu_reg);
 9093 %}
 9094 
 9095 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9096 %{
 9097   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9098   predicate(UseBMI1Instructions);
 9099   effect(KILL cr, TEMP dst);
 9100 
 9101   ins_cost(125);
 9102   format %{ "MOVL    $dst.hi, 0\n\t"
 9103             "BLSMSKL $dst.lo, $src\n\t"
 9104             "JNC     done\n\t"
 9105             "BLSMSKL $dst.hi, $src+4\n"
 9106             "done:"
 9107          %}
 9108 
 9109   ins_encode %{
 9110     Label done;
 9111     Register Rdst = $dst$$Register;
 9112     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9113 
 9114     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9115     __ blsmskl(Rdst, $src$$Address);
 9116     __ jccb(Assembler::carryClear, done);
 9117     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9118     __ bind(done);
 9119   %}
 9120 
 9121   ins_pipe(ialu_reg_mem);
 9122 %}
 9123 
 9124 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9125 %{
 9126   match(Set dst (AndL (AddL src minus_1) src) );
 9127   predicate(UseBMI1Instructions);
 9128   effect(KILL cr, TEMP dst);
 9129 
 9130   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9131             "BLSRL  $dst.lo, $src.lo\n\t"
 9132             "JNC    done\n\t"
 9133             "BLSRL  $dst.hi, $src.hi\n"
 9134             "done:"
 9135   %}
 9136 
 9137   ins_encode %{
 9138     Label done;
 9139     Register Rdst = $dst$$Register;
 9140     Register Rsrc = $src$$Register;
 9141     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9142     __ blsrl(Rdst, Rsrc);
 9143     __ jccb(Assembler::carryClear, done);
 9144     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9145     __ bind(done);
 9146   %}
 9147 
 9148   ins_pipe(ialu_reg);
 9149 %}
 9150 
 9151 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9152 %{
 9153   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9154   predicate(UseBMI1Instructions);
 9155   effect(KILL cr, TEMP dst);
 9156 
 9157   ins_cost(125);
 9158   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9159             "BLSRL  $dst.lo, $src\n\t"
 9160             "JNC    done\n\t"
 9161             "BLSRL  $dst.hi, $src+4\n"
 9162             "done:"
 9163   %}
 9164 
 9165   ins_encode %{
 9166     Label done;
 9167     Register Rdst = $dst$$Register;
 9168     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9169     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9170     __ blsrl(Rdst, $src$$Address);
 9171     __ jccb(Assembler::carryClear, done);
 9172     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9173     __ bind(done);
 9174   %}
 9175 
 9176   ins_pipe(ialu_reg_mem);
 9177 %}
 9178 
 9179 // Or Long Register with Register
 9180 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9181   match(Set dst (OrL dst src));
 9182   effect(KILL cr);
 9183   format %{ "OR     $dst.lo,$src.lo\n\t"
 9184             "OR     $dst.hi,$src.hi" %}
 9185   opcode(0x0B,0x0B);
 9186   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9187   ins_pipe( ialu_reg_reg_long );
 9188 %}
 9189 
 9190 // Or Long Register with Immediate
 9191 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9192   match(Set dst (OrL dst src));
 9193   effect(KILL cr);
 9194   format %{ "OR     $dst.lo,$src.lo\n\t"
 9195             "OR     $dst.hi,$src.hi" %}
 9196   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9197   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9198   ins_pipe( ialu_reg_long );
 9199 %}
 9200 
 9201 // Or Long Register with Memory
 9202 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9203   match(Set dst (OrL dst (LoadL mem)));
 9204   effect(KILL cr);
 9205   ins_cost(125);
 9206   format %{ "OR     $dst.lo,$mem\n\t"
 9207             "OR     $dst.hi,$mem+4" %}
 9208   opcode(0x0B,0x0B);
 9209   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9210   ins_pipe( ialu_reg_long_mem );
 9211 %}
 9212 
 9213 // Xor Long Register with Register
 9214 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9215   match(Set dst (XorL dst src));
 9216   effect(KILL cr);
 9217   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9218             "XOR    $dst.hi,$src.hi" %}
 9219   opcode(0x33,0x33);
 9220   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9221   ins_pipe( ialu_reg_reg_long );
 9222 %}
 9223 
 9224 // Xor Long Register with Immediate -1
 9225 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9226   match(Set dst (XorL dst imm));
 9227   format %{ "NOT    $dst.lo\n\t"
 9228             "NOT    $dst.hi" %}
 9229   ins_encode %{
 9230      __ notl($dst$$Register);
 9231      __ notl(HIGH_FROM_LOW($dst$$Register));
 9232   %}
 9233   ins_pipe( ialu_reg_long );
 9234 %}
 9235 
 9236 // Xor Long Register with Immediate
 9237 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9238   match(Set dst (XorL dst src));
 9239   effect(KILL cr);
 9240   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9241             "XOR    $dst.hi,$src.hi" %}
 9242   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9243   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9244   ins_pipe( ialu_reg_long );
 9245 %}
 9246 
 9247 // Xor Long Register with Memory
 9248 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9249   match(Set dst (XorL dst (LoadL mem)));
 9250   effect(KILL cr);
 9251   ins_cost(125);
 9252   format %{ "XOR    $dst.lo,$mem\n\t"
 9253             "XOR    $dst.hi,$mem+4" %}
 9254   opcode(0x33,0x33);
 9255   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9256   ins_pipe( ialu_reg_long_mem );
 9257 %}
 9258 
 9259 // Shift Left Long by 1
 9260 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9261   predicate(UseNewLongLShift);
 9262   match(Set dst (LShiftL dst cnt));
 9263   effect(KILL cr);
 9264   ins_cost(100);
 9265   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9266             "ADC    $dst.hi,$dst.hi" %}
 9267   ins_encode %{
 9268     __ addl($dst$$Register,$dst$$Register);
 9269     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9270   %}
 9271   ins_pipe( ialu_reg_long );
 9272 %}
 9273 
 9274 // Shift Left Long by 2
 9275 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9276   predicate(UseNewLongLShift);
 9277   match(Set dst (LShiftL dst cnt));
 9278   effect(KILL cr);
 9279   ins_cost(100);
 9280   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9281             "ADC    $dst.hi,$dst.hi\n\t"
 9282             "ADD    $dst.lo,$dst.lo\n\t"
 9283             "ADC    $dst.hi,$dst.hi" %}
 9284   ins_encode %{
 9285     __ addl($dst$$Register,$dst$$Register);
 9286     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9287     __ addl($dst$$Register,$dst$$Register);
 9288     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9289   %}
 9290   ins_pipe( ialu_reg_long );
 9291 %}
 9292 
 9293 // Shift Left Long by 3
 9294 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9295   predicate(UseNewLongLShift);
 9296   match(Set dst (LShiftL dst cnt));
 9297   effect(KILL cr);
 9298   ins_cost(100);
 9299   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9300             "ADC    $dst.hi,$dst.hi\n\t"
 9301             "ADD    $dst.lo,$dst.lo\n\t"
 9302             "ADC    $dst.hi,$dst.hi\n\t"
 9303             "ADD    $dst.lo,$dst.lo\n\t"
 9304             "ADC    $dst.hi,$dst.hi" %}
 9305   ins_encode %{
 9306     __ addl($dst$$Register,$dst$$Register);
 9307     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9308     __ addl($dst$$Register,$dst$$Register);
 9309     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9310     __ addl($dst$$Register,$dst$$Register);
 9311     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9312   %}
 9313   ins_pipe( ialu_reg_long );
 9314 %}
 9315 
 9316 // Shift Left Long by 1-31
 9317 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9318   match(Set dst (LShiftL dst cnt));
 9319   effect(KILL cr);
 9320   ins_cost(200);
 9321   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9322             "SHL    $dst.lo,$cnt" %}
 9323   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9324   ins_encode( move_long_small_shift(dst,cnt) );
 9325   ins_pipe( ialu_reg_long );
 9326 %}
 9327 
 9328 // Shift Left Long by 32-63
 9329 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9330   match(Set dst (LShiftL dst cnt));
 9331   effect(KILL cr);
 9332   ins_cost(300);
 9333   format %{ "MOV    $dst.hi,$dst.lo\n"
 9334           "\tSHL    $dst.hi,$cnt-32\n"
 9335           "\tXOR    $dst.lo,$dst.lo" %}
 9336   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9337   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9338   ins_pipe( ialu_reg_long );
 9339 %}
 9340 
 9341 // Shift Left Long by variable
 9342 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9343   match(Set dst (LShiftL dst shift));
 9344   effect(KILL cr);
 9345   ins_cost(500+200);
 9346   size(17);
 9347   format %{ "TEST   $shift,32\n\t"
 9348             "JEQ,s  small\n\t"
 9349             "MOV    $dst.hi,$dst.lo\n\t"
 9350             "XOR    $dst.lo,$dst.lo\n"
 9351     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9352             "SHL    $dst.lo,$shift" %}
 9353   ins_encode( shift_left_long( dst, shift ) );
 9354   ins_pipe( pipe_slow );
 9355 %}
 9356 
 9357 // Shift Right Long by 1-31
 9358 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9359   match(Set dst (URShiftL dst cnt));
 9360   effect(KILL cr);
 9361   ins_cost(200);
 9362   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9363             "SHR    $dst.hi,$cnt" %}
 9364   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9365   ins_encode( move_long_small_shift(dst,cnt) );
 9366   ins_pipe( ialu_reg_long );
 9367 %}
 9368 
 9369 // Shift Right Long by 32-63
 9370 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9371   match(Set dst (URShiftL dst cnt));
 9372   effect(KILL cr);
 9373   ins_cost(300);
 9374   format %{ "MOV    $dst.lo,$dst.hi\n"
 9375           "\tSHR    $dst.lo,$cnt-32\n"
 9376           "\tXOR    $dst.hi,$dst.hi" %}
 9377   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9378   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9379   ins_pipe( ialu_reg_long );
 9380 %}
 9381 
 9382 // Shift Right Long by variable
 9383 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9384   match(Set dst (URShiftL dst shift));
 9385   effect(KILL cr);
 9386   ins_cost(600);
 9387   size(17);
 9388   format %{ "TEST   $shift,32\n\t"
 9389             "JEQ,s  small\n\t"
 9390             "MOV    $dst.lo,$dst.hi\n\t"
 9391             "XOR    $dst.hi,$dst.hi\n"
 9392     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9393             "SHR    $dst.hi,$shift" %}
 9394   ins_encode( shift_right_long( dst, shift ) );
 9395   ins_pipe( pipe_slow );
 9396 %}
 9397 
 9398 // Shift Right Long by 1-31
 9399 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9400   match(Set dst (RShiftL dst cnt));
 9401   effect(KILL cr);
 9402   ins_cost(200);
 9403   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9404             "SAR    $dst.hi,$cnt" %}
 9405   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9406   ins_encode( move_long_small_shift(dst,cnt) );
 9407   ins_pipe( ialu_reg_long );
 9408 %}
 9409 
 9410 // Shift Right Long by 32-63
 9411 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9412   match(Set dst (RShiftL dst cnt));
 9413   effect(KILL cr);
 9414   ins_cost(300);
 9415   format %{ "MOV    $dst.lo,$dst.hi\n"
 9416           "\tSAR    $dst.lo,$cnt-32\n"
 9417           "\tSAR    $dst.hi,31" %}
 9418   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9419   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9420   ins_pipe( ialu_reg_long );
 9421 %}
 9422 
 9423 // Shift Right arithmetic Long by variable
 9424 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9425   match(Set dst (RShiftL dst shift));
 9426   effect(KILL cr);
 9427   ins_cost(600);
 9428   size(18);
 9429   format %{ "TEST   $shift,32\n\t"
 9430             "JEQ,s  small\n\t"
 9431             "MOV    $dst.lo,$dst.hi\n\t"
 9432             "SAR    $dst.hi,31\n"
 9433     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9434             "SAR    $dst.hi,$shift" %}
 9435   ins_encode( shift_right_arith_long( dst, shift ) );
 9436   ins_pipe( pipe_slow );
 9437 %}
 9438 
 9439 
 9440 //----------Double Instructions------------------------------------------------
 9441 // Double Math
 9442 
 9443 // Compare & branch
 9444 
 9445 // P6 version of float compare, sets condition codes in EFLAGS
 9446 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9447   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9448   match(Set cr (CmpD src1 src2));
 9449   effect(KILL rax);
 9450   ins_cost(150);
 9451   format %{ "FLD    $src1\n\t"
 9452             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9453             "JNP    exit\n\t"
 9454             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9455             "SAHF\n"
 9456      "exit:\tNOP               // avoid branch to branch" %}
 9457   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9458   ins_encode( Push_Reg_DPR(src1),
 9459               OpcP, RegOpc(src2),
 9460               cmpF_P6_fixup );
 9461   ins_pipe( pipe_slow );
 9462 %}
 9463 
 9464 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9465   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9466   match(Set cr (CmpD src1 src2));
 9467   ins_cost(150);
 9468   format %{ "FLD    $src1\n\t"
 9469             "FUCOMIP ST,$src2  // P6 instruction" %}
 9470   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9471   ins_encode( Push_Reg_DPR(src1),
 9472               OpcP, RegOpc(src2));
 9473   ins_pipe( pipe_slow );
 9474 %}
 9475 
 9476 // Compare & branch
 9477 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9478   predicate(UseSSE<=1);
 9479   match(Set cr (CmpD src1 src2));
 9480   effect(KILL rax);
 9481   ins_cost(200);
 9482   format %{ "FLD    $src1\n\t"
 9483             "FCOMp  $src2\n\t"
 9484             "FNSTSW AX\n\t"
 9485             "TEST   AX,0x400\n\t"
 9486             "JZ,s   flags\n\t"
 9487             "MOV    AH,1\t# unordered treat as LT\n"
 9488     "flags:\tSAHF" %}
 9489   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9490   ins_encode( Push_Reg_DPR(src1),
 9491               OpcP, RegOpc(src2),
 9492               fpu_flags);
 9493   ins_pipe( pipe_slow );
 9494 %}
 9495 
 9496 // Compare vs zero into -1,0,1
 9497 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9498   predicate(UseSSE<=1);
 9499   match(Set dst (CmpD3 src1 zero));
 9500   effect(KILL cr, KILL rax);
 9501   ins_cost(280);
 9502   format %{ "FTSTD  $dst,$src1" %}
 9503   opcode(0xE4, 0xD9);
 9504   ins_encode( Push_Reg_DPR(src1),
 9505               OpcS, OpcP, PopFPU,
 9506               CmpF_Result(dst));
 9507   ins_pipe( pipe_slow );
 9508 %}
 9509 
 9510 // Compare into -1,0,1
 9511 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9512   predicate(UseSSE<=1);
 9513   match(Set dst (CmpD3 src1 src2));
 9514   effect(KILL cr, KILL rax);
 9515   ins_cost(300);
 9516   format %{ "FCMPD  $dst,$src1,$src2" %}
 9517   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9518   ins_encode( Push_Reg_DPR(src1),
 9519               OpcP, RegOpc(src2),
 9520               CmpF_Result(dst));
 9521   ins_pipe( pipe_slow );
 9522 %}
 9523 
 9524 // float compare and set condition codes in EFLAGS by XMM regs
 9525 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9526   predicate(UseSSE>=2);
 9527   match(Set cr (CmpD src1 src2));
 9528   ins_cost(145);
 9529   format %{ "UCOMISD $src1,$src2\n\t"
 9530             "JNP,s   exit\n\t"
 9531             "PUSHF\t# saw NaN, set CF\n\t"
 9532             "AND     [rsp], #0xffffff2b\n\t"
 9533             "POPF\n"
 9534     "exit:" %}
 9535   ins_encode %{
 9536     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9537     emit_cmpfp_fixup(_masm);
 9538   %}
 9539   ins_pipe( pipe_slow );
 9540 %}
 9541 
 9542 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9543   predicate(UseSSE>=2);
 9544   match(Set cr (CmpD src1 src2));
 9545   ins_cost(100);
 9546   format %{ "UCOMISD $src1,$src2" %}
 9547   ins_encode %{
 9548     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9549   %}
 9550   ins_pipe( pipe_slow );
 9551 %}
 9552 
 9553 // float compare and set condition codes in EFLAGS by XMM regs
 9554 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9555   predicate(UseSSE>=2);
 9556   match(Set cr (CmpD src1 (LoadD src2)));
 9557   ins_cost(145);
 9558   format %{ "UCOMISD $src1,$src2\n\t"
 9559             "JNP,s   exit\n\t"
 9560             "PUSHF\t# saw NaN, set CF\n\t"
 9561             "AND     [rsp], #0xffffff2b\n\t"
 9562             "POPF\n"
 9563     "exit:" %}
 9564   ins_encode %{
 9565     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9566     emit_cmpfp_fixup(_masm);
 9567   %}
 9568   ins_pipe( pipe_slow );
 9569 %}
 9570 
 9571 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9572   predicate(UseSSE>=2);
 9573   match(Set cr (CmpD src1 (LoadD src2)));
 9574   ins_cost(100);
 9575   format %{ "UCOMISD $src1,$src2" %}
 9576   ins_encode %{
 9577     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9578   %}
 9579   ins_pipe( pipe_slow );
 9580 %}
 9581 
 9582 // Compare into -1,0,1 in XMM
 9583 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9584   predicate(UseSSE>=2);
 9585   match(Set dst (CmpD3 src1 src2));
 9586   effect(KILL cr);
 9587   ins_cost(255);
 9588   format %{ "UCOMISD $src1, $src2\n\t"
 9589             "MOV     $dst, #-1\n\t"
 9590             "JP,s    done\n\t"
 9591             "JB,s    done\n\t"
 9592             "SETNE   $dst\n\t"
 9593             "MOVZB   $dst, $dst\n"
 9594     "done:" %}
 9595   ins_encode %{
 9596     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9597     emit_cmpfp3(_masm, $dst$$Register);
 9598   %}
 9599   ins_pipe( pipe_slow );
 9600 %}
 9601 
 9602 // Compare into -1,0,1 in XMM and memory
 9603 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9604   predicate(UseSSE>=2);
 9605   match(Set dst (CmpD3 src1 (LoadD src2)));
 9606   effect(KILL cr);
 9607   ins_cost(275);
 9608   format %{ "UCOMISD $src1, $src2\n\t"
 9609             "MOV     $dst, #-1\n\t"
 9610             "JP,s    done\n\t"
 9611             "JB,s    done\n\t"
 9612             "SETNE   $dst\n\t"
 9613             "MOVZB   $dst, $dst\n"
 9614     "done:" %}
 9615   ins_encode %{
 9616     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9617     emit_cmpfp3(_masm, $dst$$Register);
 9618   %}
 9619   ins_pipe( pipe_slow );
 9620 %}
 9621 
 9622 
 9623 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9624   predicate (UseSSE <=1);
 9625   match(Set dst (SubD dst src));
 9626 
 9627   format %{ "FLD    $src\n\t"
 9628             "DSUBp  $dst,ST" %}
 9629   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9630   ins_cost(150);
 9631   ins_encode( Push_Reg_DPR(src),
 9632               OpcP, RegOpc(dst) );
 9633   ins_pipe( fpu_reg_reg );
 9634 %}
 9635 
 9636 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9637   predicate (UseSSE <=1);
 9638   match(Set dst (RoundDouble (SubD src1 src2)));
 9639   ins_cost(250);
 9640 
 9641   format %{ "FLD    $src2\n\t"
 9642             "DSUB   ST,$src1\n\t"
 9643             "FSTP_D $dst\t# D-round" %}
 9644   opcode(0xD8, 0x5);
 9645   ins_encode( Push_Reg_DPR(src2),
 9646               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9647   ins_pipe( fpu_mem_reg_reg );
 9648 %}
 9649 
 9650 
 9651 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9652   predicate (UseSSE <=1);
 9653   match(Set dst (SubD dst (LoadD src)));
 9654   ins_cost(150);
 9655 
 9656   format %{ "FLD    $src\n\t"
 9657             "DSUBp  $dst,ST" %}
 9658   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9659   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9660               OpcP, RegOpc(dst) );
 9661   ins_pipe( fpu_reg_mem );
 9662 %}
 9663 
 9664 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9665   predicate (UseSSE<=1);
 9666   match(Set dst (AbsD src));
 9667   ins_cost(100);
 9668   format %{ "FABS" %}
 9669   opcode(0xE1, 0xD9);
 9670   ins_encode( OpcS, OpcP );
 9671   ins_pipe( fpu_reg_reg );
 9672 %}
 9673 
 9674 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9675   predicate(UseSSE<=1);
 9676   match(Set dst (NegD src));
 9677   ins_cost(100);
 9678   format %{ "FCHS" %}
 9679   opcode(0xE0, 0xD9);
 9680   ins_encode( OpcS, OpcP );
 9681   ins_pipe( fpu_reg_reg );
 9682 %}
 9683 
 9684 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9685   predicate(UseSSE<=1);
 9686   match(Set dst (AddD dst src));
 9687   format %{ "FLD    $src\n\t"
 9688             "DADD   $dst,ST" %}
 9689   size(4);
 9690   ins_cost(150);
 9691   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9692   ins_encode( Push_Reg_DPR(src),
 9693               OpcP, RegOpc(dst) );
 9694   ins_pipe( fpu_reg_reg );
 9695 %}
 9696 
 9697 
 9698 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9699   predicate(UseSSE<=1);
 9700   match(Set dst (RoundDouble (AddD src1 src2)));
 9701   ins_cost(250);
 9702 
 9703   format %{ "FLD    $src2\n\t"
 9704             "DADD   ST,$src1\n\t"
 9705             "FSTP_D $dst\t# D-round" %}
 9706   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9707   ins_encode( Push_Reg_DPR(src2),
 9708               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9709   ins_pipe( fpu_mem_reg_reg );
 9710 %}
 9711 
 9712 
 9713 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9714   predicate(UseSSE<=1);
 9715   match(Set dst (AddD dst (LoadD src)));
 9716   ins_cost(150);
 9717 
 9718   format %{ "FLD    $src\n\t"
 9719             "DADDp  $dst,ST" %}
 9720   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9721   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9722               OpcP, RegOpc(dst) );
 9723   ins_pipe( fpu_reg_mem );
 9724 %}
 9725 
 9726 // add-to-memory
 9727 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9728   predicate(UseSSE<=1);
 9729   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9730   ins_cost(150);
 9731 
 9732   format %{ "FLD_D  $dst\n\t"
 9733             "DADD   ST,$src\n\t"
 9734             "FST_D  $dst" %}
 9735   opcode(0xDD, 0x0);
 9736   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9737               Opcode(0xD8), RegOpc(src),
 9738               set_instruction_start,
 9739               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9740   ins_pipe( fpu_reg_mem );
 9741 %}
 9742 
 9743 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9744   predicate(UseSSE<=1);
 9745   match(Set dst (AddD dst con));
 9746   ins_cost(125);
 9747   format %{ "FLD1\n\t"
 9748             "DADDp  $dst,ST" %}
 9749   ins_encode %{
 9750     __ fld1();
 9751     __ faddp($dst$$reg);
 9752   %}
 9753   ins_pipe(fpu_reg);
 9754 %}
 9755 
 9756 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9757   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9758   match(Set dst (AddD dst con));
 9759   ins_cost(200);
 9760   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9761             "DADDp  $dst,ST" %}
 9762   ins_encode %{
 9763     __ fld_d($constantaddress($con));
 9764     __ faddp($dst$$reg);
 9765   %}
 9766   ins_pipe(fpu_reg_mem);
 9767 %}
 9768 
 9769 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9770   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9771   match(Set dst (RoundDouble (AddD src con)));
 9772   ins_cost(200);
 9773   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9774             "DADD   ST,$src\n\t"
 9775             "FSTP_D $dst\t# D-round" %}
 9776   ins_encode %{
 9777     __ fld_d($constantaddress($con));
 9778     __ fadd($src$$reg);
 9779     __ fstp_d(Address(rsp, $dst$$disp));
 9780   %}
 9781   ins_pipe(fpu_mem_reg_con);
 9782 %}
 9783 
 9784 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9785   predicate(UseSSE<=1);
 9786   match(Set dst (MulD dst src));
 9787   format %{ "FLD    $src\n\t"
 9788             "DMULp  $dst,ST" %}
 9789   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9790   ins_cost(150);
 9791   ins_encode( Push_Reg_DPR(src),
 9792               OpcP, RegOpc(dst) );
 9793   ins_pipe( fpu_reg_reg );
 9794 %}
 9795 
 9796 // Strict FP instruction biases argument before multiply then
 9797 // biases result to avoid double rounding of subnormals.
 9798 //
 9799 // scale arg1 by multiplying arg1 by 2^(-15360)
 9800 // load arg2
 9801 // multiply scaled arg1 by arg2
 9802 // rescale product by 2^(15360)
 9803 //
 9804 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9805   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9806   match(Set dst (MulD dst src));
 9807   ins_cost(1);   // Select this instruction for all FP double multiplies
 9808 
 9809   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9810             "DMULp  $dst,ST\n\t"
 9811             "FLD    $src\n\t"
 9812             "DMULp  $dst,ST\n\t"
 9813             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9814             "DMULp  $dst,ST\n\t" %}
 9815   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9816   ins_encode( strictfp_bias1(dst),
 9817               Push_Reg_DPR(src),
 9818               OpcP, RegOpc(dst),
 9819               strictfp_bias2(dst) );
 9820   ins_pipe( fpu_reg_reg );
 9821 %}
 9822 
 9823 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9824   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9825   match(Set dst (MulD dst con));
 9826   ins_cost(200);
 9827   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9828             "DMULp  $dst,ST" %}
 9829   ins_encode %{
 9830     __ fld_d($constantaddress($con));
 9831     __ fmulp($dst$$reg);
 9832   %}
 9833   ins_pipe(fpu_reg_mem);
 9834 %}
 9835 
 9836 
 9837 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9838   predicate( UseSSE<=1 );
 9839   match(Set dst (MulD dst (LoadD src)));
 9840   ins_cost(200);
 9841   format %{ "FLD_D  $src\n\t"
 9842             "DMULp  $dst,ST" %}
 9843   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9844   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9845               OpcP, RegOpc(dst) );
 9846   ins_pipe( fpu_reg_mem );
 9847 %}
 9848 
 9849 //
 9850 // Cisc-alternate to reg-reg multiply
 9851 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9852   predicate( UseSSE<=1 );
 9853   match(Set dst (MulD src (LoadD mem)));
 9854   ins_cost(250);
 9855   format %{ "FLD_D  $mem\n\t"
 9856             "DMUL   ST,$src\n\t"
 9857             "FSTP_D $dst" %}
 9858   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9859   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9860               OpcReg_FPR(src),
 9861               Pop_Reg_DPR(dst) );
 9862   ins_pipe( fpu_reg_reg_mem );
 9863 %}
 9864 
 9865 
 9866 // MACRO3 -- addDPR a mulDPR
 9867 // This instruction is a '2-address' instruction in that the result goes
 9868 // back to src2.  This eliminates a move from the macro; possibly the
 9869 // register allocator will have to add it back (and maybe not).
 9870 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9871   predicate( UseSSE<=1 );
 9872   match(Set src2 (AddD (MulD src0 src1) src2));
 9873   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9874             "DMUL   ST,$src1\n\t"
 9875             "DADDp  $src2,ST" %}
 9876   ins_cost(250);
 9877   opcode(0xDD); /* LoadD DD /0 */
 9878   ins_encode( Push_Reg_FPR(src0),
 9879               FMul_ST_reg(src1),
 9880               FAddP_reg_ST(src2) );
 9881   ins_pipe( fpu_reg_reg_reg );
 9882 %}
 9883 
 9884 
 9885 // MACRO3 -- subDPR a mulDPR
 9886 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9887   predicate( UseSSE<=1 );
 9888   match(Set src2 (SubD (MulD src0 src1) src2));
 9889   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9890             "DMUL   ST,$src1\n\t"
 9891             "DSUBRp $src2,ST" %}
 9892   ins_cost(250);
 9893   ins_encode( Push_Reg_FPR(src0),
 9894               FMul_ST_reg(src1),
 9895               Opcode(0xDE), Opc_plus(0xE0,src2));
 9896   ins_pipe( fpu_reg_reg_reg );
 9897 %}
 9898 
 9899 
 9900 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9901   predicate( UseSSE<=1 );
 9902   match(Set dst (DivD dst src));
 9903 
 9904   format %{ "FLD    $src\n\t"
 9905             "FDIVp  $dst,ST" %}
 9906   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9907   ins_cost(150);
 9908   ins_encode( Push_Reg_DPR(src),
 9909               OpcP, RegOpc(dst) );
 9910   ins_pipe( fpu_reg_reg );
 9911 %}
 9912 
 9913 // Strict FP instruction biases argument before division then
 9914 // biases result, to avoid double rounding of subnormals.
 9915 //
 9916 // scale dividend by multiplying dividend by 2^(-15360)
 9917 // load divisor
 9918 // divide scaled dividend by divisor
 9919 // rescale quotient by 2^(15360)
 9920 //
 9921 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9922   predicate (UseSSE<=1);
 9923   match(Set dst (DivD dst src));
 9924   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9925   ins_cost(01);
 9926 
 9927   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9928             "DMULp  $dst,ST\n\t"
 9929             "FLD    $src\n\t"
 9930             "FDIVp  $dst,ST\n\t"
 9931             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9932             "DMULp  $dst,ST\n\t" %}
 9933   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9934   ins_encode( strictfp_bias1(dst),
 9935               Push_Reg_DPR(src),
 9936               OpcP, RegOpc(dst),
 9937               strictfp_bias2(dst) );
 9938   ins_pipe( fpu_reg_reg );
 9939 %}
 9940 
 9941 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9942   predicate(UseSSE<=1);
 9943   match(Set dst (ModD dst src));
 9944   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9945 
 9946   format %{ "DMOD   $dst,$src" %}
 9947   ins_cost(250);
 9948   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9949               emitModDPR(),
 9950               Push_Result_Mod_DPR(src),
 9951               Pop_Reg_DPR(dst));
 9952   ins_pipe( pipe_slow );
 9953 %}
 9954 
 9955 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9956   predicate(UseSSE>=2);
 9957   match(Set dst (ModD src0 src1));
 9958   effect(KILL rax, KILL cr);
 9959 
 9960   format %{ "SUB    ESP,8\t # DMOD\n"
 9961           "\tMOVSD  [ESP+0],$src1\n"
 9962           "\tFLD_D  [ESP+0]\n"
 9963           "\tMOVSD  [ESP+0],$src0\n"
 9964           "\tFLD_D  [ESP+0]\n"
 9965      "loop:\tFPREM\n"
 9966           "\tFWAIT\n"
 9967           "\tFNSTSW AX\n"
 9968           "\tSAHF\n"
 9969           "\tJP     loop\n"
 9970           "\tFSTP_D [ESP+0]\n"
 9971           "\tMOVSD  $dst,[ESP+0]\n"
 9972           "\tADD    ESP,8\n"
 9973           "\tFSTP   ST0\t # Restore FPU Stack"
 9974     %}
 9975   ins_cost(250);
 9976   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9977   ins_pipe( pipe_slow );
 9978 %}
 9979 
 9980 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9981   predicate (UseSSE<=1);
 9982   match(Set dst(AtanD dst src));
 9983   format %{ "DATA   $dst,$src" %}
 9984   opcode(0xD9, 0xF3);
 9985   ins_encode( Push_Reg_DPR(src),
 9986               OpcP, OpcS, RegOpc(dst) );
 9987   ins_pipe( pipe_slow );
 9988 %}
 9989 
 9990 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9991   predicate (UseSSE>=2);
 9992   match(Set dst(AtanD dst src));
 9993   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9994   format %{ "DATA   $dst,$src" %}
 9995   opcode(0xD9, 0xF3);
 9996   ins_encode( Push_SrcD(src),
 9997               OpcP, OpcS, Push_ResultD(dst) );
 9998   ins_pipe( pipe_slow );
 9999 %}
10000 
10001 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10002   predicate (UseSSE<=1);
10003   match(Set dst (SqrtD src));
10004   format %{ "DSQRT  $dst,$src" %}
10005   opcode(0xFA, 0xD9);
10006   ins_encode( Push_Reg_DPR(src),
10007               OpcS, OpcP, Pop_Reg_DPR(dst) );
10008   ins_pipe( pipe_slow );
10009 %}
10010 
10011 //-------------Float Instructions-------------------------------
10012 // Float Math
10013 
10014 // Code for float compare:
10015 //     fcompp();
10016 //     fwait(); fnstsw_ax();
10017 //     sahf();
10018 //     movl(dst, unordered_result);
10019 //     jcc(Assembler::parity, exit);
10020 //     movl(dst, less_result);
10021 //     jcc(Assembler::below, exit);
10022 //     movl(dst, equal_result);
10023 //     jcc(Assembler::equal, exit);
10024 //     movl(dst, greater_result);
10025 //   exit:
10026 
10027 // P6 version of float compare, sets condition codes in EFLAGS
10028 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10029   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10030   match(Set cr (CmpF src1 src2));
10031   effect(KILL rax);
10032   ins_cost(150);
10033   format %{ "FLD    $src1\n\t"
10034             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10035             "JNP    exit\n\t"
10036             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10037             "SAHF\n"
10038      "exit:\tNOP               // avoid branch to branch" %}
10039   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10040   ins_encode( Push_Reg_DPR(src1),
10041               OpcP, RegOpc(src2),
10042               cmpF_P6_fixup );
10043   ins_pipe( pipe_slow );
10044 %}
10045 
10046 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10047   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10048   match(Set cr (CmpF src1 src2));
10049   ins_cost(100);
10050   format %{ "FLD    $src1\n\t"
10051             "FUCOMIP ST,$src2  // P6 instruction" %}
10052   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10053   ins_encode( Push_Reg_DPR(src1),
10054               OpcP, RegOpc(src2));
10055   ins_pipe( pipe_slow );
10056 %}
10057 
10058 
10059 // Compare & branch
10060 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10061   predicate(UseSSE == 0);
10062   match(Set cr (CmpF src1 src2));
10063   effect(KILL rax);
10064   ins_cost(200);
10065   format %{ "FLD    $src1\n\t"
10066             "FCOMp  $src2\n\t"
10067             "FNSTSW AX\n\t"
10068             "TEST   AX,0x400\n\t"
10069             "JZ,s   flags\n\t"
10070             "MOV    AH,1\t# unordered treat as LT\n"
10071     "flags:\tSAHF" %}
10072   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10073   ins_encode( Push_Reg_DPR(src1),
10074               OpcP, RegOpc(src2),
10075               fpu_flags);
10076   ins_pipe( pipe_slow );
10077 %}
10078 
10079 // Compare vs zero into -1,0,1
10080 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10081   predicate(UseSSE == 0);
10082   match(Set dst (CmpF3 src1 zero));
10083   effect(KILL cr, KILL rax);
10084   ins_cost(280);
10085   format %{ "FTSTF  $dst,$src1" %}
10086   opcode(0xE4, 0xD9);
10087   ins_encode( Push_Reg_DPR(src1),
10088               OpcS, OpcP, PopFPU,
10089               CmpF_Result(dst));
10090   ins_pipe( pipe_slow );
10091 %}
10092 
10093 // Compare into -1,0,1
10094 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10095   predicate(UseSSE == 0);
10096   match(Set dst (CmpF3 src1 src2));
10097   effect(KILL cr, KILL rax);
10098   ins_cost(300);
10099   format %{ "FCMPF  $dst,$src1,$src2" %}
10100   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10101   ins_encode( Push_Reg_DPR(src1),
10102               OpcP, RegOpc(src2),
10103               CmpF_Result(dst));
10104   ins_pipe( pipe_slow );
10105 %}
10106 
10107 // float compare and set condition codes in EFLAGS by XMM regs
10108 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10109   predicate(UseSSE>=1);
10110   match(Set cr (CmpF src1 src2));
10111   ins_cost(145);
10112   format %{ "UCOMISS $src1,$src2\n\t"
10113             "JNP,s   exit\n\t"
10114             "PUSHF\t# saw NaN, set CF\n\t"
10115             "AND     [rsp], #0xffffff2b\n\t"
10116             "POPF\n"
10117     "exit:" %}
10118   ins_encode %{
10119     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10120     emit_cmpfp_fixup(_masm);
10121   %}
10122   ins_pipe( pipe_slow );
10123 %}
10124 
10125 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10126   predicate(UseSSE>=1);
10127   match(Set cr (CmpF src1 src2));
10128   ins_cost(100);
10129   format %{ "UCOMISS $src1,$src2" %}
10130   ins_encode %{
10131     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10132   %}
10133   ins_pipe( pipe_slow );
10134 %}
10135 
10136 // float compare and set condition codes in EFLAGS by XMM regs
10137 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10138   predicate(UseSSE>=1);
10139   match(Set cr (CmpF src1 (LoadF src2)));
10140   ins_cost(165);
10141   format %{ "UCOMISS $src1,$src2\n\t"
10142             "JNP,s   exit\n\t"
10143             "PUSHF\t# saw NaN, set CF\n\t"
10144             "AND     [rsp], #0xffffff2b\n\t"
10145             "POPF\n"
10146     "exit:" %}
10147   ins_encode %{
10148     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10149     emit_cmpfp_fixup(_masm);
10150   %}
10151   ins_pipe( pipe_slow );
10152 %}
10153 
10154 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10155   predicate(UseSSE>=1);
10156   match(Set cr (CmpF src1 (LoadF src2)));
10157   ins_cost(100);
10158   format %{ "UCOMISS $src1,$src2" %}
10159   ins_encode %{
10160     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10161   %}
10162   ins_pipe( pipe_slow );
10163 %}
10164 
10165 // Compare into -1,0,1 in XMM
10166 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10167   predicate(UseSSE>=1);
10168   match(Set dst (CmpF3 src1 src2));
10169   effect(KILL cr);
10170   ins_cost(255);
10171   format %{ "UCOMISS $src1, $src2\n\t"
10172             "MOV     $dst, #-1\n\t"
10173             "JP,s    done\n\t"
10174             "JB,s    done\n\t"
10175             "SETNE   $dst\n\t"
10176             "MOVZB   $dst, $dst\n"
10177     "done:" %}
10178   ins_encode %{
10179     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10180     emit_cmpfp3(_masm, $dst$$Register);
10181   %}
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 // Compare into -1,0,1 in XMM and memory
10186 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10187   predicate(UseSSE>=1);
10188   match(Set dst (CmpF3 src1 (LoadF src2)));
10189   effect(KILL cr);
10190   ins_cost(275);
10191   format %{ "UCOMISS $src1, $src2\n\t"
10192             "MOV     $dst, #-1\n\t"
10193             "JP,s    done\n\t"
10194             "JB,s    done\n\t"
10195             "SETNE   $dst\n\t"
10196             "MOVZB   $dst, $dst\n"
10197     "done:" %}
10198   ins_encode %{
10199     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10200     emit_cmpfp3(_masm, $dst$$Register);
10201   %}
10202   ins_pipe( pipe_slow );
10203 %}
10204 
10205 // Spill to obtain 24-bit precision
10206 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10207   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10208   match(Set dst (SubF src1 src2));
10209 
10210   format %{ "FSUB   $dst,$src1 - $src2" %}
10211   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10212   ins_encode( Push_Reg_FPR(src1),
10213               OpcReg_FPR(src2),
10214               Pop_Mem_FPR(dst) );
10215   ins_pipe( fpu_mem_reg_reg );
10216 %}
10217 //
10218 // This instruction does not round to 24-bits
10219 instruct subFPR_reg(regFPR dst, regFPR src) %{
10220   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10221   match(Set dst (SubF dst src));
10222 
10223   format %{ "FSUB   $dst,$src" %}
10224   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10225   ins_encode( Push_Reg_FPR(src),
10226               OpcP, RegOpc(dst) );
10227   ins_pipe( fpu_reg_reg );
10228 %}
10229 
10230 // Spill to obtain 24-bit precision
10231 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10232   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10233   match(Set dst (AddF src1 src2));
10234 
10235   format %{ "FADD   $dst,$src1,$src2" %}
10236   opcode(0xD8, 0x0); /* D8 C0+i */
10237   ins_encode( Push_Reg_FPR(src2),
10238               OpcReg_FPR(src1),
10239               Pop_Mem_FPR(dst) );
10240   ins_pipe( fpu_mem_reg_reg );
10241 %}
10242 //
10243 // This instruction does not round to 24-bits
10244 instruct addFPR_reg(regFPR dst, regFPR src) %{
10245   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10246   match(Set dst (AddF dst src));
10247 
10248   format %{ "FLD    $src\n\t"
10249             "FADDp  $dst,ST" %}
10250   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10251   ins_encode( Push_Reg_FPR(src),
10252               OpcP, RegOpc(dst) );
10253   ins_pipe( fpu_reg_reg );
10254 %}
10255 
10256 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10257   predicate(UseSSE==0);
10258   match(Set dst (AbsF src));
10259   ins_cost(100);
10260   format %{ "FABS" %}
10261   opcode(0xE1, 0xD9);
10262   ins_encode( OpcS, OpcP );
10263   ins_pipe( fpu_reg_reg );
10264 %}
10265 
10266 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10267   predicate(UseSSE==0);
10268   match(Set dst (NegF src));
10269   ins_cost(100);
10270   format %{ "FCHS" %}
10271   opcode(0xE0, 0xD9);
10272   ins_encode( OpcS, OpcP );
10273   ins_pipe( fpu_reg_reg );
10274 %}
10275 
10276 // Cisc-alternate to addFPR_reg
10277 // Spill to obtain 24-bit precision
10278 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10279   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10280   match(Set dst (AddF src1 (LoadF src2)));
10281 
10282   format %{ "FLD    $src2\n\t"
10283             "FADD   ST,$src1\n\t"
10284             "FSTP_S $dst" %}
10285   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10286   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10287               OpcReg_FPR(src1),
10288               Pop_Mem_FPR(dst) );
10289   ins_pipe( fpu_mem_reg_mem );
10290 %}
10291 //
10292 // Cisc-alternate to addFPR_reg
10293 // This instruction does not round to 24-bits
10294 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10295   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10296   match(Set dst (AddF dst (LoadF src)));
10297 
10298   format %{ "FADD   $dst,$src" %}
10299   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10300   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10301               OpcP, RegOpc(dst) );
10302   ins_pipe( fpu_reg_mem );
10303 %}
10304 
10305 // // Following two instructions for _222_mpegaudio
10306 // Spill to obtain 24-bit precision
10307 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10308   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10309   match(Set dst (AddF src1 src2));
10310 
10311   format %{ "FADD   $dst,$src1,$src2" %}
10312   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10313   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10314               OpcReg_FPR(src2),
10315               Pop_Mem_FPR(dst) );
10316   ins_pipe( fpu_mem_reg_mem );
10317 %}
10318 
10319 // Cisc-spill variant
10320 // Spill to obtain 24-bit precision
10321 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10322   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10323   match(Set dst (AddF src1 (LoadF src2)));
10324 
10325   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10326   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10327   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10328               set_instruction_start,
10329               OpcP, RMopc_Mem(secondary,src1),
10330               Pop_Mem_FPR(dst) );
10331   ins_pipe( fpu_mem_mem_mem );
10332 %}
10333 
10334 // Spill to obtain 24-bit precision
10335 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10336   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10337   match(Set dst (AddF src1 src2));
10338 
10339   format %{ "FADD   $dst,$src1,$src2" %}
10340   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10341   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10342               set_instruction_start,
10343               OpcP, RMopc_Mem(secondary,src1),
10344               Pop_Mem_FPR(dst) );
10345   ins_pipe( fpu_mem_mem_mem );
10346 %}
10347 
10348 
10349 // Spill to obtain 24-bit precision
10350 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10351   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10352   match(Set dst (AddF src con));
10353   format %{ "FLD    $src\n\t"
10354             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10355             "FSTP_S $dst"  %}
10356   ins_encode %{
10357     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10358     __ fadd_s($constantaddress($con));
10359     __ fstp_s(Address(rsp, $dst$$disp));
10360   %}
10361   ins_pipe(fpu_mem_reg_con);
10362 %}
10363 //
10364 // This instruction does not round to 24-bits
10365 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10366   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10367   match(Set dst (AddF src con));
10368   format %{ "FLD    $src\n\t"
10369             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10370             "FSTP   $dst"  %}
10371   ins_encode %{
10372     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10373     __ fadd_s($constantaddress($con));
10374     __ fstp_d($dst$$reg);
10375   %}
10376   ins_pipe(fpu_reg_reg_con);
10377 %}
10378 
10379 // Spill to obtain 24-bit precision
10380 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10381   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10382   match(Set dst (MulF src1 src2));
10383 
10384   format %{ "FLD    $src1\n\t"
10385             "FMUL   $src2\n\t"
10386             "FSTP_S $dst"  %}
10387   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10388   ins_encode( Push_Reg_FPR(src1),
10389               OpcReg_FPR(src2),
10390               Pop_Mem_FPR(dst) );
10391   ins_pipe( fpu_mem_reg_reg );
10392 %}
10393 //
10394 // This instruction does not round to 24-bits
10395 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10396   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10397   match(Set dst (MulF src1 src2));
10398 
10399   format %{ "FLD    $src1\n\t"
10400             "FMUL   $src2\n\t"
10401             "FSTP_S $dst"  %}
10402   opcode(0xD8, 0x1); /* D8 C8+i */
10403   ins_encode( Push_Reg_FPR(src2),
10404               OpcReg_FPR(src1),
10405               Pop_Reg_FPR(dst) );
10406   ins_pipe( fpu_reg_reg_reg );
10407 %}
10408 
10409 
10410 // Spill to obtain 24-bit precision
10411 // Cisc-alternate to reg-reg multiply
10412 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10413   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10414   match(Set dst (MulF src1 (LoadF src2)));
10415 
10416   format %{ "FLD_S  $src2\n\t"
10417             "FMUL   $src1\n\t"
10418             "FSTP_S $dst"  %}
10419   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10420   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10421               OpcReg_FPR(src1),
10422               Pop_Mem_FPR(dst) );
10423   ins_pipe( fpu_mem_reg_mem );
10424 %}
10425 //
10426 // This instruction does not round to 24-bits
10427 // Cisc-alternate to reg-reg multiply
10428 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10429   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10430   match(Set dst (MulF src1 (LoadF src2)));
10431 
10432   format %{ "FMUL   $dst,$src1,$src2" %}
10433   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10434   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10435               OpcReg_FPR(src1),
10436               Pop_Reg_FPR(dst) );
10437   ins_pipe( fpu_reg_reg_mem );
10438 %}
10439 
10440 // Spill to obtain 24-bit precision
10441 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10442   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10443   match(Set dst (MulF src1 src2));
10444 
10445   format %{ "FMUL   $dst,$src1,$src2" %}
10446   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10447   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10448               set_instruction_start,
10449               OpcP, RMopc_Mem(secondary,src1),
10450               Pop_Mem_FPR(dst) );
10451   ins_pipe( fpu_mem_mem_mem );
10452 %}
10453 
10454 // Spill to obtain 24-bit precision
10455 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10456   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10457   match(Set dst (MulF src con));
10458 
10459   format %{ "FLD    $src\n\t"
10460             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10461             "FSTP_S $dst"  %}
10462   ins_encode %{
10463     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10464     __ fmul_s($constantaddress($con));
10465     __ fstp_s(Address(rsp, $dst$$disp));
10466   %}
10467   ins_pipe(fpu_mem_reg_con);
10468 %}
10469 //
10470 // This instruction does not round to 24-bits
10471 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10472   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10473   match(Set dst (MulF src con));
10474 
10475   format %{ "FLD    $src\n\t"
10476             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10477             "FSTP   $dst"  %}
10478   ins_encode %{
10479     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10480     __ fmul_s($constantaddress($con));
10481     __ fstp_d($dst$$reg);
10482   %}
10483   ins_pipe(fpu_reg_reg_con);
10484 %}
10485 
10486 
10487 //
10488 // MACRO1 -- subsume unshared load into mulFPR
10489 // This instruction does not round to 24-bits
10490 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10491   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10492   match(Set dst (MulF (LoadF mem1) src));
10493 
10494   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10495             "FMUL   ST,$src\n\t"
10496             "FSTP   $dst" %}
10497   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10498   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10499               OpcReg_FPR(src),
10500               Pop_Reg_FPR(dst) );
10501   ins_pipe( fpu_reg_reg_mem );
10502 %}
10503 //
10504 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10505 // This instruction does not round to 24-bits
10506 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10507   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10508   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10509   ins_cost(95);
10510 
10511   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10512             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10513             "FADD   ST,$src2\n\t"
10514             "FSTP   $dst" %}
10515   opcode(0xD9); /* LoadF D9 /0 */
10516   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10517               FMul_ST_reg(src1),
10518               FAdd_ST_reg(src2),
10519               Pop_Reg_FPR(dst) );
10520   ins_pipe( fpu_reg_mem_reg_reg );
10521 %}
10522 
10523 // MACRO3 -- addFPR a mulFPR
10524 // This instruction does not round to 24-bits.  It is a '2-address'
10525 // instruction in that the result goes back to src2.  This eliminates
10526 // a move from the macro; possibly the register allocator will have
10527 // to add it back (and maybe not).
10528 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10529   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10530   match(Set src2 (AddF (MulF src0 src1) src2));
10531 
10532   format %{ "FLD    $src0     ===MACRO3===\n\t"
10533             "FMUL   ST,$src1\n\t"
10534             "FADDP  $src2,ST" %}
10535   opcode(0xD9); /* LoadF D9 /0 */
10536   ins_encode( Push_Reg_FPR(src0),
10537               FMul_ST_reg(src1),
10538               FAddP_reg_ST(src2) );
10539   ins_pipe( fpu_reg_reg_reg );
10540 %}
10541 
10542 // MACRO4 -- divFPR subFPR
10543 // This instruction does not round to 24-bits
10544 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10545   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10546   match(Set dst (DivF (SubF src2 src1) src3));
10547 
10548   format %{ "FLD    $src2   ===MACRO4===\n\t"
10549             "FSUB   ST,$src1\n\t"
10550             "FDIV   ST,$src3\n\t"
10551             "FSTP  $dst" %}
10552   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10553   ins_encode( Push_Reg_FPR(src2),
10554               subFPR_divFPR_encode(src1,src3),
10555               Pop_Reg_FPR(dst) );
10556   ins_pipe( fpu_reg_reg_reg_reg );
10557 %}
10558 
10559 // Spill to obtain 24-bit precision
10560 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10561   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10562   match(Set dst (DivF src1 src2));
10563 
10564   format %{ "FDIV   $dst,$src1,$src2" %}
10565   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10566   ins_encode( Push_Reg_FPR(src1),
10567               OpcReg_FPR(src2),
10568               Pop_Mem_FPR(dst) );
10569   ins_pipe( fpu_mem_reg_reg );
10570 %}
10571 //
10572 // This instruction does not round to 24-bits
10573 instruct divFPR_reg(regFPR dst, regFPR src) %{
10574   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10575   match(Set dst (DivF dst src));
10576 
10577   format %{ "FDIV   $dst,$src" %}
10578   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10579   ins_encode( Push_Reg_FPR(src),
10580               OpcP, RegOpc(dst) );
10581   ins_pipe( fpu_reg_reg );
10582 %}
10583 
10584 
10585 // Spill to obtain 24-bit precision
10586 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10587   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10588   match(Set dst (ModF src1 src2));
10589   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10590 
10591   format %{ "FMOD   $dst,$src1,$src2" %}
10592   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10593               emitModDPR(),
10594               Push_Result_Mod_DPR(src2),
10595               Pop_Mem_FPR(dst));
10596   ins_pipe( pipe_slow );
10597 %}
10598 //
10599 // This instruction does not round to 24-bits
10600 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10601   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10602   match(Set dst (ModF dst src));
10603   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10604 
10605   format %{ "FMOD   $dst,$src" %}
10606   ins_encode(Push_Reg_Mod_DPR(dst, src),
10607               emitModDPR(),
10608               Push_Result_Mod_DPR(src),
10609               Pop_Reg_FPR(dst));
10610   ins_pipe( pipe_slow );
10611 %}
10612 
10613 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10614   predicate(UseSSE>=1);
10615   match(Set dst (ModF src0 src1));
10616   effect(KILL rax, KILL cr);
10617   format %{ "SUB    ESP,4\t # FMOD\n"
10618           "\tMOVSS  [ESP+0],$src1\n"
10619           "\tFLD_S  [ESP+0]\n"
10620           "\tMOVSS  [ESP+0],$src0\n"
10621           "\tFLD_S  [ESP+0]\n"
10622      "loop:\tFPREM\n"
10623           "\tFWAIT\n"
10624           "\tFNSTSW AX\n"
10625           "\tSAHF\n"
10626           "\tJP     loop\n"
10627           "\tFSTP_S [ESP+0]\n"
10628           "\tMOVSS  $dst,[ESP+0]\n"
10629           "\tADD    ESP,4\n"
10630           "\tFSTP   ST0\t # Restore FPU Stack"
10631     %}
10632   ins_cost(250);
10633   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10634   ins_pipe( pipe_slow );
10635 %}
10636 
10637 
10638 //----------Arithmetic Conversion Instructions---------------------------------
10639 // The conversions operations are all Alpha sorted.  Please keep it that way!
10640 
10641 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10642   predicate(UseSSE==0);
10643   match(Set dst (RoundFloat src));
10644   ins_cost(125);
10645   format %{ "FST_S  $dst,$src\t# F-round" %}
10646   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10647   ins_pipe( fpu_mem_reg );
10648 %}
10649 
10650 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10651   predicate(UseSSE<=1);
10652   match(Set dst (RoundDouble src));
10653   ins_cost(125);
10654   format %{ "FST_D  $dst,$src\t# D-round" %}
10655   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10656   ins_pipe( fpu_mem_reg );
10657 %}
10658 
10659 // Force rounding to 24-bit precision and 6-bit exponent
10660 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10661   predicate(UseSSE==0);
10662   match(Set dst (ConvD2F src));
10663   format %{ "FST_S  $dst,$src\t# F-round" %}
10664   expand %{
10665     roundFloat_mem_reg(dst,src);
10666   %}
10667 %}
10668 
10669 // Force rounding to 24-bit precision and 6-bit exponent
10670 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10671   predicate(UseSSE==1);
10672   match(Set dst (ConvD2F src));
10673   effect( KILL cr );
10674   format %{ "SUB    ESP,4\n\t"
10675             "FST_S  [ESP],$src\t# F-round\n\t"
10676             "MOVSS  $dst,[ESP]\n\t"
10677             "ADD ESP,4" %}
10678   ins_encode %{
10679     __ subptr(rsp, 4);
10680     if ($src$$reg != FPR1L_enc) {
10681       __ fld_s($src$$reg-1);
10682       __ fstp_s(Address(rsp, 0));
10683     } else {
10684       __ fst_s(Address(rsp, 0));
10685     }
10686     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10687     __ addptr(rsp, 4);
10688   %}
10689   ins_pipe( pipe_slow );
10690 %}
10691 
10692 // Force rounding double precision to single precision
10693 instruct convD2F_reg(regF dst, regD src) %{
10694   predicate(UseSSE>=2);
10695   match(Set dst (ConvD2F src));
10696   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10697   ins_encode %{
10698     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10699   %}
10700   ins_pipe( pipe_slow );
10701 %}
10702 
10703 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10704   predicate(UseSSE==0);
10705   match(Set dst (ConvF2D src));
10706   format %{ "FST_S  $dst,$src\t# D-round" %}
10707   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10708   ins_pipe( fpu_reg_reg );
10709 %}
10710 
10711 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10712   predicate(UseSSE==1);
10713   match(Set dst (ConvF2D src));
10714   format %{ "FST_D  $dst,$src\t# D-round" %}
10715   expand %{
10716     roundDouble_mem_reg(dst,src);
10717   %}
10718 %}
10719 
10720 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10721   predicate(UseSSE==1);
10722   match(Set dst (ConvF2D src));
10723   effect( KILL cr );
10724   format %{ "SUB    ESP,4\n\t"
10725             "MOVSS  [ESP] $src\n\t"
10726             "FLD_S  [ESP]\n\t"
10727             "ADD    ESP,4\n\t"
10728             "FSTP   $dst\t# D-round" %}
10729   ins_encode %{
10730     __ subptr(rsp, 4);
10731     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10732     __ fld_s(Address(rsp, 0));
10733     __ addptr(rsp, 4);
10734     __ fstp_d($dst$$reg);
10735   %}
10736   ins_pipe( pipe_slow );
10737 %}
10738 
10739 instruct convF2D_reg(regD dst, regF src) %{
10740   predicate(UseSSE>=2);
10741   match(Set dst (ConvF2D src));
10742   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10743   ins_encode %{
10744     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10745   %}
10746   ins_pipe( pipe_slow );
10747 %}
10748 
10749 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10750 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10751   predicate(UseSSE<=1);
10752   match(Set dst (ConvD2I src));
10753   effect( KILL tmp, KILL cr );
10754   format %{ "FLD    $src\t# Convert double to int \n\t"
10755             "FLDCW  trunc mode\n\t"
10756             "SUB    ESP,4\n\t"
10757             "FISTp  [ESP + #0]\n\t"
10758             "FLDCW  std/24-bit mode\n\t"
10759             "POP    EAX\n\t"
10760             "CMP    EAX,0x80000000\n\t"
10761             "JNE,s  fast\n\t"
10762             "FLD_D  $src\n\t"
10763             "CALL   d2i_wrapper\n"
10764       "fast:" %}
10765   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10766   ins_pipe( pipe_slow );
10767 %}
10768 
10769 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10770 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10771   predicate(UseSSE>=2);
10772   match(Set dst (ConvD2I src));
10773   effect( KILL tmp, KILL cr );
10774   format %{ "CVTTSD2SI $dst, $src\n\t"
10775             "CMP    $dst,0x80000000\n\t"
10776             "JNE,s  fast\n\t"
10777             "SUB    ESP, 8\n\t"
10778             "MOVSD  [ESP], $src\n\t"
10779             "FLD_D  [ESP]\n\t"
10780             "ADD    ESP, 8\n\t"
10781             "CALL   d2i_wrapper\n"
10782       "fast:" %}
10783   ins_encode %{
10784     Label fast;
10785     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10786     __ cmpl($dst$$Register, 0x80000000);
10787     __ jccb(Assembler::notEqual, fast);
10788     __ subptr(rsp, 8);
10789     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10790     __ fld_d(Address(rsp, 0));
10791     __ addptr(rsp, 8);
10792     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10793     __ post_call_nop();
10794     __ bind(fast);
10795   %}
10796   ins_pipe( pipe_slow );
10797 %}
10798 
10799 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10800   predicate(UseSSE<=1);
10801   match(Set dst (ConvD2L src));
10802   effect( KILL cr );
10803   format %{ "FLD    $src\t# Convert double to long\n\t"
10804             "FLDCW  trunc mode\n\t"
10805             "SUB    ESP,8\n\t"
10806             "FISTp  [ESP + #0]\n\t"
10807             "FLDCW  std/24-bit mode\n\t"
10808             "POP    EAX\n\t"
10809             "POP    EDX\n\t"
10810             "CMP    EDX,0x80000000\n\t"
10811             "JNE,s  fast\n\t"
10812             "TEST   EAX,EAX\n\t"
10813             "JNE,s  fast\n\t"
10814             "FLD    $src\n\t"
10815             "CALL   d2l_wrapper\n"
10816       "fast:" %}
10817   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10818   ins_pipe( pipe_slow );
10819 %}
10820 
10821 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10822 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10823   predicate (UseSSE>=2);
10824   match(Set dst (ConvD2L src));
10825   effect( KILL cr );
10826   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10827             "MOVSD  [ESP],$src\n\t"
10828             "FLD_D  [ESP]\n\t"
10829             "FLDCW  trunc mode\n\t"
10830             "FISTp  [ESP + #0]\n\t"
10831             "FLDCW  std/24-bit mode\n\t"
10832             "POP    EAX\n\t"
10833             "POP    EDX\n\t"
10834             "CMP    EDX,0x80000000\n\t"
10835             "JNE,s  fast\n\t"
10836             "TEST   EAX,EAX\n\t"
10837             "JNE,s  fast\n\t"
10838             "SUB    ESP,8\n\t"
10839             "MOVSD  [ESP],$src\n\t"
10840             "FLD_D  [ESP]\n\t"
10841             "ADD    ESP,8\n\t"
10842             "CALL   d2l_wrapper\n"
10843       "fast:" %}
10844   ins_encode %{
10845     Label fast;
10846     __ subptr(rsp, 8);
10847     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10848     __ fld_d(Address(rsp, 0));
10849     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10850     __ fistp_d(Address(rsp, 0));
10851     // Restore the rounding mode, mask the exception
10852     if (Compile::current()->in_24_bit_fp_mode()) {
10853       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10854     } else {
10855       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10856     }
10857     // Load the converted long, adjust CPU stack
10858     __ pop(rax);
10859     __ pop(rdx);
10860     __ cmpl(rdx, 0x80000000);
10861     __ jccb(Assembler::notEqual, fast);
10862     __ testl(rax, rax);
10863     __ jccb(Assembler::notEqual, fast);
10864     __ subptr(rsp, 8);
10865     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10866     __ fld_d(Address(rsp, 0));
10867     __ addptr(rsp, 8);
10868     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10869     __ post_call_nop();
10870     __ bind(fast);
10871   %}
10872   ins_pipe( pipe_slow );
10873 %}
10874 
10875 // Convert a double to an int.  Java semantics require we do complex
10876 // manglations in the corner cases.  So we set the rounding mode to
10877 // 'zero', store the darned double down as an int, and reset the
10878 // rounding mode to 'nearest'.  The hardware stores a flag value down
10879 // if we would overflow or converted a NAN; we check for this and
10880 // and go the slow path if needed.
10881 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10882   predicate(UseSSE==0);
10883   match(Set dst (ConvF2I src));
10884   effect( KILL tmp, KILL cr );
10885   format %{ "FLD    $src\t# Convert float to int \n\t"
10886             "FLDCW  trunc mode\n\t"
10887             "SUB    ESP,4\n\t"
10888             "FISTp  [ESP + #0]\n\t"
10889             "FLDCW  std/24-bit mode\n\t"
10890             "POP    EAX\n\t"
10891             "CMP    EAX,0x80000000\n\t"
10892             "JNE,s  fast\n\t"
10893             "FLD    $src\n\t"
10894             "CALL   d2i_wrapper\n"
10895       "fast:" %}
10896   // DPR2I_encoding works for FPR2I
10897   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10898   ins_pipe( pipe_slow );
10899 %}
10900 
10901 // Convert a float in xmm to an int reg.
10902 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10903   predicate(UseSSE>=1);
10904   match(Set dst (ConvF2I src));
10905   effect( KILL tmp, KILL cr );
10906   format %{ "CVTTSS2SI $dst, $src\n\t"
10907             "CMP    $dst,0x80000000\n\t"
10908             "JNE,s  fast\n\t"
10909             "SUB    ESP, 4\n\t"
10910             "MOVSS  [ESP], $src\n\t"
10911             "FLD    [ESP]\n\t"
10912             "ADD    ESP, 4\n\t"
10913             "CALL   d2i_wrapper\n"
10914       "fast:" %}
10915   ins_encode %{
10916     Label fast;
10917     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10918     __ cmpl($dst$$Register, 0x80000000);
10919     __ jccb(Assembler::notEqual, fast);
10920     __ subptr(rsp, 4);
10921     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10922     __ fld_s(Address(rsp, 0));
10923     __ addptr(rsp, 4);
10924     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10925     __ post_call_nop();
10926     __ bind(fast);
10927   %}
10928   ins_pipe( pipe_slow );
10929 %}
10930 
10931 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10932   predicate(UseSSE==0);
10933   match(Set dst (ConvF2L src));
10934   effect( KILL cr );
10935   format %{ "FLD    $src\t# Convert float to long\n\t"
10936             "FLDCW  trunc mode\n\t"
10937             "SUB    ESP,8\n\t"
10938             "FISTp  [ESP + #0]\n\t"
10939             "FLDCW  std/24-bit mode\n\t"
10940             "POP    EAX\n\t"
10941             "POP    EDX\n\t"
10942             "CMP    EDX,0x80000000\n\t"
10943             "JNE,s  fast\n\t"
10944             "TEST   EAX,EAX\n\t"
10945             "JNE,s  fast\n\t"
10946             "FLD    $src\n\t"
10947             "CALL   d2l_wrapper\n"
10948       "fast:" %}
10949   // DPR2L_encoding works for FPR2L
10950   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10951   ins_pipe( pipe_slow );
10952 %}
10953 
10954 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10955 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10956   predicate (UseSSE>=1);
10957   match(Set dst (ConvF2L src));
10958   effect( KILL cr );
10959   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10960             "MOVSS  [ESP],$src\n\t"
10961             "FLD_S  [ESP]\n\t"
10962             "FLDCW  trunc mode\n\t"
10963             "FISTp  [ESP + #0]\n\t"
10964             "FLDCW  std/24-bit mode\n\t"
10965             "POP    EAX\n\t"
10966             "POP    EDX\n\t"
10967             "CMP    EDX,0x80000000\n\t"
10968             "JNE,s  fast\n\t"
10969             "TEST   EAX,EAX\n\t"
10970             "JNE,s  fast\n\t"
10971             "SUB    ESP,4\t# Convert float to long\n\t"
10972             "MOVSS  [ESP],$src\n\t"
10973             "FLD_S  [ESP]\n\t"
10974             "ADD    ESP,4\n\t"
10975             "CALL   d2l_wrapper\n"
10976       "fast:" %}
10977   ins_encode %{
10978     Label fast;
10979     __ subptr(rsp, 8);
10980     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10981     __ fld_s(Address(rsp, 0));
10982     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10983     __ fistp_d(Address(rsp, 0));
10984     // Restore the rounding mode, mask the exception
10985     if (Compile::current()->in_24_bit_fp_mode()) {
10986       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10987     } else {
10988       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10989     }
10990     // Load the converted long, adjust CPU stack
10991     __ pop(rax);
10992     __ pop(rdx);
10993     __ cmpl(rdx, 0x80000000);
10994     __ jccb(Assembler::notEqual, fast);
10995     __ testl(rax, rax);
10996     __ jccb(Assembler::notEqual, fast);
10997     __ subptr(rsp, 4);
10998     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10999     __ fld_s(Address(rsp, 0));
11000     __ addptr(rsp, 4);
11001     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11002     __ post_call_nop();
11003     __ bind(fast);
11004   %}
11005   ins_pipe( pipe_slow );
11006 %}
11007 
11008 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11009   predicate( UseSSE<=1 );
11010   match(Set dst (ConvI2D src));
11011   format %{ "FILD   $src\n\t"
11012             "FSTP   $dst" %}
11013   opcode(0xDB, 0x0);  /* DB /0 */
11014   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11015   ins_pipe( fpu_reg_mem );
11016 %}
11017 
11018 instruct convI2D_reg(regD dst, rRegI src) %{
11019   predicate( UseSSE>=2 && !UseXmmI2D );
11020   match(Set dst (ConvI2D src));
11021   format %{ "CVTSI2SD $dst,$src" %}
11022   ins_encode %{
11023     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11024   %}
11025   ins_pipe( pipe_slow );
11026 %}
11027 
11028 instruct convI2D_mem(regD dst, memory mem) %{
11029   predicate( UseSSE>=2 );
11030   match(Set dst (ConvI2D (LoadI mem)));
11031   format %{ "CVTSI2SD $dst,$mem" %}
11032   ins_encode %{
11033     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11034   %}
11035   ins_pipe( pipe_slow );
11036 %}
11037 
11038 instruct convXI2D_reg(regD dst, rRegI src)
11039 %{
11040   predicate( UseSSE>=2 && UseXmmI2D );
11041   match(Set dst (ConvI2D src));
11042 
11043   format %{ "MOVD  $dst,$src\n\t"
11044             "CVTDQ2PD $dst,$dst\t# i2d" %}
11045   ins_encode %{
11046     __ movdl($dst$$XMMRegister, $src$$Register);
11047     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11048   %}
11049   ins_pipe(pipe_slow); // XXX
11050 %}
11051 
11052 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11053   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11054   match(Set dst (ConvI2D (LoadI mem)));
11055   format %{ "FILD   $mem\n\t"
11056             "FSTP   $dst" %}
11057   opcode(0xDB);      /* DB /0 */
11058   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11059               Pop_Reg_DPR(dst));
11060   ins_pipe( fpu_reg_mem );
11061 %}
11062 
11063 // Convert a byte to a float; no rounding step needed.
11064 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11065   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11066   match(Set dst (ConvI2F src));
11067   format %{ "FILD   $src\n\t"
11068             "FSTP   $dst" %}
11069 
11070   opcode(0xDB, 0x0);  /* DB /0 */
11071   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11072   ins_pipe( fpu_reg_mem );
11073 %}
11074 
11075 // In 24-bit mode, force exponent rounding by storing back out
11076 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11077   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11078   match(Set dst (ConvI2F src));
11079   ins_cost(200);
11080   format %{ "FILD   $src\n\t"
11081             "FSTP_S $dst" %}
11082   opcode(0xDB, 0x0);  /* DB /0 */
11083   ins_encode( Push_Mem_I(src),
11084               Pop_Mem_FPR(dst));
11085   ins_pipe( fpu_mem_mem );
11086 %}
11087 
11088 // In 24-bit mode, force exponent rounding by storing back out
11089 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11090   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11091   match(Set dst (ConvI2F (LoadI mem)));
11092   ins_cost(200);
11093   format %{ "FILD   $mem\n\t"
11094             "FSTP_S $dst" %}
11095   opcode(0xDB);  /* DB /0 */
11096   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11097               Pop_Mem_FPR(dst));
11098   ins_pipe( fpu_mem_mem );
11099 %}
11100 
11101 // This instruction does not round to 24-bits
11102 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11103   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11104   match(Set dst (ConvI2F src));
11105   format %{ "FILD   $src\n\t"
11106             "FSTP   $dst" %}
11107   opcode(0xDB, 0x0);  /* DB /0 */
11108   ins_encode( Push_Mem_I(src),
11109               Pop_Reg_FPR(dst));
11110   ins_pipe( fpu_reg_mem );
11111 %}
11112 
11113 // This instruction does not round to 24-bits
11114 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11115   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11116   match(Set dst (ConvI2F (LoadI mem)));
11117   format %{ "FILD   $mem\n\t"
11118             "FSTP   $dst" %}
11119   opcode(0xDB);      /* DB /0 */
11120   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11121               Pop_Reg_FPR(dst));
11122   ins_pipe( fpu_reg_mem );
11123 %}
11124 
11125 // Convert an int to a float in xmm; no rounding step needed.
11126 instruct convI2F_reg(regF dst, rRegI src) %{
11127   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11128   match(Set dst (ConvI2F src));
11129   format %{ "CVTSI2SS $dst, $src" %}
11130   ins_encode %{
11131     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11132   %}
11133   ins_pipe( pipe_slow );
11134 %}
11135 
11136  instruct convXI2F_reg(regF dst, rRegI src)
11137 %{
11138   predicate( UseSSE>=2 && UseXmmI2F );
11139   match(Set dst (ConvI2F src));
11140 
11141   format %{ "MOVD  $dst,$src\n\t"
11142             "CVTDQ2PS $dst,$dst\t# i2f" %}
11143   ins_encode %{
11144     __ movdl($dst$$XMMRegister, $src$$Register);
11145     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11146   %}
11147   ins_pipe(pipe_slow); // XXX
11148 %}
11149 
11150 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11151   match(Set dst (ConvI2L src));
11152   effect(KILL cr);
11153   ins_cost(375);
11154   format %{ "MOV    $dst.lo,$src\n\t"
11155             "MOV    $dst.hi,$src\n\t"
11156             "SAR    $dst.hi,31" %}
11157   ins_encode(convert_int_long(dst,src));
11158   ins_pipe( ialu_reg_reg_long );
11159 %}
11160 
11161 // Zero-extend convert int to long
11162 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11163   match(Set dst (AndL (ConvI2L src) mask) );
11164   effect( KILL flags );
11165   ins_cost(250);
11166   format %{ "MOV    $dst.lo,$src\n\t"
11167             "XOR    $dst.hi,$dst.hi" %}
11168   opcode(0x33); // XOR
11169   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11170   ins_pipe( ialu_reg_reg_long );
11171 %}
11172 
11173 // Zero-extend long
11174 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11175   match(Set dst (AndL src mask) );
11176   effect( KILL flags );
11177   ins_cost(250);
11178   format %{ "MOV    $dst.lo,$src.lo\n\t"
11179             "XOR    $dst.hi,$dst.hi\n\t" %}
11180   opcode(0x33); // XOR
11181   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11182   ins_pipe( ialu_reg_reg_long );
11183 %}
11184 
11185 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11186   predicate (UseSSE<=1);
11187   match(Set dst (ConvL2D src));
11188   effect( KILL cr );
11189   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11190             "PUSH   $src.lo\n\t"
11191             "FILD   ST,[ESP + #0]\n\t"
11192             "ADD    ESP,8\n\t"
11193             "FSTP_D $dst\t# D-round" %}
11194   opcode(0xDF, 0x5);  /* DF /5 */
11195   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11196   ins_pipe( pipe_slow );
11197 %}
11198 
11199 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11200   predicate (UseSSE>=2);
11201   match(Set dst (ConvL2D src));
11202   effect( KILL cr );
11203   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11204             "PUSH   $src.lo\n\t"
11205             "FILD_D [ESP]\n\t"
11206             "FSTP_D [ESP]\n\t"
11207             "MOVSD  $dst,[ESP]\n\t"
11208             "ADD    ESP,8" %}
11209   opcode(0xDF, 0x5);  /* DF /5 */
11210   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11211   ins_pipe( pipe_slow );
11212 %}
11213 
11214 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11215   predicate (UseSSE>=1);
11216   match(Set dst (ConvL2F src));
11217   effect( KILL cr );
11218   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11219             "PUSH   $src.lo\n\t"
11220             "FILD_D [ESP]\n\t"
11221             "FSTP_S [ESP]\n\t"
11222             "MOVSS  $dst,[ESP]\n\t"
11223             "ADD    ESP,8" %}
11224   opcode(0xDF, 0x5);  /* DF /5 */
11225   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11226   ins_pipe( pipe_slow );
11227 %}
11228 
11229 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11230   match(Set dst (ConvL2F src));
11231   effect( KILL cr );
11232   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11233             "PUSH   $src.lo\n\t"
11234             "FILD   ST,[ESP + #0]\n\t"
11235             "ADD    ESP,8\n\t"
11236             "FSTP_S $dst\t# F-round" %}
11237   opcode(0xDF, 0x5);  /* DF /5 */
11238   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11239   ins_pipe( pipe_slow );
11240 %}
11241 
11242 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11243   match(Set dst (ConvL2I src));
11244   effect( DEF dst, USE src );
11245   format %{ "MOV    $dst,$src.lo" %}
11246   ins_encode(enc_CopyL_Lo(dst,src));
11247   ins_pipe( ialu_reg_reg );
11248 %}
11249 
11250 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11251   match(Set dst (MoveF2I src));
11252   effect( DEF dst, USE src );
11253   ins_cost(100);
11254   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11255   ins_encode %{
11256     __ movl($dst$$Register, Address(rsp, $src$$disp));
11257   %}
11258   ins_pipe( ialu_reg_mem );
11259 %}
11260 
11261 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11262   predicate(UseSSE==0);
11263   match(Set dst (MoveF2I src));
11264   effect( DEF dst, USE src );
11265 
11266   ins_cost(125);
11267   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11268   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11269   ins_pipe( fpu_mem_reg );
11270 %}
11271 
11272 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11273   predicate(UseSSE>=1);
11274   match(Set dst (MoveF2I src));
11275   effect( DEF dst, USE src );
11276 
11277   ins_cost(95);
11278   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11279   ins_encode %{
11280     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11281   %}
11282   ins_pipe( pipe_slow );
11283 %}
11284 
11285 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11286   predicate(UseSSE>=2);
11287   match(Set dst (MoveF2I src));
11288   effect( DEF dst, USE src );
11289   ins_cost(85);
11290   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11291   ins_encode %{
11292     __ movdl($dst$$Register, $src$$XMMRegister);
11293   %}
11294   ins_pipe( pipe_slow );
11295 %}
11296 
11297 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11298   match(Set dst (MoveI2F src));
11299   effect( DEF dst, USE src );
11300 
11301   ins_cost(100);
11302   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11303   ins_encode %{
11304     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11305   %}
11306   ins_pipe( ialu_mem_reg );
11307 %}
11308 
11309 
11310 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11311   predicate(UseSSE==0);
11312   match(Set dst (MoveI2F src));
11313   effect(DEF dst, USE src);
11314 
11315   ins_cost(125);
11316   format %{ "FLD_S  $src\n\t"
11317             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11318   opcode(0xD9);               /* D9 /0, FLD m32real */
11319   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11320               Pop_Reg_FPR(dst) );
11321   ins_pipe( fpu_reg_mem );
11322 %}
11323 
11324 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11325   predicate(UseSSE>=1);
11326   match(Set dst (MoveI2F src));
11327   effect( DEF dst, USE src );
11328 
11329   ins_cost(95);
11330   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11331   ins_encode %{
11332     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11333   %}
11334   ins_pipe( pipe_slow );
11335 %}
11336 
11337 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11338   predicate(UseSSE>=2);
11339   match(Set dst (MoveI2F src));
11340   effect( DEF dst, USE src );
11341 
11342   ins_cost(85);
11343   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11344   ins_encode %{
11345     __ movdl($dst$$XMMRegister, $src$$Register);
11346   %}
11347   ins_pipe( pipe_slow );
11348 %}
11349 
11350 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11351   match(Set dst (MoveD2L src));
11352   effect(DEF dst, USE src);
11353 
11354   ins_cost(250);
11355   format %{ "MOV    $dst.lo,$src\n\t"
11356             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11357   opcode(0x8B, 0x8B);
11358   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11359   ins_pipe( ialu_mem_long_reg );
11360 %}
11361 
11362 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11363   predicate(UseSSE<=1);
11364   match(Set dst (MoveD2L src));
11365   effect(DEF dst, USE src);
11366 
11367   ins_cost(125);
11368   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11369   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11370   ins_pipe( fpu_mem_reg );
11371 %}
11372 
11373 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11374   predicate(UseSSE>=2);
11375   match(Set dst (MoveD2L src));
11376   effect(DEF dst, USE src);
11377   ins_cost(95);
11378   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11379   ins_encode %{
11380     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11381   %}
11382   ins_pipe( pipe_slow );
11383 %}
11384 
11385 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11386   predicate(UseSSE>=2);
11387   match(Set dst (MoveD2L src));
11388   effect(DEF dst, USE src, TEMP tmp);
11389   ins_cost(85);
11390   format %{ "MOVD   $dst.lo,$src\n\t"
11391             "PSHUFLW $tmp,$src,0x4E\n\t"
11392             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11393   ins_encode %{
11394     __ movdl($dst$$Register, $src$$XMMRegister);
11395     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11396     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11397   %}
11398   ins_pipe( pipe_slow );
11399 %}
11400 
11401 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11402   match(Set dst (MoveL2D src));
11403   effect(DEF dst, USE src);
11404 
11405   ins_cost(200);
11406   format %{ "MOV    $dst,$src.lo\n\t"
11407             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11408   opcode(0x89, 0x89);
11409   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11410   ins_pipe( ialu_mem_long_reg );
11411 %}
11412 
11413 
11414 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11415   predicate(UseSSE<=1);
11416   match(Set dst (MoveL2D src));
11417   effect(DEF dst, USE src);
11418   ins_cost(125);
11419 
11420   format %{ "FLD_D  $src\n\t"
11421             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11422   opcode(0xDD);               /* DD /0, FLD m64real */
11423   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11424               Pop_Reg_DPR(dst) );
11425   ins_pipe( fpu_reg_mem );
11426 %}
11427 
11428 
11429 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11430   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11431   match(Set dst (MoveL2D src));
11432   effect(DEF dst, USE src);
11433 
11434   ins_cost(95);
11435   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11436   ins_encode %{
11437     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11438   %}
11439   ins_pipe( pipe_slow );
11440 %}
11441 
11442 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11443   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11444   match(Set dst (MoveL2D src));
11445   effect(DEF dst, USE src);
11446 
11447   ins_cost(95);
11448   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11449   ins_encode %{
11450     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11451   %}
11452   ins_pipe( pipe_slow );
11453 %}
11454 
11455 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11456   predicate(UseSSE>=2);
11457   match(Set dst (MoveL2D src));
11458   effect(TEMP dst, USE src, TEMP tmp);
11459   ins_cost(85);
11460   format %{ "MOVD   $dst,$src.lo\n\t"
11461             "MOVD   $tmp,$src.hi\n\t"
11462             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11463   ins_encode %{
11464     __ movdl($dst$$XMMRegister, $src$$Register);
11465     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11466     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11467   %}
11468   ins_pipe( pipe_slow );
11469 %}
11470 
11471 //----------------------------- CompressBits/ExpandBits ------------------------
11472 
11473 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11474   predicate(n->bottom_type()->isa_long());
11475   match(Set dst (CompressBits src mask));
11476   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11477   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11478   ins_encode %{
11479     Label exit, partail_result;
11480     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11481     // Merge the results of upper and lower destination registers such that upper destination
11482     // results are contiguously laid out after the lower destination result.
11483     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11484     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11485     __ popcntl($rtmp$$Register, $mask$$Register);
11486     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11487     __ cmpl($rtmp$$Register, 32);
11488     __ jccb(Assembler::equal, exit);
11489     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11490     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11491     // Shift left the contents of upper destination register by true bit count of lower mask register
11492     // and merge with lower destination register.
11493     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11494     __ orl($dst$$Register, $rtmp$$Register);
11495     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11496     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11497     // since contents of upper destination have already been copied to lower destination
11498     // register.
11499     __ cmpl($rtmp$$Register, 0);
11500     __ jccb(Assembler::greater, partail_result);
11501     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11502     __ jmp(exit);
11503     __ bind(partail_result);
11504     // Perform right shift over upper destination register to move out bits already copied
11505     // to lower destination register.
11506     __ subl($rtmp$$Register, 32);
11507     __ negl($rtmp$$Register);
11508     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11509     __ bind(exit);
11510   %}
11511   ins_pipe( pipe_slow );
11512 %}
11513 
11514 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11515   predicate(n->bottom_type()->isa_long());
11516   match(Set dst (ExpandBits src mask));
11517   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11518   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11519   ins_encode %{
11520     // Extraction operation sequentially reads the bits from source register starting from LSB
11521     // and lays them out into destination register at bit locations corresponding to true bits
11522     // in mask register. Thus number of source bits read are equal to combined true bit count
11523     // of mask register pair.
11524     Label exit, mask_clipping;
11525     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11526     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11527     __ popcntl($rtmp$$Register, $mask$$Register);
11528     // If true bit count of lower mask register is 32 then none of bit of lower source register
11529     // will feed to upper destination register.
11530     __ cmpl($rtmp$$Register, 32);
11531     __ jccb(Assembler::equal, exit);
11532     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11533     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11534     // Shift right the contents of lower source register to remove already consumed bits.
11535     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11536     // Extract the bits from lower source register starting from LSB under the influence
11537     // of upper mask register.
11538     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11539     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11540     __ subl($rtmp$$Register, 32);
11541     __ negl($rtmp$$Register);
11542     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11543     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11544     // Clear the set bits in upper mask register which have been used to extract the contents
11545     // from lower source register.
11546     __ bind(mask_clipping);
11547     __ blsrl($mask$$Register, $mask$$Register);
11548     __ decrementl($rtmp$$Register, 1);
11549     __ jccb(Assembler::greater, mask_clipping);
11550     // Starting from LSB extract the bits from upper source register under the influence of
11551     // remaining set bits in upper mask register.
11552     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11553     // Merge the partial results extracted from lower and upper source register bits.
11554     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11555     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11556     __ bind(exit);
11557   %}
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 // =======================================================================
11562 // fast clearing of an array
11563 // Small ClearArray non-AVX512.
11564 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11565   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11566   match(Set dummy (ClearArray cnt base));
11567   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11568 
11569   format %{ $$template
11570     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11571     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11572     $$emit$$"JG     LARGE\n\t"
11573     $$emit$$"SHL    ECX, 1\n\t"
11574     $$emit$$"DEC    ECX\n\t"
11575     $$emit$$"JS     DONE\t# Zero length\n\t"
11576     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11577     $$emit$$"DEC    ECX\n\t"
11578     $$emit$$"JGE    LOOP\n\t"
11579     $$emit$$"JMP    DONE\n\t"
11580     $$emit$$"# LARGE:\n\t"
11581     if (UseFastStosb) {
11582        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11583        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11584     } else if (UseXMMForObjInit) {
11585        $$emit$$"MOV     RDI,RAX\n\t"
11586        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11587        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11588        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11589        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11590        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11591        $$emit$$"ADD     0x40,RAX\n\t"
11592        $$emit$$"# L_zero_64_bytes:\n\t"
11593        $$emit$$"SUB     0x8,RCX\n\t"
11594        $$emit$$"JGE     L_loop\n\t"
11595        $$emit$$"ADD     0x4,RCX\n\t"
11596        $$emit$$"JL      L_tail\n\t"
11597        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11598        $$emit$$"ADD     0x20,RAX\n\t"
11599        $$emit$$"SUB     0x4,RCX\n\t"
11600        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11601        $$emit$$"ADD     0x4,RCX\n\t"
11602        $$emit$$"JLE     L_end\n\t"
11603        $$emit$$"DEC     RCX\n\t"
11604        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11605        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11606        $$emit$$"ADD     0x8,RAX\n\t"
11607        $$emit$$"DEC     RCX\n\t"
11608        $$emit$$"JGE     L_sloop\n\t"
11609        $$emit$$"# L_end:\n\t"
11610     } else {
11611        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11612        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11613     }
11614     $$emit$$"# DONE"
11615   %}
11616   ins_encode %{
11617     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11618                  $tmp$$XMMRegister, false, knoreg);
11619   %}
11620   ins_pipe( pipe_slow );
11621 %}
11622 
11623 // Small ClearArray AVX512 non-constant length.
11624 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11625   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11626   match(Set dummy (ClearArray cnt base));
11627   ins_cost(125);
11628   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11629 
11630   format %{ $$template
11631     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11632     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11633     $$emit$$"JG     LARGE\n\t"
11634     $$emit$$"SHL    ECX, 1\n\t"
11635     $$emit$$"DEC    ECX\n\t"
11636     $$emit$$"JS     DONE\t# Zero length\n\t"
11637     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11638     $$emit$$"DEC    ECX\n\t"
11639     $$emit$$"JGE    LOOP\n\t"
11640     $$emit$$"JMP    DONE\n\t"
11641     $$emit$$"# LARGE:\n\t"
11642     if (UseFastStosb) {
11643        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11644        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11645     } else if (UseXMMForObjInit) {
11646        $$emit$$"MOV     RDI,RAX\n\t"
11647        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11648        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11649        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11650        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11651        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11652        $$emit$$"ADD     0x40,RAX\n\t"
11653        $$emit$$"# L_zero_64_bytes:\n\t"
11654        $$emit$$"SUB     0x8,RCX\n\t"
11655        $$emit$$"JGE     L_loop\n\t"
11656        $$emit$$"ADD     0x4,RCX\n\t"
11657        $$emit$$"JL      L_tail\n\t"
11658        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11659        $$emit$$"ADD     0x20,RAX\n\t"
11660        $$emit$$"SUB     0x4,RCX\n\t"
11661        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11662        $$emit$$"ADD     0x4,RCX\n\t"
11663        $$emit$$"JLE     L_end\n\t"
11664        $$emit$$"DEC     RCX\n\t"
11665        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11666        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11667        $$emit$$"ADD     0x8,RAX\n\t"
11668        $$emit$$"DEC     RCX\n\t"
11669        $$emit$$"JGE     L_sloop\n\t"
11670        $$emit$$"# L_end:\n\t"
11671     } else {
11672        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11673        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11674     }
11675     $$emit$$"# DONE"
11676   %}
11677   ins_encode %{
11678     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11679                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11680   %}
11681   ins_pipe( pipe_slow );
11682 %}
11683 
11684 // Large ClearArray non-AVX512.
11685 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11686   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11687   match(Set dummy (ClearArray cnt base));
11688   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11689   format %{ $$template
11690     if (UseFastStosb) {
11691        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11692        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11693        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11694     } else if (UseXMMForObjInit) {
11695        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11696        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11697        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11698        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11699        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11700        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11701        $$emit$$"ADD     0x40,RAX\n\t"
11702        $$emit$$"# L_zero_64_bytes:\n\t"
11703        $$emit$$"SUB     0x8,RCX\n\t"
11704        $$emit$$"JGE     L_loop\n\t"
11705        $$emit$$"ADD     0x4,RCX\n\t"
11706        $$emit$$"JL      L_tail\n\t"
11707        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11708        $$emit$$"ADD     0x20,RAX\n\t"
11709        $$emit$$"SUB     0x4,RCX\n\t"
11710        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11711        $$emit$$"ADD     0x4,RCX\n\t"
11712        $$emit$$"JLE     L_end\n\t"
11713        $$emit$$"DEC     RCX\n\t"
11714        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11715        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11716        $$emit$$"ADD     0x8,RAX\n\t"
11717        $$emit$$"DEC     RCX\n\t"
11718        $$emit$$"JGE     L_sloop\n\t"
11719        $$emit$$"# L_end:\n\t"
11720     } else {
11721        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11722        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11723        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11724     }
11725     $$emit$$"# DONE"
11726   %}
11727   ins_encode %{
11728     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11729                  $tmp$$XMMRegister, true, knoreg);
11730   %}
11731   ins_pipe( pipe_slow );
11732 %}
11733 
11734 // Large ClearArray AVX512.
11735 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11736   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11737   match(Set dummy (ClearArray cnt base));
11738   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11739   format %{ $$template
11740     if (UseFastStosb) {
11741        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11742        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11743        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11744     } else if (UseXMMForObjInit) {
11745        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11746        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11747        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11748        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11749        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11750        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11751        $$emit$$"ADD     0x40,RAX\n\t"
11752        $$emit$$"# L_zero_64_bytes:\n\t"
11753        $$emit$$"SUB     0x8,RCX\n\t"
11754        $$emit$$"JGE     L_loop\n\t"
11755        $$emit$$"ADD     0x4,RCX\n\t"
11756        $$emit$$"JL      L_tail\n\t"
11757        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11758        $$emit$$"ADD     0x20,RAX\n\t"
11759        $$emit$$"SUB     0x4,RCX\n\t"
11760        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11761        $$emit$$"ADD     0x4,RCX\n\t"
11762        $$emit$$"JLE     L_end\n\t"
11763        $$emit$$"DEC     RCX\n\t"
11764        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11765        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11766        $$emit$$"ADD     0x8,RAX\n\t"
11767        $$emit$$"DEC     RCX\n\t"
11768        $$emit$$"JGE     L_sloop\n\t"
11769        $$emit$$"# L_end:\n\t"
11770     } else {
11771        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11772        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11773        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11774     }
11775     $$emit$$"# DONE"
11776   %}
11777   ins_encode %{
11778     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11779                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11780   %}
11781   ins_pipe( pipe_slow );
11782 %}
11783 
11784 // Small ClearArray AVX512 constant length.
11785 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11786 %{
11787   predicate(!((ClearArrayNode*)n)->is_large() &&
11788                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11789   match(Set dummy (ClearArray cnt base));
11790   ins_cost(100);
11791   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11792   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11793   ins_encode %{
11794    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11795   %}
11796   ins_pipe(pipe_slow);
11797 %}
11798 
11799 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11800                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11801   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11802   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11803   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11804 
11805   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11806   ins_encode %{
11807     __ string_compare($str1$$Register, $str2$$Register,
11808                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11809                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11810   %}
11811   ins_pipe( pipe_slow );
11812 %}
11813 
11814 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11815                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11816   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11817   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11818   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11819 
11820   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11821   ins_encode %{
11822     __ string_compare($str1$$Register, $str2$$Register,
11823                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11824                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11825   %}
11826   ins_pipe( pipe_slow );
11827 %}
11828 
11829 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11830                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11831   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11832   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11833   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11834 
11835   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11836   ins_encode %{
11837     __ string_compare($str1$$Register, $str2$$Register,
11838                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11839                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11840   %}
11841   ins_pipe( pipe_slow );
11842 %}
11843 
11844 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11845                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11846   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11847   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11848   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11849 
11850   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11851   ins_encode %{
11852     __ string_compare($str1$$Register, $str2$$Register,
11853                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11854                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11855   %}
11856   ins_pipe( pipe_slow );
11857 %}
11858 
11859 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11860                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11861   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11862   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11863   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11864 
11865   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11866   ins_encode %{
11867     __ string_compare($str1$$Register, $str2$$Register,
11868                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11869                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11870   %}
11871   ins_pipe( pipe_slow );
11872 %}
11873 
11874 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11875                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11876   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11877   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11878   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11879 
11880   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11881   ins_encode %{
11882     __ string_compare($str1$$Register, $str2$$Register,
11883                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11884                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11885   %}
11886   ins_pipe( pipe_slow );
11887 %}
11888 
11889 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11890                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11891   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11892   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11893   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11894 
11895   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11896   ins_encode %{
11897     __ string_compare($str2$$Register, $str1$$Register,
11898                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11899                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11900   %}
11901   ins_pipe( pipe_slow );
11902 %}
11903 
11904 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11905                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11906   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11907   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11908   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11909 
11910   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11911   ins_encode %{
11912     __ string_compare($str2$$Register, $str1$$Register,
11913                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11914                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11915   %}
11916   ins_pipe( pipe_slow );
11917 %}
11918 
11919 // fast string equals
11920 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11921                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11922   predicate(!VM_Version::supports_avx512vlbw());
11923   match(Set result (StrEquals (Binary str1 str2) cnt));
11924   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11925 
11926   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11927   ins_encode %{
11928     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11929                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11930                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11931   %}
11932 
11933   ins_pipe( pipe_slow );
11934 %}
11935 
11936 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11937                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11938   predicate(VM_Version::supports_avx512vlbw());
11939   match(Set result (StrEquals (Binary str1 str2) cnt));
11940   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11941 
11942   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11943   ins_encode %{
11944     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11945                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11946                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11947   %}
11948 
11949   ins_pipe( pipe_slow );
11950 %}
11951 
11952 
11953 // fast search of substring with known size.
11954 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11955                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11956   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11957   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11958   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11959 
11960   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11961   ins_encode %{
11962     int icnt2 = (int)$int_cnt2$$constant;
11963     if (icnt2 >= 16) {
11964       // IndexOf for constant substrings with size >= 16 elements
11965       // which don't need to be loaded through stack.
11966       __ string_indexofC8($str1$$Register, $str2$$Register,
11967                           $cnt1$$Register, $cnt2$$Register,
11968                           icnt2, $result$$Register,
11969                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11970     } else {
11971       // Small strings are loaded through stack if they cross page boundary.
11972       __ string_indexof($str1$$Register, $str2$$Register,
11973                         $cnt1$$Register, $cnt2$$Register,
11974                         icnt2, $result$$Register,
11975                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11976     }
11977   %}
11978   ins_pipe( pipe_slow );
11979 %}
11980 
11981 // fast search of substring with known size.
11982 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11983                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11984   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11985   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11986   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11987 
11988   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11989   ins_encode %{
11990     int icnt2 = (int)$int_cnt2$$constant;
11991     if (icnt2 >= 8) {
11992       // IndexOf for constant substrings with size >= 8 elements
11993       // which don't need to be loaded through stack.
11994       __ string_indexofC8($str1$$Register, $str2$$Register,
11995                           $cnt1$$Register, $cnt2$$Register,
11996                           icnt2, $result$$Register,
11997                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11998     } else {
11999       // Small strings are loaded through stack if they cross page boundary.
12000       __ string_indexof($str1$$Register, $str2$$Register,
12001                         $cnt1$$Register, $cnt2$$Register,
12002                         icnt2, $result$$Register,
12003                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12004     }
12005   %}
12006   ins_pipe( pipe_slow );
12007 %}
12008 
12009 // fast search of substring with known size.
12010 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12011                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12012   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12013   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12014   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12015 
12016   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12017   ins_encode %{
12018     int icnt2 = (int)$int_cnt2$$constant;
12019     if (icnt2 >= 8) {
12020       // IndexOf for constant substrings with size >= 8 elements
12021       // which don't need to be loaded through stack.
12022       __ string_indexofC8($str1$$Register, $str2$$Register,
12023                           $cnt1$$Register, $cnt2$$Register,
12024                           icnt2, $result$$Register,
12025                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12026     } else {
12027       // Small strings are loaded through stack if they cross page boundary.
12028       __ string_indexof($str1$$Register, $str2$$Register,
12029                         $cnt1$$Register, $cnt2$$Register,
12030                         icnt2, $result$$Register,
12031                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12032     }
12033   %}
12034   ins_pipe( pipe_slow );
12035 %}
12036 
12037 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12038                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12039   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12040   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12041   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12042 
12043   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12044   ins_encode %{
12045     __ string_indexof($str1$$Register, $str2$$Register,
12046                       $cnt1$$Register, $cnt2$$Register,
12047                       (-1), $result$$Register,
12048                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12049   %}
12050   ins_pipe( pipe_slow );
12051 %}
12052 
12053 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12054                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12055   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12056   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12057   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12058 
12059   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12060   ins_encode %{
12061     __ string_indexof($str1$$Register, $str2$$Register,
12062                       $cnt1$$Register, $cnt2$$Register,
12063                       (-1), $result$$Register,
12064                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12065   %}
12066   ins_pipe( pipe_slow );
12067 %}
12068 
12069 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12070                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12071   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12072   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12073   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12074 
12075   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12076   ins_encode %{
12077     __ string_indexof($str1$$Register, $str2$$Register,
12078                       $cnt1$$Register, $cnt2$$Register,
12079                       (-1), $result$$Register,
12080                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12081   %}
12082   ins_pipe( pipe_slow );
12083 %}
12084 
12085 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12086                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12087   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12088   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12089   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12090   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12091   ins_encode %{
12092     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12093                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12094   %}
12095   ins_pipe( pipe_slow );
12096 %}
12097 
12098 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12099                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12100   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12101   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12102   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12103   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12104   ins_encode %{
12105     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12106                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12107   %}
12108   ins_pipe( pipe_slow );
12109 %}
12110 
12111 
12112 // fast array equals
12113 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12114                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12115 %{
12116   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12117   match(Set result (AryEq ary1 ary2));
12118   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12119   //ins_cost(300);
12120 
12121   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12122   ins_encode %{
12123     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12124                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12125                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12126   %}
12127   ins_pipe( pipe_slow );
12128 %}
12129 
12130 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12131                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12132 %{
12133   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12134   match(Set result (AryEq ary1 ary2));
12135   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12136   //ins_cost(300);
12137 
12138   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12139   ins_encode %{
12140     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12141                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12142                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12143   %}
12144   ins_pipe( pipe_slow );
12145 %}
12146 
12147 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12148                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12149 %{
12150   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12151   match(Set result (AryEq ary1 ary2));
12152   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12153   //ins_cost(300);
12154 
12155   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12156   ins_encode %{
12157     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12158                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12159                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12160   %}
12161   ins_pipe( pipe_slow );
12162 %}
12163 
12164 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12165                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12166 %{
12167   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12168   match(Set result (AryEq ary1 ary2));
12169   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12170   //ins_cost(300);
12171 
12172   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12173   ins_encode %{
12174     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12175                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12176                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12177   %}
12178   ins_pipe( pipe_slow );
12179 %}
12180 
12181 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12182                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12183 %{
12184   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12185   match(Set result (CountPositives ary1 len));
12186   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12187 
12188   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12189   ins_encode %{
12190     __ count_positives($ary1$$Register, $len$$Register,
12191                        $result$$Register, $tmp3$$Register,
12192                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12193   %}
12194   ins_pipe( pipe_slow );
12195 %}
12196 
12197 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12198                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12199 %{
12200   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12201   match(Set result (CountPositives ary1 len));
12202   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12203 
12204   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12205   ins_encode %{
12206     __ count_positives($ary1$$Register, $len$$Register,
12207                        $result$$Register, $tmp3$$Register,
12208                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12209   %}
12210   ins_pipe( pipe_slow );
12211 %}
12212 
12213 
12214 // fast char[] to byte[] compression
12215 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12216                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12217   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12218   match(Set result (StrCompressedCopy src (Binary dst len)));
12219   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12220 
12221   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12222   ins_encode %{
12223     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12224                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12225                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12226                            knoreg, knoreg);
12227   %}
12228   ins_pipe( pipe_slow );
12229 %}
12230 
12231 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12232                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12233   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12234   match(Set result (StrCompressedCopy src (Binary dst len)));
12235   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12236 
12237   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12238   ins_encode %{
12239     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12240                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12241                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12242                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12243   %}
12244   ins_pipe( pipe_slow );
12245 %}
12246 
12247 // fast byte[] to char[] inflation
12248 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12249                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12250   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12251   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12252   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12253 
12254   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12255   ins_encode %{
12256     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12257                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12258   %}
12259   ins_pipe( pipe_slow );
12260 %}
12261 
12262 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12263                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12264   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12265   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12266   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12267 
12268   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12269   ins_encode %{
12270     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12271                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12272   %}
12273   ins_pipe( pipe_slow );
12274 %}
12275 
12276 // encode char[] to byte[] in ISO_8859_1
12277 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12278                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12279                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12280   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12281   match(Set result (EncodeISOArray src (Binary dst len)));
12282   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12283 
12284   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12285   ins_encode %{
12286     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12287                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12288                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12289   %}
12290   ins_pipe( pipe_slow );
12291 %}
12292 
12293 // encode char[] to byte[] in ASCII
12294 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12295                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12296                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12297   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12298   match(Set result (EncodeISOArray src (Binary dst len)));
12299   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12300 
12301   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12302   ins_encode %{
12303     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12304                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12305                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12306   %}
12307   ins_pipe( pipe_slow );
12308 %}
12309 
12310 //----------Control Flow Instructions------------------------------------------
12311 // Signed compare Instructions
12312 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12313   match(Set cr (CmpI op1 op2));
12314   effect( DEF cr, USE op1, USE op2 );
12315   format %{ "CMP    $op1,$op2" %}
12316   opcode(0x3B);  /* Opcode 3B /r */
12317   ins_encode( OpcP, RegReg( op1, op2) );
12318   ins_pipe( ialu_cr_reg_reg );
12319 %}
12320 
12321 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12322   match(Set cr (CmpI op1 op2));
12323   effect( DEF cr, USE op1 );
12324   format %{ "CMP    $op1,$op2" %}
12325   opcode(0x81,0x07);  /* Opcode 81 /7 */
12326   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12327   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12328   ins_pipe( ialu_cr_reg_imm );
12329 %}
12330 
12331 // Cisc-spilled version of cmpI_eReg
12332 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12333   match(Set cr (CmpI op1 (LoadI op2)));
12334 
12335   format %{ "CMP    $op1,$op2" %}
12336   ins_cost(500);
12337   opcode(0x3B);  /* Opcode 3B /r */
12338   ins_encode( OpcP, RegMem( op1, op2) );
12339   ins_pipe( ialu_cr_reg_mem );
12340 %}
12341 
12342 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12343   match(Set cr (CmpI src zero));
12344   effect( DEF cr, USE src );
12345 
12346   format %{ "TEST   $src,$src" %}
12347   opcode(0x85);
12348   ins_encode( OpcP, RegReg( src, src ) );
12349   ins_pipe( ialu_cr_reg_imm );
12350 %}
12351 
12352 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12353   match(Set cr (CmpI (AndI src con) zero));
12354 
12355   format %{ "TEST   $src,$con" %}
12356   opcode(0xF7,0x00);
12357   ins_encode( OpcP, RegOpc(src), Con32(con) );
12358   ins_pipe( ialu_cr_reg_imm );
12359 %}
12360 
12361 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12362   match(Set cr (CmpI (AndI src mem) zero));
12363 
12364   format %{ "TEST   $src,$mem" %}
12365   opcode(0x85);
12366   ins_encode( OpcP, RegMem( src, mem ) );
12367   ins_pipe( ialu_cr_reg_mem );
12368 %}
12369 
12370 // Unsigned compare Instructions; really, same as signed except they
12371 // produce an eFlagsRegU instead of eFlagsReg.
12372 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12373   match(Set cr (CmpU op1 op2));
12374 
12375   format %{ "CMPu   $op1,$op2" %}
12376   opcode(0x3B);  /* Opcode 3B /r */
12377   ins_encode( OpcP, RegReg( op1, op2) );
12378   ins_pipe( ialu_cr_reg_reg );
12379 %}
12380 
12381 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12382   match(Set cr (CmpU op1 op2));
12383 
12384   format %{ "CMPu   $op1,$op2" %}
12385   opcode(0x81,0x07);  /* Opcode 81 /7 */
12386   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12387   ins_pipe( ialu_cr_reg_imm );
12388 %}
12389 
12390 // // Cisc-spilled version of cmpU_eReg
12391 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12392   match(Set cr (CmpU op1 (LoadI op2)));
12393 
12394   format %{ "CMPu   $op1,$op2" %}
12395   ins_cost(500);
12396   opcode(0x3B);  /* Opcode 3B /r */
12397   ins_encode( OpcP, RegMem( op1, op2) );
12398   ins_pipe( ialu_cr_reg_mem );
12399 %}
12400 
12401 // // Cisc-spilled version of cmpU_eReg
12402 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12403 //  match(Set cr (CmpU (LoadI op1) op2));
12404 //
12405 //  format %{ "CMPu   $op1,$op2" %}
12406 //  ins_cost(500);
12407 //  opcode(0x39);  /* Opcode 39 /r */
12408 //  ins_encode( OpcP, RegMem( op1, op2) );
12409 //%}
12410 
12411 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12412   match(Set cr (CmpU src zero));
12413 
12414   format %{ "TESTu  $src,$src" %}
12415   opcode(0x85);
12416   ins_encode( OpcP, RegReg( src, src ) );
12417   ins_pipe( ialu_cr_reg_imm );
12418 %}
12419 
12420 // Unsigned pointer compare Instructions
12421 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12422   match(Set cr (CmpP op1 op2));
12423 
12424   format %{ "CMPu   $op1,$op2" %}
12425   opcode(0x3B);  /* Opcode 3B /r */
12426   ins_encode( OpcP, RegReg( op1, op2) );
12427   ins_pipe( ialu_cr_reg_reg );
12428 %}
12429 
12430 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12431   match(Set cr (CmpP op1 op2));
12432 
12433   format %{ "CMPu   $op1,$op2" %}
12434   opcode(0x81,0x07);  /* Opcode 81 /7 */
12435   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12436   ins_pipe( ialu_cr_reg_imm );
12437 %}
12438 
12439 // // Cisc-spilled version of cmpP_eReg
12440 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12441   match(Set cr (CmpP op1 (LoadP op2)));
12442 
12443   format %{ "CMPu   $op1,$op2" %}
12444   ins_cost(500);
12445   opcode(0x3B);  /* Opcode 3B /r */
12446   ins_encode( OpcP, RegMem( op1, op2) );
12447   ins_pipe( ialu_cr_reg_mem );
12448 %}
12449 
12450 // // Cisc-spilled version of cmpP_eReg
12451 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12452 //  match(Set cr (CmpP (LoadP op1) op2));
12453 //
12454 //  format %{ "CMPu   $op1,$op2" %}
12455 //  ins_cost(500);
12456 //  opcode(0x39);  /* Opcode 39 /r */
12457 //  ins_encode( OpcP, RegMem( op1, op2) );
12458 //%}
12459 
12460 // Compare raw pointer (used in out-of-heap check).
12461 // Only works because non-oop pointers must be raw pointers
12462 // and raw pointers have no anti-dependencies.
12463 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12464   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12465   match(Set cr (CmpP op1 (LoadP op2)));
12466 
12467   format %{ "CMPu   $op1,$op2" %}
12468   opcode(0x3B);  /* Opcode 3B /r */
12469   ins_encode( OpcP, RegMem( op1, op2) );
12470   ins_pipe( ialu_cr_reg_mem );
12471 %}
12472 
12473 //
12474 // This will generate a signed flags result. This should be ok
12475 // since any compare to a zero should be eq/neq.
12476 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12477   match(Set cr (CmpP src zero));
12478 
12479   format %{ "TEST   $src,$src" %}
12480   opcode(0x85);
12481   ins_encode( OpcP, RegReg( src, src ) );
12482   ins_pipe( ialu_cr_reg_imm );
12483 %}
12484 
12485 // Cisc-spilled version of testP_reg
12486 // This will generate a signed flags result. This should be ok
12487 // since any compare to a zero should be eq/neq.
12488 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12489   match(Set cr (CmpP (LoadP op) zero));
12490 
12491   format %{ "TEST   $op,0xFFFFFFFF" %}
12492   ins_cost(500);
12493   opcode(0xF7);               /* Opcode F7 /0 */
12494   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12495   ins_pipe( ialu_cr_reg_imm );
12496 %}
12497 
12498 // Yanked all unsigned pointer compare operations.
12499 // Pointer compares are done with CmpP which is already unsigned.
12500 
12501 //----------Max and Min--------------------------------------------------------
12502 // Min Instructions
12503 ////
12504 //   *** Min and Max using the conditional move are slower than the
12505 //   *** branch version on a Pentium III.
12506 // // Conditional move for min
12507 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12508 //  effect( USE_DEF op2, USE op1, USE cr );
12509 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12510 //  opcode(0x4C,0x0F);
12511 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12512 //  ins_pipe( pipe_cmov_reg );
12513 //%}
12514 //
12515 //// Min Register with Register (P6 version)
12516 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12517 //  predicate(VM_Version::supports_cmov() );
12518 //  match(Set op2 (MinI op1 op2));
12519 //  ins_cost(200);
12520 //  expand %{
12521 //    eFlagsReg cr;
12522 //    compI_eReg(cr,op1,op2);
12523 //    cmovI_reg_lt(op2,op1,cr);
12524 //  %}
12525 //%}
12526 
12527 // Min Register with Register (generic version)
12528 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12529   match(Set dst (MinI dst src));
12530   effect(KILL flags);
12531   ins_cost(300);
12532 
12533   format %{ "MIN    $dst,$src" %}
12534   opcode(0xCC);
12535   ins_encode( min_enc(dst,src) );
12536   ins_pipe( pipe_slow );
12537 %}
12538 
12539 // Max Register with Register
12540 //   *** Min and Max using the conditional move are slower than the
12541 //   *** branch version on a Pentium III.
12542 // // Conditional move for max
12543 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12544 //  effect( USE_DEF op2, USE op1, USE cr );
12545 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12546 //  opcode(0x4F,0x0F);
12547 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12548 //  ins_pipe( pipe_cmov_reg );
12549 //%}
12550 //
12551 // // Max Register with Register (P6 version)
12552 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12553 //  predicate(VM_Version::supports_cmov() );
12554 //  match(Set op2 (MaxI op1 op2));
12555 //  ins_cost(200);
12556 //  expand %{
12557 //    eFlagsReg cr;
12558 //    compI_eReg(cr,op1,op2);
12559 //    cmovI_reg_gt(op2,op1,cr);
12560 //  %}
12561 //%}
12562 
12563 // Max Register with Register (generic version)
12564 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12565   match(Set dst (MaxI dst src));
12566   effect(KILL flags);
12567   ins_cost(300);
12568 
12569   format %{ "MAX    $dst,$src" %}
12570   opcode(0xCC);
12571   ins_encode( max_enc(dst,src) );
12572   ins_pipe( pipe_slow );
12573 %}
12574 
12575 // ============================================================================
12576 // Counted Loop limit node which represents exact final iterator value.
12577 // Note: the resulting value should fit into integer range since
12578 // counted loops have limit check on overflow.
12579 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12580   match(Set limit (LoopLimit (Binary init limit) stride));
12581   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12582   ins_cost(300);
12583 
12584   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12585   ins_encode %{
12586     int strd = (int)$stride$$constant;
12587     assert(strd != 1 && strd != -1, "sanity");
12588     int m1 = (strd > 0) ? 1 : -1;
12589     // Convert limit to long (EAX:EDX)
12590     __ cdql();
12591     // Convert init to long (init:tmp)
12592     __ movl($tmp$$Register, $init$$Register);
12593     __ sarl($tmp$$Register, 31);
12594     // $limit - $init
12595     __ subl($limit$$Register, $init$$Register);
12596     __ sbbl($limit_hi$$Register, $tmp$$Register);
12597     // + ($stride - 1)
12598     if (strd > 0) {
12599       __ addl($limit$$Register, (strd - 1));
12600       __ adcl($limit_hi$$Register, 0);
12601       __ movl($tmp$$Register, strd);
12602     } else {
12603       __ addl($limit$$Register, (strd + 1));
12604       __ adcl($limit_hi$$Register, -1);
12605       __ lneg($limit_hi$$Register, $limit$$Register);
12606       __ movl($tmp$$Register, -strd);
12607     }
12608     // signed division: (EAX:EDX) / pos_stride
12609     __ idivl($tmp$$Register);
12610     if (strd < 0) {
12611       // restore sign
12612       __ negl($tmp$$Register);
12613     }
12614     // (EAX) * stride
12615     __ mull($tmp$$Register);
12616     // + init (ignore upper bits)
12617     __ addl($limit$$Register, $init$$Register);
12618   %}
12619   ins_pipe( pipe_slow );
12620 %}
12621 
12622 // ============================================================================
12623 // Branch Instructions
12624 // Jump Table
12625 instruct jumpXtnd(rRegI switch_val) %{
12626   match(Jump switch_val);
12627   ins_cost(350);
12628   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12629   ins_encode %{
12630     // Jump to Address(table_base + switch_reg)
12631     Address index(noreg, $switch_val$$Register, Address::times_1);
12632     __ jump(ArrayAddress($constantaddress, index), noreg);
12633   %}
12634   ins_pipe(pipe_jmp);
12635 %}
12636 
12637 // Jump Direct - Label defines a relative address from JMP+1
12638 instruct jmpDir(label labl) %{
12639   match(Goto);
12640   effect(USE labl);
12641 
12642   ins_cost(300);
12643   format %{ "JMP    $labl" %}
12644   size(5);
12645   ins_encode %{
12646     Label* L = $labl$$label;
12647     __ jmp(*L, false); // Always long jump
12648   %}
12649   ins_pipe( pipe_jmp );
12650 %}
12651 
12652 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12653 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12654   match(If cop cr);
12655   effect(USE labl);
12656 
12657   ins_cost(300);
12658   format %{ "J$cop    $labl" %}
12659   size(6);
12660   ins_encode %{
12661     Label* L = $labl$$label;
12662     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12663   %}
12664   ins_pipe( pipe_jcc );
12665 %}
12666 
12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12668 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12669   match(CountedLoopEnd cop cr);
12670   effect(USE labl);
12671 
12672   ins_cost(300);
12673   format %{ "J$cop    $labl\t# Loop end" %}
12674   size(6);
12675   ins_encode %{
12676     Label* L = $labl$$label;
12677     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12678   %}
12679   ins_pipe( pipe_jcc );
12680 %}
12681 
12682 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12683 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12684   match(CountedLoopEnd cop cmp);
12685   effect(USE labl);
12686 
12687   ins_cost(300);
12688   format %{ "J$cop,u  $labl\t# Loop end" %}
12689   size(6);
12690   ins_encode %{
12691     Label* L = $labl$$label;
12692     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12693   %}
12694   ins_pipe( pipe_jcc );
12695 %}
12696 
12697 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12698   match(CountedLoopEnd cop cmp);
12699   effect(USE labl);
12700 
12701   ins_cost(200);
12702   format %{ "J$cop,u  $labl\t# Loop end" %}
12703   size(6);
12704   ins_encode %{
12705     Label* L = $labl$$label;
12706     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12707   %}
12708   ins_pipe( pipe_jcc );
12709 %}
12710 
12711 // Jump Direct Conditional - using unsigned comparison
12712 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12713   match(If cop cmp);
12714   effect(USE labl);
12715 
12716   ins_cost(300);
12717   format %{ "J$cop,u  $labl" %}
12718   size(6);
12719   ins_encode %{
12720     Label* L = $labl$$label;
12721     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12722   %}
12723   ins_pipe(pipe_jcc);
12724 %}
12725 
12726 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12727   match(If cop cmp);
12728   effect(USE labl);
12729 
12730   ins_cost(200);
12731   format %{ "J$cop,u  $labl" %}
12732   size(6);
12733   ins_encode %{
12734     Label* L = $labl$$label;
12735     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12736   %}
12737   ins_pipe(pipe_jcc);
12738 %}
12739 
12740 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12741   match(If cop cmp);
12742   effect(USE labl);
12743 
12744   ins_cost(200);
12745   format %{ $$template
12746     if ($cop$$cmpcode == Assembler::notEqual) {
12747       $$emit$$"JP,u   $labl\n\t"
12748       $$emit$$"J$cop,u   $labl"
12749     } else {
12750       $$emit$$"JP,u   done\n\t"
12751       $$emit$$"J$cop,u   $labl\n\t"
12752       $$emit$$"done:"
12753     }
12754   %}
12755   ins_encode %{
12756     Label* l = $labl$$label;
12757     if ($cop$$cmpcode == Assembler::notEqual) {
12758       __ jcc(Assembler::parity, *l, false);
12759       __ jcc(Assembler::notEqual, *l, false);
12760     } else if ($cop$$cmpcode == Assembler::equal) {
12761       Label done;
12762       __ jccb(Assembler::parity, done);
12763       __ jcc(Assembler::equal, *l, false);
12764       __ bind(done);
12765     } else {
12766        ShouldNotReachHere();
12767     }
12768   %}
12769   ins_pipe(pipe_jcc);
12770 %}
12771 
12772 // ============================================================================
12773 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12774 // array for an instance of the superklass.  Set a hidden internal cache on a
12775 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12776 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12777 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12778   match(Set result (PartialSubtypeCheck sub super));
12779   effect( KILL rcx, KILL cr );
12780 
12781   ins_cost(1100);  // slightly larger than the next version
12782   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12783             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12784             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12785             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12786             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12787             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12788             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12789      "miss:\t" %}
12790 
12791   opcode(0x1); // Force a XOR of EDI
12792   ins_encode( enc_PartialSubtypeCheck() );
12793   ins_pipe( pipe_slow );
12794 %}
12795 
12796 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12797   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12798   effect( KILL rcx, KILL result );
12799 
12800   ins_cost(1000);
12801   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12802             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12803             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12804             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12805             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12806             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12807      "miss:\t" %}
12808 
12809   opcode(0x0);  // No need to XOR EDI
12810   ins_encode( enc_PartialSubtypeCheck() );
12811   ins_pipe( pipe_slow );
12812 %}
12813 
12814 // ============================================================================
12815 // Branch Instructions -- short offset versions
12816 //
12817 // These instructions are used to replace jumps of a long offset (the default
12818 // match) with jumps of a shorter offset.  These instructions are all tagged
12819 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12820 // match rules in general matching.  Instead, the ADLC generates a conversion
12821 // method in the MachNode which can be used to do in-place replacement of the
12822 // long variant with the shorter variant.  The compiler will determine if a
12823 // branch can be taken by the is_short_branch_offset() predicate in the machine
12824 // specific code section of the file.
12825 
12826 // Jump Direct - Label defines a relative address from JMP+1
12827 instruct jmpDir_short(label labl) %{
12828   match(Goto);
12829   effect(USE labl);
12830 
12831   ins_cost(300);
12832   format %{ "JMP,s  $labl" %}
12833   size(2);
12834   ins_encode %{
12835     Label* L = $labl$$label;
12836     __ jmpb(*L);
12837   %}
12838   ins_pipe( pipe_jmp );
12839   ins_short_branch(1);
12840 %}
12841 
12842 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12843 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12844   match(If cop cr);
12845   effect(USE labl);
12846 
12847   ins_cost(300);
12848   format %{ "J$cop,s  $labl" %}
12849   size(2);
12850   ins_encode %{
12851     Label* L = $labl$$label;
12852     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12853   %}
12854   ins_pipe( pipe_jcc );
12855   ins_short_branch(1);
12856 %}
12857 
12858 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12859 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12860   match(CountedLoopEnd cop cr);
12861   effect(USE labl);
12862 
12863   ins_cost(300);
12864   format %{ "J$cop,s  $labl\t# Loop end" %}
12865   size(2);
12866   ins_encode %{
12867     Label* L = $labl$$label;
12868     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12869   %}
12870   ins_pipe( pipe_jcc );
12871   ins_short_branch(1);
12872 %}
12873 
12874 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12875 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12876   match(CountedLoopEnd cop cmp);
12877   effect(USE labl);
12878 
12879   ins_cost(300);
12880   format %{ "J$cop,us $labl\t# Loop end" %}
12881   size(2);
12882   ins_encode %{
12883     Label* L = $labl$$label;
12884     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12885   %}
12886   ins_pipe( pipe_jcc );
12887   ins_short_branch(1);
12888 %}
12889 
12890 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12891   match(CountedLoopEnd cop cmp);
12892   effect(USE labl);
12893 
12894   ins_cost(300);
12895   format %{ "J$cop,us $labl\t# Loop end" %}
12896   size(2);
12897   ins_encode %{
12898     Label* L = $labl$$label;
12899     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12900   %}
12901   ins_pipe( pipe_jcc );
12902   ins_short_branch(1);
12903 %}
12904 
12905 // Jump Direct Conditional - using unsigned comparison
12906 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12907   match(If cop cmp);
12908   effect(USE labl);
12909 
12910   ins_cost(300);
12911   format %{ "J$cop,us $labl" %}
12912   size(2);
12913   ins_encode %{
12914     Label* L = $labl$$label;
12915     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12916   %}
12917   ins_pipe( pipe_jcc );
12918   ins_short_branch(1);
12919 %}
12920 
12921 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12922   match(If cop cmp);
12923   effect(USE labl);
12924 
12925   ins_cost(300);
12926   format %{ "J$cop,us $labl" %}
12927   size(2);
12928   ins_encode %{
12929     Label* L = $labl$$label;
12930     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12931   %}
12932   ins_pipe( pipe_jcc );
12933   ins_short_branch(1);
12934 %}
12935 
12936 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12937   match(If cop cmp);
12938   effect(USE labl);
12939 
12940   ins_cost(300);
12941   format %{ $$template
12942     if ($cop$$cmpcode == Assembler::notEqual) {
12943       $$emit$$"JP,u,s   $labl\n\t"
12944       $$emit$$"J$cop,u,s   $labl"
12945     } else {
12946       $$emit$$"JP,u,s   done\n\t"
12947       $$emit$$"J$cop,u,s  $labl\n\t"
12948       $$emit$$"done:"
12949     }
12950   %}
12951   size(4);
12952   ins_encode %{
12953     Label* l = $labl$$label;
12954     if ($cop$$cmpcode == Assembler::notEqual) {
12955       __ jccb(Assembler::parity, *l);
12956       __ jccb(Assembler::notEqual, *l);
12957     } else if ($cop$$cmpcode == Assembler::equal) {
12958       Label done;
12959       __ jccb(Assembler::parity, done);
12960       __ jccb(Assembler::equal, *l);
12961       __ bind(done);
12962     } else {
12963        ShouldNotReachHere();
12964     }
12965   %}
12966   ins_pipe(pipe_jcc);
12967   ins_short_branch(1);
12968 %}
12969 
12970 // ============================================================================
12971 // Long Compare
12972 //
12973 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12974 // is tricky.  The flavor of compare used depends on whether we are testing
12975 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12976 // The GE test is the negated LT test.  The LE test can be had by commuting
12977 // the operands (yielding a GE test) and then negating; negate again for the
12978 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12979 // NE test is negated from that.
12980 
12981 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12982 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12983 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12984 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12985 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12986 // foo match ends up with the wrong leaf.  One fix is to not match both
12987 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12988 // both forms beat the trinary form of long-compare and both are very useful
12989 // on Intel which has so few registers.
12990 
12991 // Manifest a CmpL result in an integer register.  Very painful.
12992 // This is the test to avoid.
12993 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12994   match(Set dst (CmpL3 src1 src2));
12995   effect( KILL flags );
12996   ins_cost(1000);
12997   format %{ "XOR    $dst,$dst\n\t"
12998             "CMP    $src1.hi,$src2.hi\n\t"
12999             "JLT,s  m_one\n\t"
13000             "JGT,s  p_one\n\t"
13001             "CMP    $src1.lo,$src2.lo\n\t"
13002             "JB,s   m_one\n\t"
13003             "JEQ,s  done\n"
13004     "p_one:\tINC    $dst\n\t"
13005             "JMP,s  done\n"
13006     "m_one:\tDEC    $dst\n"
13007      "done:" %}
13008   ins_encode %{
13009     Label p_one, m_one, done;
13010     __ xorptr($dst$$Register, $dst$$Register);
13011     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13012     __ jccb(Assembler::less,    m_one);
13013     __ jccb(Assembler::greater, p_one);
13014     __ cmpl($src1$$Register, $src2$$Register);
13015     __ jccb(Assembler::below,   m_one);
13016     __ jccb(Assembler::equal,   done);
13017     __ bind(p_one);
13018     __ incrementl($dst$$Register);
13019     __ jmpb(done);
13020     __ bind(m_one);
13021     __ decrementl($dst$$Register);
13022     __ bind(done);
13023   %}
13024   ins_pipe( pipe_slow );
13025 %}
13026 
13027 //======
13028 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13029 // compares.  Can be used for LE or GT compares by reversing arguments.
13030 // NOT GOOD FOR EQ/NE tests.
13031 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13032   match( Set flags (CmpL src zero ));
13033   ins_cost(100);
13034   format %{ "TEST   $src.hi,$src.hi" %}
13035   opcode(0x85);
13036   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13037   ins_pipe( ialu_cr_reg_reg );
13038 %}
13039 
13040 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13041 // compares.  Can be used for LE or GT compares by reversing arguments.
13042 // NOT GOOD FOR EQ/NE tests.
13043 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13044   match( Set flags (CmpL src1 src2 ));
13045   effect( TEMP tmp );
13046   ins_cost(300);
13047   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13048             "MOV    $tmp,$src1.hi\n\t"
13049             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13050   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13051   ins_pipe( ialu_cr_reg_reg );
13052 %}
13053 
13054 // Long compares reg < zero/req OR reg >= zero/req.
13055 // Just a wrapper for a normal branch, plus the predicate test.
13056 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13057   match(If cmp flags);
13058   effect(USE labl);
13059   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13060   expand %{
13061     jmpCon(cmp,flags,labl);    // JLT or JGE...
13062   %}
13063 %}
13064 
13065 //======
13066 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13067 // compares.  Can be used for LE or GT compares by reversing arguments.
13068 // NOT GOOD FOR EQ/NE tests.
13069 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13070   match(Set flags (CmpUL src zero));
13071   ins_cost(100);
13072   format %{ "TEST   $src.hi,$src.hi" %}
13073   opcode(0x85);
13074   ins_encode(OpcP, RegReg_Hi2(src, src));
13075   ins_pipe(ialu_cr_reg_reg);
13076 %}
13077 
13078 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13079 // compares.  Can be used for LE or GT compares by reversing arguments.
13080 // NOT GOOD FOR EQ/NE tests.
13081 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13082   match(Set flags (CmpUL src1 src2));
13083   effect(TEMP tmp);
13084   ins_cost(300);
13085   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13086             "MOV    $tmp,$src1.hi\n\t"
13087             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13088   ins_encode(long_cmp_flags2(src1, src2, tmp));
13089   ins_pipe(ialu_cr_reg_reg);
13090 %}
13091 
13092 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13093 // Just a wrapper for a normal branch, plus the predicate test.
13094 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13095   match(If cmp flags);
13096   effect(USE labl);
13097   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13098   expand %{
13099     jmpCon(cmp, flags, labl);    // JLT or JGE...
13100   %}
13101 %}
13102 
13103 // Compare 2 longs and CMOVE longs.
13104 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13105   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13106   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13107   ins_cost(400);
13108   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13109             "CMOV$cmp $dst.hi,$src.hi" %}
13110   opcode(0x0F,0x40);
13111   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13112   ins_pipe( pipe_cmov_reg_long );
13113 %}
13114 
13115 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13116   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13117   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13118   ins_cost(500);
13119   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13120             "CMOV$cmp $dst.hi,$src.hi" %}
13121   opcode(0x0F,0x40);
13122   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13123   ins_pipe( pipe_cmov_reg_long );
13124 %}
13125 
13126 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13127   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13128   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13129   ins_cost(400);
13130   expand %{
13131     cmovLL_reg_LTGE(cmp, flags, dst, src);
13132   %}
13133 %}
13134 
13135 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13136   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13137   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13138   ins_cost(500);
13139   expand %{
13140     cmovLL_mem_LTGE(cmp, flags, dst, src);
13141   %}
13142 %}
13143 
13144 // Compare 2 longs and CMOVE ints.
13145 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13146   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13147   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13148   ins_cost(200);
13149   format %{ "CMOV$cmp $dst,$src" %}
13150   opcode(0x0F,0x40);
13151   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13152   ins_pipe( pipe_cmov_reg );
13153 %}
13154 
13155 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13156   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13157   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13158   ins_cost(250);
13159   format %{ "CMOV$cmp $dst,$src" %}
13160   opcode(0x0F,0x40);
13161   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13162   ins_pipe( pipe_cmov_mem );
13163 %}
13164 
13165 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13166   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13167   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13168   ins_cost(200);
13169   expand %{
13170     cmovII_reg_LTGE(cmp, flags, dst, src);
13171   %}
13172 %}
13173 
13174 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13175   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13176   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13177   ins_cost(250);
13178   expand %{
13179     cmovII_mem_LTGE(cmp, flags, dst, src);
13180   %}
13181 %}
13182 
13183 // Compare 2 longs and CMOVE ptrs.
13184 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13185   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13186   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13187   ins_cost(200);
13188   format %{ "CMOV$cmp $dst,$src" %}
13189   opcode(0x0F,0x40);
13190   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13191   ins_pipe( pipe_cmov_reg );
13192 %}
13193 
13194 // Compare 2 unsigned longs and CMOVE ptrs.
13195 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13196   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13197   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13198   ins_cost(200);
13199   expand %{
13200     cmovPP_reg_LTGE(cmp,flags,dst,src);
13201   %}
13202 %}
13203 
13204 // Compare 2 longs and CMOVE doubles
13205 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13206   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13207   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13208   ins_cost(200);
13209   expand %{
13210     fcmovDPR_regS(cmp,flags,dst,src);
13211   %}
13212 %}
13213 
13214 // Compare 2 longs and CMOVE doubles
13215 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13216   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13217   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13218   ins_cost(200);
13219   expand %{
13220     fcmovD_regS(cmp,flags,dst,src);
13221   %}
13222 %}
13223 
13224 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13225   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13226   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13227   ins_cost(200);
13228   expand %{
13229     fcmovFPR_regS(cmp,flags,dst,src);
13230   %}
13231 %}
13232 
13233 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13234   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13235   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13236   ins_cost(200);
13237   expand %{
13238     fcmovF_regS(cmp,flags,dst,src);
13239   %}
13240 %}
13241 
13242 //======
13243 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13244 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13245   match( Set flags (CmpL src zero ));
13246   effect(TEMP tmp);
13247   ins_cost(200);
13248   format %{ "MOV    $tmp,$src.lo\n\t"
13249             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13250   ins_encode( long_cmp_flags0( src, tmp ) );
13251   ins_pipe( ialu_reg_reg_long );
13252 %}
13253 
13254 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13255 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13256   match( Set flags (CmpL src1 src2 ));
13257   ins_cost(200+300);
13258   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13259             "JNE,s  skip\n\t"
13260             "CMP    $src1.hi,$src2.hi\n\t"
13261      "skip:\t" %}
13262   ins_encode( long_cmp_flags1( src1, src2 ) );
13263   ins_pipe( ialu_cr_reg_reg );
13264 %}
13265 
13266 // Long compare reg == zero/reg OR reg != zero/reg
13267 // Just a wrapper for a normal branch, plus the predicate test.
13268 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13269   match(If cmp flags);
13270   effect(USE labl);
13271   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13272   expand %{
13273     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13274   %}
13275 %}
13276 
13277 //======
13278 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13279 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13280   match(Set flags (CmpUL src zero));
13281   effect(TEMP tmp);
13282   ins_cost(200);
13283   format %{ "MOV    $tmp,$src.lo\n\t"
13284             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13285   ins_encode(long_cmp_flags0(src, tmp));
13286   ins_pipe(ialu_reg_reg_long);
13287 %}
13288 
13289 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13290 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13291   match(Set flags (CmpUL src1 src2));
13292   ins_cost(200+300);
13293   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13294             "JNE,s  skip\n\t"
13295             "CMP    $src1.hi,$src2.hi\n\t"
13296      "skip:\t" %}
13297   ins_encode(long_cmp_flags1(src1, src2));
13298   ins_pipe(ialu_cr_reg_reg);
13299 %}
13300 
13301 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13302 // Just a wrapper for a normal branch, plus the predicate test.
13303 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13304   match(If cmp flags);
13305   effect(USE labl);
13306   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13307   expand %{
13308     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13309   %}
13310 %}
13311 
13312 // Compare 2 longs and CMOVE longs.
13313 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13314   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13315   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13316   ins_cost(400);
13317   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13318             "CMOV$cmp $dst.hi,$src.hi" %}
13319   opcode(0x0F,0x40);
13320   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13321   ins_pipe( pipe_cmov_reg_long );
13322 %}
13323 
13324 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13325   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13326   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13327   ins_cost(500);
13328   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13329             "CMOV$cmp $dst.hi,$src.hi" %}
13330   opcode(0x0F,0x40);
13331   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13332   ins_pipe( pipe_cmov_reg_long );
13333 %}
13334 
13335 // Compare 2 longs and CMOVE ints.
13336 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13337   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13338   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13339   ins_cost(200);
13340   format %{ "CMOV$cmp $dst,$src" %}
13341   opcode(0x0F,0x40);
13342   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13343   ins_pipe( pipe_cmov_reg );
13344 %}
13345 
13346 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13347   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13348   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13349   ins_cost(250);
13350   format %{ "CMOV$cmp $dst,$src" %}
13351   opcode(0x0F,0x40);
13352   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13353   ins_pipe( pipe_cmov_mem );
13354 %}
13355 
13356 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13357   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13358   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13359   ins_cost(200);
13360   expand %{
13361     cmovII_reg_EQNE(cmp, flags, dst, src);
13362   %}
13363 %}
13364 
13365 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13366   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13367   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13368   ins_cost(250);
13369   expand %{
13370     cmovII_mem_EQNE(cmp, flags, dst, src);
13371   %}
13372 %}
13373 
13374 // Compare 2 longs and CMOVE ptrs.
13375 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13376   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13377   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13378   ins_cost(200);
13379   format %{ "CMOV$cmp $dst,$src" %}
13380   opcode(0x0F,0x40);
13381   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13382   ins_pipe( pipe_cmov_reg );
13383 %}
13384 
13385 // Compare 2 unsigned longs and CMOVE ptrs.
13386 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13387   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13388   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13389   ins_cost(200);
13390   expand %{
13391     cmovPP_reg_EQNE(cmp,flags,dst,src);
13392   %}
13393 %}
13394 
13395 // Compare 2 longs and CMOVE doubles
13396 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13397   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13398   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13399   ins_cost(200);
13400   expand %{
13401     fcmovDPR_regS(cmp,flags,dst,src);
13402   %}
13403 %}
13404 
13405 // Compare 2 longs and CMOVE doubles
13406 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13407   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13408   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13409   ins_cost(200);
13410   expand %{
13411     fcmovD_regS(cmp,flags,dst,src);
13412   %}
13413 %}
13414 
13415 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13416   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13417   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13418   ins_cost(200);
13419   expand %{
13420     fcmovFPR_regS(cmp,flags,dst,src);
13421   %}
13422 %}
13423 
13424 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13425   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13426   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13427   ins_cost(200);
13428   expand %{
13429     fcmovF_regS(cmp,flags,dst,src);
13430   %}
13431 %}
13432 
13433 //======
13434 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13435 // Same as cmpL_reg_flags_LEGT except must negate src
13436 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13437   match( Set flags (CmpL src zero ));
13438   effect( TEMP tmp );
13439   ins_cost(300);
13440   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13441             "CMP    $tmp,$src.lo\n\t"
13442             "SBB    $tmp,$src.hi\n\t" %}
13443   ins_encode( long_cmp_flags3(src, tmp) );
13444   ins_pipe( ialu_reg_reg_long );
13445 %}
13446 
13447 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13448 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13449 // requires a commuted test to get the same result.
13450 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13451   match( Set flags (CmpL src1 src2 ));
13452   effect( TEMP tmp );
13453   ins_cost(300);
13454   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13455             "MOV    $tmp,$src2.hi\n\t"
13456             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13457   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13458   ins_pipe( ialu_cr_reg_reg );
13459 %}
13460 
13461 // Long compares reg < zero/req OR reg >= zero/req.
13462 // Just a wrapper for a normal branch, plus the predicate test
13463 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13464   match(If cmp flags);
13465   effect(USE labl);
13466   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13467   ins_cost(300);
13468   expand %{
13469     jmpCon(cmp,flags,labl);    // JGT or JLE...
13470   %}
13471 %}
13472 
13473 //======
13474 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13475 // Same as cmpUL_reg_flags_LEGT except must negate src
13476 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13477   match(Set flags (CmpUL src zero));
13478   effect(TEMP tmp);
13479   ins_cost(300);
13480   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13481             "CMP    $tmp,$src.lo\n\t"
13482             "SBB    $tmp,$src.hi\n\t" %}
13483   ins_encode(long_cmp_flags3(src, tmp));
13484   ins_pipe(ialu_reg_reg_long);
13485 %}
13486 
13487 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13488 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13489 // requires a commuted test to get the same result.
13490 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13491   match(Set flags (CmpUL src1 src2));
13492   effect(TEMP tmp);
13493   ins_cost(300);
13494   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13495             "MOV    $tmp,$src2.hi\n\t"
13496             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13497   ins_encode(long_cmp_flags2( src2, src1, tmp));
13498   ins_pipe(ialu_cr_reg_reg);
13499 %}
13500 
13501 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13502 // Just a wrapper for a normal branch, plus the predicate test
13503 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13504   match(If cmp flags);
13505   effect(USE labl);
13506   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13507   ins_cost(300);
13508   expand %{
13509     jmpCon(cmp, flags, labl);    // JGT or JLE...
13510   %}
13511 %}
13512 
13513 // Compare 2 longs and CMOVE longs.
13514 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13515   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13516   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13517   ins_cost(400);
13518   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13519             "CMOV$cmp $dst.hi,$src.hi" %}
13520   opcode(0x0F,0x40);
13521   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13522   ins_pipe( pipe_cmov_reg_long );
13523 %}
13524 
13525 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13526   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13527   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13528   ins_cost(500);
13529   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13530             "CMOV$cmp $dst.hi,$src.hi+4" %}
13531   opcode(0x0F,0x40);
13532   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13533   ins_pipe( pipe_cmov_reg_long );
13534 %}
13535 
13536 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13537   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13538   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13539   ins_cost(400);
13540   expand %{
13541     cmovLL_reg_LEGT(cmp, flags, dst, src);
13542   %}
13543 %}
13544 
13545 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13546   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13547   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13548   ins_cost(500);
13549   expand %{
13550     cmovLL_mem_LEGT(cmp, flags, dst, src);
13551   %}
13552 %}
13553 
13554 // Compare 2 longs and CMOVE ints.
13555 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13556   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13557   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13558   ins_cost(200);
13559   format %{ "CMOV$cmp $dst,$src" %}
13560   opcode(0x0F,0x40);
13561   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13562   ins_pipe( pipe_cmov_reg );
13563 %}
13564 
13565 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13566   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13567   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13568   ins_cost(250);
13569   format %{ "CMOV$cmp $dst,$src" %}
13570   opcode(0x0F,0x40);
13571   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13572   ins_pipe( pipe_cmov_mem );
13573 %}
13574 
13575 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13576   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13577   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13578   ins_cost(200);
13579   expand %{
13580     cmovII_reg_LEGT(cmp, flags, dst, src);
13581   %}
13582 %}
13583 
13584 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13585   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13586   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13587   ins_cost(250);
13588   expand %{
13589     cmovII_mem_LEGT(cmp, flags, dst, src);
13590   %}
13591 %}
13592 
13593 // Compare 2 longs and CMOVE ptrs.
13594 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13595   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13596   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13597   ins_cost(200);
13598   format %{ "CMOV$cmp $dst,$src" %}
13599   opcode(0x0F,0x40);
13600   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13601   ins_pipe( pipe_cmov_reg );
13602 %}
13603 
13604 // Compare 2 unsigned longs and CMOVE ptrs.
13605 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13606   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13607   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13608   ins_cost(200);
13609   expand %{
13610     cmovPP_reg_LEGT(cmp,flags,dst,src);
13611   %}
13612 %}
13613 
13614 // Compare 2 longs and CMOVE doubles
13615 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13616   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13617   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13618   ins_cost(200);
13619   expand %{
13620     fcmovDPR_regS(cmp,flags,dst,src);
13621   %}
13622 %}
13623 
13624 // Compare 2 longs and CMOVE doubles
13625 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13626   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13627   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13628   ins_cost(200);
13629   expand %{
13630     fcmovD_regS(cmp,flags,dst,src);
13631   %}
13632 %}
13633 
13634 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13635   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13636   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13637   ins_cost(200);
13638   expand %{
13639     fcmovFPR_regS(cmp,flags,dst,src);
13640   %}
13641 %}
13642 
13643 
13644 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13645   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13646   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13647   ins_cost(200);
13648   expand %{
13649     fcmovF_regS(cmp,flags,dst,src);
13650   %}
13651 %}
13652 
13653 
13654 // ============================================================================
13655 // Procedure Call/Return Instructions
13656 // Call Java Static Instruction
13657 // Note: If this code changes, the corresponding ret_addr_offset() and
13658 //       compute_padding() functions will have to be adjusted.
13659 instruct CallStaticJavaDirect(method meth) %{
13660   match(CallStaticJava);
13661   effect(USE meth);
13662 
13663   ins_cost(300);
13664   format %{ "CALL,static " %}
13665   opcode(0xE8); /* E8 cd */
13666   ins_encode( pre_call_resets,
13667               Java_Static_Call( meth ),
13668               call_epilog,
13669               post_call_FPU );
13670   ins_pipe( pipe_slow );
13671   ins_alignment(4);
13672 %}
13673 
13674 // Call Java Dynamic Instruction
13675 // Note: If this code changes, the corresponding ret_addr_offset() and
13676 //       compute_padding() functions will have to be adjusted.
13677 instruct CallDynamicJavaDirect(method meth) %{
13678   match(CallDynamicJava);
13679   effect(USE meth);
13680 
13681   ins_cost(300);
13682   format %{ "MOV    EAX,(oop)-1\n\t"
13683             "CALL,dynamic" %}
13684   opcode(0xE8); /* E8 cd */
13685   ins_encode( pre_call_resets,
13686               Java_Dynamic_Call( meth ),
13687               call_epilog,
13688               post_call_FPU );
13689   ins_pipe( pipe_slow );
13690   ins_alignment(4);
13691 %}
13692 
13693 // Call Runtime Instruction
13694 instruct CallRuntimeDirect(method meth) %{
13695   match(CallRuntime );
13696   effect(USE meth);
13697 
13698   ins_cost(300);
13699   format %{ "CALL,runtime " %}
13700   opcode(0xE8); /* E8 cd */
13701   // Use FFREEs to clear entries in float stack
13702   ins_encode( pre_call_resets,
13703               FFree_Float_Stack_All,
13704               Java_To_Runtime( meth ),
13705               post_call_FPU );
13706   ins_pipe( pipe_slow );
13707 %}
13708 
13709 // Call runtime without safepoint
13710 instruct CallLeafDirect(method meth) %{
13711   match(CallLeaf);
13712   effect(USE meth);
13713 
13714   ins_cost(300);
13715   format %{ "CALL_LEAF,runtime " %}
13716   opcode(0xE8); /* E8 cd */
13717   ins_encode( pre_call_resets,
13718               FFree_Float_Stack_All,
13719               Java_To_Runtime( meth ),
13720               Verify_FPU_For_Leaf, post_call_FPU );
13721   ins_pipe( pipe_slow );
13722 %}
13723 
13724 instruct CallLeafNoFPDirect(method meth) %{
13725   match(CallLeafNoFP);
13726   effect(USE meth);
13727 
13728   ins_cost(300);
13729   format %{ "CALL_LEAF_NOFP,runtime " %}
13730   opcode(0xE8); /* E8 cd */
13731   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13732   ins_pipe( pipe_slow );
13733 %}
13734 
13735 
13736 // Return Instruction
13737 // Remove the return address & jump to it.
13738 instruct Ret() %{
13739   match(Return);
13740   format %{ "RET" %}
13741   opcode(0xC3);
13742   ins_encode(OpcP);
13743   ins_pipe( pipe_jmp );
13744 %}
13745 
13746 // Tail Call; Jump from runtime stub to Java code.
13747 // Also known as an 'interprocedural jump'.
13748 // Target of jump will eventually return to caller.
13749 // TailJump below removes the return address.
13750 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13751   match(TailCall jump_target method_ptr);
13752   ins_cost(300);
13753   format %{ "JMP    $jump_target \t# EBX holds method" %}
13754   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13755   ins_encode( OpcP, RegOpc(jump_target) );
13756   ins_pipe( pipe_jmp );
13757 %}
13758 
13759 
13760 // Tail Jump; remove the return address; jump to target.
13761 // TailCall above leaves the return address around.
13762 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13763   match( TailJump jump_target ex_oop );
13764   ins_cost(300);
13765   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13766             "JMP    $jump_target " %}
13767   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13768   ins_encode( enc_pop_rdx,
13769               OpcP, RegOpc(jump_target) );
13770   ins_pipe( pipe_jmp );
13771 %}
13772 
13773 // Create exception oop: created by stack-crawling runtime code.
13774 // Created exception is now available to this handler, and is setup
13775 // just prior to jumping to this handler.  No code emitted.
13776 instruct CreateException( eAXRegP ex_oop )
13777 %{
13778   match(Set ex_oop (CreateEx));
13779 
13780   size(0);
13781   // use the following format syntax
13782   format %{ "# exception oop is in EAX; no code emitted" %}
13783   ins_encode();
13784   ins_pipe( empty );
13785 %}
13786 
13787 
13788 // Rethrow exception:
13789 // The exception oop will come in the first argument position.
13790 // Then JUMP (not call) to the rethrow stub code.
13791 instruct RethrowException()
13792 %{
13793   match(Rethrow);
13794 
13795   // use the following format syntax
13796   format %{ "JMP    rethrow_stub" %}
13797   ins_encode(enc_rethrow);
13798   ins_pipe( pipe_jmp );
13799 %}
13800 
13801 // inlined locking and unlocking
13802 
13803 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13804   predicate(Compile::current()->use_rtm());
13805   match(Set cr (FastLock object box));
13806   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13807   ins_cost(300);
13808   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13809   ins_encode %{
13810     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13811                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13812                  _rtm_counters, _stack_rtm_counters,
13813                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13814                  true, ra_->C->profile_rtm());
13815   %}
13816   ins_pipe(pipe_slow);
13817 %}
13818 
13819 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13820   predicate(!Compile::current()->use_rtm());
13821   match(Set cr (FastLock object box));
13822   effect(TEMP tmp, TEMP scr, USE_KILL box);
13823   ins_cost(300);
13824   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13825   ins_encode %{
13826     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13827                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13828   %}
13829   ins_pipe(pipe_slow);
13830 %}
13831 
13832 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13833   match(Set cr (FastUnlock object box));
13834   effect(TEMP tmp, USE_KILL box);
13835   ins_cost(300);
13836   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13837   ins_encode %{
13838     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13839   %}
13840   ins_pipe(pipe_slow);
13841 %}
13842 
13843 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13844   predicate(Matcher::vector_length(n) <= 32);
13845   match(Set dst (MaskAll src));
13846   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13847   ins_encode %{
13848     int mask_len = Matcher::vector_length(this);
13849     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13850   %}
13851   ins_pipe( pipe_slow );
13852 %}
13853 
13854 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13855   predicate(Matcher::vector_length(n) > 32);
13856   match(Set dst (MaskAll src));
13857   effect(TEMP ktmp);
13858   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13859   ins_encode %{
13860     int mask_len = Matcher::vector_length(this);
13861     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13862   %}
13863   ins_pipe( pipe_slow );
13864 %}
13865 
13866 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13867   predicate(Matcher::vector_length(n) > 32);
13868   match(Set dst (MaskAll src));
13869   effect(TEMP ktmp);
13870   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13871   ins_encode %{
13872     int mask_len = Matcher::vector_length(this);
13873     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13874   %}
13875   ins_pipe( pipe_slow );
13876 %}
13877 
13878 // ============================================================================
13879 // Safepoint Instruction
13880 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13881   match(SafePoint poll);
13882   effect(KILL cr, USE poll);
13883 
13884   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13885   ins_cost(125);
13886   // EBP would need size(3)
13887   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13888   ins_encode %{
13889     __ relocate(relocInfo::poll_type);
13890     address pre_pc = __ pc();
13891     __ testl(rax, Address($poll$$Register, 0));
13892     address post_pc = __ pc();
13893     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13894   %}
13895   ins_pipe(ialu_reg_mem);
13896 %}
13897 
13898 
13899 // ============================================================================
13900 // This name is KNOWN by the ADLC and cannot be changed.
13901 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13902 // for this guy.
13903 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13904   match(Set dst (ThreadLocal));
13905   effect(DEF dst, KILL cr);
13906 
13907   format %{ "MOV    $dst, Thread::current()" %}
13908   ins_encode %{
13909     Register dstReg = as_Register($dst$$reg);
13910     __ get_thread(dstReg);
13911   %}
13912   ins_pipe( ialu_reg_fat );
13913 %}
13914 
13915 
13916 
13917 //----------PEEPHOLE RULES-----------------------------------------------------
13918 // These must follow all instruction definitions as they use the names
13919 // defined in the instructions definitions.
13920 //
13921 // peepmatch ( root_instr_name [preceding_instruction]* );
13922 //
13923 // peepconstraint %{
13924 // (instruction_number.operand_name relational_op instruction_number.operand_name
13925 //  [, ...] );
13926 // // instruction numbers are zero-based using left to right order in peepmatch
13927 //
13928 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13929 // // provide an instruction_number.operand_name for each operand that appears
13930 // // in the replacement instruction's match rule
13931 //
13932 // ---------VM FLAGS---------------------------------------------------------
13933 //
13934 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13935 //
13936 // Each peephole rule is given an identifying number starting with zero and
13937 // increasing by one in the order seen by the parser.  An individual peephole
13938 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13939 // on the command-line.
13940 //
13941 // ---------CURRENT LIMITATIONS----------------------------------------------
13942 //
13943 // Only match adjacent instructions in same basic block
13944 // Only equality constraints
13945 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13946 // Only one replacement instruction
13947 //
13948 // ---------EXAMPLE----------------------------------------------------------
13949 //
13950 // // pertinent parts of existing instructions in architecture description
13951 // instruct movI(rRegI dst, rRegI src) %{
13952 //   match(Set dst (CopyI src));
13953 // %}
13954 //
13955 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13956 //   match(Set dst (AddI dst src));
13957 //   effect(KILL cr);
13958 // %}
13959 //
13960 // // Change (inc mov) to lea
13961 // peephole %{
13962 //   // increment preceded by register-register move
13963 //   peepmatch ( incI_eReg movI );
13964 //   // require that the destination register of the increment
13965 //   // match the destination register of the move
13966 //   peepconstraint ( 0.dst == 1.dst );
13967 //   // construct a replacement instruction that sets
13968 //   // the destination to ( move's source register + one )
13969 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13970 // %}
13971 //
13972 // Implementation no longer uses movX instructions since
13973 // machine-independent system no longer uses CopyX nodes.
13974 //
13975 // peephole %{
13976 //   peepmatch ( incI_eReg movI );
13977 //   peepconstraint ( 0.dst == 1.dst );
13978 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13979 // %}
13980 //
13981 // peephole %{
13982 //   peepmatch ( decI_eReg movI );
13983 //   peepconstraint ( 0.dst == 1.dst );
13984 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13985 // %}
13986 //
13987 // peephole %{
13988 //   peepmatch ( addI_eReg_imm movI );
13989 //   peepconstraint ( 0.dst == 1.dst );
13990 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13991 // %}
13992 //
13993 // peephole %{
13994 //   peepmatch ( addP_eReg_imm movP );
13995 //   peepconstraint ( 0.dst == 1.dst );
13996 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13997 // %}
13998 
13999 // // Change load of spilled value to only a spill
14000 // instruct storeI(memory mem, rRegI src) %{
14001 //   match(Set mem (StoreI mem src));
14002 // %}
14003 //
14004 // instruct loadI(rRegI dst, memory mem) %{
14005 //   match(Set dst (LoadI mem));
14006 // %}
14007 //
14008 peephole %{
14009   peepmatch ( loadI storeI );
14010   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14011   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14012 %}
14013 
14014 //----------SMARTSPILL RULES---------------------------------------------------
14015 // These must follow all instruction definitions as they use the names
14016 // defined in the instructions definitions.