1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   int max_monitors = C->method() != NULL ? C->max_monitors() : 0;
  618   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL, max_monitors);
  619 
  620   C->output()->set_frame_complete(cbuf.insts_size());
  621 
  622   if (C->has_mach_constant_base_node()) {
  623     // NOTE: We set the table base offset here because users might be
  624     // emitted before MachConstantBaseNode.
  625     ConstantTable& constant_table = C->output()->constant_table();
  626     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  627   }
  628 }
  629 
  630 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  631   return MachNode::size(ra_); // too many variables; just compute it the hard way
  632 }
  633 
  634 int MachPrologNode::reloc() const {
  635   return 0; // a large enough number
  636 }
  637 
  638 //=============================================================================
  639 #ifndef PRODUCT
  640 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  641   Compile *C = ra_->C;
  642   int framesize = C->output()->frame_size_in_bytes();
  643   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  644   // Remove two words for return addr and rbp,
  645   framesize -= 2*wordSize;
  646 
  647   if (C->max_vector_size() > 16) {
  648     st->print("VZEROUPPER");
  649     st->cr(); st->print("\t");
  650   }
  651   if (C->in_24_bit_fp_mode()) {
  652     st->print("FLDCW  standard control word");
  653     st->cr(); st->print("\t");
  654   }
  655   if (framesize) {
  656     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  657     st->cr(); st->print("\t");
  658   }
  659   st->print_cr("POPL   EBP"); st->print("\t");
  660   if (do_polling() && C->is_method_compilation()) {
  661     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  662               "JA      #safepoint_stub\t"
  663               "# Safepoint: poll for GC");
  664   }
  665 }
  666 #endif
  667 
  668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  669   Compile *C = ra_->C;
  670   MacroAssembler _masm(&cbuf);
  671 
  672   if (C->max_vector_size() > 16) {
  673     // Clear upper bits of YMM registers when current compiled code uses
  674     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  675     _masm.vzeroupper();
  676   }
  677   // If method set FPU control word, restore to standard control word
  678   if (C->in_24_bit_fp_mode()) {
  679     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  680   }
  681 
  682   int framesize = C->output()->frame_size_in_bytes();
  683   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  684   // Remove two words for return addr and rbp,
  685   framesize -= 2*wordSize;
  686 
  687   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  688 
  689   if (framesize >= 128) {
  690     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d32(cbuf, framesize);
  693   } else if (framesize) {
  694     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  696     emit_d8(cbuf, framesize);
  697   }
  698 
  699   emit_opcode(cbuf, 0x58 | EBP_enc);
  700 
  701   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  702     __ reserved_stack_check();
  703   }
  704 
  705   if (do_polling() && C->is_method_compilation()) {
  706     Register thread = as_Register(EBX_enc);
  707     MacroAssembler masm(&cbuf);
  708     __ get_thread(thread);
  709     Label dummy_label;
  710     Label* code_stub = &dummy_label;
  711     if (!C->output()->in_scratch_emit_size()) {
  712       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  713       C->output()->add_stub(stub);
  714       code_stub = &stub->entry();
  715     }
  716     __ relocate(relocInfo::poll_return_type);
  717     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  718   }
  719 }
  720 
  721 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  722   return MachNode::size(ra_); // too many variables; just compute it
  723                               // the hard way
  724 }
  725 
  726 int MachEpilogNode::reloc() const {
  727   return 0; // a large enough number
  728 }
  729 
  730 const Pipeline * MachEpilogNode::pipeline() const {
  731   return MachNode::pipeline_class();
  732 }
  733 
  734 //=============================================================================
  735 
  736 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  737 static enum RC rc_class( OptoReg::Name reg ) {
  738 
  739   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  740   if (OptoReg::is_stack(reg)) return rc_stack;
  741 
  742   VMReg r = OptoReg::as_VMReg(reg);
  743   if (r->is_Register()) return rc_int;
  744   if (r->is_FloatRegister()) {
  745     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  746     return rc_float;
  747   }
  748   if (r->is_KRegister()) return rc_kreg;
  749   assert(r->is_XMMRegister(), "must be");
  750   return rc_xmm;
  751 }
  752 
  753 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  754                         int opcode, const char *op_str, int size, outputStream* st ) {
  755   if( cbuf ) {
  756     emit_opcode  (*cbuf, opcode );
  757     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  758 #ifndef PRODUCT
  759   } else if( !do_size ) {
  760     if( size != 0 ) st->print("\n\t");
  761     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  762       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  763       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  764     } else { // FLD, FST, PUSH, POP
  765       st->print("%s [ESP + #%d]",op_str,offset);
  766     }
  767 #endif
  768   }
  769   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  770   return size+3+offset_size;
  771 }
  772 
  773 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  774 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  775                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  776   int in_size_in_bits = Assembler::EVEX_32bit;
  777   int evex_encoding = 0;
  778   if (reg_lo+1 == reg_hi) {
  779     in_size_in_bits = Assembler::EVEX_64bit;
  780     evex_encoding = Assembler::VEX_W;
  781   }
  782   if (cbuf) {
  783     MacroAssembler _masm(cbuf);
  784     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  785     //                          it maps more cases to single byte displacement
  786     _masm.set_managed();
  787     if (reg_lo+1 == reg_hi) { // double move?
  788       if (is_load) {
  789         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  790       } else {
  791         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  792       }
  793     } else {
  794       if (is_load) {
  795         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  796       } else {
  797         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  798       }
  799     }
  800 #ifndef PRODUCT
  801   } else if (!do_size) {
  802     if (size != 0) st->print("\n\t");
  803     if (reg_lo+1 == reg_hi) { // double move?
  804       if (is_load) st->print("%s %s,[ESP + #%d]",
  805                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  806                               Matcher::regName[reg_lo], offset);
  807       else         st->print("MOVSD  [ESP + #%d],%s",
  808                               offset, Matcher::regName[reg_lo]);
  809     } else {
  810       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  811                               Matcher::regName[reg_lo], offset);
  812       else         st->print("MOVSS  [ESP + #%d],%s",
  813                               offset, Matcher::regName[reg_lo]);
  814     }
  815 #endif
  816   }
  817   bool is_single_byte = false;
  818   if ((UseAVX > 2) && (offset != 0)) {
  819     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  820   }
  821   int offset_size = 0;
  822   if (UseAVX > 2 ) {
  823     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  824   } else {
  825     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  826   }
  827   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  828   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  829   return size+5+offset_size;
  830 }
  831 
  832 
  833 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  834                             int src_hi, int dst_hi, int size, outputStream* st ) {
  835   if (cbuf) {
  836     MacroAssembler _masm(cbuf);
  837     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  838     _masm.set_managed();
  839     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  840       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  841                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  842     } else {
  843       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  844                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  845     }
  846 #ifndef PRODUCT
  847   } else if (!do_size) {
  848     if (size != 0) st->print("\n\t");
  849     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  850       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  851         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       } else {
  853         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       }
  855     } else {
  856       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  857         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       } else {
  859         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  860       }
  861     }
  862 #endif
  863   }
  864   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  865   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  866   int sz = (UseAVX > 2) ? 6 : 4;
  867   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  868       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  869   return size + sz;
  870 }
  871 
  872 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  873                             int src_hi, int dst_hi, int size, outputStream* st ) {
  874   // 32-bit
  875   if (cbuf) {
  876     MacroAssembler _masm(cbuf);
  877     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  878     _masm.set_managed();
  879     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  880              as_Register(Matcher::_regEncode[src_lo]));
  881 #ifndef PRODUCT
  882   } else if (!do_size) {
  883     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  884 #endif
  885   }
  886   return (UseAVX> 2) ? 6 : 4;
  887 }
  888 
  889 
  890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  891                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  892   // 32-bit
  893   if (cbuf) {
  894     MacroAssembler _masm(cbuf);
  895     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  896     _masm.set_managed();
  897     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  898              as_XMMRegister(Matcher::_regEncode[src_lo]));
  899 #ifndef PRODUCT
  900   } else if (!do_size) {
  901     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  902 #endif
  903   }
  904   return (UseAVX> 2) ? 6 : 4;
  905 }
  906 
  907 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  908   if( cbuf ) {
  909     emit_opcode(*cbuf, 0x8B );
  910     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  911 #ifndef PRODUCT
  912   } else if( !do_size ) {
  913     if( size != 0 ) st->print("\n\t");
  914     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  915 #endif
  916   }
  917   return size+2;
  918 }
  919 
  920 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  921                                  int offset, int size, outputStream* st ) {
  922   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  923     if( cbuf ) {
  924       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  925       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  926 #ifndef PRODUCT
  927     } else if( !do_size ) {
  928       if( size != 0 ) st->print("\n\t");
  929       st->print("FLD    %s",Matcher::regName[src_lo]);
  930 #endif
  931     }
  932     size += 2;
  933   }
  934 
  935   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  936   const char *op_str;
  937   int op;
  938   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  939     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  940     op = 0xDD;
  941   } else {                   // 32-bit store
  942     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  943     op = 0xD9;
  944     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  945   }
  946 
  947   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  948 }
  949 
  950 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  951 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  952                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  953 
  954 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  955                             int stack_offset, int reg, uint ireg, outputStream* st);
  956 
  957 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  958                                      int dst_offset, uint ireg, outputStream* st) {
  959   if (cbuf) {
  960     MacroAssembler _masm(cbuf);
  961     switch (ireg) {
  962     case Op_VecS:
  963       __ pushl(Address(rsp, src_offset));
  964       __ popl (Address(rsp, dst_offset));
  965       break;
  966     case Op_VecD:
  967       __ pushl(Address(rsp, src_offset));
  968       __ popl (Address(rsp, dst_offset));
  969       __ pushl(Address(rsp, src_offset+4));
  970       __ popl (Address(rsp, dst_offset+4));
  971       break;
  972     case Op_VecX:
  973       __ movdqu(Address(rsp, -16), xmm0);
  974       __ movdqu(xmm0, Address(rsp, src_offset));
  975       __ movdqu(Address(rsp, dst_offset), xmm0);
  976       __ movdqu(xmm0, Address(rsp, -16));
  977       break;
  978     case Op_VecY:
  979       __ vmovdqu(Address(rsp, -32), xmm0);
  980       __ vmovdqu(xmm0, Address(rsp, src_offset));
  981       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  982       __ vmovdqu(xmm0, Address(rsp, -32));
  983       break;
  984     case Op_VecZ:
  985       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  986       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  987       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  988       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  989       break;
  990     default:
  991       ShouldNotReachHere();
  992     }
  993 #ifndef PRODUCT
  994   } else {
  995     switch (ireg) {
  996     case Op_VecS:
  997       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  998                 "popl    [rsp + #%d]",
  999                 src_offset, dst_offset);
 1000       break;
 1001     case Op_VecD:
 1002       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1003                 "popq    [rsp + #%d]\n\t"
 1004                 "pushl   [rsp + #%d]\n\t"
 1005                 "popq    [rsp + #%d]",
 1006                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1007       break;
 1008      case Op_VecX:
 1009       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1010                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1011                 "movdqu  [rsp + #%d], xmm0\n\t"
 1012                 "movdqu  xmm0, [rsp - #16]",
 1013                 src_offset, dst_offset);
 1014       break;
 1015     case Op_VecY:
 1016       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1017                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1018                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1019                 "vmovdqu xmm0, [rsp - #32]",
 1020                 src_offset, dst_offset);
 1021       break;
 1022     case Op_VecZ:
 1023       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1024                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1025                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1026                 "vmovdqu xmm0, [rsp - #64]",
 1027                 src_offset, dst_offset);
 1028       break;
 1029     default:
 1030       ShouldNotReachHere();
 1031     }
 1032 #endif
 1033   }
 1034 }
 1035 
 1036 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1037   // Get registers to move
 1038   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1039   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1040   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1041   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1042 
 1043   enum RC src_second_rc = rc_class(src_second);
 1044   enum RC src_first_rc = rc_class(src_first);
 1045   enum RC dst_second_rc = rc_class(dst_second);
 1046   enum RC dst_first_rc = rc_class(dst_first);
 1047 
 1048   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1049 
 1050   // Generate spill code!
 1051   int size = 0;
 1052 
 1053   if( src_first == dst_first && src_second == dst_second )
 1054     return size;            // Self copy, no move
 1055 
 1056   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1057     uint ireg = ideal_reg();
 1058     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1059     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1060     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1061     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1062       // mem -> mem
 1063       int src_offset = ra_->reg2offset(src_first);
 1064       int dst_offset = ra_->reg2offset(dst_first);
 1065       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1066     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1067       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1068     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1069       int stack_offset = ra_->reg2offset(dst_first);
 1070       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1071     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1072       int stack_offset = ra_->reg2offset(src_first);
 1073       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1074     } else {
 1075       ShouldNotReachHere();
 1076     }
 1077     return 0;
 1078   }
 1079 
 1080   // --------------------------------------
 1081   // Check for mem-mem move.  push/pop to move.
 1082   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1083     if( src_second == dst_first ) { // overlapping stack copy ranges
 1084       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1085       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1086       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1087       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1088     }
 1089     // move low bits
 1090     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1091     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1092     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1093       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1094       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1095     }
 1096     return size;
 1097   }
 1098 
 1099   // --------------------------------------
 1100   // Check for integer reg-reg copy
 1101   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1102     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1103 
 1104   // Check for integer store
 1105   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1106     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1107 
 1108   // Check for integer load
 1109   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1110     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1111 
 1112   // Check for integer reg-xmm reg copy
 1113   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1114     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1115             "no 64 bit integer-float reg moves" );
 1116     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1117   }
 1118   // --------------------------------------
 1119   // Check for float reg-reg copy
 1120   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1121     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1122             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1123     if( cbuf ) {
 1124 
 1125       // Note the mucking with the register encode to compensate for the 0/1
 1126       // indexing issue mentioned in a comment in the reg_def sections
 1127       // for FPR registers many lines above here.
 1128 
 1129       if( src_first != FPR1L_num ) {
 1130         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1131         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1132         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1133         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1134      } else {
 1135         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1136         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1137      }
 1138 #ifndef PRODUCT
 1139     } else if( !do_size ) {
 1140       if( size != 0 ) st->print("\n\t");
 1141       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1142       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1143 #endif
 1144     }
 1145     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1146   }
 1147 
 1148   // Check for float store
 1149   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1150     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1151   }
 1152 
 1153   // Check for float load
 1154   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1155     int offset = ra_->reg2offset(src_first);
 1156     const char *op_str;
 1157     int op;
 1158     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1159       op_str = "FLD_D";
 1160       op = 0xDD;
 1161     } else {                   // 32-bit load
 1162       op_str = "FLD_S";
 1163       op = 0xD9;
 1164       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1165     }
 1166     if( cbuf ) {
 1167       emit_opcode  (*cbuf, op );
 1168       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1169       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1170       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1171 #ifndef PRODUCT
 1172     } else if( !do_size ) {
 1173       if( size != 0 ) st->print("\n\t");
 1174       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1175 #endif
 1176     }
 1177     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1178     return size + 3+offset_size+2;
 1179   }
 1180 
 1181   // Check for xmm reg-reg copy
 1182   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1183     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1184             (src_first+1 == src_second && dst_first+1 == dst_second),
 1185             "no non-adjacent float-moves" );
 1186     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1187   }
 1188 
 1189   // Check for xmm reg-integer reg copy
 1190   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1191     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1192             "no 64 bit float-integer reg moves" );
 1193     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1194   }
 1195 
 1196   // Check for xmm store
 1197   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1198     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1199   }
 1200 
 1201   // Check for float xmm load
 1202   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1203     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1204   }
 1205 
 1206   // Copy from float reg to xmm reg
 1207   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1208     // copy to the top of stack from floating point reg
 1209     // and use LEA to preserve flags
 1210     if( cbuf ) {
 1211       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1212       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1213       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1214       emit_d8(*cbuf,0xF8);
 1215 #ifndef PRODUCT
 1216     } else if( !do_size ) {
 1217       if( size != 0 ) st->print("\n\t");
 1218       st->print("LEA    ESP,[ESP-8]");
 1219 #endif
 1220     }
 1221     size += 4;
 1222 
 1223     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1224 
 1225     // Copy from the temp memory to the xmm reg.
 1226     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1227 
 1228     if( cbuf ) {
 1229       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1230       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1231       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1232       emit_d8(*cbuf,0x08);
 1233 #ifndef PRODUCT
 1234     } else if( !do_size ) {
 1235       if( size != 0 ) st->print("\n\t");
 1236       st->print("LEA    ESP,[ESP+8]");
 1237 #endif
 1238     }
 1239     size += 4;
 1240     return size;
 1241   }
 1242 
 1243   // AVX-512 opmask specific spilling.
 1244   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1245     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1246     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1247     MacroAssembler _masm(cbuf);
 1248     int offset = ra_->reg2offset(src_first);
 1249     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1250     return 0;
 1251   }
 1252 
 1253   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1254     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1255     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1256     MacroAssembler _masm(cbuf);
 1257     int offset = ra_->reg2offset(dst_first);
 1258     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1259     return 0;
 1260   }
 1261 
 1262   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1263     Unimplemented();
 1264     return 0;
 1265   }
 1266 
 1267   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1268     Unimplemented();
 1269     return 0;
 1270   }
 1271 
 1272   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1273     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1274     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1275     MacroAssembler _masm(cbuf);
 1276     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1277     return 0;
 1278   }
 1279 
 1280   assert( size > 0, "missed a case" );
 1281 
 1282   // --------------------------------------------------------------------
 1283   // Check for second bits still needing moving.
 1284   if( src_second == dst_second )
 1285     return size;               // Self copy; no move
 1286   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1287 
 1288   // Check for second word int-int move
 1289   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1290     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1291 
 1292   // Check for second word integer store
 1293   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1294     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1295 
 1296   // Check for second word integer load
 1297   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1298     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1299 
 1300   Unimplemented();
 1301   return 0; // Mute compiler
 1302 }
 1303 
 1304 #ifndef PRODUCT
 1305 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1306   implementation( NULL, ra_, false, st );
 1307 }
 1308 #endif
 1309 
 1310 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1311   implementation( &cbuf, ra_, false, NULL );
 1312 }
 1313 
 1314 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1315   return MachNode::size(ra_);
 1316 }
 1317 
 1318 
 1319 //=============================================================================
 1320 #ifndef PRODUCT
 1321 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1322   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1323   int reg = ra_->get_reg_first(this);
 1324   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1325 }
 1326 #endif
 1327 
 1328 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1329   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1330   int reg = ra_->get_encode(this);
 1331   if( offset >= 128 ) {
 1332     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1333     emit_rm(cbuf, 0x2, reg, 0x04);
 1334     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1335     emit_d32(cbuf, offset);
 1336   }
 1337   else {
 1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1339     emit_rm(cbuf, 0x1, reg, 0x04);
 1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1341     emit_d8(cbuf, offset);
 1342   }
 1343 }
 1344 
 1345 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1346   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1347   if( offset >= 128 ) {
 1348     return 7;
 1349   }
 1350   else {
 1351     return 4;
 1352   }
 1353 }
 1354 
 1355 //=============================================================================
 1356 #ifndef PRODUCT
 1357 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1358   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1359   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1360   st->print_cr("\tNOP");
 1361   st->print_cr("\tNOP");
 1362   if( !OptoBreakpoint )
 1363     st->print_cr("\tNOP");
 1364 }
 1365 #endif
 1366 
 1367 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1368   MacroAssembler masm(&cbuf);
 1369 #ifdef ASSERT
 1370   uint insts_size = cbuf.insts_size();
 1371 #endif
 1372   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1373   masm.jump_cc(Assembler::notEqual,
 1374                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1375   /* WARNING these NOPs are critical so that verified entry point is properly
 1376      aligned for patching by NativeJump::patch_verified_entry() */
 1377   int nops_cnt = 2;
 1378   if( !OptoBreakpoint ) // Leave space for int3
 1379      nops_cnt += 1;
 1380   masm.nop(nops_cnt);
 1381 
 1382   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1383 }
 1384 
 1385 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1386   return OptoBreakpoint ? 11 : 12;
 1387 }
 1388 
 1389 
 1390 //=============================================================================
 1391 
 1392 // Vector calling convention not supported.
 1393 const bool Matcher::supports_vector_calling_convention() {
 1394   return false;
 1395 }
 1396 
 1397 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1398   Unimplemented();
 1399   return OptoRegPair(0, 0);
 1400 }
 1401 
 1402 // Is this branch offset short enough that a short branch can be used?
 1403 //
 1404 // NOTE: If the platform does not provide any short branch variants, then
 1405 //       this method should return false for offset 0.
 1406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1407   // The passed offset is relative to address of the branch.
 1408   // On 86 a branch displacement is calculated relative to address
 1409   // of a next instruction.
 1410   offset -= br_size;
 1411 
 1412   // the short version of jmpConUCF2 contains multiple branches,
 1413   // making the reach slightly less
 1414   if (rule == jmpConUCF2_rule)
 1415     return (-126 <= offset && offset <= 125);
 1416   return (-128 <= offset && offset <= 127);
 1417 }
 1418 
 1419 // Return whether or not this register is ever used as an argument.  This
 1420 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1421 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1422 // arguments in those registers not be available to the callee.
 1423 bool Matcher::can_be_java_arg( int reg ) {
 1424   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1425   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1426   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1427   return false;
 1428 }
 1429 
 1430 bool Matcher::is_spillable_arg( int reg ) {
 1431   return can_be_java_arg(reg);
 1432 }
 1433 
 1434 uint Matcher::int_pressure_limit()
 1435 {
 1436   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1437 }
 1438 
 1439 uint Matcher::float_pressure_limit()
 1440 {
 1441   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1442 }
 1443 
 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1445   // Use hardware integer DIV instruction when
 1446   // it is faster than a code which use multiply.
 1447   // Only when constant divisor fits into 32 bit
 1448   // (min_jint is excluded to get only correct
 1449   // positive 32 bit values from negative).
 1450   return VM_Version::has_fast_idiv() &&
 1451          (divisor == (int)divisor && divisor != min_jint);
 1452 }
 1453 
 1454 // Register for DIVI projection of divmodI
 1455 RegMask Matcher::divI_proj_mask() {
 1456   return EAX_REG_mask();
 1457 }
 1458 
 1459 // Register for MODI projection of divmodI
 1460 RegMask Matcher::modI_proj_mask() {
 1461   return EDX_REG_mask();
 1462 }
 1463 
 1464 // Register for DIVL projection of divmodL
 1465 RegMask Matcher::divL_proj_mask() {
 1466   ShouldNotReachHere();
 1467   return RegMask();
 1468 }
 1469 
 1470 // Register for MODL projection of divmodL
 1471 RegMask Matcher::modL_proj_mask() {
 1472   ShouldNotReachHere();
 1473   return RegMask();
 1474 }
 1475 
 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1477   return NO_REG_mask();
 1478 }
 1479 
 1480 // Returns true if the high 32 bits of the value is known to be zero.
 1481 bool is_operand_hi32_zero(Node* n) {
 1482   int opc = n->Opcode();
 1483   if (opc == Op_AndL) {
 1484     Node* o2 = n->in(2);
 1485     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486       return true;
 1487     }
 1488   }
 1489   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1490     return true;
 1491   }
 1492   return false;
 1493 }
 1494 
 1495 %}
 1496 
 1497 //----------ENCODING BLOCK-----------------------------------------------------
 1498 // This block specifies the encoding classes used by the compiler to output
 1499 // byte streams.  Encoding classes generate functions which are called by
 1500 // Machine Instruction Nodes in order to generate the bit encoding of the
 1501 // instruction.  Operands specify their base encoding interface with the
 1502 // interface keyword.  There are currently supported four interfaces,
 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1504 // operand to generate a function which returns its register number when
 1505 // queried.   CONST_INTER causes an operand to generate a function which
 1506 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1507 // operand to generate four functions which return the Base Register, the
 1508 // Index Register, the Scale Value, and the Offset Value of the operand when
 1509 // queried.  COND_INTER causes an operand to generate six functions which
 1510 // return the encoding code (ie - encoding bits for the instruction)
 1511 // associated with each basic boolean condition for a conditional instruction.
 1512 // Instructions specify two basic values for encoding.  They use the
 1513 // ins_encode keyword to specify their encoding class (which must be one of
 1514 // the class names specified in the encoding block), and they use the
 1515 // opcode keyword to specify, in order, their primary, secondary, and
 1516 // tertiary opcode.  Only the opcode sections which a particular instruction
 1517 // needs for encoding need to be specified.
 1518 encode %{
 1519   // Build emit functions for each basic byte or larger field in the intel
 1520   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1521   // code in the enc_class source block.  Emit functions will live in the
 1522   // main source block for now.  In future, we can generalize this by
 1523   // adding a syntax that specifies the sizes of fields in an order,
 1524   // so that the adlc can build the emit functions automagically
 1525 
 1526   // Emit primary opcode
 1527   enc_class OpcP %{
 1528     emit_opcode(cbuf, $primary);
 1529   %}
 1530 
 1531   // Emit secondary opcode
 1532   enc_class OpcS %{
 1533     emit_opcode(cbuf, $secondary);
 1534   %}
 1535 
 1536   // Emit opcode directly
 1537   enc_class Opcode(immI d8) %{
 1538     emit_opcode(cbuf, $d8$$constant);
 1539   %}
 1540 
 1541   enc_class SizePrefix %{
 1542     emit_opcode(cbuf,0x66);
 1543   %}
 1544 
 1545   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1546     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1547   %}
 1548 
 1549   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1550     emit_opcode(cbuf,$opcode$$constant);
 1551     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1552   %}
 1553 
 1554   enc_class mov_r32_imm0( rRegI dst ) %{
 1555     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1556     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1557   %}
 1558 
 1559   enc_class cdq_enc %{
 1560     // Full implementation of Java idiv and irem; checks for
 1561     // special case as described in JVM spec., p.243 & p.271.
 1562     //
 1563     //         normal case                           special case
 1564     //
 1565     // input : rax,: dividend                         min_int
 1566     //         reg: divisor                          -1
 1567     //
 1568     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1569     //         rdx: remainder (= rax, irem reg)       0
 1570     //
 1571     //  Code sequnce:
 1572     //
 1573     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1574     //  0F 85 0B 00 00 00    jne         normal_case
 1575     //  33 D2                xor         rdx,edx
 1576     //  83 F9 FF             cmp         rcx,0FFh
 1577     //  0F 84 03 00 00 00    je          done
 1578     //                  normal_case:
 1579     //  99                   cdq
 1580     //  F7 F9                idiv        rax,ecx
 1581     //                  done:
 1582     //
 1583     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1584     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1585     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1586     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1587     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1588     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1589     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1590     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1591     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1592     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1594     // normal_case:
 1595     emit_opcode(cbuf,0x99);                                         // cdq
 1596     // idiv (note: must be emitted by the user of this rule)
 1597     // normal:
 1598   %}
 1599 
 1600   // Dense encoding for older common ops
 1601   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1602     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1603   %}
 1604 
 1605 
 1606   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1607   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1608     // Check for 8-bit immediate, and set sign extend bit in opcode
 1609     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1610       emit_opcode(cbuf, $primary | 0x02);
 1611     }
 1612     else {                          // If 32-bit immediate
 1613       emit_opcode(cbuf, $primary);
 1614     }
 1615   %}
 1616 
 1617   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1618     // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(cbuf, $primary | 0x02);    }
 1622     else {                          // If 32-bit immediate
 1623       emit_opcode(cbuf, $primary);
 1624     }
 1625     // Emit r/m byte with secondary opcode, after primary opcode.
 1626     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1627   %}
 1628 
 1629   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       $$$emit8$imm$$constant;
 1633     }
 1634     else {                          // If 32-bit immediate
 1635       // Output immediate
 1636       $$$emit32$imm$$constant;
 1637     }
 1638   %}
 1639 
 1640   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1641     // Emit primary opcode and set sign-extend bit
 1642     // Check for 8-bit immediate, and set sign extend bit in opcode
 1643     int con = (int)$imm$$constant; // Throw away top bits
 1644     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1645     // Emit r/m byte with secondary opcode, after primary opcode.
 1646     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1647     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1648     else                               emit_d32(cbuf,con);
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1655     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with tertiary opcode, after primary opcode.
 1657     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1659     else                               emit_d32(cbuf,con);
 1660   %}
 1661 
 1662   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1663     emit_cc(cbuf, $secondary, $dst$$reg );
 1664   %}
 1665 
 1666   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1667     int destlo = $dst$$reg;
 1668     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1669     // bswap lo
 1670     emit_opcode(cbuf, 0x0F);
 1671     emit_cc(cbuf, 0xC8, destlo);
 1672     // bswap hi
 1673     emit_opcode(cbuf, 0x0F);
 1674     emit_cc(cbuf, 0xC8, desthi);
 1675     // xchg lo and hi
 1676     emit_opcode(cbuf, 0x87);
 1677     emit_rm(cbuf, 0x3, destlo, desthi);
 1678   %}
 1679 
 1680   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1681     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1682   %}
 1683 
 1684   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1685     $$$emit8$primary;
 1686     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1687   %}
 1688 
 1689   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1690     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1691     emit_d8(cbuf, op >> 8 );
 1692     emit_d8(cbuf, op & 255);
 1693   %}
 1694 
 1695   // emulate a CMOV with a conditional branch around a MOV
 1696   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1697     // Invert sense of branch from sense of CMOV
 1698     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1699     emit_d8( cbuf, $brOffs$$constant );
 1700   %}
 1701 
 1702   enc_class enc_PartialSubtypeCheck( ) %{
 1703     Register Redi = as_Register(EDI_enc); // result register
 1704     Register Reax = as_Register(EAX_enc); // super class
 1705     Register Recx = as_Register(ECX_enc); // killed
 1706     Register Resi = as_Register(ESI_enc); // sub class
 1707     Label miss;
 1708 
 1709     MacroAssembler _masm(&cbuf);
 1710     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1711                                      NULL, &miss,
 1712                                      /*set_cond_codes:*/ true);
 1713     if ($primary) {
 1714       __ xorptr(Redi, Redi);
 1715     }
 1716     __ bind(miss);
 1717   %}
 1718 
 1719   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1720     MacroAssembler masm(&cbuf);
 1721     int start = masm.offset();
 1722     if (UseSSE >= 2) {
 1723       if (VerifyFPU) {
 1724         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1725       }
 1726     } else {
 1727       // External c_calling_convention expects the FPU stack to be 'clean'.
 1728       // Compiled code leaves it dirty.  Do cleanup now.
 1729       masm.empty_FPU_stack();
 1730     }
 1731     if (sizeof_FFree_Float_Stack_All == -1) {
 1732       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1733     } else {
 1734       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1735     }
 1736   %}
 1737 
 1738   enc_class Verify_FPU_For_Leaf %{
 1739     if( VerifyFPU ) {
 1740       MacroAssembler masm(&cbuf);
 1741       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1742     }
 1743   %}
 1744 
 1745   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1746     // This is the instruction starting address for relocation info.
 1747     MacroAssembler _masm(&cbuf);
 1748     cbuf.set_insts_mark();
 1749     $$$emit8$primary;
 1750     // CALL directly to the runtime
 1751     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1752                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1753     __ post_call_nop();
 1754 
 1755     if (UseSSE >= 2) {
 1756       MacroAssembler _masm(&cbuf);
 1757       BasicType rt = tf()->return_type();
 1758 
 1759       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1760         // A C runtime call where the return value is unused.  In SSE2+
 1761         // mode the result needs to be removed from the FPU stack.  It's
 1762         // likely that this function call could be removed by the
 1763         // optimizer if the C function is a pure function.
 1764         __ ffree(0);
 1765       } else if (rt == T_FLOAT) {
 1766         __ lea(rsp, Address(rsp, -4));
 1767         __ fstp_s(Address(rsp, 0));
 1768         __ movflt(xmm0, Address(rsp, 0));
 1769         __ lea(rsp, Address(rsp,  4));
 1770       } else if (rt == T_DOUBLE) {
 1771         __ lea(rsp, Address(rsp, -8));
 1772         __ fstp_d(Address(rsp, 0));
 1773         __ movdbl(xmm0, Address(rsp, 0));
 1774         __ lea(rsp, Address(rsp,  8));
 1775       }
 1776     }
 1777   %}
 1778 
 1779   enc_class pre_call_resets %{
 1780     // If method sets FPU control word restore it here
 1781     debug_only(int off0 = cbuf.insts_size());
 1782     if (ra_->C->in_24_bit_fp_mode()) {
 1783       MacroAssembler _masm(&cbuf);
 1784       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1785     }
 1786     // Clear upper bits of YMM registers when current compiled code uses
 1787     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1788     MacroAssembler _masm(&cbuf);
 1789     __ vzeroupper();
 1790     debug_only(int off1 = cbuf.insts_size());
 1791     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1792   %}
 1793 
 1794   enc_class post_call_FPU %{
 1795     // If method sets FPU control word do it here also
 1796     if (Compile::current()->in_24_bit_fp_mode()) {
 1797       MacroAssembler masm(&cbuf);
 1798       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1799     }
 1800   %}
 1801 
 1802   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1803     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1804     // who we intended to call.
 1805     MacroAssembler _masm(&cbuf);
 1806     cbuf.set_insts_mark();
 1807     $$$emit8$primary;
 1808 
 1809     if (!_method) {
 1810       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1811                      runtime_call_Relocation::spec(),
 1812                      RELOC_IMM32);
 1813       __ post_call_nop();
 1814     } else {
 1815       int method_index = resolved_method_index(cbuf);
 1816       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1817                                                   : static_call_Relocation::spec(method_index);
 1818       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1819                      rspec, RELOC_DISP32);
 1820       __ post_call_nop();
 1821       address mark = cbuf.insts_mark();
 1822       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1823         // Calls of the same statically bound method can share
 1824         // a stub to the interpreter.
 1825         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1826       } else {
 1827         // Emit stubs for static call.
 1828         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1829         if (stub == NULL) {
 1830           ciEnv::current()->record_failure("CodeCache is full");
 1831           return;
 1832         }
 1833       }
 1834     }
 1835   %}
 1836 
 1837   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1838     MacroAssembler _masm(&cbuf);
 1839     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1840     __ post_call_nop();
 1841   %}
 1842 
 1843   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1844     int disp = in_bytes(Method::from_compiled_offset());
 1845     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1846 
 1847     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1848     MacroAssembler _masm(&cbuf);
 1849     cbuf.set_insts_mark();
 1850     $$$emit8$primary;
 1851     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1852     emit_d8(cbuf, disp);             // Displacement
 1853     __ post_call_nop();
 1854   %}
 1855 
 1856 //   Following encoding is no longer used, but may be restored if calling
 1857 //   convention changes significantly.
 1858 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1859 //
 1860 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1861 //     // int ic_reg     = Matcher::inline_cache_reg();
 1862 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1863 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1864 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1865 //
 1866 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1867 //     // // so we load it immediately before the call
 1868 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1869 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1870 //
 1871 //     // xor rbp,ebp
 1872 //     emit_opcode(cbuf, 0x33);
 1873 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1874 //
 1875 //     // CALL to interpreter.
 1876 //     cbuf.set_insts_mark();
 1877 //     $$$emit8$primary;
 1878 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1879 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1880 //   %}
 1881 
 1882   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1883     $$$emit8$primary;
 1884     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1885     $$$emit8$shift$$constant;
 1886   %}
 1887 
 1888   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1889     // Load immediate does not have a zero or sign extended version
 1890     // for 8-bit immediates
 1891     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1892     $$$emit32$src$$constant;
 1893   %}
 1894 
 1895   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1896     // Load immediate does not have a zero or sign extended version
 1897     // for 8-bit immediates
 1898     emit_opcode(cbuf, $primary + $dst$$reg);
 1899     $$$emit32$src$$constant;
 1900   %}
 1901 
 1902   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1903     // Load immediate does not have a zero or sign extended version
 1904     // for 8-bit immediates
 1905     int dst_enc = $dst$$reg;
 1906     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1907     if (src_con == 0) {
 1908       // xor dst, dst
 1909       emit_opcode(cbuf, 0x33);
 1910       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1911     } else {
 1912       emit_opcode(cbuf, $primary + dst_enc);
 1913       emit_d32(cbuf, src_con);
 1914     }
 1915   %}
 1916 
 1917   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1918     // Load immediate does not have a zero or sign extended version
 1919     // for 8-bit immediates
 1920     int dst_enc = $dst$$reg + 2;
 1921     int src_con = ((julong)($src$$constant)) >> 32;
 1922     if (src_con == 0) {
 1923       // xor dst, dst
 1924       emit_opcode(cbuf, 0x33);
 1925       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1926     } else {
 1927       emit_opcode(cbuf, $primary + dst_enc);
 1928       emit_d32(cbuf, src_con);
 1929     }
 1930   %}
 1931 
 1932 
 1933   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1934   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1935     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1936   %}
 1937 
 1938   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1939     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1940   %}
 1941 
 1942   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1943     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1944   %}
 1945 
 1946   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1947     $$$emit8$primary;
 1948     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1949   %}
 1950 
 1951   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1952     $$$emit8$secondary;
 1953     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1954   %}
 1955 
 1956   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1957     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1958   %}
 1959 
 1960   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1961     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1962   %}
 1963 
 1964   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1965     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1966   %}
 1967 
 1968   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1969     // Output immediate
 1970     $$$emit32$src$$constant;
 1971   %}
 1972 
 1973   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1974     // Output Float immediate bits
 1975     jfloat jf = $src$$constant;
 1976     int    jf_as_bits = jint_cast( jf );
 1977     emit_d32(cbuf, jf_as_bits);
 1978   %}
 1979 
 1980   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1981     // Output Float immediate bits
 1982     jfloat jf = $src$$constant;
 1983     int    jf_as_bits = jint_cast( jf );
 1984     emit_d32(cbuf, jf_as_bits);
 1985   %}
 1986 
 1987   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1988     // Output immediate
 1989     $$$emit16$src$$constant;
 1990   %}
 1991 
 1992   enc_class Con_d32(immI src) %{
 1993     emit_d32(cbuf,$src$$constant);
 1994   %}
 1995 
 1996   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1997     // Output immediate memory reference
 1998     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1999     emit_d32(cbuf, 0x00);
 2000   %}
 2001 
 2002   enc_class lock_prefix( ) %{
 2003     emit_opcode(cbuf,0xF0);         // [Lock]
 2004   %}
 2005 
 2006   // Cmp-xchg long value.
 2007   // Note: we need to swap rbx, and rcx before and after the
 2008   //       cmpxchg8 instruction because the instruction uses
 2009   //       rcx as the high order word of the new value to store but
 2010   //       our register encoding uses rbx,.
 2011   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2012 
 2013     // XCHG  rbx,ecx
 2014     emit_opcode(cbuf,0x87);
 2015     emit_opcode(cbuf,0xD9);
 2016     // [Lock]
 2017     emit_opcode(cbuf,0xF0);
 2018     // CMPXCHG8 [Eptr]
 2019     emit_opcode(cbuf,0x0F);
 2020     emit_opcode(cbuf,0xC7);
 2021     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2022     // XCHG  rbx,ecx
 2023     emit_opcode(cbuf,0x87);
 2024     emit_opcode(cbuf,0xD9);
 2025   %}
 2026 
 2027   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2028     // [Lock]
 2029     emit_opcode(cbuf,0xF0);
 2030 
 2031     // CMPXCHG [Eptr]
 2032     emit_opcode(cbuf,0x0F);
 2033     emit_opcode(cbuf,0xB1);
 2034     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2035   %}
 2036 
 2037   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2038     // [Lock]
 2039     emit_opcode(cbuf,0xF0);
 2040 
 2041     // CMPXCHGB [Eptr]
 2042     emit_opcode(cbuf,0x0F);
 2043     emit_opcode(cbuf,0xB0);
 2044     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2045   %}
 2046 
 2047   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2048     // [Lock]
 2049     emit_opcode(cbuf,0xF0);
 2050 
 2051     // 16-bit mode
 2052     emit_opcode(cbuf, 0x66);
 2053 
 2054     // CMPXCHGW [Eptr]
 2055     emit_opcode(cbuf,0x0F);
 2056     emit_opcode(cbuf,0xB1);
 2057     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2058   %}
 2059 
 2060   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2061     int res_encoding = $res$$reg;
 2062 
 2063     // MOV  res,0
 2064     emit_opcode( cbuf, 0xB8 + res_encoding);
 2065     emit_d32( cbuf, 0 );
 2066     // JNE,s  fail
 2067     emit_opcode(cbuf,0x75);
 2068     emit_d8(cbuf, 5 );
 2069     // MOV  res,1
 2070     emit_opcode( cbuf, 0xB8 + res_encoding);
 2071     emit_d32( cbuf, 1 );
 2072     // fail:
 2073   %}
 2074 
 2075   enc_class set_instruction_start( ) %{
 2076     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2077   %}
 2078 
 2079   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2080     int reg_encoding = $ereg$$reg;
 2081     int base  = $mem$$base;
 2082     int index = $mem$$index;
 2083     int scale = $mem$$scale;
 2084     int displace = $mem$$disp;
 2085     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2086     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2087   %}
 2088 
 2089   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2090     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2091     int base  = $mem$$base;
 2092     int index = $mem$$index;
 2093     int scale = $mem$$scale;
 2094     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2095     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2096     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2097   %}
 2098 
 2099   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2100     int r1, r2;
 2101     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2102     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2103     emit_opcode(cbuf,0x0F);
 2104     emit_opcode(cbuf,$tertiary);
 2105     emit_rm(cbuf, 0x3, r1, r2);
 2106     emit_d8(cbuf,$cnt$$constant);
 2107     emit_d8(cbuf,$primary);
 2108     emit_rm(cbuf, 0x3, $secondary, r1);
 2109     emit_d8(cbuf,$cnt$$constant);
 2110   %}
 2111 
 2112   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2113     emit_opcode( cbuf, 0x8B ); // Move
 2114     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2115     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2116       emit_d8(cbuf,$primary);
 2117       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2118       emit_d8(cbuf,$cnt$$constant-32);
 2119     }
 2120     emit_d8(cbuf,$primary);
 2121     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2122     emit_d8(cbuf,31);
 2123   %}
 2124 
 2125   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2126     int r1, r2;
 2127     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2128     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2129 
 2130     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2131     emit_rm(cbuf, 0x3, r1, r2);
 2132     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2133       emit_opcode(cbuf,$primary);
 2134       emit_rm(cbuf, 0x3, $secondary, r1);
 2135       emit_d8(cbuf,$cnt$$constant-32);
 2136     }
 2137     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2138     emit_rm(cbuf, 0x3, r2, r2);
 2139   %}
 2140 
 2141   // Clone of RegMem but accepts an extra parameter to access each
 2142   // half of a double in memory; it never needs relocation info.
 2143   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2144     emit_opcode(cbuf,$opcode$$constant);
 2145     int reg_encoding = $rm_reg$$reg;
 2146     int base     = $mem$$base;
 2147     int index    = $mem$$index;
 2148     int scale    = $mem$$scale;
 2149     int displace = $mem$$disp + $disp_for_half$$constant;
 2150     relocInfo::relocType disp_reloc = relocInfo::none;
 2151     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2152   %}
 2153 
 2154   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2155   //
 2156   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2157   // and it never needs relocation information.
 2158   // Frequently used to move data between FPU's Stack Top and memory.
 2159   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2160     int rm_byte_opcode = $rm_opcode$$constant;
 2161     int base     = $mem$$base;
 2162     int index    = $mem$$index;
 2163     int scale    = $mem$$scale;
 2164     int displace = $mem$$disp;
 2165     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2166     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2167   %}
 2168 
 2169   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2170     int rm_byte_opcode = $rm_opcode$$constant;
 2171     int base     = $mem$$base;
 2172     int index    = $mem$$index;
 2173     int scale    = $mem$$scale;
 2174     int displace = $mem$$disp;
 2175     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2176     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2177   %}
 2178 
 2179   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2180     int reg_encoding = $dst$$reg;
 2181     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2182     int index        = 0x04;            // 0x04 indicates no index
 2183     int scale        = 0x00;            // 0x00 indicates no scale
 2184     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2185     relocInfo::relocType disp_reloc = relocInfo::none;
 2186     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2187   %}
 2188 
 2189   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2190     // Compare dst,src
 2191     emit_opcode(cbuf,0x3B);
 2192     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2193     // jmp dst < src around move
 2194     emit_opcode(cbuf,0x7C);
 2195     emit_d8(cbuf,2);
 2196     // move dst,src
 2197     emit_opcode(cbuf,0x8B);
 2198     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2199   %}
 2200 
 2201   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2202     // Compare dst,src
 2203     emit_opcode(cbuf,0x3B);
 2204     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2205     // jmp dst > src around move
 2206     emit_opcode(cbuf,0x7F);
 2207     emit_d8(cbuf,2);
 2208     // move dst,src
 2209     emit_opcode(cbuf,0x8B);
 2210     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2211   %}
 2212 
 2213   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2214     // If src is FPR1, we can just FST to store it.
 2215     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2216     int reg_encoding = 0x2; // Just store
 2217     int base  = $mem$$base;
 2218     int index = $mem$$index;
 2219     int scale = $mem$$scale;
 2220     int displace = $mem$$disp;
 2221     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2222     if( $src$$reg != FPR1L_enc ) {
 2223       reg_encoding = 0x3;  // Store & pop
 2224       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2225       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2226     }
 2227     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2228     emit_opcode(cbuf,$primary);
 2229     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2230   %}
 2231 
 2232   enc_class neg_reg(rRegI dst) %{
 2233     // NEG $dst
 2234     emit_opcode(cbuf,0xF7);
 2235     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2236   %}
 2237 
 2238   enc_class setLT_reg(eCXRegI dst) %{
 2239     // SETLT $dst
 2240     emit_opcode(cbuf,0x0F);
 2241     emit_opcode(cbuf,0x9C);
 2242     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2243   %}
 2244 
 2245   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2246     int tmpReg = $tmp$$reg;
 2247 
 2248     // SUB $p,$q
 2249     emit_opcode(cbuf,0x2B);
 2250     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2251     // SBB $tmp,$tmp
 2252     emit_opcode(cbuf,0x1B);
 2253     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2254     // AND $tmp,$y
 2255     emit_opcode(cbuf,0x23);
 2256     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2257     // ADD $p,$tmp
 2258     emit_opcode(cbuf,0x03);
 2259     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2260   %}
 2261 
 2262   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2263     // TEST shift,32
 2264     emit_opcode(cbuf,0xF7);
 2265     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2266     emit_d32(cbuf,0x20);
 2267     // JEQ,s small
 2268     emit_opcode(cbuf, 0x74);
 2269     emit_d8(cbuf, 0x04);
 2270     // MOV    $dst.hi,$dst.lo
 2271     emit_opcode( cbuf, 0x8B );
 2272     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2273     // CLR    $dst.lo
 2274     emit_opcode(cbuf, 0x33);
 2275     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2276 // small:
 2277     // SHLD   $dst.hi,$dst.lo,$shift
 2278     emit_opcode(cbuf,0x0F);
 2279     emit_opcode(cbuf,0xA5);
 2280     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2281     // SHL    $dst.lo,$shift"
 2282     emit_opcode(cbuf,0xD3);
 2283     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2284   %}
 2285 
 2286   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2287     // TEST shift,32
 2288     emit_opcode(cbuf,0xF7);
 2289     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2290     emit_d32(cbuf,0x20);
 2291     // JEQ,s small
 2292     emit_opcode(cbuf, 0x74);
 2293     emit_d8(cbuf, 0x04);
 2294     // MOV    $dst.lo,$dst.hi
 2295     emit_opcode( cbuf, 0x8B );
 2296     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2297     // CLR    $dst.hi
 2298     emit_opcode(cbuf, 0x33);
 2299     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2300 // small:
 2301     // SHRD   $dst.lo,$dst.hi,$shift
 2302     emit_opcode(cbuf,0x0F);
 2303     emit_opcode(cbuf,0xAD);
 2304     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2305     // SHR    $dst.hi,$shift"
 2306     emit_opcode(cbuf,0xD3);
 2307     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2308   %}
 2309 
 2310   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2311     // TEST shift,32
 2312     emit_opcode(cbuf,0xF7);
 2313     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2314     emit_d32(cbuf,0x20);
 2315     // JEQ,s small
 2316     emit_opcode(cbuf, 0x74);
 2317     emit_d8(cbuf, 0x05);
 2318     // MOV    $dst.lo,$dst.hi
 2319     emit_opcode( cbuf, 0x8B );
 2320     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2321     // SAR    $dst.hi,31
 2322     emit_opcode(cbuf, 0xC1);
 2323     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2324     emit_d8(cbuf, 0x1F );
 2325 // small:
 2326     // SHRD   $dst.lo,$dst.hi,$shift
 2327     emit_opcode(cbuf,0x0F);
 2328     emit_opcode(cbuf,0xAD);
 2329     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2330     // SAR    $dst.hi,$shift"
 2331     emit_opcode(cbuf,0xD3);
 2332     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2333   %}
 2334 
 2335 
 2336   // ----------------- Encodings for floating point unit -----------------
 2337   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2338   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2339     $$$emit8$primary;
 2340     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2341   %}
 2342 
 2343   // Pop argument in FPR0 with FSTP ST(0)
 2344   enc_class PopFPU() %{
 2345     emit_opcode( cbuf, 0xDD );
 2346     emit_d8( cbuf, 0xD8 );
 2347   %}
 2348 
 2349   // !!!!! equivalent to Pop_Reg_F
 2350   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2351     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2352     emit_d8( cbuf, 0xD8+$dst$$reg );
 2353   %}
 2354 
 2355   enc_class Push_Reg_DPR( regDPR dst ) %{
 2356     emit_opcode( cbuf, 0xD9 );
 2357     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2358   %}
 2359 
 2360   enc_class strictfp_bias1( regDPR dst ) %{
 2361     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2362     emit_opcode( cbuf, 0x2D );
 2363     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2364     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2365     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2366   %}
 2367 
 2368   enc_class strictfp_bias2( regDPR dst ) %{
 2369     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2370     emit_opcode( cbuf, 0x2D );
 2371     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2372     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2373     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2374   %}
 2375 
 2376   // Special case for moving an integer register to a stack slot.
 2377   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2378     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2379   %}
 2380 
 2381   // Special case for moving a register to a stack slot.
 2382   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2383     // Opcode already emitted
 2384     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2385     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2386     emit_d32(cbuf, $dst$$disp);   // Displacement
 2387   %}
 2388 
 2389   // Push the integer in stackSlot 'src' onto FP-stack
 2390   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2391     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2392   %}
 2393 
 2394   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2395   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2396     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2397   %}
 2398 
 2399   // Same as Pop_Mem_F except for opcode
 2400   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2401   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2402     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2403   %}
 2404 
 2405   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2406     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2407     emit_d8( cbuf, 0xD8+$dst$$reg );
 2408   %}
 2409 
 2410   enc_class Push_Reg_FPR( regFPR dst ) %{
 2411     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2412     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2413   %}
 2414 
 2415   // Push FPU's float to a stack-slot, and pop FPU-stack
 2416   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2417     int pop = 0x02;
 2418     if ($src$$reg != FPR1L_enc) {
 2419       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2420       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2421       pop = 0x03;
 2422     }
 2423     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2424   %}
 2425 
 2426   // Push FPU's double to a stack-slot, and pop FPU-stack
 2427   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2428     int pop = 0x02;
 2429     if ($src$$reg != FPR1L_enc) {
 2430       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2431       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2432       pop = 0x03;
 2433     }
 2434     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2435   %}
 2436 
 2437   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2438   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2439     int pop = 0xD0 - 1; // -1 since we skip FLD
 2440     if ($src$$reg != FPR1L_enc) {
 2441       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2442       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2443       pop = 0xD8;
 2444     }
 2445     emit_opcode( cbuf, 0xDD );
 2446     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2447   %}
 2448 
 2449 
 2450   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2451     // load dst in FPR0
 2452     emit_opcode( cbuf, 0xD9 );
 2453     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2454     if ($src$$reg != FPR1L_enc) {
 2455       // fincstp
 2456       emit_opcode (cbuf, 0xD9);
 2457       emit_opcode (cbuf, 0xF7);
 2458       // swap src with FPR1:
 2459       // FXCH FPR1 with src
 2460       emit_opcode(cbuf, 0xD9);
 2461       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2462       // fdecstp
 2463       emit_opcode (cbuf, 0xD9);
 2464       emit_opcode (cbuf, 0xF6);
 2465     }
 2466   %}
 2467 
 2468   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2469     MacroAssembler _masm(&cbuf);
 2470     __ subptr(rsp, 8);
 2471     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2472     __ fld_d(Address(rsp, 0));
 2473     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2474     __ fld_d(Address(rsp, 0));
 2475   %}
 2476 
 2477   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2478     MacroAssembler _masm(&cbuf);
 2479     __ subptr(rsp, 4);
 2480     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2481     __ fld_s(Address(rsp, 0));
 2482     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2483     __ fld_s(Address(rsp, 0));
 2484   %}
 2485 
 2486   enc_class Push_ResultD(regD dst) %{
 2487     MacroAssembler _masm(&cbuf);
 2488     __ fstp_d(Address(rsp, 0));
 2489     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2490     __ addptr(rsp, 8);
 2491   %}
 2492 
 2493   enc_class Push_ResultF(regF dst, immI d8) %{
 2494     MacroAssembler _masm(&cbuf);
 2495     __ fstp_s(Address(rsp, 0));
 2496     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2497     __ addptr(rsp, $d8$$constant);
 2498   %}
 2499 
 2500   enc_class Push_SrcD(regD src) %{
 2501     MacroAssembler _masm(&cbuf);
 2502     __ subptr(rsp, 8);
 2503     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2504     __ fld_d(Address(rsp, 0));
 2505   %}
 2506 
 2507   enc_class push_stack_temp_qword() %{
 2508     MacroAssembler _masm(&cbuf);
 2509     __ subptr(rsp, 8);
 2510   %}
 2511 
 2512   enc_class pop_stack_temp_qword() %{
 2513     MacroAssembler _masm(&cbuf);
 2514     __ addptr(rsp, 8);
 2515   %}
 2516 
 2517   enc_class push_xmm_to_fpr1(regD src) %{
 2518     MacroAssembler _masm(&cbuf);
 2519     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2520     __ fld_d(Address(rsp, 0));
 2521   %}
 2522 
 2523   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2524     if ($src$$reg != FPR1L_enc) {
 2525       // fincstp
 2526       emit_opcode (cbuf, 0xD9);
 2527       emit_opcode (cbuf, 0xF7);
 2528       // FXCH FPR1 with src
 2529       emit_opcode(cbuf, 0xD9);
 2530       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2531       // fdecstp
 2532       emit_opcode (cbuf, 0xD9);
 2533       emit_opcode (cbuf, 0xF6);
 2534     }
 2535     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2536     // // FSTP   FPR$dst$$reg
 2537     // emit_opcode( cbuf, 0xDD );
 2538     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2539   %}
 2540 
 2541   enc_class fnstsw_sahf_skip_parity() %{
 2542     // fnstsw ax
 2543     emit_opcode( cbuf, 0xDF );
 2544     emit_opcode( cbuf, 0xE0 );
 2545     // sahf
 2546     emit_opcode( cbuf, 0x9E );
 2547     // jnp  ::skip
 2548     emit_opcode( cbuf, 0x7B );
 2549     emit_opcode( cbuf, 0x05 );
 2550   %}
 2551 
 2552   enc_class emitModDPR() %{
 2553     // fprem must be iterative
 2554     // :: loop
 2555     // fprem
 2556     emit_opcode( cbuf, 0xD9 );
 2557     emit_opcode( cbuf, 0xF8 );
 2558     // wait
 2559     emit_opcode( cbuf, 0x9b );
 2560     // fnstsw ax
 2561     emit_opcode( cbuf, 0xDF );
 2562     emit_opcode( cbuf, 0xE0 );
 2563     // sahf
 2564     emit_opcode( cbuf, 0x9E );
 2565     // jp  ::loop
 2566     emit_opcode( cbuf, 0x0F );
 2567     emit_opcode( cbuf, 0x8A );
 2568     emit_opcode( cbuf, 0xF4 );
 2569     emit_opcode( cbuf, 0xFF );
 2570     emit_opcode( cbuf, 0xFF );
 2571     emit_opcode( cbuf, 0xFF );
 2572   %}
 2573 
 2574   enc_class fpu_flags() %{
 2575     // fnstsw_ax
 2576     emit_opcode( cbuf, 0xDF);
 2577     emit_opcode( cbuf, 0xE0);
 2578     // test ax,0x0400
 2579     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2580     emit_opcode( cbuf, 0xA9 );
 2581     emit_d16   ( cbuf, 0x0400 );
 2582     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2583     // // test rax,0x0400
 2584     // emit_opcode( cbuf, 0xA9 );
 2585     // emit_d32   ( cbuf, 0x00000400 );
 2586     //
 2587     // jz exit (no unordered comparison)
 2588     emit_opcode( cbuf, 0x74 );
 2589     emit_d8    ( cbuf, 0x02 );
 2590     // mov ah,1 - treat as LT case (set carry flag)
 2591     emit_opcode( cbuf, 0xB4 );
 2592     emit_d8    ( cbuf, 0x01 );
 2593     // sahf
 2594     emit_opcode( cbuf, 0x9E);
 2595   %}
 2596 
 2597   enc_class cmpF_P6_fixup() %{
 2598     // Fixup the integer flags in case comparison involved a NaN
 2599     //
 2600     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2601     emit_opcode( cbuf, 0x7B );
 2602     emit_d8    ( cbuf, 0x03 );
 2603     // MOV AH,1 - treat as LT case (set carry flag)
 2604     emit_opcode( cbuf, 0xB4 );
 2605     emit_d8    ( cbuf, 0x01 );
 2606     // SAHF
 2607     emit_opcode( cbuf, 0x9E);
 2608     // NOP     // target for branch to avoid branch to branch
 2609     emit_opcode( cbuf, 0x90);
 2610   %}
 2611 
 2612 //     fnstsw_ax();
 2613 //     sahf();
 2614 //     movl(dst, nan_result);
 2615 //     jcc(Assembler::parity, exit);
 2616 //     movl(dst, less_result);
 2617 //     jcc(Assembler::below, exit);
 2618 //     movl(dst, equal_result);
 2619 //     jcc(Assembler::equal, exit);
 2620 //     movl(dst, greater_result);
 2621 
 2622 // less_result     =  1;
 2623 // greater_result  = -1;
 2624 // equal_result    = 0;
 2625 // nan_result      = -1;
 2626 
 2627   enc_class CmpF_Result(rRegI dst) %{
 2628     // fnstsw_ax();
 2629     emit_opcode( cbuf, 0xDF);
 2630     emit_opcode( cbuf, 0xE0);
 2631     // sahf
 2632     emit_opcode( cbuf, 0x9E);
 2633     // movl(dst, nan_result);
 2634     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2635     emit_d32( cbuf, -1 );
 2636     // jcc(Assembler::parity, exit);
 2637     emit_opcode( cbuf, 0x7A );
 2638     emit_d8    ( cbuf, 0x13 );
 2639     // movl(dst, less_result);
 2640     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2641     emit_d32( cbuf, -1 );
 2642     // jcc(Assembler::below, exit);
 2643     emit_opcode( cbuf, 0x72 );
 2644     emit_d8    ( cbuf, 0x0C );
 2645     // movl(dst, equal_result);
 2646     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2647     emit_d32( cbuf, 0 );
 2648     // jcc(Assembler::equal, exit);
 2649     emit_opcode( cbuf, 0x74 );
 2650     emit_d8    ( cbuf, 0x05 );
 2651     // movl(dst, greater_result);
 2652     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2653     emit_d32( cbuf, 1 );
 2654   %}
 2655 
 2656 
 2657   // Compare the longs and set flags
 2658   // BROKEN!  Do Not use as-is
 2659   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2660     // CMP    $src1.hi,$src2.hi
 2661     emit_opcode( cbuf, 0x3B );
 2662     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2663     // JNE,s  done
 2664     emit_opcode(cbuf,0x75);
 2665     emit_d8(cbuf, 2 );
 2666     // CMP    $src1.lo,$src2.lo
 2667     emit_opcode( cbuf, 0x3B );
 2668     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2669 // done:
 2670   %}
 2671 
 2672   enc_class convert_int_long( regL dst, rRegI src ) %{
 2673     // mov $dst.lo,$src
 2674     int dst_encoding = $dst$$reg;
 2675     int src_encoding = $src$$reg;
 2676     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2677     // mov $dst.hi,$src
 2678     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2679     // sar $dst.hi,31
 2680     emit_opcode( cbuf, 0xC1 );
 2681     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2682     emit_d8(cbuf, 0x1F );
 2683   %}
 2684 
 2685   enc_class convert_long_double( eRegL src ) %{
 2686     // push $src.hi
 2687     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2688     // push $src.lo
 2689     emit_opcode(cbuf, 0x50+$src$$reg  );
 2690     // fild 64-bits at [SP]
 2691     emit_opcode(cbuf,0xdf);
 2692     emit_d8(cbuf, 0x6C);
 2693     emit_d8(cbuf, 0x24);
 2694     emit_d8(cbuf, 0x00);
 2695     // pop stack
 2696     emit_opcode(cbuf, 0x83); // add  SP, #8
 2697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2698     emit_d8(cbuf, 0x8);
 2699   %}
 2700 
 2701   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2702     // IMUL   EDX:EAX,$src1
 2703     emit_opcode( cbuf, 0xF7 );
 2704     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2705     // SAR    EDX,$cnt-32
 2706     int shift_count = ((int)$cnt$$constant) - 32;
 2707     if (shift_count > 0) {
 2708       emit_opcode(cbuf, 0xC1);
 2709       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2710       emit_d8(cbuf, shift_count);
 2711     }
 2712   %}
 2713 
 2714   // this version doesn't have add sp, 8
 2715   enc_class convert_long_double2( eRegL src ) %{
 2716     // push $src.hi
 2717     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2718     // push $src.lo
 2719     emit_opcode(cbuf, 0x50+$src$$reg  );
 2720     // fild 64-bits at [SP]
 2721     emit_opcode(cbuf,0xdf);
 2722     emit_d8(cbuf, 0x6C);
 2723     emit_d8(cbuf, 0x24);
 2724     emit_d8(cbuf, 0x00);
 2725   %}
 2726 
 2727   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2728     // Basic idea: long = (long)int * (long)int
 2729     // IMUL EDX:EAX, src
 2730     emit_opcode( cbuf, 0xF7 );
 2731     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2732   %}
 2733 
 2734   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2735     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2736     // MUL EDX:EAX, src
 2737     emit_opcode( cbuf, 0xF7 );
 2738     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2739   %}
 2740 
 2741   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2742     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2743     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2744     // MOV    $tmp,$src.lo
 2745     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2746     // IMUL   $tmp,EDX
 2747     emit_opcode( cbuf, 0x0F );
 2748     emit_opcode( cbuf, 0xAF );
 2749     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2750     // MOV    EDX,$src.hi
 2751     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2752     // IMUL   EDX,EAX
 2753     emit_opcode( cbuf, 0x0F );
 2754     emit_opcode( cbuf, 0xAF );
 2755     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2756     // ADD    $tmp,EDX
 2757     emit_opcode( cbuf, 0x03 );
 2758     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2759     // MUL   EDX:EAX,$src.lo
 2760     emit_opcode( cbuf, 0xF7 );
 2761     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2762     // ADD    EDX,ESI
 2763     emit_opcode( cbuf, 0x03 );
 2764     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2765   %}
 2766 
 2767   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2768     // Basic idea: lo(result) = lo(src * y_lo)
 2769     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2770     // IMUL   $tmp,EDX,$src
 2771     emit_opcode( cbuf, 0x6B );
 2772     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2773     emit_d8( cbuf, (int)$src$$constant );
 2774     // MOV    EDX,$src
 2775     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2776     emit_d32( cbuf, (int)$src$$constant );
 2777     // MUL   EDX:EAX,EDX
 2778     emit_opcode( cbuf, 0xF7 );
 2779     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2780     // ADD    EDX,ESI
 2781     emit_opcode( cbuf, 0x03 );
 2782     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2783   %}
 2784 
 2785   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2786     // PUSH src1.hi
 2787     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2788     // PUSH src1.lo
 2789     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2790     // PUSH src2.hi
 2791     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2792     // PUSH src2.lo
 2793     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2794     // CALL directly to the runtime
 2795     MacroAssembler _masm(&cbuf);
 2796     cbuf.set_insts_mark();
 2797     emit_opcode(cbuf,0xE8);       // Call into runtime
 2798     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2799     __ post_call_nop();
 2800     // Restore stack
 2801     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2802     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2803     emit_d8(cbuf, 4*4);
 2804   %}
 2805 
 2806   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2807     // PUSH src1.hi
 2808     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2809     // PUSH src1.lo
 2810     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2811     // PUSH src2.hi
 2812     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2813     // PUSH src2.lo
 2814     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2815     // CALL directly to the runtime
 2816     MacroAssembler _masm(&cbuf);
 2817     cbuf.set_insts_mark();
 2818     emit_opcode(cbuf,0xE8);       // Call into runtime
 2819     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2820     __ post_call_nop();
 2821     // Restore stack
 2822     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2823     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2824     emit_d8(cbuf, 4*4);
 2825   %}
 2826 
 2827   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2828     // MOV   $tmp,$src.lo
 2829     emit_opcode(cbuf, 0x8B);
 2830     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2831     // OR    $tmp,$src.hi
 2832     emit_opcode(cbuf, 0x0B);
 2833     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2834   %}
 2835 
 2836   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2837     // CMP    $src1.lo,$src2.lo
 2838     emit_opcode( cbuf, 0x3B );
 2839     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2840     // JNE,s  skip
 2841     emit_cc(cbuf, 0x70, 0x5);
 2842     emit_d8(cbuf,2);
 2843     // CMP    $src1.hi,$src2.hi
 2844     emit_opcode( cbuf, 0x3B );
 2845     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2846   %}
 2847 
 2848   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2849     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2850     emit_opcode( cbuf, 0x3B );
 2851     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2852     // MOV    $tmp,$src1.hi
 2853     emit_opcode( cbuf, 0x8B );
 2854     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2855     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2856     emit_opcode( cbuf, 0x1B );
 2857     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2858   %}
 2859 
 2860   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2861     // XOR    $tmp,$tmp
 2862     emit_opcode(cbuf,0x33);  // XOR
 2863     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2864     // CMP    $tmp,$src.lo
 2865     emit_opcode( cbuf, 0x3B );
 2866     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2867     // SBB    $tmp,$src.hi
 2868     emit_opcode( cbuf, 0x1B );
 2869     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2870   %}
 2871 
 2872  // Sniff, sniff... smells like Gnu Superoptimizer
 2873   enc_class neg_long( eRegL dst ) %{
 2874     emit_opcode(cbuf,0xF7);    // NEG hi
 2875     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2876     emit_opcode(cbuf,0xF7);    // NEG lo
 2877     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2878     emit_opcode(cbuf,0x83);    // SBB hi,0
 2879     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2880     emit_d8    (cbuf,0 );
 2881   %}
 2882 
 2883   enc_class enc_pop_rdx() %{
 2884     emit_opcode(cbuf,0x5A);
 2885   %}
 2886 
 2887   enc_class enc_rethrow() %{
 2888     MacroAssembler _masm(&cbuf);
 2889     cbuf.set_insts_mark();
 2890     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2891     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2892                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2893     __ post_call_nop();
 2894   %}
 2895 
 2896 
 2897   // Convert a double to an int.  Java semantics require we do complex
 2898   // manglelations in the corner cases.  So we set the rounding mode to
 2899   // 'zero', store the darned double down as an int, and reset the
 2900   // rounding mode to 'nearest'.  The hardware throws an exception which
 2901   // patches up the correct value directly to the stack.
 2902   enc_class DPR2I_encoding( regDPR src ) %{
 2903     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2904     // exceptions here, so that a NAN or other corner-case value will
 2905     // thrown an exception (but normal values get converted at full speed).
 2906     // However, I2C adapters and other float-stack manglers leave pending
 2907     // invalid-op exceptions hanging.  We would have to clear them before
 2908     // enabling them and that is more expensive than just testing for the
 2909     // invalid value Intel stores down in the corner cases.
 2910     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2911     emit_opcode(cbuf,0x2D);
 2912     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2913     // Allocate a word
 2914     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2915     emit_opcode(cbuf,0xEC);
 2916     emit_d8(cbuf,0x04);
 2917     // Encoding assumes a double has been pushed into FPR0.
 2918     // Store down the double as an int, popping the FPU stack
 2919     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2920     emit_opcode(cbuf,0x1C);
 2921     emit_d8(cbuf,0x24);
 2922     // Restore the rounding mode; mask the exception
 2923     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2924     emit_opcode(cbuf,0x2D);
 2925     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2926         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2927         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2928 
 2929     // Load the converted int; adjust CPU stack
 2930     emit_opcode(cbuf,0x58);       // POP EAX
 2931     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2932     emit_d32   (cbuf,0x80000000); //         0x80000000
 2933     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2934     emit_d8    (cbuf,0x07);       // Size of slow_call
 2935     // Push src onto stack slow-path
 2936     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2937     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2938     // CALL directly to the runtime
 2939     MacroAssembler _masm(&cbuf);
 2940     cbuf.set_insts_mark();
 2941     emit_opcode(cbuf,0xE8);       // Call into runtime
 2942     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2943     __ post_call_nop();
 2944     // Carry on here...
 2945   %}
 2946 
 2947   enc_class DPR2L_encoding( regDPR src ) %{
 2948     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2949     emit_opcode(cbuf,0x2D);
 2950     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2951     // Allocate a word
 2952     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2953     emit_opcode(cbuf,0xEC);
 2954     emit_d8(cbuf,0x08);
 2955     // Encoding assumes a double has been pushed into FPR0.
 2956     // Store down the double as a long, popping the FPU stack
 2957     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2958     emit_opcode(cbuf,0x3C);
 2959     emit_d8(cbuf,0x24);
 2960     // Restore the rounding mode; mask the exception
 2961     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2962     emit_opcode(cbuf,0x2D);
 2963     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2964         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2965         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2966 
 2967     // Load the converted int; adjust CPU stack
 2968     emit_opcode(cbuf,0x58);       // POP EAX
 2969     emit_opcode(cbuf,0x5A);       // POP EDX
 2970     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2971     emit_d8    (cbuf,0xFA);       // rdx
 2972     emit_d32   (cbuf,0x80000000); //         0x80000000
 2973     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2974     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2975     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2976     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2977     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2978     emit_d8    (cbuf,0x07);       // Size of slow_call
 2979     // Push src onto stack slow-path
 2980     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2981     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2982     // CALL directly to the runtime
 2983     MacroAssembler _masm(&cbuf);
 2984     cbuf.set_insts_mark();
 2985     emit_opcode(cbuf,0xE8);       // Call into runtime
 2986     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2987     __ post_call_nop();
 2988     // Carry on here...
 2989   %}
 2990 
 2991   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2992     // Operand was loaded from memory into fp ST (stack top)
 2993     // FMUL   ST,$src  /* D8 C8+i */
 2994     emit_opcode(cbuf, 0xD8);
 2995     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2996   %}
 2997 
 2998   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2999     // FADDP  ST,src2  /* D8 C0+i */
 3000     emit_opcode(cbuf, 0xD8);
 3001     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3002     //could use FADDP  src2,fpST  /* DE C0+i */
 3003   %}
 3004 
 3005   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3006     // FADDP  src2,ST  /* DE C0+i */
 3007     emit_opcode(cbuf, 0xDE);
 3008     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3009   %}
 3010 
 3011   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3012     // Operand has been loaded into fp ST (stack top)
 3013       // FSUB   ST,$src1
 3014       emit_opcode(cbuf, 0xD8);
 3015       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3016 
 3017       // FDIV
 3018       emit_opcode(cbuf, 0xD8);
 3019       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3020   %}
 3021 
 3022   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3023     // Operand was loaded from memory into fp ST (stack top)
 3024     // FADD   ST,$src  /* D8 C0+i */
 3025     emit_opcode(cbuf, 0xD8);
 3026     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3027 
 3028     // FMUL  ST,src2  /* D8 C*+i */
 3029     emit_opcode(cbuf, 0xD8);
 3030     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3031   %}
 3032 
 3033 
 3034   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3035     // Operand was loaded from memory into fp ST (stack top)
 3036     // FADD   ST,$src  /* D8 C0+i */
 3037     emit_opcode(cbuf, 0xD8);
 3038     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3039 
 3040     // FMULP  src2,ST  /* DE C8+i */
 3041     emit_opcode(cbuf, 0xDE);
 3042     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3043   %}
 3044 
 3045   // Atomically load the volatile long
 3046   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3047     emit_opcode(cbuf,0xDF);
 3048     int rm_byte_opcode = 0x05;
 3049     int base     = $mem$$base;
 3050     int index    = $mem$$index;
 3051     int scale    = $mem$$scale;
 3052     int displace = $mem$$disp;
 3053     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3054     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3055     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3056   %}
 3057 
 3058   // Volatile Store Long.  Must be atomic, so move it into
 3059   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3060   // target address before the store (for null-ptr checks)
 3061   // so the memory operand is used twice in the encoding.
 3062   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3063     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3064     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3065     emit_opcode(cbuf,0xDF);
 3066     int rm_byte_opcode = 0x07;
 3067     int base     = $mem$$base;
 3068     int index    = $mem$$index;
 3069     int scale    = $mem$$scale;
 3070     int displace = $mem$$disp;
 3071     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3072     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3073   %}
 3074 
 3075 %}
 3076 
 3077 
 3078 //----------FRAME--------------------------------------------------------------
 3079 // Definition of frame structure and management information.
 3080 //
 3081 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3082 //                             |   (to get allocators register number
 3083 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3084 //  r   CALLER     |        |
 3085 //  o     |        +--------+      pad to even-align allocators stack-slot
 3086 //  w     V        |  pad0  |        numbers; owned by CALLER
 3087 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3088 //  h     ^        |   in   |  5
 3089 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3090 //  |     |        |        |  3
 3091 //  |     |        +--------+
 3092 //  V     |        | old out|      Empty on Intel, window on Sparc
 3093 //        |    old |preserve|      Must be even aligned.
 3094 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3095 //        |        |   in   |  3   area for Intel ret address
 3096 //     Owned by    |preserve|      Empty on Sparc.
 3097 //       SELF      +--------+
 3098 //        |        |  pad2  |  2   pad to align old SP
 3099 //        |        +--------+  1
 3100 //        |        | locks  |  0
 3101 //        |        +--------+----> OptoReg::stack0(), even aligned
 3102 //        |        |  pad1  | 11   pad to align new SP
 3103 //        |        +--------+
 3104 //        |        |        | 10
 3105 //        |        | spills |  9   spills
 3106 //        V        |        |  8   (pad0 slot for callee)
 3107 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3108 //        ^        |  out   |  7
 3109 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3110 //     Owned by    +--------+
 3111 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3112 //        |    new |preserve|      Must be even-aligned.
 3113 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3114 //        |        |        |
 3115 //
 3116 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3117 //         known from SELF's arguments and the Java calling convention.
 3118 //         Region 6-7 is determined per call site.
 3119 // Note 2: If the calling convention leaves holes in the incoming argument
 3120 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3121 //         are owned by the CALLEE.  Holes should not be necessary in the
 3122 //         incoming area, as the Java calling convention is completely under
 3123 //         the control of the AD file.  Doubles can be sorted and packed to
 3124 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3125 //         varargs C calling conventions.
 3126 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3127 //         even aligned with pad0 as needed.
 3128 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3129 //         region 6-11 is even aligned; it may be padded out more so that
 3130 //         the region from SP to FP meets the minimum stack alignment.
 3131 
 3132 frame %{
 3133   // These three registers define part of the calling convention
 3134   // between compiled code and the interpreter.
 3135   inline_cache_reg(EAX);                // Inline Cache Register
 3136 
 3137   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3138   cisc_spilling_operand_name(indOffset32);
 3139 
 3140   // Number of stack slots consumed by locking an object
 3141   sync_stack_slots(1);
 3142 
 3143   // Compiled code's Frame Pointer
 3144   frame_pointer(ESP);
 3145   // Interpreter stores its frame pointer in a register which is
 3146   // stored to the stack by I2CAdaptors.
 3147   // I2CAdaptors convert from interpreted java to compiled java.
 3148   interpreter_frame_pointer(EBP);
 3149 
 3150   // Stack alignment requirement
 3151   // Alignment size in bytes (128-bit -> 16 bytes)
 3152   stack_alignment(StackAlignmentInBytes);
 3153 
 3154   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3155   // for calls to C.  Supports the var-args backing area for register parms.
 3156   varargs_C_out_slots_killed(0);
 3157 
 3158   // The after-PROLOG location of the return address.  Location of
 3159   // return address specifies a type (REG or STACK) and a number
 3160   // representing the register number (i.e. - use a register name) or
 3161   // stack slot.
 3162   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3163   // Otherwise, it is above the locks and verification slot and alignment word
 3164   return_addr(STACK - 1 +
 3165               align_up((Compile::current()->in_preserve_stack_slots() +
 3166                         Compile::current()->fixed_slots()),
 3167                        stack_alignment_in_slots()));
 3168 
 3169   // Location of C & interpreter return values
 3170   c_return_value %{
 3171     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3172     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3173     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3174 
 3175     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3176     // that C functions return float and double results in XMM0.
 3177     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3178       return OptoRegPair(XMM0b_num,XMM0_num);
 3179     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3180       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3181 
 3182     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3183   %}
 3184 
 3185   // Location of return values
 3186   return_value %{
 3187     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3188     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3189     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3190     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3191       return OptoRegPair(XMM0b_num,XMM0_num);
 3192     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3193       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3194     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3195   %}
 3196 
 3197 %}
 3198 
 3199 //----------ATTRIBUTES---------------------------------------------------------
 3200 //----------Operand Attributes-------------------------------------------------
 3201 op_attrib op_cost(0);        // Required cost attribute
 3202 
 3203 //----------Instruction Attributes---------------------------------------------
 3204 ins_attrib ins_cost(100);       // Required cost attribute
 3205 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3206 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3207                                 // non-matching short branch variant of some
 3208                                                             // long branch?
 3209 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3210                                 // specifies the alignment that some part of the instruction (not
 3211                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3212                                 // function must be provided for the instruction
 3213 
 3214 //----------OPERANDS-----------------------------------------------------------
 3215 // Operand definitions must precede instruction definitions for correct parsing
 3216 // in the ADLC because operands constitute user defined types which are used in
 3217 // instruction definitions.
 3218 
 3219 //----------Simple Operands----------------------------------------------------
 3220 // Immediate Operands
 3221 // Integer Immediate
 3222 operand immI() %{
 3223   match(ConI);
 3224 
 3225   op_cost(10);
 3226   format %{ %}
 3227   interface(CONST_INTER);
 3228 %}
 3229 
 3230 // Constant for test vs zero
 3231 operand immI_0() %{
 3232   predicate(n->get_int() == 0);
 3233   match(ConI);
 3234 
 3235   op_cost(0);
 3236   format %{ %}
 3237   interface(CONST_INTER);
 3238 %}
 3239 
 3240 // Constant for increment
 3241 operand immI_1() %{
 3242   predicate(n->get_int() == 1);
 3243   match(ConI);
 3244 
 3245   op_cost(0);
 3246   format %{ %}
 3247   interface(CONST_INTER);
 3248 %}
 3249 
 3250 // Constant for decrement
 3251 operand immI_M1() %{
 3252   predicate(n->get_int() == -1);
 3253   match(ConI);
 3254 
 3255   op_cost(0);
 3256   format %{ %}
 3257   interface(CONST_INTER);
 3258 %}
 3259 
 3260 // Valid scale values for addressing modes
 3261 operand immI2() %{
 3262   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3263   match(ConI);
 3264 
 3265   format %{ %}
 3266   interface(CONST_INTER);
 3267 %}
 3268 
 3269 operand immI8() %{
 3270   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3271   match(ConI);
 3272 
 3273   op_cost(5);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 operand immU8() %{
 3279   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3280   match(ConI);
 3281 
 3282   op_cost(5);
 3283   format %{ %}
 3284   interface(CONST_INTER);
 3285 %}
 3286 
 3287 operand immI16() %{
 3288   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3289   match(ConI);
 3290 
 3291   op_cost(10);
 3292   format %{ %}
 3293   interface(CONST_INTER);
 3294 %}
 3295 
 3296 // Int Immediate non-negative
 3297 operand immU31()
 3298 %{
 3299   predicate(n->get_int() >= 0);
 3300   match(ConI);
 3301 
 3302   op_cost(0);
 3303   format %{ %}
 3304   interface(CONST_INTER);
 3305 %}
 3306 
 3307 // Constant for long shifts
 3308 operand immI_32() %{
 3309   predicate( n->get_int() == 32 );
 3310   match(ConI);
 3311 
 3312   op_cost(0);
 3313   format %{ %}
 3314   interface(CONST_INTER);
 3315 %}
 3316 
 3317 operand immI_1_31() %{
 3318   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3319   match(ConI);
 3320 
 3321   op_cost(0);
 3322   format %{ %}
 3323   interface(CONST_INTER);
 3324 %}
 3325 
 3326 operand immI_32_63() %{
 3327   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3328   match(ConI);
 3329   op_cost(0);
 3330 
 3331   format %{ %}
 3332   interface(CONST_INTER);
 3333 %}
 3334 
 3335 operand immI_2() %{
 3336   predicate( n->get_int() == 2 );
 3337   match(ConI);
 3338 
 3339   op_cost(0);
 3340   format %{ %}
 3341   interface(CONST_INTER);
 3342 %}
 3343 
 3344 operand immI_3() %{
 3345   predicate( n->get_int() == 3 );
 3346   match(ConI);
 3347 
 3348   op_cost(0);
 3349   format %{ %}
 3350   interface(CONST_INTER);
 3351 %}
 3352 
 3353 operand immI_4()
 3354 %{
 3355   predicate(n->get_int() == 4);
 3356   match(ConI);
 3357 
 3358   op_cost(0);
 3359   format %{ %}
 3360   interface(CONST_INTER);
 3361 %}
 3362 
 3363 operand immI_8()
 3364 %{
 3365   predicate(n->get_int() == 8);
 3366   match(ConI);
 3367 
 3368   op_cost(0);
 3369   format %{ %}
 3370   interface(CONST_INTER);
 3371 %}
 3372 
 3373 // Pointer Immediate
 3374 operand immP() %{
 3375   match(ConP);
 3376 
 3377   op_cost(10);
 3378   format %{ %}
 3379   interface(CONST_INTER);
 3380 %}
 3381 
 3382 // NULL Pointer Immediate
 3383 operand immP0() %{
 3384   predicate( n->get_ptr() == 0 );
 3385   match(ConP);
 3386   op_cost(0);
 3387 
 3388   format %{ %}
 3389   interface(CONST_INTER);
 3390 %}
 3391 
 3392 // Long Immediate
 3393 operand immL() %{
 3394   match(ConL);
 3395 
 3396   op_cost(20);
 3397   format %{ %}
 3398   interface(CONST_INTER);
 3399 %}
 3400 
 3401 // Long Immediate zero
 3402 operand immL0() %{
 3403   predicate( n->get_long() == 0L );
 3404   match(ConL);
 3405   op_cost(0);
 3406 
 3407   format %{ %}
 3408   interface(CONST_INTER);
 3409 %}
 3410 
 3411 // Long Immediate zero
 3412 operand immL_M1() %{
 3413   predicate( n->get_long() == -1L );
 3414   match(ConL);
 3415   op_cost(0);
 3416 
 3417   format %{ %}
 3418   interface(CONST_INTER);
 3419 %}
 3420 
 3421 // Long immediate from 0 to 127.
 3422 // Used for a shorter form of long mul by 10.
 3423 operand immL_127() %{
 3424   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3425   match(ConL);
 3426   op_cost(0);
 3427 
 3428   format %{ %}
 3429   interface(CONST_INTER);
 3430 %}
 3431 
 3432 // Long Immediate: low 32-bit mask
 3433 operand immL_32bits() %{
 3434   predicate(n->get_long() == 0xFFFFFFFFL);
 3435   match(ConL);
 3436   op_cost(0);
 3437 
 3438   format %{ %}
 3439   interface(CONST_INTER);
 3440 %}
 3441 
 3442 // Long Immediate: low 32-bit mask
 3443 operand immL32() %{
 3444   predicate(n->get_long() == (int)(n->get_long()));
 3445   match(ConL);
 3446   op_cost(20);
 3447 
 3448   format %{ %}
 3449   interface(CONST_INTER);
 3450 %}
 3451 
 3452 //Double Immediate zero
 3453 operand immDPR0() %{
 3454   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3455   // bug that generates code such that NaNs compare equal to 0.0
 3456   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3457   match(ConD);
 3458 
 3459   op_cost(5);
 3460   format %{ %}
 3461   interface(CONST_INTER);
 3462 %}
 3463 
 3464 // Double Immediate one
 3465 operand immDPR1() %{
 3466   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3467   match(ConD);
 3468 
 3469   op_cost(5);
 3470   format %{ %}
 3471   interface(CONST_INTER);
 3472 %}
 3473 
 3474 // Double Immediate
 3475 operand immDPR() %{
 3476   predicate(UseSSE<=1);
 3477   match(ConD);
 3478 
 3479   op_cost(5);
 3480   format %{ %}
 3481   interface(CONST_INTER);
 3482 %}
 3483 
 3484 operand immD() %{
 3485   predicate(UseSSE>=2);
 3486   match(ConD);
 3487 
 3488   op_cost(5);
 3489   format %{ %}
 3490   interface(CONST_INTER);
 3491 %}
 3492 
 3493 // Double Immediate zero
 3494 operand immD0() %{
 3495   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3496   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3497   // compare equal to -0.0.
 3498   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3499   match(ConD);
 3500 
 3501   format %{ %}
 3502   interface(CONST_INTER);
 3503 %}
 3504 
 3505 // Float Immediate zero
 3506 operand immFPR0() %{
 3507   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3508   match(ConF);
 3509 
 3510   op_cost(5);
 3511   format %{ %}
 3512   interface(CONST_INTER);
 3513 %}
 3514 
 3515 // Float Immediate one
 3516 operand immFPR1() %{
 3517   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3518   match(ConF);
 3519 
 3520   op_cost(5);
 3521   format %{ %}
 3522   interface(CONST_INTER);
 3523 %}
 3524 
 3525 // Float Immediate
 3526 operand immFPR() %{
 3527   predicate( UseSSE == 0 );
 3528   match(ConF);
 3529 
 3530   op_cost(5);
 3531   format %{ %}
 3532   interface(CONST_INTER);
 3533 %}
 3534 
 3535 // Float Immediate
 3536 operand immF() %{
 3537   predicate(UseSSE >= 1);
 3538   match(ConF);
 3539 
 3540   op_cost(5);
 3541   format %{ %}
 3542   interface(CONST_INTER);
 3543 %}
 3544 
 3545 // Float Immediate zero.  Zero and not -0.0
 3546 operand immF0() %{
 3547   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3548   match(ConF);
 3549 
 3550   op_cost(5);
 3551   format %{ %}
 3552   interface(CONST_INTER);
 3553 %}
 3554 
 3555 // Immediates for special shifts (sign extend)
 3556 
 3557 // Constants for increment
 3558 operand immI_16() %{
 3559   predicate( n->get_int() == 16 );
 3560   match(ConI);
 3561 
 3562   format %{ %}
 3563   interface(CONST_INTER);
 3564 %}
 3565 
 3566 operand immI_24() %{
 3567   predicate( n->get_int() == 24 );
 3568   match(ConI);
 3569 
 3570   format %{ %}
 3571   interface(CONST_INTER);
 3572 %}
 3573 
 3574 // Constant for byte-wide masking
 3575 operand immI_255() %{
 3576   predicate( n->get_int() == 255 );
 3577   match(ConI);
 3578 
 3579   format %{ %}
 3580   interface(CONST_INTER);
 3581 %}
 3582 
 3583 // Constant for short-wide masking
 3584 operand immI_65535() %{
 3585   predicate(n->get_int() == 65535);
 3586   match(ConI);
 3587 
 3588   format %{ %}
 3589   interface(CONST_INTER);
 3590 %}
 3591 
 3592 operand kReg()
 3593 %{
 3594   constraint(ALLOC_IN_RC(vectmask_reg));
 3595   match(RegVectMask);
 3596   format %{%}
 3597   interface(REG_INTER);
 3598 %}
 3599 
 3600 operand kReg_K1()
 3601 %{
 3602   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3603   match(RegVectMask);
 3604   format %{%}
 3605   interface(REG_INTER);
 3606 %}
 3607 
 3608 operand kReg_K2()
 3609 %{
 3610   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3611   match(RegVectMask);
 3612   format %{%}
 3613   interface(REG_INTER);
 3614 %}
 3615 
 3616 // Special Registers
 3617 operand kReg_K3()
 3618 %{
 3619   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3620   match(RegVectMask);
 3621   format %{%}
 3622   interface(REG_INTER);
 3623 %}
 3624 
 3625 operand kReg_K4()
 3626 %{
 3627   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3628   match(RegVectMask);
 3629   format %{%}
 3630   interface(REG_INTER);
 3631 %}
 3632 
 3633 operand kReg_K5()
 3634 %{
 3635   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3636   match(RegVectMask);
 3637   format %{%}
 3638   interface(REG_INTER);
 3639 %}
 3640 
 3641 operand kReg_K6()
 3642 %{
 3643   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3644   match(RegVectMask);
 3645   format %{%}
 3646   interface(REG_INTER);
 3647 %}
 3648 
 3649 // Special Registers
 3650 operand kReg_K7()
 3651 %{
 3652   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3653   match(RegVectMask);
 3654   format %{%}
 3655   interface(REG_INTER);
 3656 %}
 3657 
 3658 // Register Operands
 3659 // Integer Register
 3660 operand rRegI() %{
 3661   constraint(ALLOC_IN_RC(int_reg));
 3662   match(RegI);
 3663   match(xRegI);
 3664   match(eAXRegI);
 3665   match(eBXRegI);
 3666   match(eCXRegI);
 3667   match(eDXRegI);
 3668   match(eDIRegI);
 3669   match(eSIRegI);
 3670 
 3671   format %{ %}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 // Subset of Integer Register
 3676 operand xRegI(rRegI reg) %{
 3677   constraint(ALLOC_IN_RC(int_x_reg));
 3678   match(reg);
 3679   match(eAXRegI);
 3680   match(eBXRegI);
 3681   match(eCXRegI);
 3682   match(eDXRegI);
 3683 
 3684   format %{ %}
 3685   interface(REG_INTER);
 3686 %}
 3687 
 3688 // Special Registers
 3689 operand eAXRegI(xRegI reg) %{
 3690   constraint(ALLOC_IN_RC(eax_reg));
 3691   match(reg);
 3692   match(rRegI);
 3693 
 3694   format %{ "EAX" %}
 3695   interface(REG_INTER);
 3696 %}
 3697 
 3698 // Special Registers
 3699 operand eBXRegI(xRegI reg) %{
 3700   constraint(ALLOC_IN_RC(ebx_reg));
 3701   match(reg);
 3702   match(rRegI);
 3703 
 3704   format %{ "EBX" %}
 3705   interface(REG_INTER);
 3706 %}
 3707 
 3708 operand eCXRegI(xRegI reg) %{
 3709   constraint(ALLOC_IN_RC(ecx_reg));
 3710   match(reg);
 3711   match(rRegI);
 3712 
 3713   format %{ "ECX" %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 operand eDXRegI(xRegI reg) %{
 3718   constraint(ALLOC_IN_RC(edx_reg));
 3719   match(reg);
 3720   match(rRegI);
 3721 
 3722   format %{ "EDX" %}
 3723   interface(REG_INTER);
 3724 %}
 3725 
 3726 operand eDIRegI(xRegI reg) %{
 3727   constraint(ALLOC_IN_RC(edi_reg));
 3728   match(reg);
 3729   match(rRegI);
 3730 
 3731   format %{ "EDI" %}
 3732   interface(REG_INTER);
 3733 %}
 3734 
 3735 operand naxRegI() %{
 3736   constraint(ALLOC_IN_RC(nax_reg));
 3737   match(RegI);
 3738   match(eCXRegI);
 3739   match(eDXRegI);
 3740   match(eSIRegI);
 3741   match(eDIRegI);
 3742 
 3743   format %{ %}
 3744   interface(REG_INTER);
 3745 %}
 3746 
 3747 operand nadxRegI() %{
 3748   constraint(ALLOC_IN_RC(nadx_reg));
 3749   match(RegI);
 3750   match(eBXRegI);
 3751   match(eCXRegI);
 3752   match(eSIRegI);
 3753   match(eDIRegI);
 3754 
 3755   format %{ %}
 3756   interface(REG_INTER);
 3757 %}
 3758 
 3759 operand ncxRegI() %{
 3760   constraint(ALLOC_IN_RC(ncx_reg));
 3761   match(RegI);
 3762   match(eAXRegI);
 3763   match(eDXRegI);
 3764   match(eSIRegI);
 3765   match(eDIRegI);
 3766 
 3767   format %{ %}
 3768   interface(REG_INTER);
 3769 %}
 3770 
 3771 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3772 // //
 3773 operand eSIRegI(xRegI reg) %{
 3774    constraint(ALLOC_IN_RC(esi_reg));
 3775    match(reg);
 3776    match(rRegI);
 3777 
 3778    format %{ "ESI" %}
 3779    interface(REG_INTER);
 3780 %}
 3781 
 3782 // Pointer Register
 3783 operand anyRegP() %{
 3784   constraint(ALLOC_IN_RC(any_reg));
 3785   match(RegP);
 3786   match(eAXRegP);
 3787   match(eBXRegP);
 3788   match(eCXRegP);
 3789   match(eDIRegP);
 3790   match(eRegP);
 3791 
 3792   format %{ %}
 3793   interface(REG_INTER);
 3794 %}
 3795 
 3796 operand eRegP() %{
 3797   constraint(ALLOC_IN_RC(int_reg));
 3798   match(RegP);
 3799   match(eAXRegP);
 3800   match(eBXRegP);
 3801   match(eCXRegP);
 3802   match(eDIRegP);
 3803 
 3804   format %{ %}
 3805   interface(REG_INTER);
 3806 %}
 3807 
 3808 operand rRegP() %{
 3809   constraint(ALLOC_IN_RC(int_reg));
 3810   match(RegP);
 3811   match(eAXRegP);
 3812   match(eBXRegP);
 3813   match(eCXRegP);
 3814   match(eDIRegP);
 3815 
 3816   format %{ %}
 3817   interface(REG_INTER);
 3818 %}
 3819 
 3820 // On windows95, EBP is not safe to use for implicit null tests.
 3821 operand eRegP_no_EBP() %{
 3822   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3823   match(RegP);
 3824   match(eAXRegP);
 3825   match(eBXRegP);
 3826   match(eCXRegP);
 3827   match(eDIRegP);
 3828 
 3829   op_cost(100);
 3830   format %{ %}
 3831   interface(REG_INTER);
 3832 %}
 3833 
 3834 operand naxRegP() %{
 3835   constraint(ALLOC_IN_RC(nax_reg));
 3836   match(RegP);
 3837   match(eBXRegP);
 3838   match(eDXRegP);
 3839   match(eCXRegP);
 3840   match(eSIRegP);
 3841   match(eDIRegP);
 3842 
 3843   format %{ %}
 3844   interface(REG_INTER);
 3845 %}
 3846 
 3847 operand nabxRegP() %{
 3848   constraint(ALLOC_IN_RC(nabx_reg));
 3849   match(RegP);
 3850   match(eCXRegP);
 3851   match(eDXRegP);
 3852   match(eSIRegP);
 3853   match(eDIRegP);
 3854 
 3855   format %{ %}
 3856   interface(REG_INTER);
 3857 %}
 3858 
 3859 operand pRegP() %{
 3860   constraint(ALLOC_IN_RC(p_reg));
 3861   match(RegP);
 3862   match(eBXRegP);
 3863   match(eDXRegP);
 3864   match(eSIRegP);
 3865   match(eDIRegP);
 3866 
 3867   format %{ %}
 3868   interface(REG_INTER);
 3869 %}
 3870 
 3871 // Special Registers
 3872 // Return a pointer value
 3873 operand eAXRegP(eRegP reg) %{
 3874   constraint(ALLOC_IN_RC(eax_reg));
 3875   match(reg);
 3876   format %{ "EAX" %}
 3877   interface(REG_INTER);
 3878 %}
 3879 
 3880 // Used in AtomicAdd
 3881 operand eBXRegP(eRegP reg) %{
 3882   constraint(ALLOC_IN_RC(ebx_reg));
 3883   match(reg);
 3884   format %{ "EBX" %}
 3885   interface(REG_INTER);
 3886 %}
 3887 
 3888 // Tail-call (interprocedural jump) to interpreter
 3889 operand eCXRegP(eRegP reg) %{
 3890   constraint(ALLOC_IN_RC(ecx_reg));
 3891   match(reg);
 3892   format %{ "ECX" %}
 3893   interface(REG_INTER);
 3894 %}
 3895 
 3896 operand eDXRegP(eRegP reg) %{
 3897   constraint(ALLOC_IN_RC(edx_reg));
 3898   match(reg);
 3899   format %{ "EDX" %}
 3900   interface(REG_INTER);
 3901 %}
 3902 
 3903 operand eSIRegP(eRegP reg) %{
 3904   constraint(ALLOC_IN_RC(esi_reg));
 3905   match(reg);
 3906   format %{ "ESI" %}
 3907   interface(REG_INTER);
 3908 %}
 3909 
 3910 // Used in rep stosw
 3911 operand eDIRegP(eRegP reg) %{
 3912   constraint(ALLOC_IN_RC(edi_reg));
 3913   match(reg);
 3914   format %{ "EDI" %}
 3915   interface(REG_INTER);
 3916 %}
 3917 
 3918 operand eRegL() %{
 3919   constraint(ALLOC_IN_RC(long_reg));
 3920   match(RegL);
 3921   match(eADXRegL);
 3922 
 3923   format %{ %}
 3924   interface(REG_INTER);
 3925 %}
 3926 
 3927 operand eADXRegL( eRegL reg ) %{
 3928   constraint(ALLOC_IN_RC(eadx_reg));
 3929   match(reg);
 3930 
 3931   format %{ "EDX:EAX" %}
 3932   interface(REG_INTER);
 3933 %}
 3934 
 3935 operand eBCXRegL( eRegL reg ) %{
 3936   constraint(ALLOC_IN_RC(ebcx_reg));
 3937   match(reg);
 3938 
 3939   format %{ "EBX:ECX" %}
 3940   interface(REG_INTER);
 3941 %}
 3942 
 3943 operand eBDPRegL( eRegL reg ) %{
 3944   constraint(ALLOC_IN_RC(ebpd_reg));
 3945   match(reg);
 3946 
 3947   format %{ "EBP:EDI" %}
 3948   interface(REG_INTER);
 3949 %}
 3950 // Special case for integer high multiply
 3951 operand eADXRegL_low_only() %{
 3952   constraint(ALLOC_IN_RC(eadx_reg));
 3953   match(RegL);
 3954 
 3955   format %{ "EAX" %}
 3956   interface(REG_INTER);
 3957 %}
 3958 
 3959 // Flags register, used as output of compare instructions
 3960 operand rFlagsReg() %{
 3961   constraint(ALLOC_IN_RC(int_flags));
 3962   match(RegFlags);
 3963 
 3964   format %{ "EFLAGS" %}
 3965   interface(REG_INTER);
 3966 %}
 3967 
 3968 // Flags register, used as output of compare instructions
 3969 operand eFlagsReg() %{
 3970   constraint(ALLOC_IN_RC(int_flags));
 3971   match(RegFlags);
 3972 
 3973   format %{ "EFLAGS" %}
 3974   interface(REG_INTER);
 3975 %}
 3976 
 3977 // Flags register, used as output of FLOATING POINT compare instructions
 3978 operand eFlagsRegU() %{
 3979   constraint(ALLOC_IN_RC(int_flags));
 3980   match(RegFlags);
 3981 
 3982   format %{ "EFLAGS_U" %}
 3983   interface(REG_INTER);
 3984 %}
 3985 
 3986 operand eFlagsRegUCF() %{
 3987   constraint(ALLOC_IN_RC(int_flags));
 3988   match(RegFlags);
 3989   predicate(false);
 3990 
 3991   format %{ "EFLAGS_U_CF" %}
 3992   interface(REG_INTER);
 3993 %}
 3994 
 3995 // Condition Code Register used by long compare
 3996 operand flagsReg_long_LTGE() %{
 3997   constraint(ALLOC_IN_RC(int_flags));
 3998   match(RegFlags);
 3999   format %{ "FLAGS_LTGE" %}
 4000   interface(REG_INTER);
 4001 %}
 4002 operand flagsReg_long_EQNE() %{
 4003   constraint(ALLOC_IN_RC(int_flags));
 4004   match(RegFlags);
 4005   format %{ "FLAGS_EQNE" %}
 4006   interface(REG_INTER);
 4007 %}
 4008 operand flagsReg_long_LEGT() %{
 4009   constraint(ALLOC_IN_RC(int_flags));
 4010   match(RegFlags);
 4011   format %{ "FLAGS_LEGT" %}
 4012   interface(REG_INTER);
 4013 %}
 4014 
 4015 // Condition Code Register used by unsigned long compare
 4016 operand flagsReg_ulong_LTGE() %{
 4017   constraint(ALLOC_IN_RC(int_flags));
 4018   match(RegFlags);
 4019   format %{ "FLAGS_U_LTGE" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 operand flagsReg_ulong_EQNE() %{
 4023   constraint(ALLOC_IN_RC(int_flags));
 4024   match(RegFlags);
 4025   format %{ "FLAGS_U_EQNE" %}
 4026   interface(REG_INTER);
 4027 %}
 4028 operand flagsReg_ulong_LEGT() %{
 4029   constraint(ALLOC_IN_RC(int_flags));
 4030   match(RegFlags);
 4031   format %{ "FLAGS_U_LEGT" %}
 4032   interface(REG_INTER);
 4033 %}
 4034 
 4035 // Float register operands
 4036 operand regDPR() %{
 4037   predicate( UseSSE < 2 );
 4038   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4039   match(RegD);
 4040   match(regDPR1);
 4041   match(regDPR2);
 4042   format %{ %}
 4043   interface(REG_INTER);
 4044 %}
 4045 
 4046 operand regDPR1(regDPR reg) %{
 4047   predicate( UseSSE < 2 );
 4048   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4049   match(reg);
 4050   format %{ "FPR1" %}
 4051   interface(REG_INTER);
 4052 %}
 4053 
 4054 operand regDPR2(regDPR reg) %{
 4055   predicate( UseSSE < 2 );
 4056   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4057   match(reg);
 4058   format %{ "FPR2" %}
 4059   interface(REG_INTER);
 4060 %}
 4061 
 4062 operand regnotDPR1(regDPR reg) %{
 4063   predicate( UseSSE < 2 );
 4064   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4065   match(reg);
 4066   format %{ %}
 4067   interface(REG_INTER);
 4068 %}
 4069 
 4070 // Float register operands
 4071 operand regFPR() %{
 4072   predicate( UseSSE < 2 );
 4073   constraint(ALLOC_IN_RC(fp_flt_reg));
 4074   match(RegF);
 4075   match(regFPR1);
 4076   format %{ %}
 4077   interface(REG_INTER);
 4078 %}
 4079 
 4080 // Float register operands
 4081 operand regFPR1(regFPR reg) %{
 4082   predicate( UseSSE < 2 );
 4083   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4084   match(reg);
 4085   format %{ "FPR1" %}
 4086   interface(REG_INTER);
 4087 %}
 4088 
 4089 // XMM Float register operands
 4090 operand regF() %{
 4091   predicate( UseSSE>=1 );
 4092   constraint(ALLOC_IN_RC(float_reg_legacy));
 4093   match(RegF);
 4094   format %{ %}
 4095   interface(REG_INTER);
 4096 %}
 4097 
 4098 operand legRegF() %{
 4099   predicate( UseSSE>=1 );
 4100   constraint(ALLOC_IN_RC(float_reg_legacy));
 4101   match(RegF);
 4102   format %{ %}
 4103   interface(REG_INTER);
 4104 %}
 4105 
 4106 // Float register operands
 4107 operand vlRegF() %{
 4108    constraint(ALLOC_IN_RC(float_reg_vl));
 4109    match(RegF);
 4110 
 4111    format %{ %}
 4112    interface(REG_INTER);
 4113 %}
 4114 
 4115 // XMM Double register operands
 4116 operand regD() %{
 4117   predicate( UseSSE>=2 );
 4118   constraint(ALLOC_IN_RC(double_reg_legacy));
 4119   match(RegD);
 4120   format %{ %}
 4121   interface(REG_INTER);
 4122 %}
 4123 
 4124 // Double register operands
 4125 operand legRegD() %{
 4126   predicate( UseSSE>=2 );
 4127   constraint(ALLOC_IN_RC(double_reg_legacy));
 4128   match(RegD);
 4129   format %{ %}
 4130   interface(REG_INTER);
 4131 %}
 4132 
 4133 operand vlRegD() %{
 4134    constraint(ALLOC_IN_RC(double_reg_vl));
 4135    match(RegD);
 4136 
 4137    format %{ %}
 4138    interface(REG_INTER);
 4139 %}
 4140 
 4141 //----------Memory Operands----------------------------------------------------
 4142 // Direct Memory Operand
 4143 operand direct(immP addr) %{
 4144   match(addr);
 4145 
 4146   format %{ "[$addr]" %}
 4147   interface(MEMORY_INTER) %{
 4148     base(0xFFFFFFFF);
 4149     index(0x4);
 4150     scale(0x0);
 4151     disp($addr);
 4152   %}
 4153 %}
 4154 
 4155 // Indirect Memory Operand
 4156 operand indirect(eRegP reg) %{
 4157   constraint(ALLOC_IN_RC(int_reg));
 4158   match(reg);
 4159 
 4160   format %{ "[$reg]" %}
 4161   interface(MEMORY_INTER) %{
 4162     base($reg);
 4163     index(0x4);
 4164     scale(0x0);
 4165     disp(0x0);
 4166   %}
 4167 %}
 4168 
 4169 // Indirect Memory Plus Short Offset Operand
 4170 operand indOffset8(eRegP reg, immI8 off) %{
 4171   match(AddP reg off);
 4172 
 4173   format %{ "[$reg + $off]" %}
 4174   interface(MEMORY_INTER) %{
 4175     base($reg);
 4176     index(0x4);
 4177     scale(0x0);
 4178     disp($off);
 4179   %}
 4180 %}
 4181 
 4182 // Indirect Memory Plus Long Offset Operand
 4183 operand indOffset32(eRegP reg, immI off) %{
 4184   match(AddP reg off);
 4185 
 4186   format %{ "[$reg + $off]" %}
 4187   interface(MEMORY_INTER) %{
 4188     base($reg);
 4189     index(0x4);
 4190     scale(0x0);
 4191     disp($off);
 4192   %}
 4193 %}
 4194 
 4195 // Indirect Memory Plus Long Offset Operand
 4196 operand indOffset32X(rRegI reg, immP off) %{
 4197   match(AddP off reg);
 4198 
 4199   format %{ "[$reg + $off]" %}
 4200   interface(MEMORY_INTER) %{
 4201     base($reg);
 4202     index(0x4);
 4203     scale(0x0);
 4204     disp($off);
 4205   %}
 4206 %}
 4207 
 4208 // Indirect Memory Plus Index Register Plus Offset Operand
 4209 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4210   match(AddP (AddP reg ireg) off);
 4211 
 4212   op_cost(10);
 4213   format %{"[$reg + $off + $ireg]" %}
 4214   interface(MEMORY_INTER) %{
 4215     base($reg);
 4216     index($ireg);
 4217     scale(0x0);
 4218     disp($off);
 4219   %}
 4220 %}
 4221 
 4222 // Indirect Memory Plus Index Register Plus Offset Operand
 4223 operand indIndex(eRegP reg, rRegI ireg) %{
 4224   match(AddP reg ireg);
 4225 
 4226   op_cost(10);
 4227   format %{"[$reg + $ireg]" %}
 4228   interface(MEMORY_INTER) %{
 4229     base($reg);
 4230     index($ireg);
 4231     scale(0x0);
 4232     disp(0x0);
 4233   %}
 4234 %}
 4235 
 4236 // // -------------------------------------------------------------------------
 4237 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4238 // // -------------------------------------------------------------------------
 4239 // // Scaled Memory Operands
 4240 // // Indirect Memory Times Scale Plus Offset Operand
 4241 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4242 //   match(AddP off (LShiftI ireg scale));
 4243 //
 4244 //   op_cost(10);
 4245 //   format %{"[$off + $ireg << $scale]" %}
 4246 //   interface(MEMORY_INTER) %{
 4247 //     base(0x4);
 4248 //     index($ireg);
 4249 //     scale($scale);
 4250 //     disp($off);
 4251 //   %}
 4252 // %}
 4253 
 4254 // Indirect Memory Times Scale Plus Index Register
 4255 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4256   match(AddP reg (LShiftI ireg scale));
 4257 
 4258   op_cost(10);
 4259   format %{"[$reg + $ireg << $scale]" %}
 4260   interface(MEMORY_INTER) %{
 4261     base($reg);
 4262     index($ireg);
 4263     scale($scale);
 4264     disp(0x0);
 4265   %}
 4266 %}
 4267 
 4268 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4269 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4270   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4271 
 4272   op_cost(10);
 4273   format %{"[$reg + $off + $ireg << $scale]" %}
 4274   interface(MEMORY_INTER) %{
 4275     base($reg);
 4276     index($ireg);
 4277     scale($scale);
 4278     disp($off);
 4279   %}
 4280 %}
 4281 
 4282 //----------Load Long Memory Operands------------------------------------------
 4283 // The load-long idiom will use it's address expression again after loading
 4284 // the first word of the long.  If the load-long destination overlaps with
 4285 // registers used in the addressing expression, the 2nd half will be loaded
 4286 // from a clobbered address.  Fix this by requiring that load-long use
 4287 // address registers that do not overlap with the load-long target.
 4288 
 4289 // load-long support
 4290 operand load_long_RegP() %{
 4291   constraint(ALLOC_IN_RC(esi_reg));
 4292   match(RegP);
 4293   match(eSIRegP);
 4294   op_cost(100);
 4295   format %{  %}
 4296   interface(REG_INTER);
 4297 %}
 4298 
 4299 // Indirect Memory Operand Long
 4300 operand load_long_indirect(load_long_RegP reg) %{
 4301   constraint(ALLOC_IN_RC(esi_reg));
 4302   match(reg);
 4303 
 4304   format %{ "[$reg]" %}
 4305   interface(MEMORY_INTER) %{
 4306     base($reg);
 4307     index(0x4);
 4308     scale(0x0);
 4309     disp(0x0);
 4310   %}
 4311 %}
 4312 
 4313 // Indirect Memory Plus Long Offset Operand
 4314 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4315   match(AddP reg off);
 4316 
 4317   format %{ "[$reg + $off]" %}
 4318   interface(MEMORY_INTER) %{
 4319     base($reg);
 4320     index(0x4);
 4321     scale(0x0);
 4322     disp($off);
 4323   %}
 4324 %}
 4325 
 4326 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4327 
 4328 
 4329 //----------Special Memory Operands--------------------------------------------
 4330 // Stack Slot Operand - This operand is used for loading and storing temporary
 4331 //                      values on the stack where a match requires a value to
 4332 //                      flow through memory.
 4333 operand stackSlotP(sRegP reg) %{
 4334   constraint(ALLOC_IN_RC(stack_slots));
 4335   // No match rule because this operand is only generated in matching
 4336   format %{ "[$reg]" %}
 4337   interface(MEMORY_INTER) %{
 4338     base(0x4);   // ESP
 4339     index(0x4);  // No Index
 4340     scale(0x0);  // No Scale
 4341     disp($reg);  // Stack Offset
 4342   %}
 4343 %}
 4344 
 4345 operand stackSlotI(sRegI reg) %{
 4346   constraint(ALLOC_IN_RC(stack_slots));
 4347   // No match rule because this operand is only generated in matching
 4348   format %{ "[$reg]" %}
 4349   interface(MEMORY_INTER) %{
 4350     base(0x4);   // ESP
 4351     index(0x4);  // No Index
 4352     scale(0x0);  // No Scale
 4353     disp($reg);  // Stack Offset
 4354   %}
 4355 %}
 4356 
 4357 operand stackSlotF(sRegF reg) %{
 4358   constraint(ALLOC_IN_RC(stack_slots));
 4359   // No match rule because this operand is only generated in matching
 4360   format %{ "[$reg]" %}
 4361   interface(MEMORY_INTER) %{
 4362     base(0x4);   // ESP
 4363     index(0x4);  // No Index
 4364     scale(0x0);  // No Scale
 4365     disp($reg);  // Stack Offset
 4366   %}
 4367 %}
 4368 
 4369 operand stackSlotD(sRegD reg) %{
 4370   constraint(ALLOC_IN_RC(stack_slots));
 4371   // No match rule because this operand is only generated in matching
 4372   format %{ "[$reg]" %}
 4373   interface(MEMORY_INTER) %{
 4374     base(0x4);   // ESP
 4375     index(0x4);  // No Index
 4376     scale(0x0);  // No Scale
 4377     disp($reg);  // Stack Offset
 4378   %}
 4379 %}
 4380 
 4381 operand stackSlotL(sRegL reg) %{
 4382   constraint(ALLOC_IN_RC(stack_slots));
 4383   // No match rule because this operand is only generated in matching
 4384   format %{ "[$reg]" %}
 4385   interface(MEMORY_INTER) %{
 4386     base(0x4);   // ESP
 4387     index(0x4);  // No Index
 4388     scale(0x0);  // No Scale
 4389     disp($reg);  // Stack Offset
 4390   %}
 4391 %}
 4392 
 4393 //----------Conditional Branch Operands----------------------------------------
 4394 // Comparison Op  - This is the operation of the comparison, and is limited to
 4395 //                  the following set of codes:
 4396 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4397 //
 4398 // Other attributes of the comparison, such as unsignedness, are specified
 4399 // by the comparison instruction that sets a condition code flags register.
 4400 // That result is represented by a flags operand whose subtype is appropriate
 4401 // to the unsignedness (etc.) of the comparison.
 4402 //
 4403 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4404 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4405 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4406 
 4407 // Comparison Code
 4408 operand cmpOp() %{
 4409   match(Bool);
 4410 
 4411   format %{ "" %}
 4412   interface(COND_INTER) %{
 4413     equal(0x4, "e");
 4414     not_equal(0x5, "ne");
 4415     less(0xC, "l");
 4416     greater_equal(0xD, "ge");
 4417     less_equal(0xE, "le");
 4418     greater(0xF, "g");
 4419     overflow(0x0, "o");
 4420     no_overflow(0x1, "no");
 4421   %}
 4422 %}
 4423 
 4424 // Comparison Code, unsigned compare.  Used by FP also, with
 4425 // C2 (unordered) turned into GT or LT already.  The other bits
 4426 // C0 and C3 are turned into Carry & Zero flags.
 4427 operand cmpOpU() %{
 4428   match(Bool);
 4429 
 4430   format %{ "" %}
 4431   interface(COND_INTER) %{
 4432     equal(0x4, "e");
 4433     not_equal(0x5, "ne");
 4434     less(0x2, "b");
 4435     greater_equal(0x3, "nb");
 4436     less_equal(0x6, "be");
 4437     greater(0x7, "nbe");
 4438     overflow(0x0, "o");
 4439     no_overflow(0x1, "no");
 4440   %}
 4441 %}
 4442 
 4443 // Floating comparisons that don't require any fixup for the unordered case
 4444 operand cmpOpUCF() %{
 4445   match(Bool);
 4446   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4447             n->as_Bool()->_test._test == BoolTest::ge ||
 4448             n->as_Bool()->_test._test == BoolTest::le ||
 4449             n->as_Bool()->_test._test == BoolTest::gt);
 4450   format %{ "" %}
 4451   interface(COND_INTER) %{
 4452     equal(0x4, "e");
 4453     not_equal(0x5, "ne");
 4454     less(0x2, "b");
 4455     greater_equal(0x3, "nb");
 4456     less_equal(0x6, "be");
 4457     greater(0x7, "nbe");
 4458     overflow(0x0, "o");
 4459     no_overflow(0x1, "no");
 4460   %}
 4461 %}
 4462 
 4463 
 4464 // Floating comparisons that can be fixed up with extra conditional jumps
 4465 operand cmpOpUCF2() %{
 4466   match(Bool);
 4467   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4468             n->as_Bool()->_test._test == BoolTest::eq);
 4469   format %{ "" %}
 4470   interface(COND_INTER) %{
 4471     equal(0x4, "e");
 4472     not_equal(0x5, "ne");
 4473     less(0x2, "b");
 4474     greater_equal(0x3, "nb");
 4475     less_equal(0x6, "be");
 4476     greater(0x7, "nbe");
 4477     overflow(0x0, "o");
 4478     no_overflow(0x1, "no");
 4479   %}
 4480 %}
 4481 
 4482 // Comparison Code for FP conditional move
 4483 operand cmpOp_fcmov() %{
 4484   match(Bool);
 4485 
 4486   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4487             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4488   format %{ "" %}
 4489   interface(COND_INTER) %{
 4490     equal        (0x0C8);
 4491     not_equal    (0x1C8);
 4492     less         (0x0C0);
 4493     greater_equal(0x1C0);
 4494     less_equal   (0x0D0);
 4495     greater      (0x1D0);
 4496     overflow(0x0, "o"); // not really supported by the instruction
 4497     no_overflow(0x1, "no"); // not really supported by the instruction
 4498   %}
 4499 %}
 4500 
 4501 // Comparison Code used in long compares
 4502 operand cmpOp_commute() %{
 4503   match(Bool);
 4504 
 4505   format %{ "" %}
 4506   interface(COND_INTER) %{
 4507     equal(0x4, "e");
 4508     not_equal(0x5, "ne");
 4509     less(0xF, "g");
 4510     greater_equal(0xE, "le");
 4511     less_equal(0xD, "ge");
 4512     greater(0xC, "l");
 4513     overflow(0x0, "o");
 4514     no_overflow(0x1, "no");
 4515   %}
 4516 %}
 4517 
 4518 // Comparison Code used in unsigned long compares
 4519 operand cmpOpU_commute() %{
 4520   match(Bool);
 4521 
 4522   format %{ "" %}
 4523   interface(COND_INTER) %{
 4524     equal(0x4, "e");
 4525     not_equal(0x5, "ne");
 4526     less(0x7, "nbe");
 4527     greater_equal(0x6, "be");
 4528     less_equal(0x3, "nb");
 4529     greater(0x2, "b");
 4530     overflow(0x0, "o");
 4531     no_overflow(0x1, "no");
 4532   %}
 4533 %}
 4534 
 4535 //----------OPERAND CLASSES----------------------------------------------------
 4536 // Operand Classes are groups of operands that are used as to simplify
 4537 // instruction definitions by not requiring the AD writer to specify separate
 4538 // instructions for every form of operand when the instruction accepts
 4539 // multiple operand types with the same basic encoding and format.  The classic
 4540 // case of this is memory operands.
 4541 
 4542 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4543                indIndex, indIndexScale, indIndexScaleOffset);
 4544 
 4545 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4546 // This means some kind of offset is always required and you cannot use
 4547 // an oop as the offset (done when working on static globals).
 4548 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4549                     indIndex, indIndexScale, indIndexScaleOffset);
 4550 
 4551 
 4552 //----------PIPELINE-----------------------------------------------------------
 4553 // Rules which define the behavior of the target architectures pipeline.
 4554 pipeline %{
 4555 
 4556 //----------ATTRIBUTES---------------------------------------------------------
 4557 attributes %{
 4558   variable_size_instructions;        // Fixed size instructions
 4559   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4560   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4561   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4562   instruction_fetch_units = 1;       // of 16 bytes
 4563 
 4564   // List of nop instructions
 4565   nops( MachNop );
 4566 %}
 4567 
 4568 //----------RESOURCES----------------------------------------------------------
 4569 // Resources are the functional units available to the machine
 4570 
 4571 // Generic P2/P3 pipeline
 4572 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4573 // 3 instructions decoded per cycle.
 4574 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4575 // 2 ALU op, only ALU0 handles mul/div instructions.
 4576 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4577            MS0, MS1, MEM = MS0 | MS1,
 4578            BR, FPU,
 4579            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4580 
 4581 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4582 // Pipeline Description specifies the stages in the machine's pipeline
 4583 
 4584 // Generic P2/P3 pipeline
 4585 pipe_desc(S0, S1, S2, S3, S4, S5);
 4586 
 4587 //----------PIPELINE CLASSES---------------------------------------------------
 4588 // Pipeline Classes describe the stages in which input and output are
 4589 // referenced by the hardware pipeline.
 4590 
 4591 // Naming convention: ialu or fpu
 4592 // Then: _reg
 4593 // Then: _reg if there is a 2nd register
 4594 // Then: _long if it's a pair of instructions implementing a long
 4595 // Then: _fat if it requires the big decoder
 4596 //   Or: _mem if it requires the big decoder and a memory unit.
 4597 
 4598 // Integer ALU reg operation
 4599 pipe_class ialu_reg(rRegI dst) %{
 4600     single_instruction;
 4601     dst    : S4(write);
 4602     dst    : S3(read);
 4603     DECODE : S0;        // any decoder
 4604     ALU    : S3;        // any alu
 4605 %}
 4606 
 4607 // Long ALU reg operation
 4608 pipe_class ialu_reg_long(eRegL dst) %{
 4609     instruction_count(2);
 4610     dst    : S4(write);
 4611     dst    : S3(read);
 4612     DECODE : S0(2);     // any 2 decoders
 4613     ALU    : S3(2);     // both alus
 4614 %}
 4615 
 4616 // Integer ALU reg operation using big decoder
 4617 pipe_class ialu_reg_fat(rRegI dst) %{
 4618     single_instruction;
 4619     dst    : S4(write);
 4620     dst    : S3(read);
 4621     D0     : S0;        // big decoder only
 4622     ALU    : S3;        // any alu
 4623 %}
 4624 
 4625 // Long ALU reg operation using big decoder
 4626 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4627     instruction_count(2);
 4628     dst    : S4(write);
 4629     dst    : S3(read);
 4630     D0     : S0(2);     // big decoder only; twice
 4631     ALU    : S3(2);     // any 2 alus
 4632 %}
 4633 
 4634 // Integer ALU reg-reg operation
 4635 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4636     single_instruction;
 4637     dst    : S4(write);
 4638     src    : S3(read);
 4639     DECODE : S0;        // any decoder
 4640     ALU    : S3;        // any alu
 4641 %}
 4642 
 4643 // Long ALU reg-reg operation
 4644 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4645     instruction_count(2);
 4646     dst    : S4(write);
 4647     src    : S3(read);
 4648     DECODE : S0(2);     // any 2 decoders
 4649     ALU    : S3(2);     // both alus
 4650 %}
 4651 
 4652 // Integer ALU reg-reg operation
 4653 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4654     single_instruction;
 4655     dst    : S4(write);
 4656     src    : S3(read);
 4657     D0     : S0;        // big decoder only
 4658     ALU    : S3;        // any alu
 4659 %}
 4660 
 4661 // Long ALU reg-reg operation
 4662 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4663     instruction_count(2);
 4664     dst    : S4(write);
 4665     src    : S3(read);
 4666     D0     : S0(2);     // big decoder only; twice
 4667     ALU    : S3(2);     // both alus
 4668 %}
 4669 
 4670 // Integer ALU reg-mem operation
 4671 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4672     single_instruction;
 4673     dst    : S5(write);
 4674     mem    : S3(read);
 4675     D0     : S0;        // big decoder only
 4676     ALU    : S4;        // any alu
 4677     MEM    : S3;        // any mem
 4678 %}
 4679 
 4680 // Long ALU reg-mem operation
 4681 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4682     instruction_count(2);
 4683     dst    : S5(write);
 4684     mem    : S3(read);
 4685     D0     : S0(2);     // big decoder only; twice
 4686     ALU    : S4(2);     // any 2 alus
 4687     MEM    : S3(2);     // both mems
 4688 %}
 4689 
 4690 // Integer mem operation (prefetch)
 4691 pipe_class ialu_mem(memory mem)
 4692 %{
 4693     single_instruction;
 4694     mem    : S3(read);
 4695     D0     : S0;        // big decoder only
 4696     MEM    : S3;        // any mem
 4697 %}
 4698 
 4699 // Integer Store to Memory
 4700 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4701     single_instruction;
 4702     mem    : S3(read);
 4703     src    : S5(read);
 4704     D0     : S0;        // big decoder only
 4705     ALU    : S4;        // any alu
 4706     MEM    : S3;
 4707 %}
 4708 
 4709 // Long Store to Memory
 4710 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4711     instruction_count(2);
 4712     mem    : S3(read);
 4713     src    : S5(read);
 4714     D0     : S0(2);     // big decoder only; twice
 4715     ALU    : S4(2);     // any 2 alus
 4716     MEM    : S3(2);     // Both mems
 4717 %}
 4718 
 4719 // Integer Store to Memory
 4720 pipe_class ialu_mem_imm(memory mem) %{
 4721     single_instruction;
 4722     mem    : S3(read);
 4723     D0     : S0;        // big decoder only
 4724     ALU    : S4;        // any alu
 4725     MEM    : S3;
 4726 %}
 4727 
 4728 // Integer ALU0 reg-reg operation
 4729 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4730     single_instruction;
 4731     dst    : S4(write);
 4732     src    : S3(read);
 4733     D0     : S0;        // Big decoder only
 4734     ALU0   : S3;        // only alu0
 4735 %}
 4736 
 4737 // Integer ALU0 reg-mem operation
 4738 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4739     single_instruction;
 4740     dst    : S5(write);
 4741     mem    : S3(read);
 4742     D0     : S0;        // big decoder only
 4743     ALU0   : S4;        // ALU0 only
 4744     MEM    : S3;        // any mem
 4745 %}
 4746 
 4747 // Integer ALU reg-reg operation
 4748 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4749     single_instruction;
 4750     cr     : S4(write);
 4751     src1   : S3(read);
 4752     src2   : S3(read);
 4753     DECODE : S0;        // any decoder
 4754     ALU    : S3;        // any alu
 4755 %}
 4756 
 4757 // Integer ALU reg-imm operation
 4758 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4759     single_instruction;
 4760     cr     : S4(write);
 4761     src1   : S3(read);
 4762     DECODE : S0;        // any decoder
 4763     ALU    : S3;        // any alu
 4764 %}
 4765 
 4766 // Integer ALU reg-mem operation
 4767 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4768     single_instruction;
 4769     cr     : S4(write);
 4770     src1   : S3(read);
 4771     src2   : S3(read);
 4772     D0     : S0;        // big decoder only
 4773     ALU    : S4;        // any alu
 4774     MEM    : S3;
 4775 %}
 4776 
 4777 // Conditional move reg-reg
 4778 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4779     instruction_count(4);
 4780     y      : S4(read);
 4781     q      : S3(read);
 4782     p      : S3(read);
 4783     DECODE : S0(4);     // any decoder
 4784 %}
 4785 
 4786 // Conditional move reg-reg
 4787 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4788     single_instruction;
 4789     dst    : S4(write);
 4790     src    : S3(read);
 4791     cr     : S3(read);
 4792     DECODE : S0;        // any decoder
 4793 %}
 4794 
 4795 // Conditional move reg-mem
 4796 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4797     single_instruction;
 4798     dst    : S4(write);
 4799     src    : S3(read);
 4800     cr     : S3(read);
 4801     DECODE : S0;        // any decoder
 4802     MEM    : S3;
 4803 %}
 4804 
 4805 // Conditional move reg-reg long
 4806 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4807     single_instruction;
 4808     dst    : S4(write);
 4809     src    : S3(read);
 4810     cr     : S3(read);
 4811     DECODE : S0(2);     // any 2 decoders
 4812 %}
 4813 
 4814 // Conditional move double reg-reg
 4815 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4816     single_instruction;
 4817     dst    : S4(write);
 4818     src    : S3(read);
 4819     cr     : S3(read);
 4820     DECODE : S0;        // any decoder
 4821 %}
 4822 
 4823 // Float reg-reg operation
 4824 pipe_class fpu_reg(regDPR dst) %{
 4825     instruction_count(2);
 4826     dst    : S3(read);
 4827     DECODE : S0(2);     // any 2 decoders
 4828     FPU    : S3;
 4829 %}
 4830 
 4831 // Float reg-reg operation
 4832 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4833     instruction_count(2);
 4834     dst    : S4(write);
 4835     src    : S3(read);
 4836     DECODE : S0(2);     // any 2 decoders
 4837     FPU    : S3;
 4838 %}
 4839 
 4840 // Float reg-reg operation
 4841 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4842     instruction_count(3);
 4843     dst    : S4(write);
 4844     src1   : S3(read);
 4845     src2   : S3(read);
 4846     DECODE : S0(3);     // any 3 decoders
 4847     FPU    : S3(2);
 4848 %}
 4849 
 4850 // Float reg-reg operation
 4851 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4852     instruction_count(4);
 4853     dst    : S4(write);
 4854     src1   : S3(read);
 4855     src2   : S3(read);
 4856     src3   : S3(read);
 4857     DECODE : S0(4);     // any 3 decoders
 4858     FPU    : S3(2);
 4859 %}
 4860 
 4861 // Float reg-reg operation
 4862 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4863     instruction_count(4);
 4864     dst    : S4(write);
 4865     src1   : S3(read);
 4866     src2   : S3(read);
 4867     src3   : S3(read);
 4868     DECODE : S1(3);     // any 3 decoders
 4869     D0     : S0;        // Big decoder only
 4870     FPU    : S3(2);
 4871     MEM    : S3;
 4872 %}
 4873 
 4874 // Float reg-mem operation
 4875 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4876     instruction_count(2);
 4877     dst    : S5(write);
 4878     mem    : S3(read);
 4879     D0     : S0;        // big decoder only
 4880     DECODE : S1;        // any decoder for FPU POP
 4881     FPU    : S4;
 4882     MEM    : S3;        // any mem
 4883 %}
 4884 
 4885 // Float reg-mem operation
 4886 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4887     instruction_count(3);
 4888     dst    : S5(write);
 4889     src1   : S3(read);
 4890     mem    : S3(read);
 4891     D0     : S0;        // big decoder only
 4892     DECODE : S1(2);     // any decoder for FPU POP
 4893     FPU    : S4;
 4894     MEM    : S3;        // any mem
 4895 %}
 4896 
 4897 // Float mem-reg operation
 4898 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4899     instruction_count(2);
 4900     src    : S5(read);
 4901     mem    : S3(read);
 4902     DECODE : S0;        // any decoder for FPU PUSH
 4903     D0     : S1;        // big decoder only
 4904     FPU    : S4;
 4905     MEM    : S3;        // any mem
 4906 %}
 4907 
 4908 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4909     instruction_count(3);
 4910     src1   : S3(read);
 4911     src2   : S3(read);
 4912     mem    : S3(read);
 4913     DECODE : S0(2);     // any decoder for FPU PUSH
 4914     D0     : S1;        // big decoder only
 4915     FPU    : S4;
 4916     MEM    : S3;        // any mem
 4917 %}
 4918 
 4919 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4920     instruction_count(3);
 4921     src1   : S3(read);
 4922     src2   : S3(read);
 4923     mem    : S4(read);
 4924     DECODE : S0;        // any decoder for FPU PUSH
 4925     D0     : S0(2);     // big decoder only
 4926     FPU    : S4;
 4927     MEM    : S3(2);     // any mem
 4928 %}
 4929 
 4930 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4931     instruction_count(2);
 4932     src1   : S3(read);
 4933     dst    : S4(read);
 4934     D0     : S0(2);     // big decoder only
 4935     MEM    : S3(2);     // any mem
 4936 %}
 4937 
 4938 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4939     instruction_count(3);
 4940     src1   : S3(read);
 4941     src2   : S3(read);
 4942     dst    : S4(read);
 4943     D0     : S0(3);     // big decoder only
 4944     FPU    : S4;
 4945     MEM    : S3(3);     // any mem
 4946 %}
 4947 
 4948 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4949     instruction_count(3);
 4950     src1   : S4(read);
 4951     mem    : S4(read);
 4952     DECODE : S0;        // any decoder for FPU PUSH
 4953     D0     : S0(2);     // big decoder only
 4954     FPU    : S4;
 4955     MEM    : S3(2);     // any mem
 4956 %}
 4957 
 4958 // Float load constant
 4959 pipe_class fpu_reg_con(regDPR dst) %{
 4960     instruction_count(2);
 4961     dst    : S5(write);
 4962     D0     : S0;        // big decoder only for the load
 4963     DECODE : S1;        // any decoder for FPU POP
 4964     FPU    : S4;
 4965     MEM    : S3;        // any mem
 4966 %}
 4967 
 4968 // Float load constant
 4969 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4970     instruction_count(3);
 4971     dst    : S5(write);
 4972     src    : S3(read);
 4973     D0     : S0;        // big decoder only for the load
 4974     DECODE : S1(2);     // any decoder for FPU POP
 4975     FPU    : S4;
 4976     MEM    : S3;        // any mem
 4977 %}
 4978 
 4979 // UnConditional branch
 4980 pipe_class pipe_jmp( label labl ) %{
 4981     single_instruction;
 4982     BR   : S3;
 4983 %}
 4984 
 4985 // Conditional branch
 4986 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4987     single_instruction;
 4988     cr    : S1(read);
 4989     BR    : S3;
 4990 %}
 4991 
 4992 // Allocation idiom
 4993 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4994     instruction_count(1); force_serialization;
 4995     fixed_latency(6);
 4996     heap_ptr : S3(read);
 4997     DECODE   : S0(3);
 4998     D0       : S2;
 4999     MEM      : S3;
 5000     ALU      : S3(2);
 5001     dst      : S5(write);
 5002     BR       : S5;
 5003 %}
 5004 
 5005 // Generic big/slow expanded idiom
 5006 pipe_class pipe_slow(  ) %{
 5007     instruction_count(10); multiple_bundles; force_serialization;
 5008     fixed_latency(100);
 5009     D0  : S0(2);
 5010     MEM : S3(2);
 5011 %}
 5012 
 5013 // The real do-nothing guy
 5014 pipe_class empty( ) %{
 5015     instruction_count(0);
 5016 %}
 5017 
 5018 // Define the class for the Nop node
 5019 define %{
 5020    MachNop = empty;
 5021 %}
 5022 
 5023 %}
 5024 
 5025 //----------INSTRUCTIONS-------------------------------------------------------
 5026 //
 5027 // match      -- States which machine-independent subtree may be replaced
 5028 //               by this instruction.
 5029 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5030 //               selection to identify a minimum cost tree of machine
 5031 //               instructions that matches a tree of machine-independent
 5032 //               instructions.
 5033 // format     -- A string providing the disassembly for this instruction.
 5034 //               The value of an instruction's operand may be inserted
 5035 //               by referring to it with a '$' prefix.
 5036 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5037 //               to within an encode class as $primary, $secondary, and $tertiary
 5038 //               respectively.  The primary opcode is commonly used to
 5039 //               indicate the type of machine instruction, while secondary
 5040 //               and tertiary are often used for prefix options or addressing
 5041 //               modes.
 5042 // ins_encode -- A list of encode classes with parameters. The encode class
 5043 //               name must have been defined in an 'enc_class' specification
 5044 //               in the encode section of the architecture description.
 5045 
 5046 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5047 // Load Float
 5048 instruct MoveF2LEG(legRegF dst, regF src) %{
 5049   match(Set dst src);
 5050   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5051   ins_encode %{
 5052     ShouldNotReachHere();
 5053   %}
 5054   ins_pipe( fpu_reg_reg );
 5055 %}
 5056 
 5057 // Load Float
 5058 instruct MoveLEG2F(regF dst, legRegF src) %{
 5059   match(Set dst src);
 5060   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5061   ins_encode %{
 5062     ShouldNotReachHere();
 5063   %}
 5064   ins_pipe( fpu_reg_reg );
 5065 %}
 5066 
 5067 // Load Float
 5068 instruct MoveF2VL(vlRegF dst, regF src) %{
 5069   match(Set dst src);
 5070   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5071   ins_encode %{
 5072     ShouldNotReachHere();
 5073   %}
 5074   ins_pipe( fpu_reg_reg );
 5075 %}
 5076 
 5077 // Load Float
 5078 instruct MoveVL2F(regF dst, vlRegF src) %{
 5079   match(Set dst src);
 5080   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5081   ins_encode %{
 5082     ShouldNotReachHere();
 5083   %}
 5084   ins_pipe( fpu_reg_reg );
 5085 %}
 5086 
 5087 
 5088 
 5089 // Load Double
 5090 instruct MoveD2LEG(legRegD dst, regD src) %{
 5091   match(Set dst src);
 5092   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5093   ins_encode %{
 5094     ShouldNotReachHere();
 5095   %}
 5096   ins_pipe( fpu_reg_reg );
 5097 %}
 5098 
 5099 // Load Double
 5100 instruct MoveLEG2D(regD dst, legRegD src) %{
 5101   match(Set dst src);
 5102   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5103   ins_encode %{
 5104     ShouldNotReachHere();
 5105   %}
 5106   ins_pipe( fpu_reg_reg );
 5107 %}
 5108 
 5109 // Load Double
 5110 instruct MoveD2VL(vlRegD dst, regD src) %{
 5111   match(Set dst src);
 5112   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5113   ins_encode %{
 5114     ShouldNotReachHere();
 5115   %}
 5116   ins_pipe( fpu_reg_reg );
 5117 %}
 5118 
 5119 // Load Double
 5120 instruct MoveVL2D(regD dst, vlRegD src) %{
 5121   match(Set dst src);
 5122   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5123   ins_encode %{
 5124     ShouldNotReachHere();
 5125   %}
 5126   ins_pipe( fpu_reg_reg );
 5127 %}
 5128 
 5129 //----------BSWAP-Instruction--------------------------------------------------
 5130 instruct bytes_reverse_int(rRegI dst) %{
 5131   match(Set dst (ReverseBytesI dst));
 5132 
 5133   format %{ "BSWAP  $dst" %}
 5134   opcode(0x0F, 0xC8);
 5135   ins_encode( OpcP, OpcSReg(dst) );
 5136   ins_pipe( ialu_reg );
 5137 %}
 5138 
 5139 instruct bytes_reverse_long(eRegL dst) %{
 5140   match(Set dst (ReverseBytesL dst));
 5141 
 5142   format %{ "BSWAP  $dst.lo\n\t"
 5143             "BSWAP  $dst.hi\n\t"
 5144             "XCHG   $dst.lo $dst.hi" %}
 5145 
 5146   ins_cost(125);
 5147   ins_encode( bswap_long_bytes(dst) );
 5148   ins_pipe( ialu_reg_reg);
 5149 %}
 5150 
 5151 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5152   match(Set dst (ReverseBytesUS dst));
 5153   effect(KILL cr);
 5154 
 5155   format %{ "BSWAP  $dst\n\t"
 5156             "SHR    $dst,16\n\t" %}
 5157   ins_encode %{
 5158     __ bswapl($dst$$Register);
 5159     __ shrl($dst$$Register, 16);
 5160   %}
 5161   ins_pipe( ialu_reg );
 5162 %}
 5163 
 5164 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5165   match(Set dst (ReverseBytesS dst));
 5166   effect(KILL cr);
 5167 
 5168   format %{ "BSWAP  $dst\n\t"
 5169             "SAR    $dst,16\n\t" %}
 5170   ins_encode %{
 5171     __ bswapl($dst$$Register);
 5172     __ sarl($dst$$Register, 16);
 5173   %}
 5174   ins_pipe( ialu_reg );
 5175 %}
 5176 
 5177 
 5178 //---------- Zeros Count Instructions ------------------------------------------
 5179 
 5180 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5181   predicate(UseCountLeadingZerosInstruction);
 5182   match(Set dst (CountLeadingZerosI src));
 5183   effect(KILL cr);
 5184 
 5185   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5186   ins_encode %{
 5187     __ lzcntl($dst$$Register, $src$$Register);
 5188   %}
 5189   ins_pipe(ialu_reg);
 5190 %}
 5191 
 5192 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5193   predicate(!UseCountLeadingZerosInstruction);
 5194   match(Set dst (CountLeadingZerosI src));
 5195   effect(KILL cr);
 5196 
 5197   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5198             "JNZ    skip\n\t"
 5199             "MOV    $dst, -1\n"
 5200       "skip:\n\t"
 5201             "NEG    $dst\n\t"
 5202             "ADD    $dst, 31" %}
 5203   ins_encode %{
 5204     Register Rdst = $dst$$Register;
 5205     Register Rsrc = $src$$Register;
 5206     Label skip;
 5207     __ bsrl(Rdst, Rsrc);
 5208     __ jccb(Assembler::notZero, skip);
 5209     __ movl(Rdst, -1);
 5210     __ bind(skip);
 5211     __ negl(Rdst);
 5212     __ addl(Rdst, BitsPerInt - 1);
 5213   %}
 5214   ins_pipe(ialu_reg);
 5215 %}
 5216 
 5217 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5218   predicate(UseCountLeadingZerosInstruction);
 5219   match(Set dst (CountLeadingZerosL src));
 5220   effect(TEMP dst, KILL cr);
 5221 
 5222   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5223             "JNC    done\n\t"
 5224             "LZCNT  $dst, $src.lo\n\t"
 5225             "ADD    $dst, 32\n"
 5226       "done:" %}
 5227   ins_encode %{
 5228     Register Rdst = $dst$$Register;
 5229     Register Rsrc = $src$$Register;
 5230     Label done;
 5231     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5232     __ jccb(Assembler::carryClear, done);
 5233     __ lzcntl(Rdst, Rsrc);
 5234     __ addl(Rdst, BitsPerInt);
 5235     __ bind(done);
 5236   %}
 5237   ins_pipe(ialu_reg);
 5238 %}
 5239 
 5240 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5241   predicate(!UseCountLeadingZerosInstruction);
 5242   match(Set dst (CountLeadingZerosL src));
 5243   effect(TEMP dst, KILL cr);
 5244 
 5245   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5246             "JZ     msw_is_zero\n\t"
 5247             "ADD    $dst, 32\n\t"
 5248             "JMP    not_zero\n"
 5249       "msw_is_zero:\n\t"
 5250             "BSR    $dst, $src.lo\n\t"
 5251             "JNZ    not_zero\n\t"
 5252             "MOV    $dst, -1\n"
 5253       "not_zero:\n\t"
 5254             "NEG    $dst\n\t"
 5255             "ADD    $dst, 63\n" %}
 5256  ins_encode %{
 5257     Register Rdst = $dst$$Register;
 5258     Register Rsrc = $src$$Register;
 5259     Label msw_is_zero;
 5260     Label not_zero;
 5261     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5262     __ jccb(Assembler::zero, msw_is_zero);
 5263     __ addl(Rdst, BitsPerInt);
 5264     __ jmpb(not_zero);
 5265     __ bind(msw_is_zero);
 5266     __ bsrl(Rdst, Rsrc);
 5267     __ jccb(Assembler::notZero, not_zero);
 5268     __ movl(Rdst, -1);
 5269     __ bind(not_zero);
 5270     __ negl(Rdst);
 5271     __ addl(Rdst, BitsPerLong - 1);
 5272   %}
 5273   ins_pipe(ialu_reg);
 5274 %}
 5275 
 5276 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5277   predicate(UseCountTrailingZerosInstruction);
 5278   match(Set dst (CountTrailingZerosI src));
 5279   effect(KILL cr);
 5280 
 5281   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5282   ins_encode %{
 5283     __ tzcntl($dst$$Register, $src$$Register);
 5284   %}
 5285   ins_pipe(ialu_reg);
 5286 %}
 5287 
 5288 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5289   predicate(!UseCountTrailingZerosInstruction);
 5290   match(Set dst (CountTrailingZerosI src));
 5291   effect(KILL cr);
 5292 
 5293   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5294             "JNZ    done\n\t"
 5295             "MOV    $dst, 32\n"
 5296       "done:" %}
 5297   ins_encode %{
 5298     Register Rdst = $dst$$Register;
 5299     Label done;
 5300     __ bsfl(Rdst, $src$$Register);
 5301     __ jccb(Assembler::notZero, done);
 5302     __ movl(Rdst, BitsPerInt);
 5303     __ bind(done);
 5304   %}
 5305   ins_pipe(ialu_reg);
 5306 %}
 5307 
 5308 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5309   predicate(UseCountTrailingZerosInstruction);
 5310   match(Set dst (CountTrailingZerosL src));
 5311   effect(TEMP dst, KILL cr);
 5312 
 5313   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5314             "JNC    done\n\t"
 5315             "TZCNT  $dst, $src.hi\n\t"
 5316             "ADD    $dst, 32\n"
 5317             "done:" %}
 5318   ins_encode %{
 5319     Register Rdst = $dst$$Register;
 5320     Register Rsrc = $src$$Register;
 5321     Label done;
 5322     __ tzcntl(Rdst, Rsrc);
 5323     __ jccb(Assembler::carryClear, done);
 5324     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5325     __ addl(Rdst, BitsPerInt);
 5326     __ bind(done);
 5327   %}
 5328   ins_pipe(ialu_reg);
 5329 %}
 5330 
 5331 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5332   predicate(!UseCountTrailingZerosInstruction);
 5333   match(Set dst (CountTrailingZerosL src));
 5334   effect(TEMP dst, KILL cr);
 5335 
 5336   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5337             "JNZ    done\n\t"
 5338             "BSF    $dst, $src.hi\n\t"
 5339             "JNZ    msw_not_zero\n\t"
 5340             "MOV    $dst, 32\n"
 5341       "msw_not_zero:\n\t"
 5342             "ADD    $dst, 32\n"
 5343       "done:" %}
 5344   ins_encode %{
 5345     Register Rdst = $dst$$Register;
 5346     Register Rsrc = $src$$Register;
 5347     Label msw_not_zero;
 5348     Label done;
 5349     __ bsfl(Rdst, Rsrc);
 5350     __ jccb(Assembler::notZero, done);
 5351     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5352     __ jccb(Assembler::notZero, msw_not_zero);
 5353     __ movl(Rdst, BitsPerInt);
 5354     __ bind(msw_not_zero);
 5355     __ addl(Rdst, BitsPerInt);
 5356     __ bind(done);
 5357   %}
 5358   ins_pipe(ialu_reg);
 5359 %}
 5360 
 5361 
 5362 //---------- Population Count Instructions -------------------------------------
 5363 
 5364 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5365   predicate(UsePopCountInstruction);
 5366   match(Set dst (PopCountI src));
 5367   effect(KILL cr);
 5368 
 5369   format %{ "POPCNT $dst, $src" %}
 5370   ins_encode %{
 5371     __ popcntl($dst$$Register, $src$$Register);
 5372   %}
 5373   ins_pipe(ialu_reg);
 5374 %}
 5375 
 5376 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5377   predicate(UsePopCountInstruction);
 5378   match(Set dst (PopCountI (LoadI mem)));
 5379   effect(KILL cr);
 5380 
 5381   format %{ "POPCNT $dst, $mem" %}
 5382   ins_encode %{
 5383     __ popcntl($dst$$Register, $mem$$Address);
 5384   %}
 5385   ins_pipe(ialu_reg);
 5386 %}
 5387 
 5388 // Note: Long.bitCount(long) returns an int.
 5389 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5390   predicate(UsePopCountInstruction);
 5391   match(Set dst (PopCountL src));
 5392   effect(KILL cr, TEMP tmp, TEMP dst);
 5393 
 5394   format %{ "POPCNT $dst, $src.lo\n\t"
 5395             "POPCNT $tmp, $src.hi\n\t"
 5396             "ADD    $dst, $tmp" %}
 5397   ins_encode %{
 5398     __ popcntl($dst$$Register, $src$$Register);
 5399     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5400     __ addl($dst$$Register, $tmp$$Register);
 5401   %}
 5402   ins_pipe(ialu_reg);
 5403 %}
 5404 
 5405 // Note: Long.bitCount(long) returns an int.
 5406 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5407   predicate(UsePopCountInstruction);
 5408   match(Set dst (PopCountL (LoadL mem)));
 5409   effect(KILL cr, TEMP tmp, TEMP dst);
 5410 
 5411   format %{ "POPCNT $dst, $mem\n\t"
 5412             "POPCNT $tmp, $mem+4\n\t"
 5413             "ADD    $dst, $tmp" %}
 5414   ins_encode %{
 5415     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5416     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5417     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5418     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5419     __ addl($dst$$Register, $tmp$$Register);
 5420   %}
 5421   ins_pipe(ialu_reg);
 5422 %}
 5423 
 5424 
 5425 //----------Load/Store/Move Instructions---------------------------------------
 5426 //----------Load Instructions--------------------------------------------------
 5427 // Load Byte (8bit signed)
 5428 instruct loadB(xRegI dst, memory mem) %{
 5429   match(Set dst (LoadB mem));
 5430 
 5431   ins_cost(125);
 5432   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5433 
 5434   ins_encode %{
 5435     __ movsbl($dst$$Register, $mem$$Address);
 5436   %}
 5437 
 5438   ins_pipe(ialu_reg_mem);
 5439 %}
 5440 
 5441 // Load Byte (8bit signed) into Long Register
 5442 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5443   match(Set dst (ConvI2L (LoadB mem)));
 5444   effect(KILL cr);
 5445 
 5446   ins_cost(375);
 5447   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5448             "MOV    $dst.hi,$dst.lo\n\t"
 5449             "SAR    $dst.hi,7" %}
 5450 
 5451   ins_encode %{
 5452     __ movsbl($dst$$Register, $mem$$Address);
 5453     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5454     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5455   %}
 5456 
 5457   ins_pipe(ialu_reg_mem);
 5458 %}
 5459 
 5460 // Load Unsigned Byte (8bit UNsigned)
 5461 instruct loadUB(xRegI dst, memory mem) %{
 5462   match(Set dst (LoadUB mem));
 5463 
 5464   ins_cost(125);
 5465   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5466 
 5467   ins_encode %{
 5468     __ movzbl($dst$$Register, $mem$$Address);
 5469   %}
 5470 
 5471   ins_pipe(ialu_reg_mem);
 5472 %}
 5473 
 5474 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5475 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5476   match(Set dst (ConvI2L (LoadUB mem)));
 5477   effect(KILL cr);
 5478 
 5479   ins_cost(250);
 5480   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5481             "XOR    $dst.hi,$dst.hi" %}
 5482 
 5483   ins_encode %{
 5484     Register Rdst = $dst$$Register;
 5485     __ movzbl(Rdst, $mem$$Address);
 5486     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5487   %}
 5488 
 5489   ins_pipe(ialu_reg_mem);
 5490 %}
 5491 
 5492 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5493 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5494   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5495   effect(KILL cr);
 5496 
 5497   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5498             "XOR    $dst.hi,$dst.hi\n\t"
 5499             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5500   ins_encode %{
 5501     Register Rdst = $dst$$Register;
 5502     __ movzbl(Rdst, $mem$$Address);
 5503     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5504     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5505   %}
 5506   ins_pipe(ialu_reg_mem);
 5507 %}
 5508 
 5509 // Load Short (16bit signed)
 5510 instruct loadS(rRegI dst, memory mem) %{
 5511   match(Set dst (LoadS mem));
 5512 
 5513   ins_cost(125);
 5514   format %{ "MOVSX  $dst,$mem\t# short" %}
 5515 
 5516   ins_encode %{
 5517     __ movswl($dst$$Register, $mem$$Address);
 5518   %}
 5519 
 5520   ins_pipe(ialu_reg_mem);
 5521 %}
 5522 
 5523 // Load Short (16 bit signed) to Byte (8 bit signed)
 5524 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5525   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5526 
 5527   ins_cost(125);
 5528   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5529   ins_encode %{
 5530     __ movsbl($dst$$Register, $mem$$Address);
 5531   %}
 5532   ins_pipe(ialu_reg_mem);
 5533 %}
 5534 
 5535 // Load Short (16bit signed) into Long Register
 5536 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5537   match(Set dst (ConvI2L (LoadS mem)));
 5538   effect(KILL cr);
 5539 
 5540   ins_cost(375);
 5541   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5542             "MOV    $dst.hi,$dst.lo\n\t"
 5543             "SAR    $dst.hi,15" %}
 5544 
 5545   ins_encode %{
 5546     __ movswl($dst$$Register, $mem$$Address);
 5547     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5548     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5549   %}
 5550 
 5551   ins_pipe(ialu_reg_mem);
 5552 %}
 5553 
 5554 // Load Unsigned Short/Char (16bit unsigned)
 5555 instruct loadUS(rRegI dst, memory mem) %{
 5556   match(Set dst (LoadUS mem));
 5557 
 5558   ins_cost(125);
 5559   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5560 
 5561   ins_encode %{
 5562     __ movzwl($dst$$Register, $mem$$Address);
 5563   %}
 5564 
 5565   ins_pipe(ialu_reg_mem);
 5566 %}
 5567 
 5568 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5569 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5570   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5571 
 5572   ins_cost(125);
 5573   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5574   ins_encode %{
 5575     __ movsbl($dst$$Register, $mem$$Address);
 5576   %}
 5577   ins_pipe(ialu_reg_mem);
 5578 %}
 5579 
 5580 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5581 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5582   match(Set dst (ConvI2L (LoadUS mem)));
 5583   effect(KILL cr);
 5584 
 5585   ins_cost(250);
 5586   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5587             "XOR    $dst.hi,$dst.hi" %}
 5588 
 5589   ins_encode %{
 5590     __ movzwl($dst$$Register, $mem$$Address);
 5591     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5592   %}
 5593 
 5594   ins_pipe(ialu_reg_mem);
 5595 %}
 5596 
 5597 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5598 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5599   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5600   effect(KILL cr);
 5601 
 5602   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5603             "XOR    $dst.hi,$dst.hi" %}
 5604   ins_encode %{
 5605     Register Rdst = $dst$$Register;
 5606     __ movzbl(Rdst, $mem$$Address);
 5607     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5608   %}
 5609   ins_pipe(ialu_reg_mem);
 5610 %}
 5611 
 5612 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5613 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5614   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5615   effect(KILL cr);
 5616 
 5617   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5618             "XOR    $dst.hi,$dst.hi\n\t"
 5619             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5620   ins_encode %{
 5621     Register Rdst = $dst$$Register;
 5622     __ movzwl(Rdst, $mem$$Address);
 5623     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5624     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5625   %}
 5626   ins_pipe(ialu_reg_mem);
 5627 %}
 5628 
 5629 // Load Integer
 5630 instruct loadI(rRegI dst, memory mem) %{
 5631   match(Set dst (LoadI mem));
 5632 
 5633   ins_cost(125);
 5634   format %{ "MOV    $dst,$mem\t# int" %}
 5635 
 5636   ins_encode %{
 5637     __ movl($dst$$Register, $mem$$Address);
 5638   %}
 5639 
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5644 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5645   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5646 
 5647   ins_cost(125);
 5648   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5649   ins_encode %{
 5650     __ movsbl($dst$$Register, $mem$$Address);
 5651   %}
 5652   ins_pipe(ialu_reg_mem);
 5653 %}
 5654 
 5655 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5656 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5657   match(Set dst (AndI (LoadI mem) mask));
 5658 
 5659   ins_cost(125);
 5660   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5661   ins_encode %{
 5662     __ movzbl($dst$$Register, $mem$$Address);
 5663   %}
 5664   ins_pipe(ialu_reg_mem);
 5665 %}
 5666 
 5667 // Load Integer (32 bit signed) to Short (16 bit signed)
 5668 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5669   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5670 
 5671   ins_cost(125);
 5672   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5673   ins_encode %{
 5674     __ movswl($dst$$Register, $mem$$Address);
 5675   %}
 5676   ins_pipe(ialu_reg_mem);
 5677 %}
 5678 
 5679 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5680 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5681   match(Set dst (AndI (LoadI mem) mask));
 5682 
 5683   ins_cost(125);
 5684   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5685   ins_encode %{
 5686     __ movzwl($dst$$Register, $mem$$Address);
 5687   %}
 5688   ins_pipe(ialu_reg_mem);
 5689 %}
 5690 
 5691 // Load Integer into Long Register
 5692 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5693   match(Set dst (ConvI2L (LoadI mem)));
 5694   effect(KILL cr);
 5695 
 5696   ins_cost(375);
 5697   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5698             "MOV    $dst.hi,$dst.lo\n\t"
 5699             "SAR    $dst.hi,31" %}
 5700 
 5701   ins_encode %{
 5702     __ movl($dst$$Register, $mem$$Address);
 5703     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5704     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5705   %}
 5706 
 5707   ins_pipe(ialu_reg_mem);
 5708 %}
 5709 
 5710 // Load Integer with mask 0xFF into Long Register
 5711 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5712   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5713   effect(KILL cr);
 5714 
 5715   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5716             "XOR    $dst.hi,$dst.hi" %}
 5717   ins_encode %{
 5718     Register Rdst = $dst$$Register;
 5719     __ movzbl(Rdst, $mem$$Address);
 5720     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5721   %}
 5722   ins_pipe(ialu_reg_mem);
 5723 %}
 5724 
 5725 // Load Integer with mask 0xFFFF into Long Register
 5726 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5727   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5728   effect(KILL cr);
 5729 
 5730   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5731             "XOR    $dst.hi,$dst.hi" %}
 5732   ins_encode %{
 5733     Register Rdst = $dst$$Register;
 5734     __ movzwl(Rdst, $mem$$Address);
 5735     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5736   %}
 5737   ins_pipe(ialu_reg_mem);
 5738 %}
 5739 
 5740 // Load Integer with 31-bit mask into Long Register
 5741 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5742   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5743   effect(KILL cr);
 5744 
 5745   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5746             "XOR    $dst.hi,$dst.hi\n\t"
 5747             "AND    $dst.lo,$mask" %}
 5748   ins_encode %{
 5749     Register Rdst = $dst$$Register;
 5750     __ movl(Rdst, $mem$$Address);
 5751     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5752     __ andl(Rdst, $mask$$constant);
 5753   %}
 5754   ins_pipe(ialu_reg_mem);
 5755 %}
 5756 
 5757 // Load Unsigned Integer into Long Register
 5758 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5759   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5760   effect(KILL cr);
 5761 
 5762   ins_cost(250);
 5763   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5764             "XOR    $dst.hi,$dst.hi" %}
 5765 
 5766   ins_encode %{
 5767     __ movl($dst$$Register, $mem$$Address);
 5768     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5769   %}
 5770 
 5771   ins_pipe(ialu_reg_mem);
 5772 %}
 5773 
 5774 // Load Long.  Cannot clobber address while loading, so restrict address
 5775 // register to ESI
 5776 instruct loadL(eRegL dst, load_long_memory mem) %{
 5777   predicate(!((LoadLNode*)n)->require_atomic_access());
 5778   match(Set dst (LoadL mem));
 5779 
 5780   ins_cost(250);
 5781   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5782             "MOV    $dst.hi,$mem+4" %}
 5783 
 5784   ins_encode %{
 5785     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5786     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5787     __ movl($dst$$Register, Amemlo);
 5788     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5789   %}
 5790 
 5791   ins_pipe(ialu_reg_long_mem);
 5792 %}
 5793 
 5794 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5795 // then store it down to the stack and reload on the int
 5796 // side.
 5797 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5798   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5799   match(Set dst (LoadL mem));
 5800 
 5801   ins_cost(200);
 5802   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5803             "FISTp  $dst" %}
 5804   ins_encode(enc_loadL_volatile(mem,dst));
 5805   ins_pipe( fpu_reg_mem );
 5806 %}
 5807 
 5808 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5809   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5810   match(Set dst (LoadL mem));
 5811   effect(TEMP tmp);
 5812   ins_cost(180);
 5813   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5814             "MOVSD  $dst,$tmp" %}
 5815   ins_encode %{
 5816     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5817     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5818   %}
 5819   ins_pipe( pipe_slow );
 5820 %}
 5821 
 5822 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5823   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5824   match(Set dst (LoadL mem));
 5825   effect(TEMP tmp);
 5826   ins_cost(160);
 5827   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5828             "MOVD   $dst.lo,$tmp\n\t"
 5829             "PSRLQ  $tmp,32\n\t"
 5830             "MOVD   $dst.hi,$tmp" %}
 5831   ins_encode %{
 5832     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5833     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5834     __ psrlq($tmp$$XMMRegister, 32);
 5835     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5836   %}
 5837   ins_pipe( pipe_slow );
 5838 %}
 5839 
 5840 // Load Range
 5841 instruct loadRange(rRegI dst, memory mem) %{
 5842   match(Set dst (LoadRange mem));
 5843 
 5844   ins_cost(125);
 5845   format %{ "MOV    $dst,$mem" %}
 5846   opcode(0x8B);
 5847   ins_encode( OpcP, RegMem(dst,mem));
 5848   ins_pipe( ialu_reg_mem );
 5849 %}
 5850 
 5851 
 5852 // Load Pointer
 5853 instruct loadP(eRegP dst, memory mem) %{
 5854   match(Set dst (LoadP mem));
 5855 
 5856   ins_cost(125);
 5857   format %{ "MOV    $dst,$mem" %}
 5858   opcode(0x8B);
 5859   ins_encode( OpcP, RegMem(dst,mem));
 5860   ins_pipe( ialu_reg_mem );
 5861 %}
 5862 
 5863 // Load Klass Pointer
 5864 instruct loadKlass(eRegP dst, memory mem) %{
 5865   match(Set dst (LoadKlass mem));
 5866 
 5867   ins_cost(125);
 5868   format %{ "MOV    $dst,$mem" %}
 5869   opcode(0x8B);
 5870   ins_encode( OpcP, RegMem(dst,mem));
 5871   ins_pipe( ialu_reg_mem );
 5872 %}
 5873 
 5874 // Load Double
 5875 instruct loadDPR(regDPR dst, memory mem) %{
 5876   predicate(UseSSE<=1);
 5877   match(Set dst (LoadD mem));
 5878 
 5879   ins_cost(150);
 5880   format %{ "FLD_D  ST,$mem\n\t"
 5881             "FSTP   $dst" %}
 5882   opcode(0xDD);               /* DD /0 */
 5883   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5884               Pop_Reg_DPR(dst) );
 5885   ins_pipe( fpu_reg_mem );
 5886 %}
 5887 
 5888 // Load Double to XMM
 5889 instruct loadD(regD dst, memory mem) %{
 5890   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5891   match(Set dst (LoadD mem));
 5892   ins_cost(145);
 5893   format %{ "MOVSD  $dst,$mem" %}
 5894   ins_encode %{
 5895     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5896   %}
 5897   ins_pipe( pipe_slow );
 5898 %}
 5899 
 5900 instruct loadD_partial(regD dst, memory mem) %{
 5901   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5902   match(Set dst (LoadD mem));
 5903   ins_cost(145);
 5904   format %{ "MOVLPD $dst,$mem" %}
 5905   ins_encode %{
 5906     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5907   %}
 5908   ins_pipe( pipe_slow );
 5909 %}
 5910 
 5911 // Load to XMM register (single-precision floating point)
 5912 // MOVSS instruction
 5913 instruct loadF(regF dst, memory mem) %{
 5914   predicate(UseSSE>=1);
 5915   match(Set dst (LoadF mem));
 5916   ins_cost(145);
 5917   format %{ "MOVSS  $dst,$mem" %}
 5918   ins_encode %{
 5919     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5920   %}
 5921   ins_pipe( pipe_slow );
 5922 %}
 5923 
 5924 // Load Float
 5925 instruct loadFPR(regFPR dst, memory mem) %{
 5926   predicate(UseSSE==0);
 5927   match(Set dst (LoadF mem));
 5928 
 5929   ins_cost(150);
 5930   format %{ "FLD_S  ST,$mem\n\t"
 5931             "FSTP   $dst" %}
 5932   opcode(0xD9);               /* D9 /0 */
 5933   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5934               Pop_Reg_FPR(dst) );
 5935   ins_pipe( fpu_reg_mem );
 5936 %}
 5937 
 5938 // Load Effective Address
 5939 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5940   match(Set dst mem);
 5941 
 5942   ins_cost(110);
 5943   format %{ "LEA    $dst,$mem" %}
 5944   opcode(0x8D);
 5945   ins_encode( OpcP, RegMem(dst,mem));
 5946   ins_pipe( ialu_reg_reg_fat );
 5947 %}
 5948 
 5949 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5950   match(Set dst mem);
 5951 
 5952   ins_cost(110);
 5953   format %{ "LEA    $dst,$mem" %}
 5954   opcode(0x8D);
 5955   ins_encode( OpcP, RegMem(dst,mem));
 5956   ins_pipe( ialu_reg_reg_fat );
 5957 %}
 5958 
 5959 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5960   match(Set dst mem);
 5961 
 5962   ins_cost(110);
 5963   format %{ "LEA    $dst,$mem" %}
 5964   opcode(0x8D);
 5965   ins_encode( OpcP, RegMem(dst,mem));
 5966   ins_pipe( ialu_reg_reg_fat );
 5967 %}
 5968 
 5969 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5970   match(Set dst mem);
 5971 
 5972   ins_cost(110);
 5973   format %{ "LEA    $dst,$mem" %}
 5974   opcode(0x8D);
 5975   ins_encode( OpcP, RegMem(dst,mem));
 5976   ins_pipe( ialu_reg_reg_fat );
 5977 %}
 5978 
 5979 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5980   match(Set dst mem);
 5981 
 5982   ins_cost(110);
 5983   format %{ "LEA    $dst,$mem" %}
 5984   opcode(0x8D);
 5985   ins_encode( OpcP, RegMem(dst,mem));
 5986   ins_pipe( ialu_reg_reg_fat );
 5987 %}
 5988 
 5989 // Load Constant
 5990 instruct loadConI(rRegI dst, immI src) %{
 5991   match(Set dst src);
 5992 
 5993   format %{ "MOV    $dst,$src" %}
 5994   ins_encode( LdImmI(dst, src) );
 5995   ins_pipe( ialu_reg_fat );
 5996 %}
 5997 
 5998 // Load Constant zero
 5999 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6000   match(Set dst src);
 6001   effect(KILL cr);
 6002 
 6003   ins_cost(50);
 6004   format %{ "XOR    $dst,$dst" %}
 6005   opcode(0x33);  /* + rd */
 6006   ins_encode( OpcP, RegReg( dst, dst ) );
 6007   ins_pipe( ialu_reg );
 6008 %}
 6009 
 6010 instruct loadConP(eRegP dst, immP src) %{
 6011   match(Set dst src);
 6012 
 6013   format %{ "MOV    $dst,$src" %}
 6014   opcode(0xB8);  /* + rd */
 6015   ins_encode( LdImmP(dst, src) );
 6016   ins_pipe( ialu_reg_fat );
 6017 %}
 6018 
 6019 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6020   match(Set dst src);
 6021   effect(KILL cr);
 6022   ins_cost(200);
 6023   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6024             "MOV    $dst.hi,$src.hi" %}
 6025   opcode(0xB8);
 6026   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6027   ins_pipe( ialu_reg_long_fat );
 6028 %}
 6029 
 6030 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6031   match(Set dst src);
 6032   effect(KILL cr);
 6033   ins_cost(150);
 6034   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6035             "XOR    $dst.hi,$dst.hi" %}
 6036   opcode(0x33,0x33);
 6037   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6038   ins_pipe( ialu_reg_long );
 6039 %}
 6040 
 6041 // The instruction usage is guarded by predicate in operand immFPR().
 6042 instruct loadConFPR(regFPR dst, immFPR con) %{
 6043   match(Set dst con);
 6044   ins_cost(125);
 6045   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6046             "FSTP   $dst" %}
 6047   ins_encode %{
 6048     __ fld_s($constantaddress($con));
 6049     __ fstp_d($dst$$reg);
 6050   %}
 6051   ins_pipe(fpu_reg_con);
 6052 %}
 6053 
 6054 // The instruction usage is guarded by predicate in operand immFPR0().
 6055 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6056   match(Set dst con);
 6057   ins_cost(125);
 6058   format %{ "FLDZ   ST\n\t"
 6059             "FSTP   $dst" %}
 6060   ins_encode %{
 6061     __ fldz();
 6062     __ fstp_d($dst$$reg);
 6063   %}
 6064   ins_pipe(fpu_reg_con);
 6065 %}
 6066 
 6067 // The instruction usage is guarded by predicate in operand immFPR1().
 6068 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6069   match(Set dst con);
 6070   ins_cost(125);
 6071   format %{ "FLD1   ST\n\t"
 6072             "FSTP   $dst" %}
 6073   ins_encode %{
 6074     __ fld1();
 6075     __ fstp_d($dst$$reg);
 6076   %}
 6077   ins_pipe(fpu_reg_con);
 6078 %}
 6079 
 6080 // The instruction usage is guarded by predicate in operand immF().
 6081 instruct loadConF(regF dst, immF con) %{
 6082   match(Set dst con);
 6083   ins_cost(125);
 6084   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6085   ins_encode %{
 6086     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6087   %}
 6088   ins_pipe(pipe_slow);
 6089 %}
 6090 
 6091 // The instruction usage is guarded by predicate in operand immF0().
 6092 instruct loadConF0(regF dst, immF0 src) %{
 6093   match(Set dst src);
 6094   ins_cost(100);
 6095   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6096   ins_encode %{
 6097     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6098   %}
 6099   ins_pipe(pipe_slow);
 6100 %}
 6101 
 6102 // The instruction usage is guarded by predicate in operand immDPR().
 6103 instruct loadConDPR(regDPR dst, immDPR con) %{
 6104   match(Set dst con);
 6105   ins_cost(125);
 6106 
 6107   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6108             "FSTP   $dst" %}
 6109   ins_encode %{
 6110     __ fld_d($constantaddress($con));
 6111     __ fstp_d($dst$$reg);
 6112   %}
 6113   ins_pipe(fpu_reg_con);
 6114 %}
 6115 
 6116 // The instruction usage is guarded by predicate in operand immDPR0().
 6117 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6118   match(Set dst con);
 6119   ins_cost(125);
 6120 
 6121   format %{ "FLDZ   ST\n\t"
 6122             "FSTP   $dst" %}
 6123   ins_encode %{
 6124     __ fldz();
 6125     __ fstp_d($dst$$reg);
 6126   %}
 6127   ins_pipe(fpu_reg_con);
 6128 %}
 6129 
 6130 // The instruction usage is guarded by predicate in operand immDPR1().
 6131 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6132   match(Set dst con);
 6133   ins_cost(125);
 6134 
 6135   format %{ "FLD1   ST\n\t"
 6136             "FSTP   $dst" %}
 6137   ins_encode %{
 6138     __ fld1();
 6139     __ fstp_d($dst$$reg);
 6140   %}
 6141   ins_pipe(fpu_reg_con);
 6142 %}
 6143 
 6144 // The instruction usage is guarded by predicate in operand immD().
 6145 instruct loadConD(regD dst, immD con) %{
 6146   match(Set dst con);
 6147   ins_cost(125);
 6148   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6149   ins_encode %{
 6150     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6151   %}
 6152   ins_pipe(pipe_slow);
 6153 %}
 6154 
 6155 // The instruction usage is guarded by predicate in operand immD0().
 6156 instruct loadConD0(regD dst, immD0 src) %{
 6157   match(Set dst src);
 6158   ins_cost(100);
 6159   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6160   ins_encode %{
 6161     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6162   %}
 6163   ins_pipe( pipe_slow );
 6164 %}
 6165 
 6166 // Load Stack Slot
 6167 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6168   match(Set dst src);
 6169   ins_cost(125);
 6170 
 6171   format %{ "MOV    $dst,$src" %}
 6172   opcode(0x8B);
 6173   ins_encode( OpcP, RegMem(dst,src));
 6174   ins_pipe( ialu_reg_mem );
 6175 %}
 6176 
 6177 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6178   match(Set dst src);
 6179 
 6180   ins_cost(200);
 6181   format %{ "MOV    $dst,$src.lo\n\t"
 6182             "MOV    $dst+4,$src.hi" %}
 6183   opcode(0x8B, 0x8B);
 6184   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6185   ins_pipe( ialu_mem_long_reg );
 6186 %}
 6187 
 6188 // Load Stack Slot
 6189 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6190   match(Set dst src);
 6191   ins_cost(125);
 6192 
 6193   format %{ "MOV    $dst,$src" %}
 6194   opcode(0x8B);
 6195   ins_encode( OpcP, RegMem(dst,src));
 6196   ins_pipe( ialu_reg_mem );
 6197 %}
 6198 
 6199 // Load Stack Slot
 6200 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6201   match(Set dst src);
 6202   ins_cost(125);
 6203 
 6204   format %{ "FLD_S  $src\n\t"
 6205             "FSTP   $dst" %}
 6206   opcode(0xD9);               /* D9 /0, FLD m32real */
 6207   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6208               Pop_Reg_FPR(dst) );
 6209   ins_pipe( fpu_reg_mem );
 6210 %}
 6211 
 6212 // Load Stack Slot
 6213 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6214   match(Set dst src);
 6215   ins_cost(125);
 6216 
 6217   format %{ "FLD_D  $src\n\t"
 6218             "FSTP   $dst" %}
 6219   opcode(0xDD);               /* DD /0, FLD m64real */
 6220   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6221               Pop_Reg_DPR(dst) );
 6222   ins_pipe( fpu_reg_mem );
 6223 %}
 6224 
 6225 // Prefetch instructions for allocation.
 6226 // Must be safe to execute with invalid address (cannot fault).
 6227 
 6228 instruct prefetchAlloc0( memory mem ) %{
 6229   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6230   match(PrefetchAllocation mem);
 6231   ins_cost(0);
 6232   size(0);
 6233   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6234   ins_encode();
 6235   ins_pipe(empty);
 6236 %}
 6237 
 6238 instruct prefetchAlloc( memory mem ) %{
 6239   predicate(AllocatePrefetchInstr==3);
 6240   match( PrefetchAllocation mem );
 6241   ins_cost(100);
 6242 
 6243   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6244   ins_encode %{
 6245     __ prefetchw($mem$$Address);
 6246   %}
 6247   ins_pipe(ialu_mem);
 6248 %}
 6249 
 6250 instruct prefetchAllocNTA( memory mem ) %{
 6251   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6252   match(PrefetchAllocation mem);
 6253   ins_cost(100);
 6254 
 6255   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6256   ins_encode %{
 6257     __ prefetchnta($mem$$Address);
 6258   %}
 6259   ins_pipe(ialu_mem);
 6260 %}
 6261 
 6262 instruct prefetchAllocT0( memory mem ) %{
 6263   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6264   match(PrefetchAllocation mem);
 6265   ins_cost(100);
 6266 
 6267   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6268   ins_encode %{
 6269     __ prefetcht0($mem$$Address);
 6270   %}
 6271   ins_pipe(ialu_mem);
 6272 %}
 6273 
 6274 instruct prefetchAllocT2( memory mem ) %{
 6275   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6276   match(PrefetchAllocation mem);
 6277   ins_cost(100);
 6278 
 6279   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6280   ins_encode %{
 6281     __ prefetcht2($mem$$Address);
 6282   %}
 6283   ins_pipe(ialu_mem);
 6284 %}
 6285 
 6286 //----------Store Instructions-------------------------------------------------
 6287 
 6288 // Store Byte
 6289 instruct storeB(memory mem, xRegI src) %{
 6290   match(Set mem (StoreB mem src));
 6291 
 6292   ins_cost(125);
 6293   format %{ "MOV8   $mem,$src" %}
 6294   opcode(0x88);
 6295   ins_encode( OpcP, RegMem( src, mem ) );
 6296   ins_pipe( ialu_mem_reg );
 6297 %}
 6298 
 6299 // Store Char/Short
 6300 instruct storeC(memory mem, rRegI src) %{
 6301   match(Set mem (StoreC mem src));
 6302 
 6303   ins_cost(125);
 6304   format %{ "MOV16  $mem,$src" %}
 6305   opcode(0x89, 0x66);
 6306   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6307   ins_pipe( ialu_mem_reg );
 6308 %}
 6309 
 6310 // Store Integer
 6311 instruct storeI(memory mem, rRegI src) %{
 6312   match(Set mem (StoreI mem src));
 6313 
 6314   ins_cost(125);
 6315   format %{ "MOV    $mem,$src" %}
 6316   opcode(0x89);
 6317   ins_encode( OpcP, RegMem( src, mem ) );
 6318   ins_pipe( ialu_mem_reg );
 6319 %}
 6320 
 6321 // Store Long
 6322 instruct storeL(long_memory mem, eRegL src) %{
 6323   predicate(!((StoreLNode*)n)->require_atomic_access());
 6324   match(Set mem (StoreL mem src));
 6325 
 6326   ins_cost(200);
 6327   format %{ "MOV    $mem,$src.lo\n\t"
 6328             "MOV    $mem+4,$src.hi" %}
 6329   opcode(0x89, 0x89);
 6330   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6331   ins_pipe( ialu_mem_long_reg );
 6332 %}
 6333 
 6334 // Store Long to Integer
 6335 instruct storeL2I(memory mem, eRegL src) %{
 6336   match(Set mem (StoreI mem (ConvL2I src)));
 6337 
 6338   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6339   ins_encode %{
 6340     __ movl($mem$$Address, $src$$Register);
 6341   %}
 6342   ins_pipe(ialu_mem_reg);
 6343 %}
 6344 
 6345 // Volatile Store Long.  Must be atomic, so move it into
 6346 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6347 // target address before the store (for null-ptr checks)
 6348 // so the memory operand is used twice in the encoding.
 6349 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6350   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6351   match(Set mem (StoreL mem src));
 6352   effect( KILL cr );
 6353   ins_cost(400);
 6354   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6355             "FILD   $src\n\t"
 6356             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6357   opcode(0x3B);
 6358   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6359   ins_pipe( fpu_reg_mem );
 6360 %}
 6361 
 6362 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6363   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6364   match(Set mem (StoreL mem src));
 6365   effect( TEMP tmp, KILL cr );
 6366   ins_cost(380);
 6367   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6368             "MOVSD  $tmp,$src\n\t"
 6369             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6370   ins_encode %{
 6371     __ cmpl(rax, $mem$$Address);
 6372     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6373     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6374   %}
 6375   ins_pipe( pipe_slow );
 6376 %}
 6377 
 6378 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6379   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6380   match(Set mem (StoreL mem src));
 6381   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6382   ins_cost(360);
 6383   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6384             "MOVD   $tmp,$src.lo\n\t"
 6385             "MOVD   $tmp2,$src.hi\n\t"
 6386             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6387             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6388   ins_encode %{
 6389     __ cmpl(rax, $mem$$Address);
 6390     __ movdl($tmp$$XMMRegister, $src$$Register);
 6391     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6392     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6393     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6394   %}
 6395   ins_pipe( pipe_slow );
 6396 %}
 6397 
 6398 // Store Pointer; for storing unknown oops and raw pointers
 6399 instruct storeP(memory mem, anyRegP src) %{
 6400   match(Set mem (StoreP mem src));
 6401 
 6402   ins_cost(125);
 6403   format %{ "MOV    $mem,$src" %}
 6404   opcode(0x89);
 6405   ins_encode( OpcP, RegMem( src, mem ) );
 6406   ins_pipe( ialu_mem_reg );
 6407 %}
 6408 
 6409 // Store Integer Immediate
 6410 instruct storeImmI(memory mem, immI src) %{
 6411   match(Set mem (StoreI mem src));
 6412 
 6413   ins_cost(150);
 6414   format %{ "MOV    $mem,$src" %}
 6415   opcode(0xC7);               /* C7 /0 */
 6416   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6417   ins_pipe( ialu_mem_imm );
 6418 %}
 6419 
 6420 // Store Short/Char Immediate
 6421 instruct storeImmI16(memory mem, immI16 src) %{
 6422   predicate(UseStoreImmI16);
 6423   match(Set mem (StoreC mem src));
 6424 
 6425   ins_cost(150);
 6426   format %{ "MOV16  $mem,$src" %}
 6427   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6428   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6429   ins_pipe( ialu_mem_imm );
 6430 %}
 6431 
 6432 // Store Pointer Immediate; null pointers or constant oops that do not
 6433 // need card-mark barriers.
 6434 instruct storeImmP(memory mem, immP src) %{
 6435   match(Set mem (StoreP mem src));
 6436 
 6437   ins_cost(150);
 6438   format %{ "MOV    $mem,$src" %}
 6439   opcode(0xC7);               /* C7 /0 */
 6440   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6441   ins_pipe( ialu_mem_imm );
 6442 %}
 6443 
 6444 // Store Byte Immediate
 6445 instruct storeImmB(memory mem, immI8 src) %{
 6446   match(Set mem (StoreB mem src));
 6447 
 6448   ins_cost(150);
 6449   format %{ "MOV8   $mem,$src" %}
 6450   opcode(0xC6);               /* C6 /0 */
 6451   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6452   ins_pipe( ialu_mem_imm );
 6453 %}
 6454 
 6455 // Store CMS card-mark Immediate
 6456 instruct storeImmCM(memory mem, immI8 src) %{
 6457   match(Set mem (StoreCM mem src));
 6458 
 6459   ins_cost(150);
 6460   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6461   opcode(0xC6);               /* C6 /0 */
 6462   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6463   ins_pipe( ialu_mem_imm );
 6464 %}
 6465 
 6466 // Store Double
 6467 instruct storeDPR( memory mem, regDPR1 src) %{
 6468   predicate(UseSSE<=1);
 6469   match(Set mem (StoreD mem src));
 6470 
 6471   ins_cost(100);
 6472   format %{ "FST_D  $mem,$src" %}
 6473   opcode(0xDD);       /* DD /2 */
 6474   ins_encode( enc_FPR_store(mem,src) );
 6475   ins_pipe( fpu_mem_reg );
 6476 %}
 6477 
 6478 // Store double does rounding on x86
 6479 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6480   predicate(UseSSE<=1);
 6481   match(Set mem (StoreD mem (RoundDouble src)));
 6482 
 6483   ins_cost(100);
 6484   format %{ "FST_D  $mem,$src\t# round" %}
 6485   opcode(0xDD);       /* DD /2 */
 6486   ins_encode( enc_FPR_store(mem,src) );
 6487   ins_pipe( fpu_mem_reg );
 6488 %}
 6489 
 6490 // Store XMM register to memory (double-precision floating points)
 6491 // MOVSD instruction
 6492 instruct storeD(memory mem, regD src) %{
 6493   predicate(UseSSE>=2);
 6494   match(Set mem (StoreD mem src));
 6495   ins_cost(95);
 6496   format %{ "MOVSD  $mem,$src" %}
 6497   ins_encode %{
 6498     __ movdbl($mem$$Address, $src$$XMMRegister);
 6499   %}
 6500   ins_pipe( pipe_slow );
 6501 %}
 6502 
 6503 // Store XMM register to memory (single-precision floating point)
 6504 // MOVSS instruction
 6505 instruct storeF(memory mem, regF src) %{
 6506   predicate(UseSSE>=1);
 6507   match(Set mem (StoreF mem src));
 6508   ins_cost(95);
 6509   format %{ "MOVSS  $mem,$src" %}
 6510   ins_encode %{
 6511     __ movflt($mem$$Address, $src$$XMMRegister);
 6512   %}
 6513   ins_pipe( pipe_slow );
 6514 %}
 6515 
 6516 
 6517 // Store Float
 6518 instruct storeFPR( memory mem, regFPR1 src) %{
 6519   predicate(UseSSE==0);
 6520   match(Set mem (StoreF mem src));
 6521 
 6522   ins_cost(100);
 6523   format %{ "FST_S  $mem,$src" %}
 6524   opcode(0xD9);       /* D9 /2 */
 6525   ins_encode( enc_FPR_store(mem,src) );
 6526   ins_pipe( fpu_mem_reg );
 6527 %}
 6528 
 6529 // Store Float does rounding on x86
 6530 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6531   predicate(UseSSE==0);
 6532   match(Set mem (StoreF mem (RoundFloat src)));
 6533 
 6534   ins_cost(100);
 6535   format %{ "FST_S  $mem,$src\t# round" %}
 6536   opcode(0xD9);       /* D9 /2 */
 6537   ins_encode( enc_FPR_store(mem,src) );
 6538   ins_pipe( fpu_mem_reg );
 6539 %}
 6540 
 6541 // Store Float does rounding on x86
 6542 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6543   predicate(UseSSE<=1);
 6544   match(Set mem (StoreF mem (ConvD2F src)));
 6545 
 6546   ins_cost(100);
 6547   format %{ "FST_S  $mem,$src\t# D-round" %}
 6548   opcode(0xD9);       /* D9 /2 */
 6549   ins_encode( enc_FPR_store(mem,src) );
 6550   ins_pipe( fpu_mem_reg );
 6551 %}
 6552 
 6553 // Store immediate Float value (it is faster than store from FPU register)
 6554 // The instruction usage is guarded by predicate in operand immFPR().
 6555 instruct storeFPR_imm( memory mem, immFPR src) %{
 6556   match(Set mem (StoreF mem src));
 6557 
 6558   ins_cost(50);
 6559   format %{ "MOV    $mem,$src\t# store float" %}
 6560   opcode(0xC7);               /* C7 /0 */
 6561   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6562   ins_pipe( ialu_mem_imm );
 6563 %}
 6564 
 6565 // Store immediate Float value (it is faster than store from XMM register)
 6566 // The instruction usage is guarded by predicate in operand immF().
 6567 instruct storeF_imm( memory mem, immF src) %{
 6568   match(Set mem (StoreF mem src));
 6569 
 6570   ins_cost(50);
 6571   format %{ "MOV    $mem,$src\t# store float" %}
 6572   opcode(0xC7);               /* C7 /0 */
 6573   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6574   ins_pipe( ialu_mem_imm );
 6575 %}
 6576 
 6577 // Store Integer to stack slot
 6578 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6579   match(Set dst src);
 6580 
 6581   ins_cost(100);
 6582   format %{ "MOV    $dst,$src" %}
 6583   opcode(0x89);
 6584   ins_encode( OpcPRegSS( dst, src ) );
 6585   ins_pipe( ialu_mem_reg );
 6586 %}
 6587 
 6588 // Store Integer to stack slot
 6589 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6590   match(Set dst src);
 6591 
 6592   ins_cost(100);
 6593   format %{ "MOV    $dst,$src" %}
 6594   opcode(0x89);
 6595   ins_encode( OpcPRegSS( dst, src ) );
 6596   ins_pipe( ialu_mem_reg );
 6597 %}
 6598 
 6599 // Store Long to stack slot
 6600 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6601   match(Set dst src);
 6602 
 6603   ins_cost(200);
 6604   format %{ "MOV    $dst,$src.lo\n\t"
 6605             "MOV    $dst+4,$src.hi" %}
 6606   opcode(0x89, 0x89);
 6607   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6608   ins_pipe( ialu_mem_long_reg );
 6609 %}
 6610 
 6611 //----------MemBar Instructions-----------------------------------------------
 6612 // Memory barrier flavors
 6613 
 6614 instruct membar_acquire() %{
 6615   match(MemBarAcquire);
 6616   match(LoadFence);
 6617   ins_cost(400);
 6618 
 6619   size(0);
 6620   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6621   ins_encode();
 6622   ins_pipe(empty);
 6623 %}
 6624 
 6625 instruct membar_acquire_lock() %{
 6626   match(MemBarAcquireLock);
 6627   ins_cost(0);
 6628 
 6629   size(0);
 6630   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6631   ins_encode( );
 6632   ins_pipe(empty);
 6633 %}
 6634 
 6635 instruct membar_release() %{
 6636   match(MemBarRelease);
 6637   match(StoreFence);
 6638   ins_cost(400);
 6639 
 6640   size(0);
 6641   format %{ "MEMBAR-release ! (empty encoding)" %}
 6642   ins_encode( );
 6643   ins_pipe(empty);
 6644 %}
 6645 
 6646 instruct membar_release_lock() %{
 6647   match(MemBarReleaseLock);
 6648   ins_cost(0);
 6649 
 6650   size(0);
 6651   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6652   ins_encode( );
 6653   ins_pipe(empty);
 6654 %}
 6655 
 6656 instruct membar_volatile(eFlagsReg cr) %{
 6657   match(MemBarVolatile);
 6658   effect(KILL cr);
 6659   ins_cost(400);
 6660 
 6661   format %{
 6662     $$template
 6663     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6664   %}
 6665   ins_encode %{
 6666     __ membar(Assembler::StoreLoad);
 6667   %}
 6668   ins_pipe(pipe_slow);
 6669 %}
 6670 
 6671 instruct unnecessary_membar_volatile() %{
 6672   match(MemBarVolatile);
 6673   predicate(Matcher::post_store_load_barrier(n));
 6674   ins_cost(0);
 6675 
 6676   size(0);
 6677   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6678   ins_encode( );
 6679   ins_pipe(empty);
 6680 %}
 6681 
 6682 instruct membar_storestore() %{
 6683   match(MemBarStoreStore);
 6684   match(StoreStoreFence);
 6685   ins_cost(0);
 6686 
 6687   size(0);
 6688   format %{ "MEMBAR-storestore (empty encoding)" %}
 6689   ins_encode( );
 6690   ins_pipe(empty);
 6691 %}
 6692 
 6693 //----------Move Instructions--------------------------------------------------
 6694 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6695   match(Set dst (CastX2P src));
 6696   format %{ "# X2P  $dst, $src" %}
 6697   ins_encode( /*empty encoding*/ );
 6698   ins_cost(0);
 6699   ins_pipe(empty);
 6700 %}
 6701 
 6702 instruct castP2X(rRegI dst, eRegP src ) %{
 6703   match(Set dst (CastP2X src));
 6704   ins_cost(50);
 6705   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6706   ins_encode( enc_Copy( dst, src) );
 6707   ins_pipe( ialu_reg_reg );
 6708 %}
 6709 
 6710 //----------Conditional Move---------------------------------------------------
 6711 // Conditional move
 6712 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6713   predicate(!VM_Version::supports_cmov() );
 6714   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6715   ins_cost(200);
 6716   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6717             "MOV    $dst,$src\n"
 6718       "skip:" %}
 6719   ins_encode %{
 6720     Label Lskip;
 6721     // Invert sense of branch from sense of CMOV
 6722     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6723     __ movl($dst$$Register, $src$$Register);
 6724     __ bind(Lskip);
 6725   %}
 6726   ins_pipe( pipe_cmov_reg );
 6727 %}
 6728 
 6729 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6730   predicate(!VM_Version::supports_cmov() );
 6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6732   ins_cost(200);
 6733   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6734             "MOV    $dst,$src\n"
 6735       "skip:" %}
 6736   ins_encode %{
 6737     Label Lskip;
 6738     // Invert sense of branch from sense of CMOV
 6739     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6740     __ movl($dst$$Register, $src$$Register);
 6741     __ bind(Lskip);
 6742   %}
 6743   ins_pipe( pipe_cmov_reg );
 6744 %}
 6745 
 6746 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6747   predicate(VM_Version::supports_cmov() );
 6748   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6749   ins_cost(200);
 6750   format %{ "CMOV$cop $dst,$src" %}
 6751   opcode(0x0F,0x40);
 6752   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6753   ins_pipe( pipe_cmov_reg );
 6754 %}
 6755 
 6756 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6757   predicate(VM_Version::supports_cmov() );
 6758   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6759   ins_cost(200);
 6760   format %{ "CMOV$cop $dst,$src" %}
 6761   opcode(0x0F,0x40);
 6762   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6763   ins_pipe( pipe_cmov_reg );
 6764 %}
 6765 
 6766 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6767   predicate(VM_Version::supports_cmov() );
 6768   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6769   ins_cost(200);
 6770   expand %{
 6771     cmovI_regU(cop, cr, dst, src);
 6772   %}
 6773 %}
 6774 
 6775 // Conditional move
 6776 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6777   predicate(VM_Version::supports_cmov() );
 6778   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6779   ins_cost(250);
 6780   format %{ "CMOV$cop $dst,$src" %}
 6781   opcode(0x0F,0x40);
 6782   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6783   ins_pipe( pipe_cmov_mem );
 6784 %}
 6785 
 6786 // Conditional move
 6787 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6788   predicate(VM_Version::supports_cmov() );
 6789   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6790   ins_cost(250);
 6791   format %{ "CMOV$cop $dst,$src" %}
 6792   opcode(0x0F,0x40);
 6793   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6794   ins_pipe( pipe_cmov_mem );
 6795 %}
 6796 
 6797 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6798   predicate(VM_Version::supports_cmov() );
 6799   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6800   ins_cost(250);
 6801   expand %{
 6802     cmovI_memU(cop, cr, dst, src);
 6803   %}
 6804 %}
 6805 
 6806 // Conditional move
 6807 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6808   predicate(VM_Version::supports_cmov() );
 6809   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6810   ins_cost(200);
 6811   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6812   opcode(0x0F,0x40);
 6813   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6814   ins_pipe( pipe_cmov_reg );
 6815 %}
 6816 
 6817 // Conditional move (non-P6 version)
 6818 // Note:  a CMoveP is generated for  stubs and native wrappers
 6819 //        regardless of whether we are on a P6, so we
 6820 //        emulate a cmov here
 6821 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6822   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6823   ins_cost(300);
 6824   format %{ "Jn$cop   skip\n\t"
 6825           "MOV    $dst,$src\t# pointer\n"
 6826       "skip:" %}
 6827   opcode(0x8b);
 6828   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6829   ins_pipe( pipe_cmov_reg );
 6830 %}
 6831 
 6832 // Conditional move
 6833 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6834   predicate(VM_Version::supports_cmov() );
 6835   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6836   ins_cost(200);
 6837   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6838   opcode(0x0F,0x40);
 6839   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6840   ins_pipe( pipe_cmov_reg );
 6841 %}
 6842 
 6843 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6844   predicate(VM_Version::supports_cmov() );
 6845   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6846   ins_cost(200);
 6847   expand %{
 6848     cmovP_regU(cop, cr, dst, src);
 6849   %}
 6850 %}
 6851 
 6852 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6853 // correctly meets the two pointer arguments; one is an incoming
 6854 // register but the other is a memory operand.  ALSO appears to
 6855 // be buggy with implicit null checks.
 6856 //
 6857 //// Conditional move
 6858 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6859 //  predicate(VM_Version::supports_cmov() );
 6860 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6861 //  ins_cost(250);
 6862 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6863 //  opcode(0x0F,0x40);
 6864 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6865 //  ins_pipe( pipe_cmov_mem );
 6866 //%}
 6867 //
 6868 //// Conditional move
 6869 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6870 //  predicate(VM_Version::supports_cmov() );
 6871 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6872 //  ins_cost(250);
 6873 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6874 //  opcode(0x0F,0x40);
 6875 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6876 //  ins_pipe( pipe_cmov_mem );
 6877 //%}
 6878 
 6879 // Conditional move
 6880 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6881   predicate(UseSSE<=1);
 6882   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6883   ins_cost(200);
 6884   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6885   opcode(0xDA);
 6886   ins_encode( enc_cmov_dpr(cop,src) );
 6887   ins_pipe( pipe_cmovDPR_reg );
 6888 %}
 6889 
 6890 // Conditional move
 6891 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6892   predicate(UseSSE==0);
 6893   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6894   ins_cost(200);
 6895   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6896   opcode(0xDA);
 6897   ins_encode( enc_cmov_dpr(cop,src) );
 6898   ins_pipe( pipe_cmovDPR_reg );
 6899 %}
 6900 
 6901 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6902 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6903   predicate(UseSSE<=1);
 6904   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6905   ins_cost(200);
 6906   format %{ "Jn$cop   skip\n\t"
 6907             "MOV    $dst,$src\t# double\n"
 6908       "skip:" %}
 6909   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6910   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6911   ins_pipe( pipe_cmovDPR_reg );
 6912 %}
 6913 
 6914 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6915 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6916   predicate(UseSSE==0);
 6917   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6918   ins_cost(200);
 6919   format %{ "Jn$cop    skip\n\t"
 6920             "MOV    $dst,$src\t# float\n"
 6921       "skip:" %}
 6922   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6923   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6924   ins_pipe( pipe_cmovDPR_reg );
 6925 %}
 6926 
 6927 // No CMOVE with SSE/SSE2
 6928 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6929   predicate (UseSSE>=1);
 6930   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6931   ins_cost(200);
 6932   format %{ "Jn$cop   skip\n\t"
 6933             "MOVSS  $dst,$src\t# float\n"
 6934       "skip:" %}
 6935   ins_encode %{
 6936     Label skip;
 6937     // Invert sense of branch from sense of CMOV
 6938     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6939     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6940     __ bind(skip);
 6941   %}
 6942   ins_pipe( pipe_slow );
 6943 %}
 6944 
 6945 // No CMOVE with SSE/SSE2
 6946 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6947   predicate (UseSSE>=2);
 6948   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6949   ins_cost(200);
 6950   format %{ "Jn$cop   skip\n\t"
 6951             "MOVSD  $dst,$src\t# float\n"
 6952       "skip:" %}
 6953   ins_encode %{
 6954     Label skip;
 6955     // Invert sense of branch from sense of CMOV
 6956     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6957     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6958     __ bind(skip);
 6959   %}
 6960   ins_pipe( pipe_slow );
 6961 %}
 6962 
 6963 // unsigned version
 6964 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6965   predicate (UseSSE>=1);
 6966   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6967   ins_cost(200);
 6968   format %{ "Jn$cop   skip\n\t"
 6969             "MOVSS  $dst,$src\t# float\n"
 6970       "skip:" %}
 6971   ins_encode %{
 6972     Label skip;
 6973     // Invert sense of branch from sense of CMOV
 6974     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6975     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6976     __ bind(skip);
 6977   %}
 6978   ins_pipe( pipe_slow );
 6979 %}
 6980 
 6981 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6982   predicate (UseSSE>=1);
 6983   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6984   ins_cost(200);
 6985   expand %{
 6986     fcmovF_regU(cop, cr, dst, src);
 6987   %}
 6988 %}
 6989 
 6990 // unsigned version
 6991 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6992   predicate (UseSSE>=2);
 6993   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6994   ins_cost(200);
 6995   format %{ "Jn$cop   skip\n\t"
 6996             "MOVSD  $dst,$src\t# float\n"
 6997       "skip:" %}
 6998   ins_encode %{
 6999     Label skip;
 7000     // Invert sense of branch from sense of CMOV
 7001     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7002     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7003     __ bind(skip);
 7004   %}
 7005   ins_pipe( pipe_slow );
 7006 %}
 7007 
 7008 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7009   predicate (UseSSE>=2);
 7010   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7011   ins_cost(200);
 7012   expand %{
 7013     fcmovD_regU(cop, cr, dst, src);
 7014   %}
 7015 %}
 7016 
 7017 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7018   predicate(VM_Version::supports_cmov() );
 7019   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7020   ins_cost(200);
 7021   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7022             "CMOV$cop $dst.hi,$src.hi" %}
 7023   opcode(0x0F,0x40);
 7024   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7025   ins_pipe( pipe_cmov_reg_long );
 7026 %}
 7027 
 7028 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7029   predicate(VM_Version::supports_cmov() );
 7030   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7031   ins_cost(200);
 7032   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7033             "CMOV$cop $dst.hi,$src.hi" %}
 7034   opcode(0x0F,0x40);
 7035   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7036   ins_pipe( pipe_cmov_reg_long );
 7037 %}
 7038 
 7039 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7040   predicate(VM_Version::supports_cmov() );
 7041   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7042   ins_cost(200);
 7043   expand %{
 7044     cmovL_regU(cop, cr, dst, src);
 7045   %}
 7046 %}
 7047 
 7048 //----------Arithmetic Instructions--------------------------------------------
 7049 //----------Addition Instructions----------------------------------------------
 7050 
 7051 // Integer Addition Instructions
 7052 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7053   match(Set dst (AddI dst src));
 7054   effect(KILL cr);
 7055 
 7056   size(2);
 7057   format %{ "ADD    $dst,$src" %}
 7058   opcode(0x03);
 7059   ins_encode( OpcP, RegReg( dst, src) );
 7060   ins_pipe( ialu_reg_reg );
 7061 %}
 7062 
 7063 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7064   match(Set dst (AddI dst src));
 7065   effect(KILL cr);
 7066 
 7067   format %{ "ADD    $dst,$src" %}
 7068   opcode(0x81, 0x00); /* /0 id */
 7069   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7070   ins_pipe( ialu_reg );
 7071 %}
 7072 
 7073 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7074   predicate(UseIncDec);
 7075   match(Set dst (AddI dst src));
 7076   effect(KILL cr);
 7077 
 7078   size(1);
 7079   format %{ "INC    $dst" %}
 7080   opcode(0x40); /*  */
 7081   ins_encode( Opc_plus( primary, dst ) );
 7082   ins_pipe( ialu_reg );
 7083 %}
 7084 
 7085 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7086   match(Set dst (AddI src0 src1));
 7087   ins_cost(110);
 7088 
 7089   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7090   opcode(0x8D); /* 0x8D /r */
 7091   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7092   ins_pipe( ialu_reg_reg );
 7093 %}
 7094 
 7095 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7096   match(Set dst (AddP src0 src1));
 7097   ins_cost(110);
 7098 
 7099   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7100   opcode(0x8D); /* 0x8D /r */
 7101   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7102   ins_pipe( ialu_reg_reg );
 7103 %}
 7104 
 7105 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7106   predicate(UseIncDec);
 7107   match(Set dst (AddI dst src));
 7108   effect(KILL cr);
 7109 
 7110   size(1);
 7111   format %{ "DEC    $dst" %}
 7112   opcode(0x48); /*  */
 7113   ins_encode( Opc_plus( primary, dst ) );
 7114   ins_pipe( ialu_reg );
 7115 %}
 7116 
 7117 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7118   match(Set dst (AddP dst src));
 7119   effect(KILL cr);
 7120 
 7121   size(2);
 7122   format %{ "ADD    $dst,$src" %}
 7123   opcode(0x03);
 7124   ins_encode( OpcP, RegReg( dst, src) );
 7125   ins_pipe( ialu_reg_reg );
 7126 %}
 7127 
 7128 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7129   match(Set dst (AddP dst src));
 7130   effect(KILL cr);
 7131 
 7132   format %{ "ADD    $dst,$src" %}
 7133   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7134   // ins_encode( RegImm( dst, src) );
 7135   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7136   ins_pipe( ialu_reg );
 7137 %}
 7138 
 7139 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7140   match(Set dst (AddI dst (LoadI src)));
 7141   effect(KILL cr);
 7142 
 7143   ins_cost(150);
 7144   format %{ "ADD    $dst,$src" %}
 7145   opcode(0x03);
 7146   ins_encode( OpcP, RegMem( dst, src) );
 7147   ins_pipe( ialu_reg_mem );
 7148 %}
 7149 
 7150 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7151   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7152   effect(KILL cr);
 7153 
 7154   ins_cost(150);
 7155   format %{ "ADD    $dst,$src" %}
 7156   opcode(0x01);  /* Opcode 01 /r */
 7157   ins_encode( OpcP, RegMem( src, dst ) );
 7158   ins_pipe( ialu_mem_reg );
 7159 %}
 7160 
 7161 // Add Memory with Immediate
 7162 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7163   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7164   effect(KILL cr);
 7165 
 7166   ins_cost(125);
 7167   format %{ "ADD    $dst,$src" %}
 7168   opcode(0x81);               /* Opcode 81 /0 id */
 7169   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7170   ins_pipe( ialu_mem_imm );
 7171 %}
 7172 
 7173 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7174   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7175   effect(KILL cr);
 7176 
 7177   ins_cost(125);
 7178   format %{ "INC    $dst" %}
 7179   opcode(0xFF);               /* Opcode FF /0 */
 7180   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7181   ins_pipe( ialu_mem_imm );
 7182 %}
 7183 
 7184 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7185   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7186   effect(KILL cr);
 7187 
 7188   ins_cost(125);
 7189   format %{ "DEC    $dst" %}
 7190   opcode(0xFF);               /* Opcode FF /1 */
 7191   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7192   ins_pipe( ialu_mem_imm );
 7193 %}
 7194 
 7195 
 7196 instruct checkCastPP( eRegP dst ) %{
 7197   match(Set dst (CheckCastPP dst));
 7198 
 7199   size(0);
 7200   format %{ "#checkcastPP of $dst" %}
 7201   ins_encode( /*empty encoding*/ );
 7202   ins_pipe( empty );
 7203 %}
 7204 
 7205 instruct castPP( eRegP dst ) %{
 7206   match(Set dst (CastPP dst));
 7207   format %{ "#castPP of $dst" %}
 7208   ins_encode( /*empty encoding*/ );
 7209   ins_pipe( empty );
 7210 %}
 7211 
 7212 instruct castII( rRegI dst ) %{
 7213   match(Set dst (CastII dst));
 7214   format %{ "#castII of $dst" %}
 7215   ins_encode( /*empty encoding*/ );
 7216   ins_cost(0);
 7217   ins_pipe( empty );
 7218 %}
 7219 
 7220 instruct castLL( eRegL dst ) %{
 7221   match(Set dst (CastLL dst));
 7222   format %{ "#castLL of $dst" %}
 7223   ins_encode( /*empty encoding*/ );
 7224   ins_cost(0);
 7225   ins_pipe( empty );
 7226 %}
 7227 
 7228 instruct castFF( regF dst ) %{
 7229   predicate(UseSSE >= 1);
 7230   match(Set dst (CastFF dst));
 7231   format %{ "#castFF of $dst" %}
 7232   ins_encode( /*empty encoding*/ );
 7233   ins_cost(0);
 7234   ins_pipe( empty );
 7235 %}
 7236 
 7237 instruct castDD( regD dst ) %{
 7238   predicate(UseSSE >= 2);
 7239   match(Set dst (CastDD dst));
 7240   format %{ "#castDD of $dst" %}
 7241   ins_encode( /*empty encoding*/ );
 7242   ins_cost(0);
 7243   ins_pipe( empty );
 7244 %}
 7245 
 7246 instruct castFF_PR( regFPR dst ) %{
 7247   predicate(UseSSE < 1);
 7248   match(Set dst (CastFF dst));
 7249   format %{ "#castFF of $dst" %}
 7250   ins_encode( /*empty encoding*/ );
 7251   ins_cost(0);
 7252   ins_pipe( empty );
 7253 %}
 7254 
 7255 instruct castDD_PR( regDPR dst ) %{
 7256   predicate(UseSSE < 2);
 7257   match(Set dst (CastDD dst));
 7258   format %{ "#castDD of $dst" %}
 7259   ins_encode( /*empty encoding*/ );
 7260   ins_cost(0);
 7261   ins_pipe( empty );
 7262 %}
 7263 
 7264 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7265 
 7266 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7267   predicate(VM_Version::supports_cx8());
 7268   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7269   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7270   effect(KILL cr, KILL oldval);
 7271   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7272             "MOV    $res,0\n\t"
 7273             "JNE,s  fail\n\t"
 7274             "MOV    $res,1\n"
 7275           "fail:" %}
 7276   ins_encode( enc_cmpxchg8(mem_ptr),
 7277               enc_flags_ne_to_boolean(res) );
 7278   ins_pipe( pipe_cmpxchg );
 7279 %}
 7280 
 7281 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7282   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7283   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7284   effect(KILL cr, KILL oldval);
 7285   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7286             "MOV    $res,0\n\t"
 7287             "JNE,s  fail\n\t"
 7288             "MOV    $res,1\n"
 7289           "fail:" %}
 7290   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7291   ins_pipe( pipe_cmpxchg );
 7292 %}
 7293 
 7294 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7295   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7296   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7297   effect(KILL cr, KILL oldval);
 7298   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7299             "MOV    $res,0\n\t"
 7300             "JNE,s  fail\n\t"
 7301             "MOV    $res,1\n"
 7302           "fail:" %}
 7303   ins_encode( enc_cmpxchgb(mem_ptr),
 7304               enc_flags_ne_to_boolean(res) );
 7305   ins_pipe( pipe_cmpxchg );
 7306 %}
 7307 
 7308 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7309   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7310   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7311   effect(KILL cr, KILL oldval);
 7312   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7313             "MOV    $res,0\n\t"
 7314             "JNE,s  fail\n\t"
 7315             "MOV    $res,1\n"
 7316           "fail:" %}
 7317   ins_encode( enc_cmpxchgw(mem_ptr),
 7318               enc_flags_ne_to_boolean(res) );
 7319   ins_pipe( pipe_cmpxchg );
 7320 %}
 7321 
 7322 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7323   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7324   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7325   effect(KILL cr, KILL oldval);
 7326   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7327             "MOV    $res,0\n\t"
 7328             "JNE,s  fail\n\t"
 7329             "MOV    $res,1\n"
 7330           "fail:" %}
 7331   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7332   ins_pipe( pipe_cmpxchg );
 7333 %}
 7334 
 7335 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7336   predicate(VM_Version::supports_cx8());
 7337   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7338   effect(KILL cr);
 7339   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7340   ins_encode( enc_cmpxchg8(mem_ptr) );
 7341   ins_pipe( pipe_cmpxchg );
 7342 %}
 7343 
 7344 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7345   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7346   effect(KILL cr);
 7347   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7348   ins_encode( enc_cmpxchg(mem_ptr) );
 7349   ins_pipe( pipe_cmpxchg );
 7350 %}
 7351 
 7352 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7353   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7354   effect(KILL cr);
 7355   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7356   ins_encode( enc_cmpxchgb(mem_ptr) );
 7357   ins_pipe( pipe_cmpxchg );
 7358 %}
 7359 
 7360 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7361   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7362   effect(KILL cr);
 7363   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7364   ins_encode( enc_cmpxchgw(mem_ptr) );
 7365   ins_pipe( pipe_cmpxchg );
 7366 %}
 7367 
 7368 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7369   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7370   effect(KILL cr);
 7371   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7372   ins_encode( enc_cmpxchg(mem_ptr) );
 7373   ins_pipe( pipe_cmpxchg );
 7374 %}
 7375 
 7376 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7377   predicate(n->as_LoadStore()->result_not_used());
 7378   match(Set dummy (GetAndAddB mem add));
 7379   effect(KILL cr);
 7380   format %{ "ADDB  [$mem],$add" %}
 7381   ins_encode %{
 7382     __ lock();
 7383     __ addb($mem$$Address, $add$$constant);
 7384   %}
 7385   ins_pipe( pipe_cmpxchg );
 7386 %}
 7387 
 7388 // Important to match to xRegI: only 8-bit regs.
 7389 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7390   match(Set newval (GetAndAddB mem newval));
 7391   effect(KILL cr);
 7392   format %{ "XADDB  [$mem],$newval" %}
 7393   ins_encode %{
 7394     __ lock();
 7395     __ xaddb($mem$$Address, $newval$$Register);
 7396   %}
 7397   ins_pipe( pipe_cmpxchg );
 7398 %}
 7399 
 7400 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7401   predicate(n->as_LoadStore()->result_not_used());
 7402   match(Set dummy (GetAndAddS mem add));
 7403   effect(KILL cr);
 7404   format %{ "ADDS  [$mem],$add" %}
 7405   ins_encode %{
 7406     __ lock();
 7407     __ addw($mem$$Address, $add$$constant);
 7408   %}
 7409   ins_pipe( pipe_cmpxchg );
 7410 %}
 7411 
 7412 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7413   match(Set newval (GetAndAddS mem newval));
 7414   effect(KILL cr);
 7415   format %{ "XADDS  [$mem],$newval" %}
 7416   ins_encode %{
 7417     __ lock();
 7418     __ xaddw($mem$$Address, $newval$$Register);
 7419   %}
 7420   ins_pipe( pipe_cmpxchg );
 7421 %}
 7422 
 7423 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7424   predicate(n->as_LoadStore()->result_not_used());
 7425   match(Set dummy (GetAndAddI mem add));
 7426   effect(KILL cr);
 7427   format %{ "ADDL  [$mem],$add" %}
 7428   ins_encode %{
 7429     __ lock();
 7430     __ addl($mem$$Address, $add$$constant);
 7431   %}
 7432   ins_pipe( pipe_cmpxchg );
 7433 %}
 7434 
 7435 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7436   match(Set newval (GetAndAddI mem newval));
 7437   effect(KILL cr);
 7438   format %{ "XADDL  [$mem],$newval" %}
 7439   ins_encode %{
 7440     __ lock();
 7441     __ xaddl($mem$$Address, $newval$$Register);
 7442   %}
 7443   ins_pipe( pipe_cmpxchg );
 7444 %}
 7445 
 7446 // Important to match to xRegI: only 8-bit regs.
 7447 instruct xchgB( memory mem, xRegI newval) %{
 7448   match(Set newval (GetAndSetB mem newval));
 7449   format %{ "XCHGB  $newval,[$mem]" %}
 7450   ins_encode %{
 7451     __ xchgb($newval$$Register, $mem$$Address);
 7452   %}
 7453   ins_pipe( pipe_cmpxchg );
 7454 %}
 7455 
 7456 instruct xchgS( memory mem, rRegI newval) %{
 7457   match(Set newval (GetAndSetS mem newval));
 7458   format %{ "XCHGW  $newval,[$mem]" %}
 7459   ins_encode %{
 7460     __ xchgw($newval$$Register, $mem$$Address);
 7461   %}
 7462   ins_pipe( pipe_cmpxchg );
 7463 %}
 7464 
 7465 instruct xchgI( memory mem, rRegI newval) %{
 7466   match(Set newval (GetAndSetI mem newval));
 7467   format %{ "XCHGL  $newval,[$mem]" %}
 7468   ins_encode %{
 7469     __ xchgl($newval$$Register, $mem$$Address);
 7470   %}
 7471   ins_pipe( pipe_cmpxchg );
 7472 %}
 7473 
 7474 instruct xchgP( memory mem, pRegP newval) %{
 7475   match(Set newval (GetAndSetP mem newval));
 7476   format %{ "XCHGL  $newval,[$mem]" %}
 7477   ins_encode %{
 7478     __ xchgl($newval$$Register, $mem$$Address);
 7479   %}
 7480   ins_pipe( pipe_cmpxchg );
 7481 %}
 7482 
 7483 //----------Subtraction Instructions-------------------------------------------
 7484 
 7485 // Integer Subtraction Instructions
 7486 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7487   match(Set dst (SubI dst src));
 7488   effect(KILL cr);
 7489 
 7490   size(2);
 7491   format %{ "SUB    $dst,$src" %}
 7492   opcode(0x2B);
 7493   ins_encode( OpcP, RegReg( dst, src) );
 7494   ins_pipe( ialu_reg_reg );
 7495 %}
 7496 
 7497 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7498   match(Set dst (SubI dst src));
 7499   effect(KILL cr);
 7500 
 7501   format %{ "SUB    $dst,$src" %}
 7502   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7503   // ins_encode( RegImm( dst, src) );
 7504   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7505   ins_pipe( ialu_reg );
 7506 %}
 7507 
 7508 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7509   match(Set dst (SubI dst (LoadI src)));
 7510   effect(KILL cr);
 7511 
 7512   ins_cost(150);
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x2B);
 7515   ins_encode( OpcP, RegMem( dst, src) );
 7516   ins_pipe( ialu_reg_mem );
 7517 %}
 7518 
 7519 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7520   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7521   effect(KILL cr);
 7522 
 7523   ins_cost(150);
 7524   format %{ "SUB    $dst,$src" %}
 7525   opcode(0x29);  /* Opcode 29 /r */
 7526   ins_encode( OpcP, RegMem( src, dst ) );
 7527   ins_pipe( ialu_mem_reg );
 7528 %}
 7529 
 7530 // Subtract from a pointer
 7531 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7532   match(Set dst (AddP dst (SubI zero src)));
 7533   effect(KILL cr);
 7534 
 7535   size(2);
 7536   format %{ "SUB    $dst,$src" %}
 7537   opcode(0x2B);
 7538   ins_encode( OpcP, RegReg( dst, src) );
 7539   ins_pipe( ialu_reg_reg );
 7540 %}
 7541 
 7542 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7543   match(Set dst (SubI zero dst));
 7544   effect(KILL cr);
 7545 
 7546   size(2);
 7547   format %{ "NEG    $dst" %}
 7548   opcode(0xF7,0x03);  // Opcode F7 /3
 7549   ins_encode( OpcP, RegOpc( dst ) );
 7550   ins_pipe( ialu_reg );
 7551 %}
 7552 
 7553 //----------Multiplication/Division Instructions-------------------------------
 7554 // Integer Multiplication Instructions
 7555 // Multiply Register
 7556 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7557   match(Set dst (MulI dst src));
 7558   effect(KILL cr);
 7559 
 7560   size(3);
 7561   ins_cost(300);
 7562   format %{ "IMUL   $dst,$src" %}
 7563   opcode(0xAF, 0x0F);
 7564   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7565   ins_pipe( ialu_reg_reg_alu0 );
 7566 %}
 7567 
 7568 // Multiply 32-bit Immediate
 7569 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7570   match(Set dst (MulI src imm));
 7571   effect(KILL cr);
 7572 
 7573   ins_cost(300);
 7574   format %{ "IMUL   $dst,$src,$imm" %}
 7575   opcode(0x69);  /* 69 /r id */
 7576   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7577   ins_pipe( ialu_reg_reg_alu0 );
 7578 %}
 7579 
 7580 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7581   match(Set dst src);
 7582   effect(KILL cr);
 7583 
 7584   // Note that this is artificially increased to make it more expensive than loadConL
 7585   ins_cost(250);
 7586   format %{ "MOV    EAX,$src\t// low word only" %}
 7587   opcode(0xB8);
 7588   ins_encode( LdImmL_Lo(dst, src) );
 7589   ins_pipe( ialu_reg_fat );
 7590 %}
 7591 
 7592 // Multiply by 32-bit Immediate, taking the shifted high order results
 7593 //  (special case for shift by 32)
 7594 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7595   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7596   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7597              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7598              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7599   effect(USE src1, KILL cr);
 7600 
 7601   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7602   ins_cost(0*100 + 1*400 - 150);
 7603   format %{ "IMUL   EDX:EAX,$src1" %}
 7604   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7605   ins_pipe( pipe_slow );
 7606 %}
 7607 
 7608 // Multiply by 32-bit Immediate, taking the shifted high order results
 7609 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7610   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7611   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7613              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7614   effect(USE src1, KILL cr);
 7615 
 7616   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7617   ins_cost(1*100 + 1*400 - 150);
 7618   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7619             "SAR    EDX,$cnt-32" %}
 7620   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7621   ins_pipe( pipe_slow );
 7622 %}
 7623 
 7624 // Multiply Memory 32-bit Immediate
 7625 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7626   match(Set dst (MulI (LoadI src) imm));
 7627   effect(KILL cr);
 7628 
 7629   ins_cost(300);
 7630   format %{ "IMUL   $dst,$src,$imm" %}
 7631   opcode(0x69);  /* 69 /r id */
 7632   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7633   ins_pipe( ialu_reg_mem_alu0 );
 7634 %}
 7635 
 7636 // Multiply Memory
 7637 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7638   match(Set dst (MulI dst (LoadI src)));
 7639   effect(KILL cr);
 7640 
 7641   ins_cost(350);
 7642   format %{ "IMUL   $dst,$src" %}
 7643   opcode(0xAF, 0x0F);
 7644   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7645   ins_pipe( ialu_reg_mem_alu0 );
 7646 %}
 7647 
 7648 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7649 %{
 7650   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7651   effect(KILL cr, KILL src2);
 7652 
 7653   expand %{ mulI_eReg(dst, src1, cr);
 7654            mulI_eReg(src2, src3, cr);
 7655            addI_eReg(dst, src2, cr); %}
 7656 %}
 7657 
 7658 // Multiply Register Int to Long
 7659 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7660   // Basic Idea: long = (long)int * (long)int
 7661   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7662   effect(DEF dst, USE src, USE src1, KILL flags);
 7663 
 7664   ins_cost(300);
 7665   format %{ "IMUL   $dst,$src1" %}
 7666 
 7667   ins_encode( long_int_multiply( dst, src1 ) );
 7668   ins_pipe( ialu_reg_reg_alu0 );
 7669 %}
 7670 
 7671 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7672   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7673   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7674   effect(KILL flags);
 7675 
 7676   ins_cost(300);
 7677   format %{ "MUL    $dst,$src1" %}
 7678 
 7679   ins_encode( long_uint_multiply(dst, src1) );
 7680   ins_pipe( ialu_reg_reg_alu0 );
 7681 %}
 7682 
 7683 // Multiply Register Long
 7684 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7685   match(Set dst (MulL dst src));
 7686   effect(KILL cr, TEMP tmp);
 7687   ins_cost(4*100+3*400);
 7688 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7689 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7690   format %{ "MOV    $tmp,$src.lo\n\t"
 7691             "IMUL   $tmp,EDX\n\t"
 7692             "MOV    EDX,$src.hi\n\t"
 7693             "IMUL   EDX,EAX\n\t"
 7694             "ADD    $tmp,EDX\n\t"
 7695             "MUL    EDX:EAX,$src.lo\n\t"
 7696             "ADD    EDX,$tmp" %}
 7697   ins_encode( long_multiply( dst, src, tmp ) );
 7698   ins_pipe( pipe_slow );
 7699 %}
 7700 
 7701 // Multiply Register Long where the left operand's high 32 bits are zero
 7702 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7703   predicate(is_operand_hi32_zero(n->in(1)));
 7704   match(Set dst (MulL dst src));
 7705   effect(KILL cr, TEMP tmp);
 7706   ins_cost(2*100+2*400);
 7707 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7708 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7709   format %{ "MOV    $tmp,$src.hi\n\t"
 7710             "IMUL   $tmp,EAX\n\t"
 7711             "MUL    EDX:EAX,$src.lo\n\t"
 7712             "ADD    EDX,$tmp" %}
 7713   ins_encode %{
 7714     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7715     __ imull($tmp$$Register, rax);
 7716     __ mull($src$$Register);
 7717     __ addl(rdx, $tmp$$Register);
 7718   %}
 7719   ins_pipe( pipe_slow );
 7720 %}
 7721 
 7722 // Multiply Register Long where the right operand's high 32 bits are zero
 7723 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7724   predicate(is_operand_hi32_zero(n->in(2)));
 7725   match(Set dst (MulL dst src));
 7726   effect(KILL cr, TEMP tmp);
 7727   ins_cost(2*100+2*400);
 7728 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7729 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7730   format %{ "MOV    $tmp,$src.lo\n\t"
 7731             "IMUL   $tmp,EDX\n\t"
 7732             "MUL    EDX:EAX,$src.lo\n\t"
 7733             "ADD    EDX,$tmp" %}
 7734   ins_encode %{
 7735     __ movl($tmp$$Register, $src$$Register);
 7736     __ imull($tmp$$Register, rdx);
 7737     __ mull($src$$Register);
 7738     __ addl(rdx, $tmp$$Register);
 7739   %}
 7740   ins_pipe( pipe_slow );
 7741 %}
 7742 
 7743 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7744 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7745   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7746   match(Set dst (MulL dst src));
 7747   effect(KILL cr);
 7748   ins_cost(1*400);
 7749 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7750 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7751   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7752   ins_encode %{
 7753     __ mull($src$$Register);
 7754   %}
 7755   ins_pipe( pipe_slow );
 7756 %}
 7757 
 7758 // Multiply Register Long by small constant
 7759 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7760   match(Set dst (MulL dst src));
 7761   effect(KILL cr, TEMP tmp);
 7762   ins_cost(2*100+2*400);
 7763   size(12);
 7764 // Basic idea: lo(result) = lo(src * EAX)
 7765 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7766   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7767             "MOV    EDX,$src\n\t"
 7768             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7769             "ADD    EDX,$tmp" %}
 7770   ins_encode( long_multiply_con( dst, src, tmp ) );
 7771   ins_pipe( pipe_slow );
 7772 %}
 7773 
 7774 // Integer DIV with Register
 7775 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7776   match(Set rax (DivI rax div));
 7777   effect(KILL rdx, KILL cr);
 7778   size(26);
 7779   ins_cost(30*100+10*100);
 7780   format %{ "CMP    EAX,0x80000000\n\t"
 7781             "JNE,s  normal\n\t"
 7782             "XOR    EDX,EDX\n\t"
 7783             "CMP    ECX,-1\n\t"
 7784             "JE,s   done\n"
 7785     "normal: CDQ\n\t"
 7786             "IDIV   $div\n\t"
 7787     "done:"        %}
 7788   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7789   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7790   ins_pipe( ialu_reg_reg_alu0 );
 7791 %}
 7792 
 7793 // Divide Register Long
 7794 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7795   match(Set dst (DivL src1 src2));
 7796   effect(CALL);
 7797   ins_cost(10000);
 7798   format %{ "PUSH   $src1.hi\n\t"
 7799             "PUSH   $src1.lo\n\t"
 7800             "PUSH   $src2.hi\n\t"
 7801             "PUSH   $src2.lo\n\t"
 7802             "CALL   SharedRuntime::ldiv\n\t"
 7803             "ADD    ESP,16" %}
 7804   ins_encode( long_div(src1,src2) );
 7805   ins_pipe( pipe_slow );
 7806 %}
 7807 
 7808 // Integer DIVMOD with Register, both quotient and mod results
 7809 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7810   match(DivModI rax div);
 7811   effect(KILL cr);
 7812   size(26);
 7813   ins_cost(30*100+10*100);
 7814   format %{ "CMP    EAX,0x80000000\n\t"
 7815             "JNE,s  normal\n\t"
 7816             "XOR    EDX,EDX\n\t"
 7817             "CMP    ECX,-1\n\t"
 7818             "JE,s   done\n"
 7819     "normal: CDQ\n\t"
 7820             "IDIV   $div\n\t"
 7821     "done:"        %}
 7822   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7823   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7824   ins_pipe( pipe_slow );
 7825 %}
 7826 
 7827 // Integer MOD with Register
 7828 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7829   match(Set rdx (ModI rax div));
 7830   effect(KILL rax, KILL cr);
 7831 
 7832   size(26);
 7833   ins_cost(300);
 7834   format %{ "CDQ\n\t"
 7835             "IDIV   $div" %}
 7836   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7837   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7838   ins_pipe( ialu_reg_reg_alu0 );
 7839 %}
 7840 
 7841 // Remainder Register Long
 7842 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7843   match(Set dst (ModL src1 src2));
 7844   effect(CALL);
 7845   ins_cost(10000);
 7846   format %{ "PUSH   $src1.hi\n\t"
 7847             "PUSH   $src1.lo\n\t"
 7848             "PUSH   $src2.hi\n\t"
 7849             "PUSH   $src2.lo\n\t"
 7850             "CALL   SharedRuntime::lrem\n\t"
 7851             "ADD    ESP,16" %}
 7852   ins_encode( long_mod(src1,src2) );
 7853   ins_pipe( pipe_slow );
 7854 %}
 7855 
 7856 // Divide Register Long (no special case since divisor != -1)
 7857 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7858   match(Set dst (DivL dst imm));
 7859   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7860   ins_cost(1000);
 7861   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7862             "XOR    $tmp2,$tmp2\n\t"
 7863             "CMP    $tmp,EDX\n\t"
 7864             "JA,s   fast\n\t"
 7865             "MOV    $tmp2,EAX\n\t"
 7866             "MOV    EAX,EDX\n\t"
 7867             "MOV    EDX,0\n\t"
 7868             "JLE,s  pos\n\t"
 7869             "LNEG   EAX : $tmp2\n\t"
 7870             "DIV    $tmp # unsigned division\n\t"
 7871             "XCHG   EAX,$tmp2\n\t"
 7872             "DIV    $tmp\n\t"
 7873             "LNEG   $tmp2 : EAX\n\t"
 7874             "JMP,s  done\n"
 7875     "pos:\n\t"
 7876             "DIV    $tmp\n\t"
 7877             "XCHG   EAX,$tmp2\n"
 7878     "fast:\n\t"
 7879             "DIV    $tmp\n"
 7880     "done:\n\t"
 7881             "MOV    EDX,$tmp2\n\t"
 7882             "NEG    EDX:EAX # if $imm < 0" %}
 7883   ins_encode %{
 7884     int con = (int)$imm$$constant;
 7885     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7886     int pcon = (con > 0) ? con : -con;
 7887     Label Lfast, Lpos, Ldone;
 7888 
 7889     __ movl($tmp$$Register, pcon);
 7890     __ xorl($tmp2$$Register,$tmp2$$Register);
 7891     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7892     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7893 
 7894     __ movl($tmp2$$Register, $dst$$Register); // save
 7895     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7896     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7897     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7898 
 7899     // Negative dividend.
 7900     // convert value to positive to use unsigned division
 7901     __ lneg($dst$$Register, $tmp2$$Register);
 7902     __ divl($tmp$$Register);
 7903     __ xchgl($dst$$Register, $tmp2$$Register);
 7904     __ divl($tmp$$Register);
 7905     // revert result back to negative
 7906     __ lneg($tmp2$$Register, $dst$$Register);
 7907     __ jmpb(Ldone);
 7908 
 7909     __ bind(Lpos);
 7910     __ divl($tmp$$Register); // Use unsigned division
 7911     __ xchgl($dst$$Register, $tmp2$$Register);
 7912     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7913 
 7914     __ bind(Lfast);
 7915     // fast path: src is positive
 7916     __ divl($tmp$$Register); // Use unsigned division
 7917 
 7918     __ bind(Ldone);
 7919     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7920     if (con < 0) {
 7921       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7922     }
 7923   %}
 7924   ins_pipe( pipe_slow );
 7925 %}
 7926 
 7927 // Remainder Register Long (remainder fit into 32 bits)
 7928 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7929   match(Set dst (ModL dst imm));
 7930   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7931   ins_cost(1000);
 7932   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7933             "CMP    $tmp,EDX\n\t"
 7934             "JA,s   fast\n\t"
 7935             "MOV    $tmp2,EAX\n\t"
 7936             "MOV    EAX,EDX\n\t"
 7937             "MOV    EDX,0\n\t"
 7938             "JLE,s  pos\n\t"
 7939             "LNEG   EAX : $tmp2\n\t"
 7940             "DIV    $tmp # unsigned division\n\t"
 7941             "MOV    EAX,$tmp2\n\t"
 7942             "DIV    $tmp\n\t"
 7943             "NEG    EDX\n\t"
 7944             "JMP,s  done\n"
 7945     "pos:\n\t"
 7946             "DIV    $tmp\n\t"
 7947             "MOV    EAX,$tmp2\n"
 7948     "fast:\n\t"
 7949             "DIV    $tmp\n"
 7950     "done:\n\t"
 7951             "MOV    EAX,EDX\n\t"
 7952             "SAR    EDX,31\n\t" %}
 7953   ins_encode %{
 7954     int con = (int)$imm$$constant;
 7955     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7956     int pcon = (con > 0) ? con : -con;
 7957     Label  Lfast, Lpos, Ldone;
 7958 
 7959     __ movl($tmp$$Register, pcon);
 7960     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7961     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7962 
 7963     __ movl($tmp2$$Register, $dst$$Register); // save
 7964     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7965     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7966     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7967 
 7968     // Negative dividend.
 7969     // convert value to positive to use unsigned division
 7970     __ lneg($dst$$Register, $tmp2$$Register);
 7971     __ divl($tmp$$Register);
 7972     __ movl($dst$$Register, $tmp2$$Register);
 7973     __ divl($tmp$$Register);
 7974     // revert remainder back to negative
 7975     __ negl(HIGH_FROM_LOW($dst$$Register));
 7976     __ jmpb(Ldone);
 7977 
 7978     __ bind(Lpos);
 7979     __ divl($tmp$$Register);
 7980     __ movl($dst$$Register, $tmp2$$Register);
 7981 
 7982     __ bind(Lfast);
 7983     // fast path: src is positive
 7984     __ divl($tmp$$Register);
 7985 
 7986     __ bind(Ldone);
 7987     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7988     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7989 
 7990   %}
 7991   ins_pipe( pipe_slow );
 7992 %}
 7993 
 7994 // Integer Shift Instructions
 7995 // Shift Left by one
 7996 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7997   match(Set dst (LShiftI dst shift));
 7998   effect(KILL cr);
 7999 
 8000   size(2);
 8001   format %{ "SHL    $dst,$shift" %}
 8002   opcode(0xD1, 0x4);  /* D1 /4 */
 8003   ins_encode( OpcP, RegOpc( dst ) );
 8004   ins_pipe( ialu_reg );
 8005 %}
 8006 
 8007 // Shift Left by 8-bit immediate
 8008 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8009   match(Set dst (LShiftI dst shift));
 8010   effect(KILL cr);
 8011 
 8012   size(3);
 8013   format %{ "SHL    $dst,$shift" %}
 8014   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8015   ins_encode( RegOpcImm( dst, shift) );
 8016   ins_pipe( ialu_reg );
 8017 %}
 8018 
 8019 // Shift Left by variable
 8020 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8021   match(Set dst (LShiftI dst shift));
 8022   effect(KILL cr);
 8023 
 8024   size(2);
 8025   format %{ "SHL    $dst,$shift" %}
 8026   opcode(0xD3, 0x4);  /* D3 /4 */
 8027   ins_encode( OpcP, RegOpc( dst ) );
 8028   ins_pipe( ialu_reg_reg );
 8029 %}
 8030 
 8031 // Arithmetic shift right by one
 8032 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8033   match(Set dst (RShiftI dst shift));
 8034   effect(KILL cr);
 8035 
 8036   size(2);
 8037   format %{ "SAR    $dst,$shift" %}
 8038   opcode(0xD1, 0x7);  /* D1 /7 */
 8039   ins_encode( OpcP, RegOpc( dst ) );
 8040   ins_pipe( ialu_reg );
 8041 %}
 8042 
 8043 // Arithmetic shift right by one
 8044 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8045   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8046   effect(KILL cr);
 8047   format %{ "SAR    $dst,$shift" %}
 8048   opcode(0xD1, 0x7);  /* D1 /7 */
 8049   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8050   ins_pipe( ialu_mem_imm );
 8051 %}
 8052 
 8053 // Arithmetic Shift Right by 8-bit immediate
 8054 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8055   match(Set dst (RShiftI dst shift));
 8056   effect(KILL cr);
 8057 
 8058   size(3);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8061   ins_encode( RegOpcImm( dst, shift ) );
 8062   ins_pipe( ialu_mem_imm );
 8063 %}
 8064 
 8065 // Arithmetic Shift Right by 8-bit immediate
 8066 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8067   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8068   effect(KILL cr);
 8069 
 8070   format %{ "SAR    $dst,$shift" %}
 8071   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8072   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8073   ins_pipe( ialu_mem_imm );
 8074 %}
 8075 
 8076 // Arithmetic Shift Right by variable
 8077 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8078   match(Set dst (RShiftI dst shift));
 8079   effect(KILL cr);
 8080 
 8081   size(2);
 8082   format %{ "SAR    $dst,$shift" %}
 8083   opcode(0xD3, 0x7);  /* D3 /7 */
 8084   ins_encode( OpcP, RegOpc( dst ) );
 8085   ins_pipe( ialu_reg_reg );
 8086 %}
 8087 
 8088 // Logical shift right by one
 8089 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8090   match(Set dst (URShiftI dst shift));
 8091   effect(KILL cr);
 8092 
 8093   size(2);
 8094   format %{ "SHR    $dst,$shift" %}
 8095   opcode(0xD1, 0x5);  /* D1 /5 */
 8096   ins_encode( OpcP, RegOpc( dst ) );
 8097   ins_pipe( ialu_reg );
 8098 %}
 8099 
 8100 // Logical Shift Right by 8-bit immediate
 8101 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8102   match(Set dst (URShiftI dst shift));
 8103   effect(KILL cr);
 8104 
 8105   size(3);
 8106   format %{ "SHR    $dst,$shift" %}
 8107   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8108   ins_encode( RegOpcImm( dst, shift) );
 8109   ins_pipe( ialu_reg );
 8110 %}
 8111 
 8112 
 8113 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8114 // This idiom is used by the compiler for the i2b bytecode.
 8115 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8116   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8117 
 8118   size(3);
 8119   format %{ "MOVSX  $dst,$src :8" %}
 8120   ins_encode %{
 8121     __ movsbl($dst$$Register, $src$$Register);
 8122   %}
 8123   ins_pipe(ialu_reg_reg);
 8124 %}
 8125 
 8126 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8127 // This idiom is used by the compiler the i2s bytecode.
 8128 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8129   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8130 
 8131   size(3);
 8132   format %{ "MOVSX  $dst,$src :16" %}
 8133   ins_encode %{
 8134     __ movswl($dst$$Register, $src$$Register);
 8135   %}
 8136   ins_pipe(ialu_reg_reg);
 8137 %}
 8138 
 8139 
 8140 // Logical Shift Right by variable
 8141 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8142   match(Set dst (URShiftI dst shift));
 8143   effect(KILL cr);
 8144 
 8145   size(2);
 8146   format %{ "SHR    $dst,$shift" %}
 8147   opcode(0xD3, 0x5);  /* D3 /5 */
 8148   ins_encode( OpcP, RegOpc( dst ) );
 8149   ins_pipe( ialu_reg_reg );
 8150 %}
 8151 
 8152 
 8153 //----------Logical Instructions-----------------------------------------------
 8154 //----------Integer Logical Instructions---------------------------------------
 8155 // And Instructions
 8156 // And Register with Register
 8157 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8158   match(Set dst (AndI dst src));
 8159   effect(KILL cr);
 8160 
 8161   size(2);
 8162   format %{ "AND    $dst,$src" %}
 8163   opcode(0x23);
 8164   ins_encode( OpcP, RegReg( dst, src) );
 8165   ins_pipe( ialu_reg_reg );
 8166 %}
 8167 
 8168 // And Register with Immediate
 8169 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8170   match(Set dst (AndI dst src));
 8171   effect(KILL cr);
 8172 
 8173   format %{ "AND    $dst,$src" %}
 8174   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8175   // ins_encode( RegImm( dst, src) );
 8176   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8177   ins_pipe( ialu_reg );
 8178 %}
 8179 
 8180 // And Register with Memory
 8181 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8182   match(Set dst (AndI dst (LoadI src)));
 8183   effect(KILL cr);
 8184 
 8185   ins_cost(150);
 8186   format %{ "AND    $dst,$src" %}
 8187   opcode(0x23);
 8188   ins_encode( OpcP, RegMem( dst, src) );
 8189   ins_pipe( ialu_reg_mem );
 8190 %}
 8191 
 8192 // And Memory with Register
 8193 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8194   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8195   effect(KILL cr);
 8196 
 8197   ins_cost(150);
 8198   format %{ "AND    $dst,$src" %}
 8199   opcode(0x21);  /* Opcode 21 /r */
 8200   ins_encode( OpcP, RegMem( src, dst ) );
 8201   ins_pipe( ialu_mem_reg );
 8202 %}
 8203 
 8204 // And Memory with Immediate
 8205 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8206   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8207   effect(KILL cr);
 8208 
 8209   ins_cost(125);
 8210   format %{ "AND    $dst,$src" %}
 8211   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8212   // ins_encode( MemImm( dst, src) );
 8213   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8214   ins_pipe( ialu_mem_imm );
 8215 %}
 8216 
 8217 // BMI1 instructions
 8218 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8219   match(Set dst (AndI (XorI src1 minus_1) src2));
 8220   predicate(UseBMI1Instructions);
 8221   effect(KILL cr);
 8222 
 8223   format %{ "ANDNL  $dst, $src1, $src2" %}
 8224 
 8225   ins_encode %{
 8226     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8227   %}
 8228   ins_pipe(ialu_reg);
 8229 %}
 8230 
 8231 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8232   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8233   predicate(UseBMI1Instructions);
 8234   effect(KILL cr);
 8235 
 8236   ins_cost(125);
 8237   format %{ "ANDNL  $dst, $src1, $src2" %}
 8238 
 8239   ins_encode %{
 8240     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8241   %}
 8242   ins_pipe(ialu_reg_mem);
 8243 %}
 8244 
 8245 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8246   match(Set dst (AndI (SubI imm_zero src) src));
 8247   predicate(UseBMI1Instructions);
 8248   effect(KILL cr);
 8249 
 8250   format %{ "BLSIL  $dst, $src" %}
 8251 
 8252   ins_encode %{
 8253     __ blsil($dst$$Register, $src$$Register);
 8254   %}
 8255   ins_pipe(ialu_reg);
 8256 %}
 8257 
 8258 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8259   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8260   predicate(UseBMI1Instructions);
 8261   effect(KILL cr);
 8262 
 8263   ins_cost(125);
 8264   format %{ "BLSIL  $dst, $src" %}
 8265 
 8266   ins_encode %{
 8267     __ blsil($dst$$Register, $src$$Address);
 8268   %}
 8269   ins_pipe(ialu_reg_mem);
 8270 %}
 8271 
 8272 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8273 %{
 8274   match(Set dst (XorI (AddI src minus_1) src));
 8275   predicate(UseBMI1Instructions);
 8276   effect(KILL cr);
 8277 
 8278   format %{ "BLSMSKL $dst, $src" %}
 8279 
 8280   ins_encode %{
 8281     __ blsmskl($dst$$Register, $src$$Register);
 8282   %}
 8283 
 8284   ins_pipe(ialu_reg);
 8285 %}
 8286 
 8287 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8288 %{
 8289   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8290   predicate(UseBMI1Instructions);
 8291   effect(KILL cr);
 8292 
 8293   ins_cost(125);
 8294   format %{ "BLSMSKL $dst, $src" %}
 8295 
 8296   ins_encode %{
 8297     __ blsmskl($dst$$Register, $src$$Address);
 8298   %}
 8299 
 8300   ins_pipe(ialu_reg_mem);
 8301 %}
 8302 
 8303 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8304 %{
 8305   match(Set dst (AndI (AddI src minus_1) src) );
 8306   predicate(UseBMI1Instructions);
 8307   effect(KILL cr);
 8308 
 8309   format %{ "BLSRL  $dst, $src" %}
 8310 
 8311   ins_encode %{
 8312     __ blsrl($dst$$Register, $src$$Register);
 8313   %}
 8314 
 8315   ins_pipe(ialu_reg);
 8316 %}
 8317 
 8318 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8319 %{
 8320   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8321   predicate(UseBMI1Instructions);
 8322   effect(KILL cr);
 8323 
 8324   ins_cost(125);
 8325   format %{ "BLSRL  $dst, $src" %}
 8326 
 8327   ins_encode %{
 8328     __ blsrl($dst$$Register, $src$$Address);
 8329   %}
 8330 
 8331   ins_pipe(ialu_reg_mem);
 8332 %}
 8333 
 8334 // Or Instructions
 8335 // Or Register with Register
 8336 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8337   match(Set dst (OrI dst src));
 8338   effect(KILL cr);
 8339 
 8340   size(2);
 8341   format %{ "OR     $dst,$src" %}
 8342   opcode(0x0B);
 8343   ins_encode( OpcP, RegReg( dst, src) );
 8344   ins_pipe( ialu_reg_reg );
 8345 %}
 8346 
 8347 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8348   match(Set dst (OrI dst (CastP2X src)));
 8349   effect(KILL cr);
 8350 
 8351   size(2);
 8352   format %{ "OR     $dst,$src" %}
 8353   opcode(0x0B);
 8354   ins_encode( OpcP, RegReg( dst, src) );
 8355   ins_pipe( ialu_reg_reg );
 8356 %}
 8357 
 8358 
 8359 // Or Register with Immediate
 8360 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8361   match(Set dst (OrI dst src));
 8362   effect(KILL cr);
 8363 
 8364   format %{ "OR     $dst,$src" %}
 8365   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8366   // ins_encode( RegImm( dst, src) );
 8367   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8368   ins_pipe( ialu_reg );
 8369 %}
 8370 
 8371 // Or Register with Memory
 8372 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8373   match(Set dst (OrI dst (LoadI src)));
 8374   effect(KILL cr);
 8375 
 8376   ins_cost(150);
 8377   format %{ "OR     $dst,$src" %}
 8378   opcode(0x0B);
 8379   ins_encode( OpcP, RegMem( dst, src) );
 8380   ins_pipe( ialu_reg_mem );
 8381 %}
 8382 
 8383 // Or Memory with Register
 8384 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8385   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8386   effect(KILL cr);
 8387 
 8388   ins_cost(150);
 8389   format %{ "OR     $dst,$src" %}
 8390   opcode(0x09);  /* Opcode 09 /r */
 8391   ins_encode( OpcP, RegMem( src, dst ) );
 8392   ins_pipe( ialu_mem_reg );
 8393 %}
 8394 
 8395 // Or Memory with Immediate
 8396 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8397   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8398   effect(KILL cr);
 8399 
 8400   ins_cost(125);
 8401   format %{ "OR     $dst,$src" %}
 8402   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8403   // ins_encode( MemImm( dst, src) );
 8404   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8405   ins_pipe( ialu_mem_imm );
 8406 %}
 8407 
 8408 // ROL/ROR
 8409 // ROL expand
 8410 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8411   effect(USE_DEF dst, USE shift, KILL cr);
 8412 
 8413   format %{ "ROL    $dst, $shift" %}
 8414   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8415   ins_encode( OpcP, RegOpc( dst ));
 8416   ins_pipe( ialu_reg );
 8417 %}
 8418 
 8419 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8420   effect(USE_DEF dst, USE shift, KILL cr);
 8421 
 8422   format %{ "ROL    $dst, $shift" %}
 8423   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8424   ins_encode( RegOpcImm(dst, shift) );
 8425   ins_pipe(ialu_reg);
 8426 %}
 8427 
 8428 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8429   effect(USE_DEF dst, USE shift, KILL cr);
 8430 
 8431   format %{ "ROL    $dst, $shift" %}
 8432   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8433   ins_encode(OpcP, RegOpc(dst));
 8434   ins_pipe( ialu_reg_reg );
 8435 %}
 8436 // end of ROL expand
 8437 
 8438 // ROL 32bit by one once
 8439 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8440   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8441 
 8442   expand %{
 8443     rolI_eReg_imm1(dst, lshift, cr);
 8444   %}
 8445 %}
 8446 
 8447 // ROL 32bit var by imm8 once
 8448 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8449   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8450   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8451 
 8452   expand %{
 8453     rolI_eReg_imm8(dst, lshift, cr);
 8454   %}
 8455 %}
 8456 
 8457 // ROL 32bit var by var once
 8458 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8459   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8460 
 8461   expand %{
 8462     rolI_eReg_CL(dst, shift, cr);
 8463   %}
 8464 %}
 8465 
 8466 // ROL 32bit var by var once
 8467 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8468   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8469 
 8470   expand %{
 8471     rolI_eReg_CL(dst, shift, cr);
 8472   %}
 8473 %}
 8474 
 8475 // ROR expand
 8476 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8477   effect(USE_DEF dst, USE shift, KILL cr);
 8478 
 8479   format %{ "ROR    $dst, $shift" %}
 8480   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8481   ins_encode( OpcP, RegOpc( dst ) );
 8482   ins_pipe( ialu_reg );
 8483 %}
 8484 
 8485 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8486   effect (USE_DEF dst, USE shift, KILL cr);
 8487 
 8488   format %{ "ROR    $dst, $shift" %}
 8489   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8490   ins_encode( RegOpcImm(dst, shift) );
 8491   ins_pipe( ialu_reg );
 8492 %}
 8493 
 8494 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8495   effect(USE_DEF dst, USE shift, KILL cr);
 8496 
 8497   format %{ "ROR    $dst, $shift" %}
 8498   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8499   ins_encode(OpcP, RegOpc(dst));
 8500   ins_pipe( ialu_reg_reg );
 8501 %}
 8502 // end of ROR expand
 8503 
 8504 // ROR right once
 8505 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8506   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8507 
 8508   expand %{
 8509     rorI_eReg_imm1(dst, rshift, cr);
 8510   %}
 8511 %}
 8512 
 8513 // ROR 32bit by immI8 once
 8514 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8515   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8516   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8517 
 8518   expand %{
 8519     rorI_eReg_imm8(dst, rshift, cr);
 8520   %}
 8521 %}
 8522 
 8523 // ROR 32bit var by var once
 8524 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8525   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8526 
 8527   expand %{
 8528     rorI_eReg_CL(dst, shift, cr);
 8529   %}
 8530 %}
 8531 
 8532 // ROR 32bit var by var once
 8533 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8534   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8535 
 8536   expand %{
 8537     rorI_eReg_CL(dst, shift, cr);
 8538   %}
 8539 %}
 8540 
 8541 // Xor Instructions
 8542 // Xor Register with Register
 8543 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8544   match(Set dst (XorI dst src));
 8545   effect(KILL cr);
 8546 
 8547   size(2);
 8548   format %{ "XOR    $dst,$src" %}
 8549   opcode(0x33);
 8550   ins_encode( OpcP, RegReg( dst, src) );
 8551   ins_pipe( ialu_reg_reg );
 8552 %}
 8553 
 8554 // Xor Register with Immediate -1
 8555 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8556   match(Set dst (XorI dst imm));
 8557 
 8558   size(2);
 8559   format %{ "NOT    $dst" %}
 8560   ins_encode %{
 8561      __ notl($dst$$Register);
 8562   %}
 8563   ins_pipe( ialu_reg );
 8564 %}
 8565 
 8566 // Xor Register with Immediate
 8567 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8568   match(Set dst (XorI dst src));
 8569   effect(KILL cr);
 8570 
 8571   format %{ "XOR    $dst,$src" %}
 8572   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8573   // ins_encode( RegImm( dst, src) );
 8574   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8575   ins_pipe( ialu_reg );
 8576 %}
 8577 
 8578 // Xor Register with Memory
 8579 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8580   match(Set dst (XorI dst (LoadI src)));
 8581   effect(KILL cr);
 8582 
 8583   ins_cost(150);
 8584   format %{ "XOR    $dst,$src" %}
 8585   opcode(0x33);
 8586   ins_encode( OpcP, RegMem(dst, src) );
 8587   ins_pipe( ialu_reg_mem );
 8588 %}
 8589 
 8590 // Xor Memory with Register
 8591 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8592   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8593   effect(KILL cr);
 8594 
 8595   ins_cost(150);
 8596   format %{ "XOR    $dst,$src" %}
 8597   opcode(0x31);  /* Opcode 31 /r */
 8598   ins_encode( OpcP, RegMem( src, dst ) );
 8599   ins_pipe( ialu_mem_reg );
 8600 %}
 8601 
 8602 // Xor Memory with Immediate
 8603 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8604   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(125);
 8608   format %{ "XOR    $dst,$src" %}
 8609   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8610   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8611   ins_pipe( ialu_mem_imm );
 8612 %}
 8613 
 8614 //----------Convert Int to Boolean---------------------------------------------
 8615 
 8616 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8617   effect( DEF dst, USE src );
 8618   format %{ "MOV    $dst,$src" %}
 8619   ins_encode( enc_Copy( dst, src) );
 8620   ins_pipe( ialu_reg_reg );
 8621 %}
 8622 
 8623 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8624   effect( USE_DEF dst, USE src, KILL cr );
 8625 
 8626   size(4);
 8627   format %{ "NEG    $dst\n\t"
 8628             "ADC    $dst,$src" %}
 8629   ins_encode( neg_reg(dst),
 8630               OpcRegReg(0x13,dst,src) );
 8631   ins_pipe( ialu_reg_reg_long );
 8632 %}
 8633 
 8634 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8635   match(Set dst (Conv2B src));
 8636 
 8637   expand %{
 8638     movI_nocopy(dst,src);
 8639     ci2b(dst,src,cr);
 8640   %}
 8641 %}
 8642 
 8643 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8644   effect( DEF dst, USE src );
 8645   format %{ "MOV    $dst,$src" %}
 8646   ins_encode( enc_Copy( dst, src) );
 8647   ins_pipe( ialu_reg_reg );
 8648 %}
 8649 
 8650 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8651   effect( USE_DEF dst, USE src, KILL cr );
 8652   format %{ "NEG    $dst\n\t"
 8653             "ADC    $dst,$src" %}
 8654   ins_encode( neg_reg(dst),
 8655               OpcRegReg(0x13,dst,src) );
 8656   ins_pipe( ialu_reg_reg_long );
 8657 %}
 8658 
 8659 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8660   match(Set dst (Conv2B src));
 8661 
 8662   expand %{
 8663     movP_nocopy(dst,src);
 8664     cp2b(dst,src,cr);
 8665   %}
 8666 %}
 8667 
 8668 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8669   match(Set dst (CmpLTMask p q));
 8670   effect(KILL cr);
 8671   ins_cost(400);
 8672 
 8673   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8674   format %{ "XOR    $dst,$dst\n\t"
 8675             "CMP    $p,$q\n\t"
 8676             "SETlt  $dst\n\t"
 8677             "NEG    $dst" %}
 8678   ins_encode %{
 8679     Register Rp = $p$$Register;
 8680     Register Rq = $q$$Register;
 8681     Register Rd = $dst$$Register;
 8682     Label done;
 8683     __ xorl(Rd, Rd);
 8684     __ cmpl(Rp, Rq);
 8685     __ setb(Assembler::less, Rd);
 8686     __ negl(Rd);
 8687   %}
 8688 
 8689   ins_pipe(pipe_slow);
 8690 %}
 8691 
 8692 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8693   match(Set dst (CmpLTMask dst zero));
 8694   effect(DEF dst, KILL cr);
 8695   ins_cost(100);
 8696 
 8697   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8698   ins_encode %{
 8699   __ sarl($dst$$Register, 31);
 8700   %}
 8701   ins_pipe(ialu_reg);
 8702 %}
 8703 
 8704 /* better to save a register than avoid a branch */
 8705 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8706   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8707   effect(KILL cr);
 8708   ins_cost(400);
 8709   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8710             "JGE    done\n\t"
 8711             "ADD    $p,$y\n"
 8712             "done:  " %}
 8713   ins_encode %{
 8714     Register Rp = $p$$Register;
 8715     Register Rq = $q$$Register;
 8716     Register Ry = $y$$Register;
 8717     Label done;
 8718     __ subl(Rp, Rq);
 8719     __ jccb(Assembler::greaterEqual, done);
 8720     __ addl(Rp, Ry);
 8721     __ bind(done);
 8722   %}
 8723 
 8724   ins_pipe(pipe_cmplt);
 8725 %}
 8726 
 8727 /* better to save a register than avoid a branch */
 8728 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8729   match(Set y (AndI (CmpLTMask p q) y));
 8730   effect(KILL cr);
 8731 
 8732   ins_cost(300);
 8733 
 8734   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8735             "JLT      done\n\t"
 8736             "XORL     $y, $y\n"
 8737             "done:  " %}
 8738   ins_encode %{
 8739     Register Rp = $p$$Register;
 8740     Register Rq = $q$$Register;
 8741     Register Ry = $y$$Register;
 8742     Label done;
 8743     __ cmpl(Rp, Rq);
 8744     __ jccb(Assembler::less, done);
 8745     __ xorl(Ry, Ry);
 8746     __ bind(done);
 8747   %}
 8748 
 8749   ins_pipe(pipe_cmplt);
 8750 %}
 8751 
 8752 /* If I enable this, I encourage spilling in the inner loop of compress.
 8753 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8754   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8755 */
 8756 //----------Overflow Math Instructions-----------------------------------------
 8757 
 8758 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8759 %{
 8760   match(Set cr (OverflowAddI op1 op2));
 8761   effect(DEF cr, USE_KILL op1, USE op2);
 8762 
 8763   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8764 
 8765   ins_encode %{
 8766     __ addl($op1$$Register, $op2$$Register);
 8767   %}
 8768   ins_pipe(ialu_reg_reg);
 8769 %}
 8770 
 8771 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8772 %{
 8773   match(Set cr (OverflowAddI op1 op2));
 8774   effect(DEF cr, USE_KILL op1, USE op2);
 8775 
 8776   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8777 
 8778   ins_encode %{
 8779     __ addl($op1$$Register, $op2$$constant);
 8780   %}
 8781   ins_pipe(ialu_reg_reg);
 8782 %}
 8783 
 8784 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8785 %{
 8786   match(Set cr (OverflowSubI op1 op2));
 8787 
 8788   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8789   ins_encode %{
 8790     __ cmpl($op1$$Register, $op2$$Register);
 8791   %}
 8792   ins_pipe(ialu_reg_reg);
 8793 %}
 8794 
 8795 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8796 %{
 8797   match(Set cr (OverflowSubI op1 op2));
 8798 
 8799   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8800   ins_encode %{
 8801     __ cmpl($op1$$Register, $op2$$constant);
 8802   %}
 8803   ins_pipe(ialu_reg_reg);
 8804 %}
 8805 
 8806 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8807 %{
 8808   match(Set cr (OverflowSubI zero op2));
 8809   effect(DEF cr, USE_KILL op2);
 8810 
 8811   format %{ "NEG    $op2\t# overflow check int" %}
 8812   ins_encode %{
 8813     __ negl($op2$$Register);
 8814   %}
 8815   ins_pipe(ialu_reg_reg);
 8816 %}
 8817 
 8818 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8819 %{
 8820   match(Set cr (OverflowMulI op1 op2));
 8821   effect(DEF cr, USE_KILL op1, USE op2);
 8822 
 8823   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8824   ins_encode %{
 8825     __ imull($op1$$Register, $op2$$Register);
 8826   %}
 8827   ins_pipe(ialu_reg_reg_alu0);
 8828 %}
 8829 
 8830 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8831 %{
 8832   match(Set cr (OverflowMulI op1 op2));
 8833   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8834 
 8835   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8836   ins_encode %{
 8837     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8838   %}
 8839   ins_pipe(ialu_reg_reg_alu0);
 8840 %}
 8841 
 8842 // Integer Absolute Instructions
 8843 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8844 %{
 8845   match(Set dst (AbsI src));
 8846   effect(TEMP dst, TEMP tmp, KILL cr);
 8847   format %{ "movl $tmp, $src\n\t"
 8848             "sarl $tmp, 31\n\t"
 8849             "movl $dst, $src\n\t"
 8850             "xorl $dst, $tmp\n\t"
 8851             "subl $dst, $tmp\n"
 8852           %}
 8853   ins_encode %{
 8854     __ movl($tmp$$Register, $src$$Register);
 8855     __ sarl($tmp$$Register, 31);
 8856     __ movl($dst$$Register, $src$$Register);
 8857     __ xorl($dst$$Register, $tmp$$Register);
 8858     __ subl($dst$$Register, $tmp$$Register);
 8859   %}
 8860 
 8861   ins_pipe(ialu_reg_reg);
 8862 %}
 8863 
 8864 //----------Long Instructions------------------------------------------------
 8865 // Add Long Register with Register
 8866 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8867   match(Set dst (AddL dst src));
 8868   effect(KILL cr);
 8869   ins_cost(200);
 8870   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8871             "ADC    $dst.hi,$src.hi" %}
 8872   opcode(0x03, 0x13);
 8873   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8874   ins_pipe( ialu_reg_reg_long );
 8875 %}
 8876 
 8877 // Add Long Register with Immediate
 8878 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8879   match(Set dst (AddL dst src));
 8880   effect(KILL cr);
 8881   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8882             "ADC    $dst.hi,$src.hi" %}
 8883   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8884   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8885   ins_pipe( ialu_reg_long );
 8886 %}
 8887 
 8888 // Add Long Register with Memory
 8889 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8890   match(Set dst (AddL dst (LoadL mem)));
 8891   effect(KILL cr);
 8892   ins_cost(125);
 8893   format %{ "ADD    $dst.lo,$mem\n\t"
 8894             "ADC    $dst.hi,$mem+4" %}
 8895   opcode(0x03, 0x13);
 8896   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8897   ins_pipe( ialu_reg_long_mem );
 8898 %}
 8899 
 8900 // Subtract Long Register with Register.
 8901 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8902   match(Set dst (SubL dst src));
 8903   effect(KILL cr);
 8904   ins_cost(200);
 8905   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8906             "SBB    $dst.hi,$src.hi" %}
 8907   opcode(0x2B, 0x1B);
 8908   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8909   ins_pipe( ialu_reg_reg_long );
 8910 %}
 8911 
 8912 // Subtract Long Register with Immediate
 8913 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8914   match(Set dst (SubL dst src));
 8915   effect(KILL cr);
 8916   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8917             "SBB    $dst.hi,$src.hi" %}
 8918   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8919   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8920   ins_pipe( ialu_reg_long );
 8921 %}
 8922 
 8923 // Subtract Long Register with Memory
 8924 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8925   match(Set dst (SubL dst (LoadL mem)));
 8926   effect(KILL cr);
 8927   ins_cost(125);
 8928   format %{ "SUB    $dst.lo,$mem\n\t"
 8929             "SBB    $dst.hi,$mem+4" %}
 8930   opcode(0x2B, 0x1B);
 8931   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8932   ins_pipe( ialu_reg_long_mem );
 8933 %}
 8934 
 8935 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8936   match(Set dst (SubL zero dst));
 8937   effect(KILL cr);
 8938   ins_cost(300);
 8939   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8940   ins_encode( neg_long(dst) );
 8941   ins_pipe( ialu_reg_reg_long );
 8942 %}
 8943 
 8944 // And Long Register with Register
 8945 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8946   match(Set dst (AndL dst src));
 8947   effect(KILL cr);
 8948   format %{ "AND    $dst.lo,$src.lo\n\t"
 8949             "AND    $dst.hi,$src.hi" %}
 8950   opcode(0x23,0x23);
 8951   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8952   ins_pipe( ialu_reg_reg_long );
 8953 %}
 8954 
 8955 // And Long Register with Immediate
 8956 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8957   match(Set dst (AndL dst src));
 8958   effect(KILL cr);
 8959   format %{ "AND    $dst.lo,$src.lo\n\t"
 8960             "AND    $dst.hi,$src.hi" %}
 8961   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8962   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8963   ins_pipe( ialu_reg_long );
 8964 %}
 8965 
 8966 // And Long Register with Memory
 8967 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8968   match(Set dst (AndL dst (LoadL mem)));
 8969   effect(KILL cr);
 8970   ins_cost(125);
 8971   format %{ "AND    $dst.lo,$mem\n\t"
 8972             "AND    $dst.hi,$mem+4" %}
 8973   opcode(0x23, 0x23);
 8974   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8975   ins_pipe( ialu_reg_long_mem );
 8976 %}
 8977 
 8978 // BMI1 instructions
 8979 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8980   match(Set dst (AndL (XorL src1 minus_1) src2));
 8981   predicate(UseBMI1Instructions);
 8982   effect(KILL cr, TEMP dst);
 8983 
 8984   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8985             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8986          %}
 8987 
 8988   ins_encode %{
 8989     Register Rdst = $dst$$Register;
 8990     Register Rsrc1 = $src1$$Register;
 8991     Register Rsrc2 = $src2$$Register;
 8992     __ andnl(Rdst, Rsrc1, Rsrc2);
 8993     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8994   %}
 8995   ins_pipe(ialu_reg_reg_long);
 8996 %}
 8997 
 8998 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8999   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9000   predicate(UseBMI1Instructions);
 9001   effect(KILL cr, TEMP dst);
 9002 
 9003   ins_cost(125);
 9004   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9005             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9006          %}
 9007 
 9008   ins_encode %{
 9009     Register Rdst = $dst$$Register;
 9010     Register Rsrc1 = $src1$$Register;
 9011     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9012 
 9013     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9014     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9015   %}
 9016   ins_pipe(ialu_reg_mem);
 9017 %}
 9018 
 9019 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9020   match(Set dst (AndL (SubL imm_zero src) src));
 9021   predicate(UseBMI1Instructions);
 9022   effect(KILL cr, TEMP dst);
 9023 
 9024   format %{ "MOVL   $dst.hi, 0\n\t"
 9025             "BLSIL  $dst.lo, $src.lo\n\t"
 9026             "JNZ    done\n\t"
 9027             "BLSIL  $dst.hi, $src.hi\n"
 9028             "done:"
 9029          %}
 9030 
 9031   ins_encode %{
 9032     Label done;
 9033     Register Rdst = $dst$$Register;
 9034     Register Rsrc = $src$$Register;
 9035     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9036     __ blsil(Rdst, Rsrc);
 9037     __ jccb(Assembler::notZero, done);
 9038     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9039     __ bind(done);
 9040   %}
 9041   ins_pipe(ialu_reg);
 9042 %}
 9043 
 9044 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9045   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9046   predicate(UseBMI1Instructions);
 9047   effect(KILL cr, TEMP dst);
 9048 
 9049   ins_cost(125);
 9050   format %{ "MOVL   $dst.hi, 0\n\t"
 9051             "BLSIL  $dst.lo, $src\n\t"
 9052             "JNZ    done\n\t"
 9053             "BLSIL  $dst.hi, $src+4\n"
 9054             "done:"
 9055          %}
 9056 
 9057   ins_encode %{
 9058     Label done;
 9059     Register Rdst = $dst$$Register;
 9060     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9061 
 9062     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9063     __ blsil(Rdst, $src$$Address);
 9064     __ jccb(Assembler::notZero, done);
 9065     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9066     __ bind(done);
 9067   %}
 9068   ins_pipe(ialu_reg_mem);
 9069 %}
 9070 
 9071 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9072 %{
 9073   match(Set dst (XorL (AddL src minus_1) src));
 9074   predicate(UseBMI1Instructions);
 9075   effect(KILL cr, TEMP dst);
 9076 
 9077   format %{ "MOVL    $dst.hi, 0\n\t"
 9078             "BLSMSKL $dst.lo, $src.lo\n\t"
 9079             "JNC     done\n\t"
 9080             "BLSMSKL $dst.hi, $src.hi\n"
 9081             "done:"
 9082          %}
 9083 
 9084   ins_encode %{
 9085     Label done;
 9086     Register Rdst = $dst$$Register;
 9087     Register Rsrc = $src$$Register;
 9088     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9089     __ blsmskl(Rdst, Rsrc);
 9090     __ jccb(Assembler::carryClear, done);
 9091     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9092     __ bind(done);
 9093   %}
 9094 
 9095   ins_pipe(ialu_reg);
 9096 %}
 9097 
 9098 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9099 %{
 9100   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9101   predicate(UseBMI1Instructions);
 9102   effect(KILL cr, TEMP dst);
 9103 
 9104   ins_cost(125);
 9105   format %{ "MOVL    $dst.hi, 0\n\t"
 9106             "BLSMSKL $dst.lo, $src\n\t"
 9107             "JNC     done\n\t"
 9108             "BLSMSKL $dst.hi, $src+4\n"
 9109             "done:"
 9110          %}
 9111 
 9112   ins_encode %{
 9113     Label done;
 9114     Register Rdst = $dst$$Register;
 9115     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9116 
 9117     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9118     __ blsmskl(Rdst, $src$$Address);
 9119     __ jccb(Assembler::carryClear, done);
 9120     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9121     __ bind(done);
 9122   %}
 9123 
 9124   ins_pipe(ialu_reg_mem);
 9125 %}
 9126 
 9127 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9128 %{
 9129   match(Set dst (AndL (AddL src minus_1) src) );
 9130   predicate(UseBMI1Instructions);
 9131   effect(KILL cr, TEMP dst);
 9132 
 9133   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9134             "BLSRL  $dst.lo, $src.lo\n\t"
 9135             "JNC    done\n\t"
 9136             "BLSRL  $dst.hi, $src.hi\n"
 9137             "done:"
 9138   %}
 9139 
 9140   ins_encode %{
 9141     Label done;
 9142     Register Rdst = $dst$$Register;
 9143     Register Rsrc = $src$$Register;
 9144     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9145     __ blsrl(Rdst, Rsrc);
 9146     __ jccb(Assembler::carryClear, done);
 9147     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9148     __ bind(done);
 9149   %}
 9150 
 9151   ins_pipe(ialu_reg);
 9152 %}
 9153 
 9154 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9155 %{
 9156   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9157   predicate(UseBMI1Instructions);
 9158   effect(KILL cr, TEMP dst);
 9159 
 9160   ins_cost(125);
 9161   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9162             "BLSRL  $dst.lo, $src\n\t"
 9163             "JNC    done\n\t"
 9164             "BLSRL  $dst.hi, $src+4\n"
 9165             "done:"
 9166   %}
 9167 
 9168   ins_encode %{
 9169     Label done;
 9170     Register Rdst = $dst$$Register;
 9171     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9172     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9173     __ blsrl(Rdst, $src$$Address);
 9174     __ jccb(Assembler::carryClear, done);
 9175     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9176     __ bind(done);
 9177   %}
 9178 
 9179   ins_pipe(ialu_reg_mem);
 9180 %}
 9181 
 9182 // Or Long Register with Register
 9183 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9184   match(Set dst (OrL dst src));
 9185   effect(KILL cr);
 9186   format %{ "OR     $dst.lo,$src.lo\n\t"
 9187             "OR     $dst.hi,$src.hi" %}
 9188   opcode(0x0B,0x0B);
 9189   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9190   ins_pipe( ialu_reg_reg_long );
 9191 %}
 9192 
 9193 // Or Long Register with Immediate
 9194 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9195   match(Set dst (OrL dst src));
 9196   effect(KILL cr);
 9197   format %{ "OR     $dst.lo,$src.lo\n\t"
 9198             "OR     $dst.hi,$src.hi" %}
 9199   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9200   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9201   ins_pipe( ialu_reg_long );
 9202 %}
 9203 
 9204 // Or Long Register with Memory
 9205 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9206   match(Set dst (OrL dst (LoadL mem)));
 9207   effect(KILL cr);
 9208   ins_cost(125);
 9209   format %{ "OR     $dst.lo,$mem\n\t"
 9210             "OR     $dst.hi,$mem+4" %}
 9211   opcode(0x0B,0x0B);
 9212   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9213   ins_pipe( ialu_reg_long_mem );
 9214 %}
 9215 
 9216 // Xor Long Register with Register
 9217 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9218   match(Set dst (XorL dst src));
 9219   effect(KILL cr);
 9220   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9221             "XOR    $dst.hi,$src.hi" %}
 9222   opcode(0x33,0x33);
 9223   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9224   ins_pipe( ialu_reg_reg_long );
 9225 %}
 9226 
 9227 // Xor Long Register with Immediate -1
 9228 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9229   match(Set dst (XorL dst imm));
 9230   format %{ "NOT    $dst.lo\n\t"
 9231             "NOT    $dst.hi" %}
 9232   ins_encode %{
 9233      __ notl($dst$$Register);
 9234      __ notl(HIGH_FROM_LOW($dst$$Register));
 9235   %}
 9236   ins_pipe( ialu_reg_long );
 9237 %}
 9238 
 9239 // Xor Long Register with Immediate
 9240 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9241   match(Set dst (XorL dst src));
 9242   effect(KILL cr);
 9243   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9244             "XOR    $dst.hi,$src.hi" %}
 9245   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9246   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9247   ins_pipe( ialu_reg_long );
 9248 %}
 9249 
 9250 // Xor Long Register with Memory
 9251 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9252   match(Set dst (XorL dst (LoadL mem)));
 9253   effect(KILL cr);
 9254   ins_cost(125);
 9255   format %{ "XOR    $dst.lo,$mem\n\t"
 9256             "XOR    $dst.hi,$mem+4" %}
 9257   opcode(0x33,0x33);
 9258   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9259   ins_pipe( ialu_reg_long_mem );
 9260 %}
 9261 
 9262 // Shift Left Long by 1
 9263 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9264   predicate(UseNewLongLShift);
 9265   match(Set dst (LShiftL dst cnt));
 9266   effect(KILL cr);
 9267   ins_cost(100);
 9268   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9269             "ADC    $dst.hi,$dst.hi" %}
 9270   ins_encode %{
 9271     __ addl($dst$$Register,$dst$$Register);
 9272     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9273   %}
 9274   ins_pipe( ialu_reg_long );
 9275 %}
 9276 
 9277 // Shift Left Long by 2
 9278 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9279   predicate(UseNewLongLShift);
 9280   match(Set dst (LShiftL dst cnt));
 9281   effect(KILL cr);
 9282   ins_cost(100);
 9283   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9284             "ADC    $dst.hi,$dst.hi\n\t"
 9285             "ADD    $dst.lo,$dst.lo\n\t"
 9286             "ADC    $dst.hi,$dst.hi" %}
 9287   ins_encode %{
 9288     __ addl($dst$$Register,$dst$$Register);
 9289     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9290     __ addl($dst$$Register,$dst$$Register);
 9291     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9292   %}
 9293   ins_pipe( ialu_reg_long );
 9294 %}
 9295 
 9296 // Shift Left Long by 3
 9297 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9298   predicate(UseNewLongLShift);
 9299   match(Set dst (LShiftL dst cnt));
 9300   effect(KILL cr);
 9301   ins_cost(100);
 9302   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9303             "ADC    $dst.hi,$dst.hi\n\t"
 9304             "ADD    $dst.lo,$dst.lo\n\t"
 9305             "ADC    $dst.hi,$dst.hi\n\t"
 9306             "ADD    $dst.lo,$dst.lo\n\t"
 9307             "ADC    $dst.hi,$dst.hi" %}
 9308   ins_encode %{
 9309     __ addl($dst$$Register,$dst$$Register);
 9310     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9311     __ addl($dst$$Register,$dst$$Register);
 9312     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9313     __ addl($dst$$Register,$dst$$Register);
 9314     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9315   %}
 9316   ins_pipe( ialu_reg_long );
 9317 %}
 9318 
 9319 // Shift Left Long by 1-31
 9320 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9321   match(Set dst (LShiftL dst cnt));
 9322   effect(KILL cr);
 9323   ins_cost(200);
 9324   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9325             "SHL    $dst.lo,$cnt" %}
 9326   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9327   ins_encode( move_long_small_shift(dst,cnt) );
 9328   ins_pipe( ialu_reg_long );
 9329 %}
 9330 
 9331 // Shift Left Long by 32-63
 9332 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9333   match(Set dst (LShiftL dst cnt));
 9334   effect(KILL cr);
 9335   ins_cost(300);
 9336   format %{ "MOV    $dst.hi,$dst.lo\n"
 9337           "\tSHL    $dst.hi,$cnt-32\n"
 9338           "\tXOR    $dst.lo,$dst.lo" %}
 9339   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9340   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9341   ins_pipe( ialu_reg_long );
 9342 %}
 9343 
 9344 // Shift Left Long by variable
 9345 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9346   match(Set dst (LShiftL dst shift));
 9347   effect(KILL cr);
 9348   ins_cost(500+200);
 9349   size(17);
 9350   format %{ "TEST   $shift,32\n\t"
 9351             "JEQ,s  small\n\t"
 9352             "MOV    $dst.hi,$dst.lo\n\t"
 9353             "XOR    $dst.lo,$dst.lo\n"
 9354     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9355             "SHL    $dst.lo,$shift" %}
 9356   ins_encode( shift_left_long( dst, shift ) );
 9357   ins_pipe( pipe_slow );
 9358 %}
 9359 
 9360 // Shift Right Long by 1-31
 9361 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9362   match(Set dst (URShiftL dst cnt));
 9363   effect(KILL cr);
 9364   ins_cost(200);
 9365   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9366             "SHR    $dst.hi,$cnt" %}
 9367   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9368   ins_encode( move_long_small_shift(dst,cnt) );
 9369   ins_pipe( ialu_reg_long );
 9370 %}
 9371 
 9372 // Shift Right Long by 32-63
 9373 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9374   match(Set dst (URShiftL dst cnt));
 9375   effect(KILL cr);
 9376   ins_cost(300);
 9377   format %{ "MOV    $dst.lo,$dst.hi\n"
 9378           "\tSHR    $dst.lo,$cnt-32\n"
 9379           "\tXOR    $dst.hi,$dst.hi" %}
 9380   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9381   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9382   ins_pipe( ialu_reg_long );
 9383 %}
 9384 
 9385 // Shift Right Long by variable
 9386 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9387   match(Set dst (URShiftL dst shift));
 9388   effect(KILL cr);
 9389   ins_cost(600);
 9390   size(17);
 9391   format %{ "TEST   $shift,32\n\t"
 9392             "JEQ,s  small\n\t"
 9393             "MOV    $dst.lo,$dst.hi\n\t"
 9394             "XOR    $dst.hi,$dst.hi\n"
 9395     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9396             "SHR    $dst.hi,$shift" %}
 9397   ins_encode( shift_right_long( dst, shift ) );
 9398   ins_pipe( pipe_slow );
 9399 %}
 9400 
 9401 // Shift Right Long by 1-31
 9402 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9403   match(Set dst (RShiftL dst cnt));
 9404   effect(KILL cr);
 9405   ins_cost(200);
 9406   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9407             "SAR    $dst.hi,$cnt" %}
 9408   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9409   ins_encode( move_long_small_shift(dst,cnt) );
 9410   ins_pipe( ialu_reg_long );
 9411 %}
 9412 
 9413 // Shift Right Long by 32-63
 9414 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9415   match(Set dst (RShiftL dst cnt));
 9416   effect(KILL cr);
 9417   ins_cost(300);
 9418   format %{ "MOV    $dst.lo,$dst.hi\n"
 9419           "\tSAR    $dst.lo,$cnt-32\n"
 9420           "\tSAR    $dst.hi,31" %}
 9421   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9422   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9423   ins_pipe( ialu_reg_long );
 9424 %}
 9425 
 9426 // Shift Right arithmetic Long by variable
 9427 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9428   match(Set dst (RShiftL dst shift));
 9429   effect(KILL cr);
 9430   ins_cost(600);
 9431   size(18);
 9432   format %{ "TEST   $shift,32\n\t"
 9433             "JEQ,s  small\n\t"
 9434             "MOV    $dst.lo,$dst.hi\n\t"
 9435             "SAR    $dst.hi,31\n"
 9436     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9437             "SAR    $dst.hi,$shift" %}
 9438   ins_encode( shift_right_arith_long( dst, shift ) );
 9439   ins_pipe( pipe_slow );
 9440 %}
 9441 
 9442 
 9443 //----------Double Instructions------------------------------------------------
 9444 // Double Math
 9445 
 9446 // Compare & branch
 9447 
 9448 // P6 version of float compare, sets condition codes in EFLAGS
 9449 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9450   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9451   match(Set cr (CmpD src1 src2));
 9452   effect(KILL rax);
 9453   ins_cost(150);
 9454   format %{ "FLD    $src1\n\t"
 9455             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9456             "JNP    exit\n\t"
 9457             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9458             "SAHF\n"
 9459      "exit:\tNOP               // avoid branch to branch" %}
 9460   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9461   ins_encode( Push_Reg_DPR(src1),
 9462               OpcP, RegOpc(src2),
 9463               cmpF_P6_fixup );
 9464   ins_pipe( pipe_slow );
 9465 %}
 9466 
 9467 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9468   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9469   match(Set cr (CmpD src1 src2));
 9470   ins_cost(150);
 9471   format %{ "FLD    $src1\n\t"
 9472             "FUCOMIP ST,$src2  // P6 instruction" %}
 9473   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9474   ins_encode( Push_Reg_DPR(src1),
 9475               OpcP, RegOpc(src2));
 9476   ins_pipe( pipe_slow );
 9477 %}
 9478 
 9479 // Compare & branch
 9480 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9481   predicate(UseSSE<=1);
 9482   match(Set cr (CmpD src1 src2));
 9483   effect(KILL rax);
 9484   ins_cost(200);
 9485   format %{ "FLD    $src1\n\t"
 9486             "FCOMp  $src2\n\t"
 9487             "FNSTSW AX\n\t"
 9488             "TEST   AX,0x400\n\t"
 9489             "JZ,s   flags\n\t"
 9490             "MOV    AH,1\t# unordered treat as LT\n"
 9491     "flags:\tSAHF" %}
 9492   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9493   ins_encode( Push_Reg_DPR(src1),
 9494               OpcP, RegOpc(src2),
 9495               fpu_flags);
 9496   ins_pipe( pipe_slow );
 9497 %}
 9498 
 9499 // Compare vs zero into -1,0,1
 9500 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9501   predicate(UseSSE<=1);
 9502   match(Set dst (CmpD3 src1 zero));
 9503   effect(KILL cr, KILL rax);
 9504   ins_cost(280);
 9505   format %{ "FTSTD  $dst,$src1" %}
 9506   opcode(0xE4, 0xD9);
 9507   ins_encode( Push_Reg_DPR(src1),
 9508               OpcS, OpcP, PopFPU,
 9509               CmpF_Result(dst));
 9510   ins_pipe( pipe_slow );
 9511 %}
 9512 
 9513 // Compare into -1,0,1
 9514 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9515   predicate(UseSSE<=1);
 9516   match(Set dst (CmpD3 src1 src2));
 9517   effect(KILL cr, KILL rax);
 9518   ins_cost(300);
 9519   format %{ "FCMPD  $dst,$src1,$src2" %}
 9520   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9521   ins_encode( Push_Reg_DPR(src1),
 9522               OpcP, RegOpc(src2),
 9523               CmpF_Result(dst));
 9524   ins_pipe( pipe_slow );
 9525 %}
 9526 
 9527 // float compare and set condition codes in EFLAGS by XMM regs
 9528 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9529   predicate(UseSSE>=2);
 9530   match(Set cr (CmpD src1 src2));
 9531   ins_cost(145);
 9532   format %{ "UCOMISD $src1,$src2\n\t"
 9533             "JNP,s   exit\n\t"
 9534             "PUSHF\t# saw NaN, set CF\n\t"
 9535             "AND     [rsp], #0xffffff2b\n\t"
 9536             "POPF\n"
 9537     "exit:" %}
 9538   ins_encode %{
 9539     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9540     emit_cmpfp_fixup(_masm);
 9541   %}
 9542   ins_pipe( pipe_slow );
 9543 %}
 9544 
 9545 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9546   predicate(UseSSE>=2);
 9547   match(Set cr (CmpD src1 src2));
 9548   ins_cost(100);
 9549   format %{ "UCOMISD $src1,$src2" %}
 9550   ins_encode %{
 9551     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9552   %}
 9553   ins_pipe( pipe_slow );
 9554 %}
 9555 
 9556 // float compare and set condition codes in EFLAGS by XMM regs
 9557 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9558   predicate(UseSSE>=2);
 9559   match(Set cr (CmpD src1 (LoadD src2)));
 9560   ins_cost(145);
 9561   format %{ "UCOMISD $src1,$src2\n\t"
 9562             "JNP,s   exit\n\t"
 9563             "PUSHF\t# saw NaN, set CF\n\t"
 9564             "AND     [rsp], #0xffffff2b\n\t"
 9565             "POPF\n"
 9566     "exit:" %}
 9567   ins_encode %{
 9568     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9569     emit_cmpfp_fixup(_masm);
 9570   %}
 9571   ins_pipe( pipe_slow );
 9572 %}
 9573 
 9574 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9575   predicate(UseSSE>=2);
 9576   match(Set cr (CmpD src1 (LoadD src2)));
 9577   ins_cost(100);
 9578   format %{ "UCOMISD $src1,$src2" %}
 9579   ins_encode %{
 9580     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9581   %}
 9582   ins_pipe( pipe_slow );
 9583 %}
 9584 
 9585 // Compare into -1,0,1 in XMM
 9586 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9587   predicate(UseSSE>=2);
 9588   match(Set dst (CmpD3 src1 src2));
 9589   effect(KILL cr);
 9590   ins_cost(255);
 9591   format %{ "UCOMISD $src1, $src2\n\t"
 9592             "MOV     $dst, #-1\n\t"
 9593             "JP,s    done\n\t"
 9594             "JB,s    done\n\t"
 9595             "SETNE   $dst\n\t"
 9596             "MOVZB   $dst, $dst\n"
 9597     "done:" %}
 9598   ins_encode %{
 9599     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9600     emit_cmpfp3(_masm, $dst$$Register);
 9601   %}
 9602   ins_pipe( pipe_slow );
 9603 %}
 9604 
 9605 // Compare into -1,0,1 in XMM and memory
 9606 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9607   predicate(UseSSE>=2);
 9608   match(Set dst (CmpD3 src1 (LoadD src2)));
 9609   effect(KILL cr);
 9610   ins_cost(275);
 9611   format %{ "UCOMISD $src1, $src2\n\t"
 9612             "MOV     $dst, #-1\n\t"
 9613             "JP,s    done\n\t"
 9614             "JB,s    done\n\t"
 9615             "SETNE   $dst\n\t"
 9616             "MOVZB   $dst, $dst\n"
 9617     "done:" %}
 9618   ins_encode %{
 9619     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9620     emit_cmpfp3(_masm, $dst$$Register);
 9621   %}
 9622   ins_pipe( pipe_slow );
 9623 %}
 9624 
 9625 
 9626 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9627   predicate (UseSSE <=1);
 9628   match(Set dst (SubD dst src));
 9629 
 9630   format %{ "FLD    $src\n\t"
 9631             "DSUBp  $dst,ST" %}
 9632   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9633   ins_cost(150);
 9634   ins_encode( Push_Reg_DPR(src),
 9635               OpcP, RegOpc(dst) );
 9636   ins_pipe( fpu_reg_reg );
 9637 %}
 9638 
 9639 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9640   predicate (UseSSE <=1);
 9641   match(Set dst (RoundDouble (SubD src1 src2)));
 9642   ins_cost(250);
 9643 
 9644   format %{ "FLD    $src2\n\t"
 9645             "DSUB   ST,$src1\n\t"
 9646             "FSTP_D $dst\t# D-round" %}
 9647   opcode(0xD8, 0x5);
 9648   ins_encode( Push_Reg_DPR(src2),
 9649               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9650   ins_pipe( fpu_mem_reg_reg );
 9651 %}
 9652 
 9653 
 9654 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9655   predicate (UseSSE <=1);
 9656   match(Set dst (SubD dst (LoadD src)));
 9657   ins_cost(150);
 9658 
 9659   format %{ "FLD    $src\n\t"
 9660             "DSUBp  $dst,ST" %}
 9661   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9662   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9663               OpcP, RegOpc(dst) );
 9664   ins_pipe( fpu_reg_mem );
 9665 %}
 9666 
 9667 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9668   predicate (UseSSE<=1);
 9669   match(Set dst (AbsD src));
 9670   ins_cost(100);
 9671   format %{ "FABS" %}
 9672   opcode(0xE1, 0xD9);
 9673   ins_encode( OpcS, OpcP );
 9674   ins_pipe( fpu_reg_reg );
 9675 %}
 9676 
 9677 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9678   predicate(UseSSE<=1);
 9679   match(Set dst (NegD src));
 9680   ins_cost(100);
 9681   format %{ "FCHS" %}
 9682   opcode(0xE0, 0xD9);
 9683   ins_encode( OpcS, OpcP );
 9684   ins_pipe( fpu_reg_reg );
 9685 %}
 9686 
 9687 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9688   predicate(UseSSE<=1);
 9689   match(Set dst (AddD dst src));
 9690   format %{ "FLD    $src\n\t"
 9691             "DADD   $dst,ST" %}
 9692   size(4);
 9693   ins_cost(150);
 9694   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9695   ins_encode( Push_Reg_DPR(src),
 9696               OpcP, RegOpc(dst) );
 9697   ins_pipe( fpu_reg_reg );
 9698 %}
 9699 
 9700 
 9701 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9702   predicate(UseSSE<=1);
 9703   match(Set dst (RoundDouble (AddD src1 src2)));
 9704   ins_cost(250);
 9705 
 9706   format %{ "FLD    $src2\n\t"
 9707             "DADD   ST,$src1\n\t"
 9708             "FSTP_D $dst\t# D-round" %}
 9709   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9710   ins_encode( Push_Reg_DPR(src2),
 9711               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9712   ins_pipe( fpu_mem_reg_reg );
 9713 %}
 9714 
 9715 
 9716 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9717   predicate(UseSSE<=1);
 9718   match(Set dst (AddD dst (LoadD src)));
 9719   ins_cost(150);
 9720 
 9721   format %{ "FLD    $src\n\t"
 9722             "DADDp  $dst,ST" %}
 9723   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9724   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9725               OpcP, RegOpc(dst) );
 9726   ins_pipe( fpu_reg_mem );
 9727 %}
 9728 
 9729 // add-to-memory
 9730 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9731   predicate(UseSSE<=1);
 9732   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9733   ins_cost(150);
 9734 
 9735   format %{ "FLD_D  $dst\n\t"
 9736             "DADD   ST,$src\n\t"
 9737             "FST_D  $dst" %}
 9738   opcode(0xDD, 0x0);
 9739   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9740               Opcode(0xD8), RegOpc(src),
 9741               set_instruction_start,
 9742               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9743   ins_pipe( fpu_reg_mem );
 9744 %}
 9745 
 9746 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9747   predicate(UseSSE<=1);
 9748   match(Set dst (AddD dst con));
 9749   ins_cost(125);
 9750   format %{ "FLD1\n\t"
 9751             "DADDp  $dst,ST" %}
 9752   ins_encode %{
 9753     __ fld1();
 9754     __ faddp($dst$$reg);
 9755   %}
 9756   ins_pipe(fpu_reg);
 9757 %}
 9758 
 9759 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9760   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9761   match(Set dst (AddD dst con));
 9762   ins_cost(200);
 9763   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9764             "DADDp  $dst,ST" %}
 9765   ins_encode %{
 9766     __ fld_d($constantaddress($con));
 9767     __ faddp($dst$$reg);
 9768   %}
 9769   ins_pipe(fpu_reg_mem);
 9770 %}
 9771 
 9772 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9773   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9774   match(Set dst (RoundDouble (AddD src con)));
 9775   ins_cost(200);
 9776   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9777             "DADD   ST,$src\n\t"
 9778             "FSTP_D $dst\t# D-round" %}
 9779   ins_encode %{
 9780     __ fld_d($constantaddress($con));
 9781     __ fadd($src$$reg);
 9782     __ fstp_d(Address(rsp, $dst$$disp));
 9783   %}
 9784   ins_pipe(fpu_mem_reg_con);
 9785 %}
 9786 
 9787 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9788   predicate(UseSSE<=1);
 9789   match(Set dst (MulD dst src));
 9790   format %{ "FLD    $src\n\t"
 9791             "DMULp  $dst,ST" %}
 9792   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9793   ins_cost(150);
 9794   ins_encode( Push_Reg_DPR(src),
 9795               OpcP, RegOpc(dst) );
 9796   ins_pipe( fpu_reg_reg );
 9797 %}
 9798 
 9799 // Strict FP instruction biases argument before multiply then
 9800 // biases result to avoid double rounding of subnormals.
 9801 //
 9802 // scale arg1 by multiplying arg1 by 2^(-15360)
 9803 // load arg2
 9804 // multiply scaled arg1 by arg2
 9805 // rescale product by 2^(15360)
 9806 //
 9807 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9808   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9809   match(Set dst (MulD dst src));
 9810   ins_cost(1);   // Select this instruction for all FP double multiplies
 9811 
 9812   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9813             "DMULp  $dst,ST\n\t"
 9814             "FLD    $src\n\t"
 9815             "DMULp  $dst,ST\n\t"
 9816             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9817             "DMULp  $dst,ST\n\t" %}
 9818   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9819   ins_encode( strictfp_bias1(dst),
 9820               Push_Reg_DPR(src),
 9821               OpcP, RegOpc(dst),
 9822               strictfp_bias2(dst) );
 9823   ins_pipe( fpu_reg_reg );
 9824 %}
 9825 
 9826 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9827   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9828   match(Set dst (MulD dst con));
 9829   ins_cost(200);
 9830   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9831             "DMULp  $dst,ST" %}
 9832   ins_encode %{
 9833     __ fld_d($constantaddress($con));
 9834     __ fmulp($dst$$reg);
 9835   %}
 9836   ins_pipe(fpu_reg_mem);
 9837 %}
 9838 
 9839 
 9840 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9841   predicate( UseSSE<=1 );
 9842   match(Set dst (MulD dst (LoadD src)));
 9843   ins_cost(200);
 9844   format %{ "FLD_D  $src\n\t"
 9845             "DMULp  $dst,ST" %}
 9846   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9847   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9848               OpcP, RegOpc(dst) );
 9849   ins_pipe( fpu_reg_mem );
 9850 %}
 9851 
 9852 //
 9853 // Cisc-alternate to reg-reg multiply
 9854 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9855   predicate( UseSSE<=1 );
 9856   match(Set dst (MulD src (LoadD mem)));
 9857   ins_cost(250);
 9858   format %{ "FLD_D  $mem\n\t"
 9859             "DMUL   ST,$src\n\t"
 9860             "FSTP_D $dst" %}
 9861   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9862   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9863               OpcReg_FPR(src),
 9864               Pop_Reg_DPR(dst) );
 9865   ins_pipe( fpu_reg_reg_mem );
 9866 %}
 9867 
 9868 
 9869 // MACRO3 -- addDPR a mulDPR
 9870 // This instruction is a '2-address' instruction in that the result goes
 9871 // back to src2.  This eliminates a move from the macro; possibly the
 9872 // register allocator will have to add it back (and maybe not).
 9873 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9874   predicate( UseSSE<=1 );
 9875   match(Set src2 (AddD (MulD src0 src1) src2));
 9876   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9877             "DMUL   ST,$src1\n\t"
 9878             "DADDp  $src2,ST" %}
 9879   ins_cost(250);
 9880   opcode(0xDD); /* LoadD DD /0 */
 9881   ins_encode( Push_Reg_FPR(src0),
 9882               FMul_ST_reg(src1),
 9883               FAddP_reg_ST(src2) );
 9884   ins_pipe( fpu_reg_reg_reg );
 9885 %}
 9886 
 9887 
 9888 // MACRO3 -- subDPR a mulDPR
 9889 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9890   predicate( UseSSE<=1 );
 9891   match(Set src2 (SubD (MulD src0 src1) src2));
 9892   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9893             "DMUL   ST,$src1\n\t"
 9894             "DSUBRp $src2,ST" %}
 9895   ins_cost(250);
 9896   ins_encode( Push_Reg_FPR(src0),
 9897               FMul_ST_reg(src1),
 9898               Opcode(0xDE), Opc_plus(0xE0,src2));
 9899   ins_pipe( fpu_reg_reg_reg );
 9900 %}
 9901 
 9902 
 9903 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9904   predicate( UseSSE<=1 );
 9905   match(Set dst (DivD dst src));
 9906 
 9907   format %{ "FLD    $src\n\t"
 9908             "FDIVp  $dst,ST" %}
 9909   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9910   ins_cost(150);
 9911   ins_encode( Push_Reg_DPR(src),
 9912               OpcP, RegOpc(dst) );
 9913   ins_pipe( fpu_reg_reg );
 9914 %}
 9915 
 9916 // Strict FP instruction biases argument before division then
 9917 // biases result, to avoid double rounding of subnormals.
 9918 //
 9919 // scale dividend by multiplying dividend by 2^(-15360)
 9920 // load divisor
 9921 // divide scaled dividend by divisor
 9922 // rescale quotient by 2^(15360)
 9923 //
 9924 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9925   predicate (UseSSE<=1);
 9926   match(Set dst (DivD dst src));
 9927   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9928   ins_cost(01);
 9929 
 9930   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9931             "DMULp  $dst,ST\n\t"
 9932             "FLD    $src\n\t"
 9933             "FDIVp  $dst,ST\n\t"
 9934             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9935             "DMULp  $dst,ST\n\t" %}
 9936   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9937   ins_encode( strictfp_bias1(dst),
 9938               Push_Reg_DPR(src),
 9939               OpcP, RegOpc(dst),
 9940               strictfp_bias2(dst) );
 9941   ins_pipe( fpu_reg_reg );
 9942 %}
 9943 
 9944 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9945   predicate(UseSSE<=1);
 9946   match(Set dst (ModD dst src));
 9947   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9948 
 9949   format %{ "DMOD   $dst,$src" %}
 9950   ins_cost(250);
 9951   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9952               emitModDPR(),
 9953               Push_Result_Mod_DPR(src),
 9954               Pop_Reg_DPR(dst));
 9955   ins_pipe( pipe_slow );
 9956 %}
 9957 
 9958 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9959   predicate(UseSSE>=2);
 9960   match(Set dst (ModD src0 src1));
 9961   effect(KILL rax, KILL cr);
 9962 
 9963   format %{ "SUB    ESP,8\t # DMOD\n"
 9964           "\tMOVSD  [ESP+0],$src1\n"
 9965           "\tFLD_D  [ESP+0]\n"
 9966           "\tMOVSD  [ESP+0],$src0\n"
 9967           "\tFLD_D  [ESP+0]\n"
 9968      "loop:\tFPREM\n"
 9969           "\tFWAIT\n"
 9970           "\tFNSTSW AX\n"
 9971           "\tSAHF\n"
 9972           "\tJP     loop\n"
 9973           "\tFSTP_D [ESP+0]\n"
 9974           "\tMOVSD  $dst,[ESP+0]\n"
 9975           "\tADD    ESP,8\n"
 9976           "\tFSTP   ST0\t # Restore FPU Stack"
 9977     %}
 9978   ins_cost(250);
 9979   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9980   ins_pipe( pipe_slow );
 9981 %}
 9982 
 9983 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9984   predicate (UseSSE<=1);
 9985   match(Set dst(AtanD dst src));
 9986   format %{ "DATA   $dst,$src" %}
 9987   opcode(0xD9, 0xF3);
 9988   ins_encode( Push_Reg_DPR(src),
 9989               OpcP, OpcS, RegOpc(dst) );
 9990   ins_pipe( pipe_slow );
 9991 %}
 9992 
 9993 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9994   predicate (UseSSE>=2);
 9995   match(Set dst(AtanD dst src));
 9996   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9997   format %{ "DATA   $dst,$src" %}
 9998   opcode(0xD9, 0xF3);
 9999   ins_encode( Push_SrcD(src),
10000               OpcP, OpcS, Push_ResultD(dst) );
10001   ins_pipe( pipe_slow );
10002 %}
10003 
10004 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10005   predicate (UseSSE<=1);
10006   match(Set dst (SqrtD src));
10007   format %{ "DSQRT  $dst,$src" %}
10008   opcode(0xFA, 0xD9);
10009   ins_encode( Push_Reg_DPR(src),
10010               OpcS, OpcP, Pop_Reg_DPR(dst) );
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 //-------------Float Instructions-------------------------------
10015 // Float Math
10016 
10017 // Code for float compare:
10018 //     fcompp();
10019 //     fwait(); fnstsw_ax();
10020 //     sahf();
10021 //     movl(dst, unordered_result);
10022 //     jcc(Assembler::parity, exit);
10023 //     movl(dst, less_result);
10024 //     jcc(Assembler::below, exit);
10025 //     movl(dst, equal_result);
10026 //     jcc(Assembler::equal, exit);
10027 //     movl(dst, greater_result);
10028 //   exit:
10029 
10030 // P6 version of float compare, sets condition codes in EFLAGS
10031 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10032   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10033   match(Set cr (CmpF src1 src2));
10034   effect(KILL rax);
10035   ins_cost(150);
10036   format %{ "FLD    $src1\n\t"
10037             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10038             "JNP    exit\n\t"
10039             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10040             "SAHF\n"
10041      "exit:\tNOP               // avoid branch to branch" %}
10042   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10043   ins_encode( Push_Reg_DPR(src1),
10044               OpcP, RegOpc(src2),
10045               cmpF_P6_fixup );
10046   ins_pipe( pipe_slow );
10047 %}
10048 
10049 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10050   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10051   match(Set cr (CmpF src1 src2));
10052   ins_cost(100);
10053   format %{ "FLD    $src1\n\t"
10054             "FUCOMIP ST,$src2  // P6 instruction" %}
10055   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10056   ins_encode( Push_Reg_DPR(src1),
10057               OpcP, RegOpc(src2));
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 
10062 // Compare & branch
10063 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10064   predicate(UseSSE == 0);
10065   match(Set cr (CmpF src1 src2));
10066   effect(KILL rax);
10067   ins_cost(200);
10068   format %{ "FLD    $src1\n\t"
10069             "FCOMp  $src2\n\t"
10070             "FNSTSW AX\n\t"
10071             "TEST   AX,0x400\n\t"
10072             "JZ,s   flags\n\t"
10073             "MOV    AH,1\t# unordered treat as LT\n"
10074     "flags:\tSAHF" %}
10075   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10076   ins_encode( Push_Reg_DPR(src1),
10077               OpcP, RegOpc(src2),
10078               fpu_flags);
10079   ins_pipe( pipe_slow );
10080 %}
10081 
10082 // Compare vs zero into -1,0,1
10083 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10084   predicate(UseSSE == 0);
10085   match(Set dst (CmpF3 src1 zero));
10086   effect(KILL cr, KILL rax);
10087   ins_cost(280);
10088   format %{ "FTSTF  $dst,$src1" %}
10089   opcode(0xE4, 0xD9);
10090   ins_encode( Push_Reg_DPR(src1),
10091               OpcS, OpcP, PopFPU,
10092               CmpF_Result(dst));
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 // Compare into -1,0,1
10097 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10098   predicate(UseSSE == 0);
10099   match(Set dst (CmpF3 src1 src2));
10100   effect(KILL cr, KILL rax);
10101   ins_cost(300);
10102   format %{ "FCMPF  $dst,$src1,$src2" %}
10103   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10104   ins_encode( Push_Reg_DPR(src1),
10105               OpcP, RegOpc(src2),
10106               CmpF_Result(dst));
10107   ins_pipe( pipe_slow );
10108 %}
10109 
10110 // float compare and set condition codes in EFLAGS by XMM regs
10111 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10112   predicate(UseSSE>=1);
10113   match(Set cr (CmpF src1 src2));
10114   ins_cost(145);
10115   format %{ "UCOMISS $src1,$src2\n\t"
10116             "JNP,s   exit\n\t"
10117             "PUSHF\t# saw NaN, set CF\n\t"
10118             "AND     [rsp], #0xffffff2b\n\t"
10119             "POPF\n"
10120     "exit:" %}
10121   ins_encode %{
10122     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10123     emit_cmpfp_fixup(_masm);
10124   %}
10125   ins_pipe( pipe_slow );
10126 %}
10127 
10128 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10129   predicate(UseSSE>=1);
10130   match(Set cr (CmpF src1 src2));
10131   ins_cost(100);
10132   format %{ "UCOMISS $src1,$src2" %}
10133   ins_encode %{
10134     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10135   %}
10136   ins_pipe( pipe_slow );
10137 %}
10138 
10139 // float compare and set condition codes in EFLAGS by XMM regs
10140 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 (LoadF src2)));
10143   ins_cost(165);
10144   format %{ "UCOMISS $src1,$src2\n\t"
10145             "JNP,s   exit\n\t"
10146             "PUSHF\t# saw NaN, set CF\n\t"
10147             "AND     [rsp], #0xffffff2b\n\t"
10148             "POPF\n"
10149     "exit:" %}
10150   ins_encode %{
10151     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10152     emit_cmpfp_fixup(_masm);
10153   %}
10154   ins_pipe( pipe_slow );
10155 %}
10156 
10157 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10158   predicate(UseSSE>=1);
10159   match(Set cr (CmpF src1 (LoadF src2)));
10160   ins_cost(100);
10161   format %{ "UCOMISS $src1,$src2" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10164   %}
10165   ins_pipe( pipe_slow );
10166 %}
10167 
10168 // Compare into -1,0,1 in XMM
10169 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10170   predicate(UseSSE>=1);
10171   match(Set dst (CmpF3 src1 src2));
10172   effect(KILL cr);
10173   ins_cost(255);
10174   format %{ "UCOMISS $src1, $src2\n\t"
10175             "MOV     $dst, #-1\n\t"
10176             "JP,s    done\n\t"
10177             "JB,s    done\n\t"
10178             "SETNE   $dst\n\t"
10179             "MOVZB   $dst, $dst\n"
10180     "done:" %}
10181   ins_encode %{
10182     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10183     emit_cmpfp3(_masm, $dst$$Register);
10184   %}
10185   ins_pipe( pipe_slow );
10186 %}
10187 
10188 // Compare into -1,0,1 in XMM and memory
10189 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10190   predicate(UseSSE>=1);
10191   match(Set dst (CmpF3 src1 (LoadF src2)));
10192   effect(KILL cr);
10193   ins_cost(275);
10194   format %{ "UCOMISS $src1, $src2\n\t"
10195             "MOV     $dst, #-1\n\t"
10196             "JP,s    done\n\t"
10197             "JB,s    done\n\t"
10198             "SETNE   $dst\n\t"
10199             "MOVZB   $dst, $dst\n"
10200     "done:" %}
10201   ins_encode %{
10202     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10203     emit_cmpfp3(_masm, $dst$$Register);
10204   %}
10205   ins_pipe( pipe_slow );
10206 %}
10207 
10208 // Spill to obtain 24-bit precision
10209 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10210   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10211   match(Set dst (SubF src1 src2));
10212 
10213   format %{ "FSUB   $dst,$src1 - $src2" %}
10214   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10215   ins_encode( Push_Reg_FPR(src1),
10216               OpcReg_FPR(src2),
10217               Pop_Mem_FPR(dst) );
10218   ins_pipe( fpu_mem_reg_reg );
10219 %}
10220 //
10221 // This instruction does not round to 24-bits
10222 instruct subFPR_reg(regFPR dst, regFPR src) %{
10223   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10224   match(Set dst (SubF dst src));
10225 
10226   format %{ "FSUB   $dst,$src" %}
10227   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10228   ins_encode( Push_Reg_FPR(src),
10229               OpcP, RegOpc(dst) );
10230   ins_pipe( fpu_reg_reg );
10231 %}
10232 
10233 // Spill to obtain 24-bit precision
10234 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10235   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10236   match(Set dst (AddF src1 src2));
10237 
10238   format %{ "FADD   $dst,$src1,$src2" %}
10239   opcode(0xD8, 0x0); /* D8 C0+i */
10240   ins_encode( Push_Reg_FPR(src2),
10241               OpcReg_FPR(src1),
10242               Pop_Mem_FPR(dst) );
10243   ins_pipe( fpu_mem_reg_reg );
10244 %}
10245 //
10246 // This instruction does not round to 24-bits
10247 instruct addFPR_reg(regFPR dst, regFPR src) %{
10248   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10249   match(Set dst (AddF dst src));
10250 
10251   format %{ "FLD    $src\n\t"
10252             "FADDp  $dst,ST" %}
10253   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10254   ins_encode( Push_Reg_FPR(src),
10255               OpcP, RegOpc(dst) );
10256   ins_pipe( fpu_reg_reg );
10257 %}
10258 
10259 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10260   predicate(UseSSE==0);
10261   match(Set dst (AbsF src));
10262   ins_cost(100);
10263   format %{ "FABS" %}
10264   opcode(0xE1, 0xD9);
10265   ins_encode( OpcS, OpcP );
10266   ins_pipe( fpu_reg_reg );
10267 %}
10268 
10269 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10270   predicate(UseSSE==0);
10271   match(Set dst (NegF src));
10272   ins_cost(100);
10273   format %{ "FCHS" %}
10274   opcode(0xE0, 0xD9);
10275   ins_encode( OpcS, OpcP );
10276   ins_pipe( fpu_reg_reg );
10277 %}
10278 
10279 // Cisc-alternate to addFPR_reg
10280 // Spill to obtain 24-bit precision
10281 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10282   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10283   match(Set dst (AddF src1 (LoadF src2)));
10284 
10285   format %{ "FLD    $src2\n\t"
10286             "FADD   ST,$src1\n\t"
10287             "FSTP_S $dst" %}
10288   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10289   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10290               OpcReg_FPR(src1),
10291               Pop_Mem_FPR(dst) );
10292   ins_pipe( fpu_mem_reg_mem );
10293 %}
10294 //
10295 // Cisc-alternate to addFPR_reg
10296 // This instruction does not round to 24-bits
10297 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10298   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10299   match(Set dst (AddF dst (LoadF src)));
10300 
10301   format %{ "FADD   $dst,$src" %}
10302   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10303   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10304               OpcP, RegOpc(dst) );
10305   ins_pipe( fpu_reg_mem );
10306 %}
10307 
10308 // // Following two instructions for _222_mpegaudio
10309 // Spill to obtain 24-bit precision
10310 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10311   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10312   match(Set dst (AddF src1 src2));
10313 
10314   format %{ "FADD   $dst,$src1,$src2" %}
10315   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10316   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10317               OpcReg_FPR(src2),
10318               Pop_Mem_FPR(dst) );
10319   ins_pipe( fpu_mem_reg_mem );
10320 %}
10321 
10322 // Cisc-spill variant
10323 // Spill to obtain 24-bit precision
10324 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10325   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10326   match(Set dst (AddF src1 (LoadF src2)));
10327 
10328   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10329   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10330   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10331               set_instruction_start,
10332               OpcP, RMopc_Mem(secondary,src1),
10333               Pop_Mem_FPR(dst) );
10334   ins_pipe( fpu_mem_mem_mem );
10335 %}
10336 
10337 // Spill to obtain 24-bit precision
10338 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10339   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10340   match(Set dst (AddF src1 src2));
10341 
10342   format %{ "FADD   $dst,$src1,$src2" %}
10343   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10344   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10345               set_instruction_start,
10346               OpcP, RMopc_Mem(secondary,src1),
10347               Pop_Mem_FPR(dst) );
10348   ins_pipe( fpu_mem_mem_mem );
10349 %}
10350 
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (AddF src con));
10356   format %{ "FLD    $src\n\t"
10357             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10358             "FSTP_S $dst"  %}
10359   ins_encode %{
10360     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10361     __ fadd_s($constantaddress($con));
10362     __ fstp_s(Address(rsp, $dst$$disp));
10363   %}
10364   ins_pipe(fpu_mem_reg_con);
10365 %}
10366 //
10367 // This instruction does not round to 24-bits
10368 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10369   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10370   match(Set dst (AddF src con));
10371   format %{ "FLD    $src\n\t"
10372             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10373             "FSTP   $dst"  %}
10374   ins_encode %{
10375     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10376     __ fadd_s($constantaddress($con));
10377     __ fstp_d($dst$$reg);
10378   %}
10379   ins_pipe(fpu_reg_reg_con);
10380 %}
10381 
10382 // Spill to obtain 24-bit precision
10383 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (MulF src1 src2));
10386 
10387   format %{ "FLD    $src1\n\t"
10388             "FMUL   $src2\n\t"
10389             "FSTP_S $dst"  %}
10390   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10391   ins_encode( Push_Reg_FPR(src1),
10392               OpcReg_FPR(src2),
10393               Pop_Mem_FPR(dst) );
10394   ins_pipe( fpu_mem_reg_reg );
10395 %}
10396 //
10397 // This instruction does not round to 24-bits
10398 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10399   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10400   match(Set dst (MulF src1 src2));
10401 
10402   format %{ "FLD    $src1\n\t"
10403             "FMUL   $src2\n\t"
10404             "FSTP_S $dst"  %}
10405   opcode(0xD8, 0x1); /* D8 C8+i */
10406   ins_encode( Push_Reg_FPR(src2),
10407               OpcReg_FPR(src1),
10408               Pop_Reg_FPR(dst) );
10409   ins_pipe( fpu_reg_reg_reg );
10410 %}
10411 
10412 
10413 // Spill to obtain 24-bit precision
10414 // Cisc-alternate to reg-reg multiply
10415 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10416   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10417   match(Set dst (MulF src1 (LoadF src2)));
10418 
10419   format %{ "FLD_S  $src2\n\t"
10420             "FMUL   $src1\n\t"
10421             "FSTP_S $dst"  %}
10422   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10423   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10424               OpcReg_FPR(src1),
10425               Pop_Mem_FPR(dst) );
10426   ins_pipe( fpu_mem_reg_mem );
10427 %}
10428 //
10429 // This instruction does not round to 24-bits
10430 // Cisc-alternate to reg-reg multiply
10431 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10432   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10433   match(Set dst (MulF src1 (LoadF src2)));
10434 
10435   format %{ "FMUL   $dst,$src1,$src2" %}
10436   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10438               OpcReg_FPR(src1),
10439               Pop_Reg_FPR(dst) );
10440   ins_pipe( fpu_reg_reg_mem );
10441 %}
10442 
10443 // Spill to obtain 24-bit precision
10444 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10445   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10446   match(Set dst (MulF src1 src2));
10447 
10448   format %{ "FMUL   $dst,$src1,$src2" %}
10449   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10450   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10451               set_instruction_start,
10452               OpcP, RMopc_Mem(secondary,src1),
10453               Pop_Mem_FPR(dst) );
10454   ins_pipe( fpu_mem_mem_mem );
10455 %}
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10459   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (MulF src con));
10461 
10462   format %{ "FLD    $src\n\t"
10463             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10464             "FSTP_S $dst"  %}
10465   ins_encode %{
10466     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10467     __ fmul_s($constantaddress($con));
10468     __ fstp_s(Address(rsp, $dst$$disp));
10469   %}
10470   ins_pipe(fpu_mem_reg_con);
10471 %}
10472 //
10473 // This instruction does not round to 24-bits
10474 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10475   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10476   match(Set dst (MulF src con));
10477 
10478   format %{ "FLD    $src\n\t"
10479             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10480             "FSTP   $dst"  %}
10481   ins_encode %{
10482     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10483     __ fmul_s($constantaddress($con));
10484     __ fstp_d($dst$$reg);
10485   %}
10486   ins_pipe(fpu_reg_reg_con);
10487 %}
10488 
10489 
10490 //
10491 // MACRO1 -- subsume unshared load into mulFPR
10492 // This instruction does not round to 24-bits
10493 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10494   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10495   match(Set dst (MulF (LoadF mem1) src));
10496 
10497   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10498             "FMUL   ST,$src\n\t"
10499             "FSTP   $dst" %}
10500   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10501   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10502               OpcReg_FPR(src),
10503               Pop_Reg_FPR(dst) );
10504   ins_pipe( fpu_reg_reg_mem );
10505 %}
10506 //
10507 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10508 // This instruction does not round to 24-bits
10509 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10510   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10511   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10512   ins_cost(95);
10513 
10514   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10515             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10516             "FADD   ST,$src2\n\t"
10517             "FSTP   $dst" %}
10518   opcode(0xD9); /* LoadF D9 /0 */
10519   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10520               FMul_ST_reg(src1),
10521               FAdd_ST_reg(src2),
10522               Pop_Reg_FPR(dst) );
10523   ins_pipe( fpu_reg_mem_reg_reg );
10524 %}
10525 
10526 // MACRO3 -- addFPR a mulFPR
10527 // This instruction does not round to 24-bits.  It is a '2-address'
10528 // instruction in that the result goes back to src2.  This eliminates
10529 // a move from the macro; possibly the register allocator will have
10530 // to add it back (and maybe not).
10531 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10532   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533   match(Set src2 (AddF (MulF src0 src1) src2));
10534 
10535   format %{ "FLD    $src0     ===MACRO3===\n\t"
10536             "FMUL   ST,$src1\n\t"
10537             "FADDP  $src2,ST" %}
10538   opcode(0xD9); /* LoadF D9 /0 */
10539   ins_encode( Push_Reg_FPR(src0),
10540               FMul_ST_reg(src1),
10541               FAddP_reg_ST(src2) );
10542   ins_pipe( fpu_reg_reg_reg );
10543 %}
10544 
10545 // MACRO4 -- divFPR subFPR
10546 // This instruction does not round to 24-bits
10547 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10548   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10549   match(Set dst (DivF (SubF src2 src1) src3));
10550 
10551   format %{ "FLD    $src2   ===MACRO4===\n\t"
10552             "FSUB   ST,$src1\n\t"
10553             "FDIV   ST,$src3\n\t"
10554             "FSTP  $dst" %}
10555   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10556   ins_encode( Push_Reg_FPR(src2),
10557               subFPR_divFPR_encode(src1,src3),
10558               Pop_Reg_FPR(dst) );
10559   ins_pipe( fpu_reg_reg_reg_reg );
10560 %}
10561 
10562 // Spill to obtain 24-bit precision
10563 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10564   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10565   match(Set dst (DivF src1 src2));
10566 
10567   format %{ "FDIV   $dst,$src1,$src2" %}
10568   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10569   ins_encode( Push_Reg_FPR(src1),
10570               OpcReg_FPR(src2),
10571               Pop_Mem_FPR(dst) );
10572   ins_pipe( fpu_mem_reg_reg );
10573 %}
10574 //
10575 // This instruction does not round to 24-bits
10576 instruct divFPR_reg(regFPR dst, regFPR src) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set dst (DivF dst src));
10579 
10580   format %{ "FDIV   $dst,$src" %}
10581   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10582   ins_encode( Push_Reg_FPR(src),
10583               OpcP, RegOpc(dst) );
10584   ins_pipe( fpu_reg_reg );
10585 %}
10586 
10587 
10588 // Spill to obtain 24-bit precision
10589 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10590   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10591   match(Set dst (ModF src1 src2));
10592   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10593 
10594   format %{ "FMOD   $dst,$src1,$src2" %}
10595   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10596               emitModDPR(),
10597               Push_Result_Mod_DPR(src2),
10598               Pop_Mem_FPR(dst));
10599   ins_pipe( pipe_slow );
10600 %}
10601 //
10602 // This instruction does not round to 24-bits
10603 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10604   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10605   match(Set dst (ModF dst src));
10606   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10607 
10608   format %{ "FMOD   $dst,$src" %}
10609   ins_encode(Push_Reg_Mod_DPR(dst, src),
10610               emitModDPR(),
10611               Push_Result_Mod_DPR(src),
10612               Pop_Reg_FPR(dst));
10613   ins_pipe( pipe_slow );
10614 %}
10615 
10616 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10617   predicate(UseSSE>=1);
10618   match(Set dst (ModF src0 src1));
10619   effect(KILL rax, KILL cr);
10620   format %{ "SUB    ESP,4\t # FMOD\n"
10621           "\tMOVSS  [ESP+0],$src1\n"
10622           "\tFLD_S  [ESP+0]\n"
10623           "\tMOVSS  [ESP+0],$src0\n"
10624           "\tFLD_S  [ESP+0]\n"
10625      "loop:\tFPREM\n"
10626           "\tFWAIT\n"
10627           "\tFNSTSW AX\n"
10628           "\tSAHF\n"
10629           "\tJP     loop\n"
10630           "\tFSTP_S [ESP+0]\n"
10631           "\tMOVSS  $dst,[ESP+0]\n"
10632           "\tADD    ESP,4\n"
10633           "\tFSTP   ST0\t # Restore FPU Stack"
10634     %}
10635   ins_cost(250);
10636   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10637   ins_pipe( pipe_slow );
10638 %}
10639 
10640 
10641 //----------Arithmetic Conversion Instructions---------------------------------
10642 // The conversions operations are all Alpha sorted.  Please keep it that way!
10643 
10644 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10645   predicate(UseSSE==0);
10646   match(Set dst (RoundFloat src));
10647   ins_cost(125);
10648   format %{ "FST_S  $dst,$src\t# F-round" %}
10649   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10650   ins_pipe( fpu_mem_reg );
10651 %}
10652 
10653 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10654   predicate(UseSSE<=1);
10655   match(Set dst (RoundDouble src));
10656   ins_cost(125);
10657   format %{ "FST_D  $dst,$src\t# D-round" %}
10658   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10659   ins_pipe( fpu_mem_reg );
10660 %}
10661 
10662 // Force rounding to 24-bit precision and 6-bit exponent
10663 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10664   predicate(UseSSE==0);
10665   match(Set dst (ConvD2F src));
10666   format %{ "FST_S  $dst,$src\t# F-round" %}
10667   expand %{
10668     roundFloat_mem_reg(dst,src);
10669   %}
10670 %}
10671 
10672 // Force rounding to 24-bit precision and 6-bit exponent
10673 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10674   predicate(UseSSE==1);
10675   match(Set dst (ConvD2F src));
10676   effect( KILL cr );
10677   format %{ "SUB    ESP,4\n\t"
10678             "FST_S  [ESP],$src\t# F-round\n\t"
10679             "MOVSS  $dst,[ESP]\n\t"
10680             "ADD ESP,4" %}
10681   ins_encode %{
10682     __ subptr(rsp, 4);
10683     if ($src$$reg != FPR1L_enc) {
10684       __ fld_s($src$$reg-1);
10685       __ fstp_s(Address(rsp, 0));
10686     } else {
10687       __ fst_s(Address(rsp, 0));
10688     }
10689     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10690     __ addptr(rsp, 4);
10691   %}
10692   ins_pipe( pipe_slow );
10693 %}
10694 
10695 // Force rounding double precision to single precision
10696 instruct convD2F_reg(regF dst, regD src) %{
10697   predicate(UseSSE>=2);
10698   match(Set dst (ConvD2F src));
10699   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10700   ins_encode %{
10701     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10702   %}
10703   ins_pipe( pipe_slow );
10704 %}
10705 
10706 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10707   predicate(UseSSE==0);
10708   match(Set dst (ConvF2D src));
10709   format %{ "FST_S  $dst,$src\t# D-round" %}
10710   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10711   ins_pipe( fpu_reg_reg );
10712 %}
10713 
10714 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10715   predicate(UseSSE==1);
10716   match(Set dst (ConvF2D src));
10717   format %{ "FST_D  $dst,$src\t# D-round" %}
10718   expand %{
10719     roundDouble_mem_reg(dst,src);
10720   %}
10721 %}
10722 
10723 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10724   predicate(UseSSE==1);
10725   match(Set dst (ConvF2D src));
10726   effect( KILL cr );
10727   format %{ "SUB    ESP,4\n\t"
10728             "MOVSS  [ESP] $src\n\t"
10729             "FLD_S  [ESP]\n\t"
10730             "ADD    ESP,4\n\t"
10731             "FSTP   $dst\t# D-round" %}
10732   ins_encode %{
10733     __ subptr(rsp, 4);
10734     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10735     __ fld_s(Address(rsp, 0));
10736     __ addptr(rsp, 4);
10737     __ fstp_d($dst$$reg);
10738   %}
10739   ins_pipe( pipe_slow );
10740 %}
10741 
10742 instruct convF2D_reg(regD dst, regF src) %{
10743   predicate(UseSSE>=2);
10744   match(Set dst (ConvF2D src));
10745   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10746   ins_encode %{
10747     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10748   %}
10749   ins_pipe( pipe_slow );
10750 %}
10751 
10752 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10753 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10754   predicate(UseSSE<=1);
10755   match(Set dst (ConvD2I src));
10756   effect( KILL tmp, KILL cr );
10757   format %{ "FLD    $src\t# Convert double to int \n\t"
10758             "FLDCW  trunc mode\n\t"
10759             "SUB    ESP,4\n\t"
10760             "FISTp  [ESP + #0]\n\t"
10761             "FLDCW  std/24-bit mode\n\t"
10762             "POP    EAX\n\t"
10763             "CMP    EAX,0x80000000\n\t"
10764             "JNE,s  fast\n\t"
10765             "FLD_D  $src\n\t"
10766             "CALL   d2i_wrapper\n"
10767       "fast:" %}
10768   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10769   ins_pipe( pipe_slow );
10770 %}
10771 
10772 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10773 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10774   predicate(UseSSE>=2);
10775   match(Set dst (ConvD2I src));
10776   effect( KILL tmp, KILL cr );
10777   format %{ "CVTTSD2SI $dst, $src\n\t"
10778             "CMP    $dst,0x80000000\n\t"
10779             "JNE,s  fast\n\t"
10780             "SUB    ESP, 8\n\t"
10781             "MOVSD  [ESP], $src\n\t"
10782             "FLD_D  [ESP]\n\t"
10783             "ADD    ESP, 8\n\t"
10784             "CALL   d2i_wrapper\n"
10785       "fast:" %}
10786   ins_encode %{
10787     Label fast;
10788     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10789     __ cmpl($dst$$Register, 0x80000000);
10790     __ jccb(Assembler::notEqual, fast);
10791     __ subptr(rsp, 8);
10792     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10793     __ fld_d(Address(rsp, 0));
10794     __ addptr(rsp, 8);
10795     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10796     __ post_call_nop();
10797     __ bind(fast);
10798   %}
10799   ins_pipe( pipe_slow );
10800 %}
10801 
10802 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10803   predicate(UseSSE<=1);
10804   match(Set dst (ConvD2L src));
10805   effect( KILL cr );
10806   format %{ "FLD    $src\t# Convert double to long\n\t"
10807             "FLDCW  trunc mode\n\t"
10808             "SUB    ESP,8\n\t"
10809             "FISTp  [ESP + #0]\n\t"
10810             "FLDCW  std/24-bit mode\n\t"
10811             "POP    EAX\n\t"
10812             "POP    EDX\n\t"
10813             "CMP    EDX,0x80000000\n\t"
10814             "JNE,s  fast\n\t"
10815             "TEST   EAX,EAX\n\t"
10816             "JNE,s  fast\n\t"
10817             "FLD    $src\n\t"
10818             "CALL   d2l_wrapper\n"
10819       "fast:" %}
10820   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10821   ins_pipe( pipe_slow );
10822 %}
10823 
10824 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10825 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10826   predicate (UseSSE>=2);
10827   match(Set dst (ConvD2L src));
10828   effect( KILL cr );
10829   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10830             "MOVSD  [ESP],$src\n\t"
10831             "FLD_D  [ESP]\n\t"
10832             "FLDCW  trunc mode\n\t"
10833             "FISTp  [ESP + #0]\n\t"
10834             "FLDCW  std/24-bit mode\n\t"
10835             "POP    EAX\n\t"
10836             "POP    EDX\n\t"
10837             "CMP    EDX,0x80000000\n\t"
10838             "JNE,s  fast\n\t"
10839             "TEST   EAX,EAX\n\t"
10840             "JNE,s  fast\n\t"
10841             "SUB    ESP,8\n\t"
10842             "MOVSD  [ESP],$src\n\t"
10843             "FLD_D  [ESP]\n\t"
10844             "ADD    ESP,8\n\t"
10845             "CALL   d2l_wrapper\n"
10846       "fast:" %}
10847   ins_encode %{
10848     Label fast;
10849     __ subptr(rsp, 8);
10850     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10851     __ fld_d(Address(rsp, 0));
10852     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10853     __ fistp_d(Address(rsp, 0));
10854     // Restore the rounding mode, mask the exception
10855     if (Compile::current()->in_24_bit_fp_mode()) {
10856       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10857     } else {
10858       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10859     }
10860     // Load the converted long, adjust CPU stack
10861     __ pop(rax);
10862     __ pop(rdx);
10863     __ cmpl(rdx, 0x80000000);
10864     __ jccb(Assembler::notEqual, fast);
10865     __ testl(rax, rax);
10866     __ jccb(Assembler::notEqual, fast);
10867     __ subptr(rsp, 8);
10868     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10869     __ fld_d(Address(rsp, 0));
10870     __ addptr(rsp, 8);
10871     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10872     __ post_call_nop();
10873     __ bind(fast);
10874   %}
10875   ins_pipe( pipe_slow );
10876 %}
10877 
10878 // Convert a double to an int.  Java semantics require we do complex
10879 // manglations in the corner cases.  So we set the rounding mode to
10880 // 'zero', store the darned double down as an int, and reset the
10881 // rounding mode to 'nearest'.  The hardware stores a flag value down
10882 // if we would overflow or converted a NAN; we check for this and
10883 // and go the slow path if needed.
10884 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10885   predicate(UseSSE==0);
10886   match(Set dst (ConvF2I src));
10887   effect( KILL tmp, KILL cr );
10888   format %{ "FLD    $src\t# Convert float to int \n\t"
10889             "FLDCW  trunc mode\n\t"
10890             "SUB    ESP,4\n\t"
10891             "FISTp  [ESP + #0]\n\t"
10892             "FLDCW  std/24-bit mode\n\t"
10893             "POP    EAX\n\t"
10894             "CMP    EAX,0x80000000\n\t"
10895             "JNE,s  fast\n\t"
10896             "FLD    $src\n\t"
10897             "CALL   d2i_wrapper\n"
10898       "fast:" %}
10899   // DPR2I_encoding works for FPR2I
10900   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10901   ins_pipe( pipe_slow );
10902 %}
10903 
10904 // Convert a float in xmm to an int reg.
10905 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10906   predicate(UseSSE>=1);
10907   match(Set dst (ConvF2I src));
10908   effect( KILL tmp, KILL cr );
10909   format %{ "CVTTSS2SI $dst, $src\n\t"
10910             "CMP    $dst,0x80000000\n\t"
10911             "JNE,s  fast\n\t"
10912             "SUB    ESP, 4\n\t"
10913             "MOVSS  [ESP], $src\n\t"
10914             "FLD    [ESP]\n\t"
10915             "ADD    ESP, 4\n\t"
10916             "CALL   d2i_wrapper\n"
10917       "fast:" %}
10918   ins_encode %{
10919     Label fast;
10920     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10921     __ cmpl($dst$$Register, 0x80000000);
10922     __ jccb(Assembler::notEqual, fast);
10923     __ subptr(rsp, 4);
10924     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10925     __ fld_s(Address(rsp, 0));
10926     __ addptr(rsp, 4);
10927     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10928     __ post_call_nop();
10929     __ bind(fast);
10930   %}
10931   ins_pipe( pipe_slow );
10932 %}
10933 
10934 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10935   predicate(UseSSE==0);
10936   match(Set dst (ConvF2L src));
10937   effect( KILL cr );
10938   format %{ "FLD    $src\t# Convert float to long\n\t"
10939             "FLDCW  trunc mode\n\t"
10940             "SUB    ESP,8\n\t"
10941             "FISTp  [ESP + #0]\n\t"
10942             "FLDCW  std/24-bit mode\n\t"
10943             "POP    EAX\n\t"
10944             "POP    EDX\n\t"
10945             "CMP    EDX,0x80000000\n\t"
10946             "JNE,s  fast\n\t"
10947             "TEST   EAX,EAX\n\t"
10948             "JNE,s  fast\n\t"
10949             "FLD    $src\n\t"
10950             "CALL   d2l_wrapper\n"
10951       "fast:" %}
10952   // DPR2L_encoding works for FPR2L
10953   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10954   ins_pipe( pipe_slow );
10955 %}
10956 
10957 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10958 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10959   predicate (UseSSE>=1);
10960   match(Set dst (ConvF2L src));
10961   effect( KILL cr );
10962   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10963             "MOVSS  [ESP],$src\n\t"
10964             "FLD_S  [ESP]\n\t"
10965             "FLDCW  trunc mode\n\t"
10966             "FISTp  [ESP + #0]\n\t"
10967             "FLDCW  std/24-bit mode\n\t"
10968             "POP    EAX\n\t"
10969             "POP    EDX\n\t"
10970             "CMP    EDX,0x80000000\n\t"
10971             "JNE,s  fast\n\t"
10972             "TEST   EAX,EAX\n\t"
10973             "JNE,s  fast\n\t"
10974             "SUB    ESP,4\t# Convert float to long\n\t"
10975             "MOVSS  [ESP],$src\n\t"
10976             "FLD_S  [ESP]\n\t"
10977             "ADD    ESP,4\n\t"
10978             "CALL   d2l_wrapper\n"
10979       "fast:" %}
10980   ins_encode %{
10981     Label fast;
10982     __ subptr(rsp, 8);
10983     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10984     __ fld_s(Address(rsp, 0));
10985     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10986     __ fistp_d(Address(rsp, 0));
10987     // Restore the rounding mode, mask the exception
10988     if (Compile::current()->in_24_bit_fp_mode()) {
10989       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10990     } else {
10991       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10992     }
10993     // Load the converted long, adjust CPU stack
10994     __ pop(rax);
10995     __ pop(rdx);
10996     __ cmpl(rdx, 0x80000000);
10997     __ jccb(Assembler::notEqual, fast);
10998     __ testl(rax, rax);
10999     __ jccb(Assembler::notEqual, fast);
11000     __ subptr(rsp, 4);
11001     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11002     __ fld_s(Address(rsp, 0));
11003     __ addptr(rsp, 4);
11004     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11005     __ post_call_nop();
11006     __ bind(fast);
11007   %}
11008   ins_pipe( pipe_slow );
11009 %}
11010 
11011 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11012   predicate( UseSSE<=1 );
11013   match(Set dst (ConvI2D src));
11014   format %{ "FILD   $src\n\t"
11015             "FSTP   $dst" %}
11016   opcode(0xDB, 0x0);  /* DB /0 */
11017   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11018   ins_pipe( fpu_reg_mem );
11019 %}
11020 
11021 instruct convI2D_reg(regD dst, rRegI src) %{
11022   predicate( UseSSE>=2 && !UseXmmI2D );
11023   match(Set dst (ConvI2D src));
11024   format %{ "CVTSI2SD $dst,$src" %}
11025   ins_encode %{
11026     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11027   %}
11028   ins_pipe( pipe_slow );
11029 %}
11030 
11031 instruct convI2D_mem(regD dst, memory mem) %{
11032   predicate( UseSSE>=2 );
11033   match(Set dst (ConvI2D (LoadI mem)));
11034   format %{ "CVTSI2SD $dst,$mem" %}
11035   ins_encode %{
11036     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11037   %}
11038   ins_pipe( pipe_slow );
11039 %}
11040 
11041 instruct convXI2D_reg(regD dst, rRegI src)
11042 %{
11043   predicate( UseSSE>=2 && UseXmmI2D );
11044   match(Set dst (ConvI2D src));
11045 
11046   format %{ "MOVD  $dst,$src\n\t"
11047             "CVTDQ2PD $dst,$dst\t# i2d" %}
11048   ins_encode %{
11049     __ movdl($dst$$XMMRegister, $src$$Register);
11050     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11051   %}
11052   ins_pipe(pipe_slow); // XXX
11053 %}
11054 
11055 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11056   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11057   match(Set dst (ConvI2D (LoadI mem)));
11058   format %{ "FILD   $mem\n\t"
11059             "FSTP   $dst" %}
11060   opcode(0xDB);      /* DB /0 */
11061   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11062               Pop_Reg_DPR(dst));
11063   ins_pipe( fpu_reg_mem );
11064 %}
11065 
11066 // Convert a byte to a float; no rounding step needed.
11067 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11068   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11069   match(Set dst (ConvI2F src));
11070   format %{ "FILD   $src\n\t"
11071             "FSTP   $dst" %}
11072 
11073   opcode(0xDB, 0x0);  /* DB /0 */
11074   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11075   ins_pipe( fpu_reg_mem );
11076 %}
11077 
11078 // In 24-bit mode, force exponent rounding by storing back out
11079 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11080   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11081   match(Set dst (ConvI2F src));
11082   ins_cost(200);
11083   format %{ "FILD   $src\n\t"
11084             "FSTP_S $dst" %}
11085   opcode(0xDB, 0x0);  /* DB /0 */
11086   ins_encode( Push_Mem_I(src),
11087               Pop_Mem_FPR(dst));
11088   ins_pipe( fpu_mem_mem );
11089 %}
11090 
11091 // In 24-bit mode, force exponent rounding by storing back out
11092 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11093   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11094   match(Set dst (ConvI2F (LoadI mem)));
11095   ins_cost(200);
11096   format %{ "FILD   $mem\n\t"
11097             "FSTP_S $dst" %}
11098   opcode(0xDB);  /* DB /0 */
11099   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11100               Pop_Mem_FPR(dst));
11101   ins_pipe( fpu_mem_mem );
11102 %}
11103 
11104 // This instruction does not round to 24-bits
11105 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11106   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11107   match(Set dst (ConvI2F src));
11108   format %{ "FILD   $src\n\t"
11109             "FSTP   $dst" %}
11110   opcode(0xDB, 0x0);  /* DB /0 */
11111   ins_encode( Push_Mem_I(src),
11112               Pop_Reg_FPR(dst));
11113   ins_pipe( fpu_reg_mem );
11114 %}
11115 
11116 // This instruction does not round to 24-bits
11117 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11118   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11119   match(Set dst (ConvI2F (LoadI mem)));
11120   format %{ "FILD   $mem\n\t"
11121             "FSTP   $dst" %}
11122   opcode(0xDB);      /* DB /0 */
11123   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11124               Pop_Reg_FPR(dst));
11125   ins_pipe( fpu_reg_mem );
11126 %}
11127 
11128 // Convert an int to a float in xmm; no rounding step needed.
11129 instruct convI2F_reg(regF dst, rRegI src) %{
11130   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11131   match(Set dst (ConvI2F src));
11132   format %{ "CVTSI2SS $dst, $src" %}
11133   ins_encode %{
11134     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11135   %}
11136   ins_pipe( pipe_slow );
11137 %}
11138 
11139  instruct convXI2F_reg(regF dst, rRegI src)
11140 %{
11141   predicate( UseSSE>=2 && UseXmmI2F );
11142   match(Set dst (ConvI2F src));
11143 
11144   format %{ "MOVD  $dst,$src\n\t"
11145             "CVTDQ2PS $dst,$dst\t# i2f" %}
11146   ins_encode %{
11147     __ movdl($dst$$XMMRegister, $src$$Register);
11148     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11149   %}
11150   ins_pipe(pipe_slow); // XXX
11151 %}
11152 
11153 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11154   match(Set dst (ConvI2L src));
11155   effect(KILL cr);
11156   ins_cost(375);
11157   format %{ "MOV    $dst.lo,$src\n\t"
11158             "MOV    $dst.hi,$src\n\t"
11159             "SAR    $dst.hi,31" %}
11160   ins_encode(convert_int_long(dst,src));
11161   ins_pipe( ialu_reg_reg_long );
11162 %}
11163 
11164 // Zero-extend convert int to long
11165 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11166   match(Set dst (AndL (ConvI2L src) mask) );
11167   effect( KILL flags );
11168   ins_cost(250);
11169   format %{ "MOV    $dst.lo,$src\n\t"
11170             "XOR    $dst.hi,$dst.hi" %}
11171   opcode(0x33); // XOR
11172   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11173   ins_pipe( ialu_reg_reg_long );
11174 %}
11175 
11176 // Zero-extend long
11177 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11178   match(Set dst (AndL src mask) );
11179   effect( KILL flags );
11180   ins_cost(250);
11181   format %{ "MOV    $dst.lo,$src.lo\n\t"
11182             "XOR    $dst.hi,$dst.hi\n\t" %}
11183   opcode(0x33); // XOR
11184   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11185   ins_pipe( ialu_reg_reg_long );
11186 %}
11187 
11188 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11189   predicate (UseSSE<=1);
11190   match(Set dst (ConvL2D src));
11191   effect( KILL cr );
11192   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11193             "PUSH   $src.lo\n\t"
11194             "FILD   ST,[ESP + #0]\n\t"
11195             "ADD    ESP,8\n\t"
11196             "FSTP_D $dst\t# D-round" %}
11197   opcode(0xDF, 0x5);  /* DF /5 */
11198   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11199   ins_pipe( pipe_slow );
11200 %}
11201 
11202 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11203   predicate (UseSSE>=2);
11204   match(Set dst (ConvL2D src));
11205   effect( KILL cr );
11206   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11207             "PUSH   $src.lo\n\t"
11208             "FILD_D [ESP]\n\t"
11209             "FSTP_D [ESP]\n\t"
11210             "MOVSD  $dst,[ESP]\n\t"
11211             "ADD    ESP,8" %}
11212   opcode(0xDF, 0x5);  /* DF /5 */
11213   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11214   ins_pipe( pipe_slow );
11215 %}
11216 
11217 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11218   predicate (UseSSE>=1);
11219   match(Set dst (ConvL2F src));
11220   effect( KILL cr );
11221   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11222             "PUSH   $src.lo\n\t"
11223             "FILD_D [ESP]\n\t"
11224             "FSTP_S [ESP]\n\t"
11225             "MOVSS  $dst,[ESP]\n\t"
11226             "ADD    ESP,8" %}
11227   opcode(0xDF, 0x5);  /* DF /5 */
11228   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11229   ins_pipe( pipe_slow );
11230 %}
11231 
11232 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11233   match(Set dst (ConvL2F src));
11234   effect( KILL cr );
11235   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11236             "PUSH   $src.lo\n\t"
11237             "FILD   ST,[ESP + #0]\n\t"
11238             "ADD    ESP,8\n\t"
11239             "FSTP_S $dst\t# F-round" %}
11240   opcode(0xDF, 0x5);  /* DF /5 */
11241   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11242   ins_pipe( pipe_slow );
11243 %}
11244 
11245 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11246   match(Set dst (ConvL2I src));
11247   effect( DEF dst, USE src );
11248   format %{ "MOV    $dst,$src.lo" %}
11249   ins_encode(enc_CopyL_Lo(dst,src));
11250   ins_pipe( ialu_reg_reg );
11251 %}
11252 
11253 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11254   match(Set dst (MoveF2I src));
11255   effect( DEF dst, USE src );
11256   ins_cost(100);
11257   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11258   ins_encode %{
11259     __ movl($dst$$Register, Address(rsp, $src$$disp));
11260   %}
11261   ins_pipe( ialu_reg_mem );
11262 %}
11263 
11264 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11265   predicate(UseSSE==0);
11266   match(Set dst (MoveF2I src));
11267   effect( DEF dst, USE src );
11268 
11269   ins_cost(125);
11270   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11271   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11272   ins_pipe( fpu_mem_reg );
11273 %}
11274 
11275 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11276   predicate(UseSSE>=1);
11277   match(Set dst (MoveF2I src));
11278   effect( DEF dst, USE src );
11279 
11280   ins_cost(95);
11281   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11282   ins_encode %{
11283     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11284   %}
11285   ins_pipe( pipe_slow );
11286 %}
11287 
11288 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11289   predicate(UseSSE>=2);
11290   match(Set dst (MoveF2I src));
11291   effect( DEF dst, USE src );
11292   ins_cost(85);
11293   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11294   ins_encode %{
11295     __ movdl($dst$$Register, $src$$XMMRegister);
11296   %}
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11301   match(Set dst (MoveI2F src));
11302   effect( DEF dst, USE src );
11303 
11304   ins_cost(100);
11305   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11306   ins_encode %{
11307     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11308   %}
11309   ins_pipe( ialu_mem_reg );
11310 %}
11311 
11312 
11313 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11314   predicate(UseSSE==0);
11315   match(Set dst (MoveI2F src));
11316   effect(DEF dst, USE src);
11317 
11318   ins_cost(125);
11319   format %{ "FLD_S  $src\n\t"
11320             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11321   opcode(0xD9);               /* D9 /0, FLD m32real */
11322   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11323               Pop_Reg_FPR(dst) );
11324   ins_pipe( fpu_reg_mem );
11325 %}
11326 
11327 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11328   predicate(UseSSE>=1);
11329   match(Set dst (MoveI2F src));
11330   effect( DEF dst, USE src );
11331 
11332   ins_cost(95);
11333   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11334   ins_encode %{
11335     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11336   %}
11337   ins_pipe( pipe_slow );
11338 %}
11339 
11340 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11341   predicate(UseSSE>=2);
11342   match(Set dst (MoveI2F src));
11343   effect( DEF dst, USE src );
11344 
11345   ins_cost(85);
11346   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11347   ins_encode %{
11348     __ movdl($dst$$XMMRegister, $src$$Register);
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11354   match(Set dst (MoveD2L src));
11355   effect(DEF dst, USE src);
11356 
11357   ins_cost(250);
11358   format %{ "MOV    $dst.lo,$src\n\t"
11359             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11360   opcode(0x8B, 0x8B);
11361   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11362   ins_pipe( ialu_mem_long_reg );
11363 %}
11364 
11365 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11366   predicate(UseSSE<=1);
11367   match(Set dst (MoveD2L src));
11368   effect(DEF dst, USE src);
11369 
11370   ins_cost(125);
11371   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11372   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11373   ins_pipe( fpu_mem_reg );
11374 %}
11375 
11376 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11377   predicate(UseSSE>=2);
11378   match(Set dst (MoveD2L src));
11379   effect(DEF dst, USE src);
11380   ins_cost(95);
11381   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11382   ins_encode %{
11383     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11384   %}
11385   ins_pipe( pipe_slow );
11386 %}
11387 
11388 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11389   predicate(UseSSE>=2);
11390   match(Set dst (MoveD2L src));
11391   effect(DEF dst, USE src, TEMP tmp);
11392   ins_cost(85);
11393   format %{ "MOVD   $dst.lo,$src\n\t"
11394             "PSHUFLW $tmp,$src,0x4E\n\t"
11395             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11396   ins_encode %{
11397     __ movdl($dst$$Register, $src$$XMMRegister);
11398     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11399     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11400   %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11405   match(Set dst (MoveL2D src));
11406   effect(DEF dst, USE src);
11407 
11408   ins_cost(200);
11409   format %{ "MOV    $dst,$src.lo\n\t"
11410             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11411   opcode(0x89, 0x89);
11412   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11413   ins_pipe( ialu_mem_long_reg );
11414 %}
11415 
11416 
11417 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11418   predicate(UseSSE<=1);
11419   match(Set dst (MoveL2D src));
11420   effect(DEF dst, USE src);
11421   ins_cost(125);
11422 
11423   format %{ "FLD_D  $src\n\t"
11424             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11425   opcode(0xDD);               /* DD /0, FLD m64real */
11426   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11427               Pop_Reg_DPR(dst) );
11428   ins_pipe( fpu_reg_mem );
11429 %}
11430 
11431 
11432 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11433   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11434   match(Set dst (MoveL2D src));
11435   effect(DEF dst, USE src);
11436 
11437   ins_cost(95);
11438   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11439   ins_encode %{
11440     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11441   %}
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11446   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11447   match(Set dst (MoveL2D src));
11448   effect(DEF dst, USE src);
11449 
11450   ins_cost(95);
11451   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11452   ins_encode %{
11453     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11454   %}
11455   ins_pipe( pipe_slow );
11456 %}
11457 
11458 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11459   predicate(UseSSE>=2);
11460   match(Set dst (MoveL2D src));
11461   effect(TEMP dst, USE src, TEMP tmp);
11462   ins_cost(85);
11463   format %{ "MOVD   $dst,$src.lo\n\t"
11464             "MOVD   $tmp,$src.hi\n\t"
11465             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11466   ins_encode %{
11467     __ movdl($dst$$XMMRegister, $src$$Register);
11468     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11469     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11470   %}
11471   ins_pipe( pipe_slow );
11472 %}
11473 
11474 //----------------------------- CompressBits/ExpandBits ------------------------
11475 
11476 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11477   predicate(n->bottom_type()->isa_long());
11478   match(Set dst (CompressBits src mask));
11479   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11480   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11481   ins_encode %{
11482     Label exit, partail_result;
11483     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11484     // Merge the results of upper and lower destination registers such that upper destination
11485     // results are contiguously laid out after the lower destination result.
11486     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11487     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11488     __ popcntl($rtmp$$Register, $mask$$Register);
11489     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11490     __ cmpl($rtmp$$Register, 32);
11491     __ jccb(Assembler::equal, exit);
11492     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11493     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11494     // Shift left the contents of upper destination register by true bit count of lower mask register
11495     // and merge with lower destination register.
11496     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11497     __ orl($dst$$Register, $rtmp$$Register);
11498     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11499     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11500     // since contents of upper destination have already been copied to lower destination
11501     // register.
11502     __ cmpl($rtmp$$Register, 0);
11503     __ jccb(Assembler::greater, partail_result);
11504     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11505     __ jmp(exit);
11506     __ bind(partail_result);
11507     // Perform right shift over upper destination register to move out bits already copied
11508     // to lower destination register.
11509     __ subl($rtmp$$Register, 32);
11510     __ negl($rtmp$$Register);
11511     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11512     __ bind(exit);
11513   %}
11514   ins_pipe( pipe_slow );
11515 %}
11516 
11517 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11518   predicate(n->bottom_type()->isa_long());
11519   match(Set dst (ExpandBits src mask));
11520   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11521   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11522   ins_encode %{
11523     // Extraction operation sequentially reads the bits from source register starting from LSB
11524     // and lays them out into destination register at bit locations corresponding to true bits
11525     // in mask register. Thus number of source bits read are equal to combined true bit count
11526     // of mask register pair.
11527     Label exit, mask_clipping;
11528     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11529     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11530     __ popcntl($rtmp$$Register, $mask$$Register);
11531     // If true bit count of lower mask register is 32 then none of bit of lower source register
11532     // will feed to upper destination register.
11533     __ cmpl($rtmp$$Register, 32);
11534     __ jccb(Assembler::equal, exit);
11535     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11536     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11537     // Shift right the contents of lower source register to remove already consumed bits.
11538     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11539     // Extract the bits from lower source register starting from LSB under the influence
11540     // of upper mask register.
11541     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11542     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11543     __ subl($rtmp$$Register, 32);
11544     __ negl($rtmp$$Register);
11545     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11546     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11547     // Clear the set bits in upper mask register which have been used to extract the contents
11548     // from lower source register.
11549     __ bind(mask_clipping);
11550     __ blsrl($mask$$Register, $mask$$Register);
11551     __ decrementl($rtmp$$Register, 1);
11552     __ jccb(Assembler::greater, mask_clipping);
11553     // Starting from LSB extract the bits from upper source register under the influence of
11554     // remaining set bits in upper mask register.
11555     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11556     // Merge the partial results extracted from lower and upper source register bits.
11557     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11558     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11559     __ bind(exit);
11560   %}
11561   ins_pipe( pipe_slow );
11562 %}
11563 
11564 // =======================================================================
11565 // fast clearing of an array
11566 // Small ClearArray non-AVX512.
11567 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11568   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11569   match(Set dummy (ClearArray cnt base));
11570   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11571 
11572   format %{ $$template
11573     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11574     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11575     $$emit$$"JG     LARGE\n\t"
11576     $$emit$$"SHL    ECX, 1\n\t"
11577     $$emit$$"DEC    ECX\n\t"
11578     $$emit$$"JS     DONE\t# Zero length\n\t"
11579     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11580     $$emit$$"DEC    ECX\n\t"
11581     $$emit$$"JGE    LOOP\n\t"
11582     $$emit$$"JMP    DONE\n\t"
11583     $$emit$$"# LARGE:\n\t"
11584     if (UseFastStosb) {
11585        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11586        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11587     } else if (UseXMMForObjInit) {
11588        $$emit$$"MOV     RDI,RAX\n\t"
11589        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11590        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11591        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11592        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11593        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11594        $$emit$$"ADD     0x40,RAX\n\t"
11595        $$emit$$"# L_zero_64_bytes:\n\t"
11596        $$emit$$"SUB     0x8,RCX\n\t"
11597        $$emit$$"JGE     L_loop\n\t"
11598        $$emit$$"ADD     0x4,RCX\n\t"
11599        $$emit$$"JL      L_tail\n\t"
11600        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11601        $$emit$$"ADD     0x20,RAX\n\t"
11602        $$emit$$"SUB     0x4,RCX\n\t"
11603        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11604        $$emit$$"ADD     0x4,RCX\n\t"
11605        $$emit$$"JLE     L_end\n\t"
11606        $$emit$$"DEC     RCX\n\t"
11607        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11608        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11609        $$emit$$"ADD     0x8,RAX\n\t"
11610        $$emit$$"DEC     RCX\n\t"
11611        $$emit$$"JGE     L_sloop\n\t"
11612        $$emit$$"# L_end:\n\t"
11613     } else {
11614        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11615        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11616     }
11617     $$emit$$"# DONE"
11618   %}
11619   ins_encode %{
11620     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11621                  $tmp$$XMMRegister, false, knoreg);
11622   %}
11623   ins_pipe( pipe_slow );
11624 %}
11625 
11626 // Small ClearArray AVX512 non-constant length.
11627 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11628   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11629   match(Set dummy (ClearArray cnt base));
11630   ins_cost(125);
11631   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11632 
11633   format %{ $$template
11634     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11635     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11636     $$emit$$"JG     LARGE\n\t"
11637     $$emit$$"SHL    ECX, 1\n\t"
11638     $$emit$$"DEC    ECX\n\t"
11639     $$emit$$"JS     DONE\t# Zero length\n\t"
11640     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11641     $$emit$$"DEC    ECX\n\t"
11642     $$emit$$"JGE    LOOP\n\t"
11643     $$emit$$"JMP    DONE\n\t"
11644     $$emit$$"# LARGE:\n\t"
11645     if (UseFastStosb) {
11646        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11647        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11648     } else if (UseXMMForObjInit) {
11649        $$emit$$"MOV     RDI,RAX\n\t"
11650        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11651        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11652        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11653        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11654        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11655        $$emit$$"ADD     0x40,RAX\n\t"
11656        $$emit$$"# L_zero_64_bytes:\n\t"
11657        $$emit$$"SUB     0x8,RCX\n\t"
11658        $$emit$$"JGE     L_loop\n\t"
11659        $$emit$$"ADD     0x4,RCX\n\t"
11660        $$emit$$"JL      L_tail\n\t"
11661        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11662        $$emit$$"ADD     0x20,RAX\n\t"
11663        $$emit$$"SUB     0x4,RCX\n\t"
11664        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11665        $$emit$$"ADD     0x4,RCX\n\t"
11666        $$emit$$"JLE     L_end\n\t"
11667        $$emit$$"DEC     RCX\n\t"
11668        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11669        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11670        $$emit$$"ADD     0x8,RAX\n\t"
11671        $$emit$$"DEC     RCX\n\t"
11672        $$emit$$"JGE     L_sloop\n\t"
11673        $$emit$$"# L_end:\n\t"
11674     } else {
11675        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11676        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11677     }
11678     $$emit$$"# DONE"
11679   %}
11680   ins_encode %{
11681     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11682                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11683   %}
11684   ins_pipe( pipe_slow );
11685 %}
11686 
11687 // Large ClearArray non-AVX512.
11688 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11689   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11690   match(Set dummy (ClearArray cnt base));
11691   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11692   format %{ $$template
11693     if (UseFastStosb) {
11694        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11695        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11696        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11697     } else if (UseXMMForObjInit) {
11698        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11699        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11700        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11701        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11702        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11703        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11704        $$emit$$"ADD     0x40,RAX\n\t"
11705        $$emit$$"# L_zero_64_bytes:\n\t"
11706        $$emit$$"SUB     0x8,RCX\n\t"
11707        $$emit$$"JGE     L_loop\n\t"
11708        $$emit$$"ADD     0x4,RCX\n\t"
11709        $$emit$$"JL      L_tail\n\t"
11710        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11711        $$emit$$"ADD     0x20,RAX\n\t"
11712        $$emit$$"SUB     0x4,RCX\n\t"
11713        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11714        $$emit$$"ADD     0x4,RCX\n\t"
11715        $$emit$$"JLE     L_end\n\t"
11716        $$emit$$"DEC     RCX\n\t"
11717        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11718        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11719        $$emit$$"ADD     0x8,RAX\n\t"
11720        $$emit$$"DEC     RCX\n\t"
11721        $$emit$$"JGE     L_sloop\n\t"
11722        $$emit$$"# L_end:\n\t"
11723     } else {
11724        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11725        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11726        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11727     }
11728     $$emit$$"# DONE"
11729   %}
11730   ins_encode %{
11731     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11732                  $tmp$$XMMRegister, true, knoreg);
11733   %}
11734   ins_pipe( pipe_slow );
11735 %}
11736 
11737 // Large ClearArray AVX512.
11738 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11739   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11740   match(Set dummy (ClearArray cnt base));
11741   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11742   format %{ $$template
11743     if (UseFastStosb) {
11744        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11745        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11746        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11747     } else if (UseXMMForObjInit) {
11748        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11749        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11750        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11751        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11752        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11753        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11754        $$emit$$"ADD     0x40,RAX\n\t"
11755        $$emit$$"# L_zero_64_bytes:\n\t"
11756        $$emit$$"SUB     0x8,RCX\n\t"
11757        $$emit$$"JGE     L_loop\n\t"
11758        $$emit$$"ADD     0x4,RCX\n\t"
11759        $$emit$$"JL      L_tail\n\t"
11760        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11761        $$emit$$"ADD     0x20,RAX\n\t"
11762        $$emit$$"SUB     0x4,RCX\n\t"
11763        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11764        $$emit$$"ADD     0x4,RCX\n\t"
11765        $$emit$$"JLE     L_end\n\t"
11766        $$emit$$"DEC     RCX\n\t"
11767        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11768        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11769        $$emit$$"ADD     0x8,RAX\n\t"
11770        $$emit$$"DEC     RCX\n\t"
11771        $$emit$$"JGE     L_sloop\n\t"
11772        $$emit$$"# L_end:\n\t"
11773     } else {
11774        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11775        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11776        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11777     }
11778     $$emit$$"# DONE"
11779   %}
11780   ins_encode %{
11781     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11782                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 // Small ClearArray AVX512 constant length.
11788 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11789 %{
11790   predicate(!((ClearArrayNode*)n)->is_large() &&
11791                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11792   match(Set dummy (ClearArray cnt base));
11793   ins_cost(100);
11794   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11795   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11796   ins_encode %{
11797    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11798   %}
11799   ins_pipe(pipe_slow);
11800 %}
11801 
11802 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11803                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11804   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11805   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11806   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11807 
11808   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11809   ins_encode %{
11810     __ string_compare($str1$$Register, $str2$$Register,
11811                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11812                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11813   %}
11814   ins_pipe( pipe_slow );
11815 %}
11816 
11817 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11818                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11819   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11820   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11821   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11822 
11823   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11824   ins_encode %{
11825     __ string_compare($str1$$Register, $str2$$Register,
11826                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11827                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11828   %}
11829   ins_pipe( pipe_slow );
11830 %}
11831 
11832 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11833                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11834   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11835   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11836   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11837 
11838   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11839   ins_encode %{
11840     __ string_compare($str1$$Register, $str2$$Register,
11841                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11842                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11848                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11849   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11850   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11851   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11852 
11853   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11854   ins_encode %{
11855     __ string_compare($str1$$Register, $str2$$Register,
11856                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11857                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11858   %}
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11863                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11864   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11865   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11866   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11867 
11868   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11869   ins_encode %{
11870     __ string_compare($str1$$Register, $str2$$Register,
11871                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11872                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11873   %}
11874   ins_pipe( pipe_slow );
11875 %}
11876 
11877 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11878                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11879   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11880   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11881   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11882 
11883   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11884   ins_encode %{
11885     __ string_compare($str1$$Register, $str2$$Register,
11886                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11887                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11888   %}
11889   ins_pipe( pipe_slow );
11890 %}
11891 
11892 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11893                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11894   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11895   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11896   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11897 
11898   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11899   ins_encode %{
11900     __ string_compare($str2$$Register, $str1$$Register,
11901                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11902                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11903   %}
11904   ins_pipe( pipe_slow );
11905 %}
11906 
11907 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11908                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11909   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11910   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11911   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11912 
11913   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11914   ins_encode %{
11915     __ string_compare($str2$$Register, $str1$$Register,
11916                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11917                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 // fast string equals
11923 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11924                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11925   predicate(!VM_Version::supports_avx512vlbw());
11926   match(Set result (StrEquals (Binary str1 str2) cnt));
11927   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11928 
11929   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11930   ins_encode %{
11931     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11932                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11933                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11934   %}
11935 
11936   ins_pipe( pipe_slow );
11937 %}
11938 
11939 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11940                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11941   predicate(VM_Version::supports_avx512vlbw());
11942   match(Set result (StrEquals (Binary str1 str2) cnt));
11943   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11944 
11945   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11946   ins_encode %{
11947     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11948                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11949                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11950   %}
11951 
11952   ins_pipe( pipe_slow );
11953 %}
11954 
11955 
11956 // fast search of substring with known size.
11957 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11958                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11959   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11960   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11961   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11962 
11963   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11964   ins_encode %{
11965     int icnt2 = (int)$int_cnt2$$constant;
11966     if (icnt2 >= 16) {
11967       // IndexOf for constant substrings with size >= 16 elements
11968       // which don't need to be loaded through stack.
11969       __ string_indexofC8($str1$$Register, $str2$$Register,
11970                           $cnt1$$Register, $cnt2$$Register,
11971                           icnt2, $result$$Register,
11972                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11973     } else {
11974       // Small strings are loaded through stack if they cross page boundary.
11975       __ string_indexof($str1$$Register, $str2$$Register,
11976                         $cnt1$$Register, $cnt2$$Register,
11977                         icnt2, $result$$Register,
11978                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11979     }
11980   %}
11981   ins_pipe( pipe_slow );
11982 %}
11983 
11984 // fast search of substring with known size.
11985 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11986                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11987   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11988   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11989   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11990 
11991   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11992   ins_encode %{
11993     int icnt2 = (int)$int_cnt2$$constant;
11994     if (icnt2 >= 8) {
11995       // IndexOf for constant substrings with size >= 8 elements
11996       // which don't need to be loaded through stack.
11997       __ string_indexofC8($str1$$Register, $str2$$Register,
11998                           $cnt1$$Register, $cnt2$$Register,
11999                           icnt2, $result$$Register,
12000                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12001     } else {
12002       // Small strings are loaded through stack if they cross page boundary.
12003       __ string_indexof($str1$$Register, $str2$$Register,
12004                         $cnt1$$Register, $cnt2$$Register,
12005                         icnt2, $result$$Register,
12006                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12007     }
12008   %}
12009   ins_pipe( pipe_slow );
12010 %}
12011 
12012 // fast search of substring with known size.
12013 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12014                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12015   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12016   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12017   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12018 
12019   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12020   ins_encode %{
12021     int icnt2 = (int)$int_cnt2$$constant;
12022     if (icnt2 >= 8) {
12023       // IndexOf for constant substrings with size >= 8 elements
12024       // which don't need to be loaded through stack.
12025       __ string_indexofC8($str1$$Register, $str2$$Register,
12026                           $cnt1$$Register, $cnt2$$Register,
12027                           icnt2, $result$$Register,
12028                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12029     } else {
12030       // Small strings are loaded through stack if they cross page boundary.
12031       __ string_indexof($str1$$Register, $str2$$Register,
12032                         $cnt1$$Register, $cnt2$$Register,
12033                         icnt2, $result$$Register,
12034                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12035     }
12036   %}
12037   ins_pipe( pipe_slow );
12038 %}
12039 
12040 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12041                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12042   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12043   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12044   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12045 
12046   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12047   ins_encode %{
12048     __ string_indexof($str1$$Register, $str2$$Register,
12049                       $cnt1$$Register, $cnt2$$Register,
12050                       (-1), $result$$Register,
12051                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12052   %}
12053   ins_pipe( pipe_slow );
12054 %}
12055 
12056 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12057                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12058   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12059   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12060   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12061 
12062   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12063   ins_encode %{
12064     __ string_indexof($str1$$Register, $str2$$Register,
12065                       $cnt1$$Register, $cnt2$$Register,
12066                       (-1), $result$$Register,
12067                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12068   %}
12069   ins_pipe( pipe_slow );
12070 %}
12071 
12072 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12073                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12074   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12075   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12076   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12077 
12078   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12079   ins_encode %{
12080     __ string_indexof($str1$$Register, $str2$$Register,
12081                       $cnt1$$Register, $cnt2$$Register,
12082                       (-1), $result$$Register,
12083                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12084   %}
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12089                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12090   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12091   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12092   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12093   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12094   ins_encode %{
12095     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12096                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12097   %}
12098   ins_pipe( pipe_slow );
12099 %}
12100 
12101 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12102                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12103   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12104   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12105   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12106   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12107   ins_encode %{
12108     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12109                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12110   %}
12111   ins_pipe( pipe_slow );
12112 %}
12113 
12114 
12115 // fast array equals
12116 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12117                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12118 %{
12119   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12120   match(Set result (AryEq ary1 ary2));
12121   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12122   //ins_cost(300);
12123 
12124   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12125   ins_encode %{
12126     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12127                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12128                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12129   %}
12130   ins_pipe( pipe_slow );
12131 %}
12132 
12133 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12134                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12135 %{
12136   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12137   match(Set result (AryEq ary1 ary2));
12138   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12139   //ins_cost(300);
12140 
12141   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12142   ins_encode %{
12143     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12144                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12145                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12146   %}
12147   ins_pipe( pipe_slow );
12148 %}
12149 
12150 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12151                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12152 %{
12153   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12154   match(Set result (AryEq ary1 ary2));
12155   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12156   //ins_cost(300);
12157 
12158   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12159   ins_encode %{
12160     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12161                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12162                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12168                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12169 %{
12170   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12171   match(Set result (AryEq ary1 ary2));
12172   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12173   //ins_cost(300);
12174 
12175   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12176   ins_encode %{
12177     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12178                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12179                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12180   %}
12181   ins_pipe( pipe_slow );
12182 %}
12183 
12184 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12185                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12186 %{
12187   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12188   match(Set result (CountPositives ary1 len));
12189   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12190 
12191   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12192   ins_encode %{
12193     __ count_positives($ary1$$Register, $len$$Register,
12194                        $result$$Register, $tmp3$$Register,
12195                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12196   %}
12197   ins_pipe( pipe_slow );
12198 %}
12199 
12200 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12201                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12202 %{
12203   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12204   match(Set result (CountPositives ary1 len));
12205   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12206 
12207   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12208   ins_encode %{
12209     __ count_positives($ary1$$Register, $len$$Register,
12210                        $result$$Register, $tmp3$$Register,
12211                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12212   %}
12213   ins_pipe( pipe_slow );
12214 %}
12215 
12216 
12217 // fast char[] to byte[] compression
12218 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12219                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12220   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12221   match(Set result (StrCompressedCopy src (Binary dst len)));
12222   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12223 
12224   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12225   ins_encode %{
12226     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12227                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12228                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12229                            knoreg, knoreg);
12230   %}
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12235                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12236   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12237   match(Set result (StrCompressedCopy src (Binary dst len)));
12238   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12239 
12240   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12241   ins_encode %{
12242     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12243                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12244                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12245                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12246   %}
12247   ins_pipe( pipe_slow );
12248 %}
12249 
12250 // fast byte[] to char[] inflation
12251 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12252                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12253   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12254   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12255   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12256 
12257   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12258   ins_encode %{
12259     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12260                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12261   %}
12262   ins_pipe( pipe_slow );
12263 %}
12264 
12265 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12266                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12267   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12268   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12269   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12270 
12271   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12272   ins_encode %{
12273     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12274                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12275   %}
12276   ins_pipe( pipe_slow );
12277 %}
12278 
12279 // encode char[] to byte[] in ISO_8859_1
12280 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12281                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12282                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12283   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12284   match(Set result (EncodeISOArray src (Binary dst len)));
12285   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12286 
12287   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12288   ins_encode %{
12289     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12290                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12291                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12292   %}
12293   ins_pipe( pipe_slow );
12294 %}
12295 
12296 // encode char[] to byte[] in ASCII
12297 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12298                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12299                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12300   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12301   match(Set result (EncodeISOArray src (Binary dst len)));
12302   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12303 
12304   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12305   ins_encode %{
12306     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12307                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12308                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12309   %}
12310   ins_pipe( pipe_slow );
12311 %}
12312 
12313 //----------Control Flow Instructions------------------------------------------
12314 // Signed compare Instructions
12315 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12316   match(Set cr (CmpI op1 op2));
12317   effect( DEF cr, USE op1, USE op2 );
12318   format %{ "CMP    $op1,$op2" %}
12319   opcode(0x3B);  /* Opcode 3B /r */
12320   ins_encode( OpcP, RegReg( op1, op2) );
12321   ins_pipe( ialu_cr_reg_reg );
12322 %}
12323 
12324 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12325   match(Set cr (CmpI op1 op2));
12326   effect( DEF cr, USE op1 );
12327   format %{ "CMP    $op1,$op2" %}
12328   opcode(0x81,0x07);  /* Opcode 81 /7 */
12329   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12330   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12331   ins_pipe( ialu_cr_reg_imm );
12332 %}
12333 
12334 // Cisc-spilled version of cmpI_eReg
12335 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12336   match(Set cr (CmpI op1 (LoadI op2)));
12337 
12338   format %{ "CMP    $op1,$op2" %}
12339   ins_cost(500);
12340   opcode(0x3B);  /* Opcode 3B /r */
12341   ins_encode( OpcP, RegMem( op1, op2) );
12342   ins_pipe( ialu_cr_reg_mem );
12343 %}
12344 
12345 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12346   match(Set cr (CmpI src zero));
12347   effect( DEF cr, USE src );
12348 
12349   format %{ "TEST   $src,$src" %}
12350   opcode(0x85);
12351   ins_encode( OpcP, RegReg( src, src ) );
12352   ins_pipe( ialu_cr_reg_imm );
12353 %}
12354 
12355 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12356   match(Set cr (CmpI (AndI src con) zero));
12357 
12358   format %{ "TEST   $src,$con" %}
12359   opcode(0xF7,0x00);
12360   ins_encode( OpcP, RegOpc(src), Con32(con) );
12361   ins_pipe( ialu_cr_reg_imm );
12362 %}
12363 
12364 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12365   match(Set cr (CmpI (AndI src mem) zero));
12366 
12367   format %{ "TEST   $src,$mem" %}
12368   opcode(0x85);
12369   ins_encode( OpcP, RegMem( src, mem ) );
12370   ins_pipe( ialu_cr_reg_mem );
12371 %}
12372 
12373 // Unsigned compare Instructions; really, same as signed except they
12374 // produce an eFlagsRegU instead of eFlagsReg.
12375 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12376   match(Set cr (CmpU op1 op2));
12377 
12378   format %{ "CMPu   $op1,$op2" %}
12379   opcode(0x3B);  /* Opcode 3B /r */
12380   ins_encode( OpcP, RegReg( op1, op2) );
12381   ins_pipe( ialu_cr_reg_reg );
12382 %}
12383 
12384 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12385   match(Set cr (CmpU op1 op2));
12386 
12387   format %{ "CMPu   $op1,$op2" %}
12388   opcode(0x81,0x07);  /* Opcode 81 /7 */
12389   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12390   ins_pipe( ialu_cr_reg_imm );
12391 %}
12392 
12393 // // Cisc-spilled version of cmpU_eReg
12394 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12395   match(Set cr (CmpU op1 (LoadI op2)));
12396 
12397   format %{ "CMPu   $op1,$op2" %}
12398   ins_cost(500);
12399   opcode(0x3B);  /* Opcode 3B /r */
12400   ins_encode( OpcP, RegMem( op1, op2) );
12401   ins_pipe( ialu_cr_reg_mem );
12402 %}
12403 
12404 // // Cisc-spilled version of cmpU_eReg
12405 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12406 //  match(Set cr (CmpU (LoadI op1) op2));
12407 //
12408 //  format %{ "CMPu   $op1,$op2" %}
12409 //  ins_cost(500);
12410 //  opcode(0x39);  /* Opcode 39 /r */
12411 //  ins_encode( OpcP, RegMem( op1, op2) );
12412 //%}
12413 
12414 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12415   match(Set cr (CmpU src zero));
12416 
12417   format %{ "TESTu  $src,$src" %}
12418   opcode(0x85);
12419   ins_encode( OpcP, RegReg( src, src ) );
12420   ins_pipe( ialu_cr_reg_imm );
12421 %}
12422 
12423 // Unsigned pointer compare Instructions
12424 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12425   match(Set cr (CmpP op1 op2));
12426 
12427   format %{ "CMPu   $op1,$op2" %}
12428   opcode(0x3B);  /* Opcode 3B /r */
12429   ins_encode( OpcP, RegReg( op1, op2) );
12430   ins_pipe( ialu_cr_reg_reg );
12431 %}
12432 
12433 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12434   match(Set cr (CmpP op1 op2));
12435 
12436   format %{ "CMPu   $op1,$op2" %}
12437   opcode(0x81,0x07);  /* Opcode 81 /7 */
12438   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12439   ins_pipe( ialu_cr_reg_imm );
12440 %}
12441 
12442 // // Cisc-spilled version of cmpP_eReg
12443 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12444   match(Set cr (CmpP op1 (LoadP op2)));
12445 
12446   format %{ "CMPu   $op1,$op2" %}
12447   ins_cost(500);
12448   opcode(0x3B);  /* Opcode 3B /r */
12449   ins_encode( OpcP, RegMem( op1, op2) );
12450   ins_pipe( ialu_cr_reg_mem );
12451 %}
12452 
12453 // // Cisc-spilled version of cmpP_eReg
12454 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12455 //  match(Set cr (CmpP (LoadP op1) op2));
12456 //
12457 //  format %{ "CMPu   $op1,$op2" %}
12458 //  ins_cost(500);
12459 //  opcode(0x39);  /* Opcode 39 /r */
12460 //  ins_encode( OpcP, RegMem( op1, op2) );
12461 //%}
12462 
12463 // Compare raw pointer (used in out-of-heap check).
12464 // Only works because non-oop pointers must be raw pointers
12465 // and raw pointers have no anti-dependencies.
12466 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12467   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12468   match(Set cr (CmpP op1 (LoadP op2)));
12469 
12470   format %{ "CMPu   $op1,$op2" %}
12471   opcode(0x3B);  /* Opcode 3B /r */
12472   ins_encode( OpcP, RegMem( op1, op2) );
12473   ins_pipe( ialu_cr_reg_mem );
12474 %}
12475 
12476 //
12477 // This will generate a signed flags result. This should be ok
12478 // since any compare to a zero should be eq/neq.
12479 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12480   match(Set cr (CmpP src zero));
12481 
12482   format %{ "TEST   $src,$src" %}
12483   opcode(0x85);
12484   ins_encode( OpcP, RegReg( src, src ) );
12485   ins_pipe( ialu_cr_reg_imm );
12486 %}
12487 
12488 // Cisc-spilled version of testP_reg
12489 // This will generate a signed flags result. This should be ok
12490 // since any compare to a zero should be eq/neq.
12491 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12492   match(Set cr (CmpP (LoadP op) zero));
12493 
12494   format %{ "TEST   $op,0xFFFFFFFF" %}
12495   ins_cost(500);
12496   opcode(0xF7);               /* Opcode F7 /0 */
12497   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12498   ins_pipe( ialu_cr_reg_imm );
12499 %}
12500 
12501 // Yanked all unsigned pointer compare operations.
12502 // Pointer compares are done with CmpP which is already unsigned.
12503 
12504 //----------Max and Min--------------------------------------------------------
12505 // Min Instructions
12506 ////
12507 //   *** Min and Max using the conditional move are slower than the
12508 //   *** branch version on a Pentium III.
12509 // // Conditional move for min
12510 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12511 //  effect( USE_DEF op2, USE op1, USE cr );
12512 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12513 //  opcode(0x4C,0x0F);
12514 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12515 //  ins_pipe( pipe_cmov_reg );
12516 //%}
12517 //
12518 //// Min Register with Register (P6 version)
12519 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12520 //  predicate(VM_Version::supports_cmov() );
12521 //  match(Set op2 (MinI op1 op2));
12522 //  ins_cost(200);
12523 //  expand %{
12524 //    eFlagsReg cr;
12525 //    compI_eReg(cr,op1,op2);
12526 //    cmovI_reg_lt(op2,op1,cr);
12527 //  %}
12528 //%}
12529 
12530 // Min Register with Register (generic version)
12531 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12532   match(Set dst (MinI dst src));
12533   effect(KILL flags);
12534   ins_cost(300);
12535 
12536   format %{ "MIN    $dst,$src" %}
12537   opcode(0xCC);
12538   ins_encode( min_enc(dst,src) );
12539   ins_pipe( pipe_slow );
12540 %}
12541 
12542 // Max Register with Register
12543 //   *** Min and Max using the conditional move are slower than the
12544 //   *** branch version on a Pentium III.
12545 // // Conditional move for max
12546 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12547 //  effect( USE_DEF op2, USE op1, USE cr );
12548 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12549 //  opcode(0x4F,0x0F);
12550 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12551 //  ins_pipe( pipe_cmov_reg );
12552 //%}
12553 //
12554 // // Max Register with Register (P6 version)
12555 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12556 //  predicate(VM_Version::supports_cmov() );
12557 //  match(Set op2 (MaxI op1 op2));
12558 //  ins_cost(200);
12559 //  expand %{
12560 //    eFlagsReg cr;
12561 //    compI_eReg(cr,op1,op2);
12562 //    cmovI_reg_gt(op2,op1,cr);
12563 //  %}
12564 //%}
12565 
12566 // Max Register with Register (generic version)
12567 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12568   match(Set dst (MaxI dst src));
12569   effect(KILL flags);
12570   ins_cost(300);
12571 
12572   format %{ "MAX    $dst,$src" %}
12573   opcode(0xCC);
12574   ins_encode( max_enc(dst,src) );
12575   ins_pipe( pipe_slow );
12576 %}
12577 
12578 // ============================================================================
12579 // Counted Loop limit node which represents exact final iterator value.
12580 // Note: the resulting value should fit into integer range since
12581 // counted loops have limit check on overflow.
12582 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12583   match(Set limit (LoopLimit (Binary init limit) stride));
12584   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12585   ins_cost(300);
12586 
12587   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12588   ins_encode %{
12589     int strd = (int)$stride$$constant;
12590     assert(strd != 1 && strd != -1, "sanity");
12591     int m1 = (strd > 0) ? 1 : -1;
12592     // Convert limit to long (EAX:EDX)
12593     __ cdql();
12594     // Convert init to long (init:tmp)
12595     __ movl($tmp$$Register, $init$$Register);
12596     __ sarl($tmp$$Register, 31);
12597     // $limit - $init
12598     __ subl($limit$$Register, $init$$Register);
12599     __ sbbl($limit_hi$$Register, $tmp$$Register);
12600     // + ($stride - 1)
12601     if (strd > 0) {
12602       __ addl($limit$$Register, (strd - 1));
12603       __ adcl($limit_hi$$Register, 0);
12604       __ movl($tmp$$Register, strd);
12605     } else {
12606       __ addl($limit$$Register, (strd + 1));
12607       __ adcl($limit_hi$$Register, -1);
12608       __ lneg($limit_hi$$Register, $limit$$Register);
12609       __ movl($tmp$$Register, -strd);
12610     }
12611     // signed division: (EAX:EDX) / pos_stride
12612     __ idivl($tmp$$Register);
12613     if (strd < 0) {
12614       // restore sign
12615       __ negl($tmp$$Register);
12616     }
12617     // (EAX) * stride
12618     __ mull($tmp$$Register);
12619     // + init (ignore upper bits)
12620     __ addl($limit$$Register, $init$$Register);
12621   %}
12622   ins_pipe( pipe_slow );
12623 %}
12624 
12625 // ============================================================================
12626 // Branch Instructions
12627 // Jump Table
12628 instruct jumpXtnd(rRegI switch_val) %{
12629   match(Jump switch_val);
12630   ins_cost(350);
12631   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12632   ins_encode %{
12633     // Jump to Address(table_base + switch_reg)
12634     Address index(noreg, $switch_val$$Register, Address::times_1);
12635     __ jump(ArrayAddress($constantaddress, index), noreg);
12636   %}
12637   ins_pipe(pipe_jmp);
12638 %}
12639 
12640 // Jump Direct - Label defines a relative address from JMP+1
12641 instruct jmpDir(label labl) %{
12642   match(Goto);
12643   effect(USE labl);
12644 
12645   ins_cost(300);
12646   format %{ "JMP    $labl" %}
12647   size(5);
12648   ins_encode %{
12649     Label* L = $labl$$label;
12650     __ jmp(*L, false); // Always long jump
12651   %}
12652   ins_pipe( pipe_jmp );
12653 %}
12654 
12655 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12656 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12657   match(If cop cr);
12658   effect(USE labl);
12659 
12660   ins_cost(300);
12661   format %{ "J$cop    $labl" %}
12662   size(6);
12663   ins_encode %{
12664     Label* L = $labl$$label;
12665     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12666   %}
12667   ins_pipe( pipe_jcc );
12668 %}
12669 
12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12671 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12672   match(CountedLoopEnd cop cr);
12673   effect(USE labl);
12674 
12675   ins_cost(300);
12676   format %{ "J$cop    $labl\t# Loop end" %}
12677   size(6);
12678   ins_encode %{
12679     Label* L = $labl$$label;
12680     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12681   %}
12682   ins_pipe( pipe_jcc );
12683 %}
12684 
12685 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12686 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12687   match(CountedLoopEnd cop cmp);
12688   effect(USE labl);
12689 
12690   ins_cost(300);
12691   format %{ "J$cop,u  $labl\t# Loop end" %}
12692   size(6);
12693   ins_encode %{
12694     Label* L = $labl$$label;
12695     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12696   %}
12697   ins_pipe( pipe_jcc );
12698 %}
12699 
12700 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12701   match(CountedLoopEnd cop cmp);
12702   effect(USE labl);
12703 
12704   ins_cost(200);
12705   format %{ "J$cop,u  $labl\t# Loop end" %}
12706   size(6);
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12710   %}
12711   ins_pipe( pipe_jcc );
12712 %}
12713 
12714 // Jump Direct Conditional - using unsigned comparison
12715 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12716   match(If cop cmp);
12717   effect(USE labl);
12718 
12719   ins_cost(300);
12720   format %{ "J$cop,u  $labl" %}
12721   size(6);
12722   ins_encode %{
12723     Label* L = $labl$$label;
12724     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12725   %}
12726   ins_pipe(pipe_jcc);
12727 %}
12728 
12729 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12730   match(If cop cmp);
12731   effect(USE labl);
12732 
12733   ins_cost(200);
12734   format %{ "J$cop,u  $labl" %}
12735   size(6);
12736   ins_encode %{
12737     Label* L = $labl$$label;
12738     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12739   %}
12740   ins_pipe(pipe_jcc);
12741 %}
12742 
12743 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12744   match(If cop cmp);
12745   effect(USE labl);
12746 
12747   ins_cost(200);
12748   format %{ $$template
12749     if ($cop$$cmpcode == Assembler::notEqual) {
12750       $$emit$$"JP,u   $labl\n\t"
12751       $$emit$$"J$cop,u   $labl"
12752     } else {
12753       $$emit$$"JP,u   done\n\t"
12754       $$emit$$"J$cop,u   $labl\n\t"
12755       $$emit$$"done:"
12756     }
12757   %}
12758   ins_encode %{
12759     Label* l = $labl$$label;
12760     if ($cop$$cmpcode == Assembler::notEqual) {
12761       __ jcc(Assembler::parity, *l, false);
12762       __ jcc(Assembler::notEqual, *l, false);
12763     } else if ($cop$$cmpcode == Assembler::equal) {
12764       Label done;
12765       __ jccb(Assembler::parity, done);
12766       __ jcc(Assembler::equal, *l, false);
12767       __ bind(done);
12768     } else {
12769        ShouldNotReachHere();
12770     }
12771   %}
12772   ins_pipe(pipe_jcc);
12773 %}
12774 
12775 // ============================================================================
12776 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12777 // array for an instance of the superklass.  Set a hidden internal cache on a
12778 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12779 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12780 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12781   match(Set result (PartialSubtypeCheck sub super));
12782   effect( KILL rcx, KILL cr );
12783 
12784   ins_cost(1100);  // slightly larger than the next version
12785   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12786             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12787             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12788             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12789             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12790             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12791             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12792      "miss:\t" %}
12793 
12794   opcode(0x1); // Force a XOR of EDI
12795   ins_encode( enc_PartialSubtypeCheck() );
12796   ins_pipe( pipe_slow );
12797 %}
12798 
12799 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12800   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12801   effect( KILL rcx, KILL result );
12802 
12803   ins_cost(1000);
12804   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12805             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12806             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12807             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12808             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12809             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12810      "miss:\t" %}
12811 
12812   opcode(0x0);  // No need to XOR EDI
12813   ins_encode( enc_PartialSubtypeCheck() );
12814   ins_pipe( pipe_slow );
12815 %}
12816 
12817 // ============================================================================
12818 // Branch Instructions -- short offset versions
12819 //
12820 // These instructions are used to replace jumps of a long offset (the default
12821 // match) with jumps of a shorter offset.  These instructions are all tagged
12822 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12823 // match rules in general matching.  Instead, the ADLC generates a conversion
12824 // method in the MachNode which can be used to do in-place replacement of the
12825 // long variant with the shorter variant.  The compiler will determine if a
12826 // branch can be taken by the is_short_branch_offset() predicate in the machine
12827 // specific code section of the file.
12828 
12829 // Jump Direct - Label defines a relative address from JMP+1
12830 instruct jmpDir_short(label labl) %{
12831   match(Goto);
12832   effect(USE labl);
12833 
12834   ins_cost(300);
12835   format %{ "JMP,s  $labl" %}
12836   size(2);
12837   ins_encode %{
12838     Label* L = $labl$$label;
12839     __ jmpb(*L);
12840   %}
12841   ins_pipe( pipe_jmp );
12842   ins_short_branch(1);
12843 %}
12844 
12845 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12846 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12847   match(If cop cr);
12848   effect(USE labl);
12849 
12850   ins_cost(300);
12851   format %{ "J$cop,s  $labl" %}
12852   size(2);
12853   ins_encode %{
12854     Label* L = $labl$$label;
12855     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12856   %}
12857   ins_pipe( pipe_jcc );
12858   ins_short_branch(1);
12859 %}
12860 
12861 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12862 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12863   match(CountedLoopEnd cop cr);
12864   effect(USE labl);
12865 
12866   ins_cost(300);
12867   format %{ "J$cop,s  $labl\t# Loop end" %}
12868   size(2);
12869   ins_encode %{
12870     Label* L = $labl$$label;
12871     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12872   %}
12873   ins_pipe( pipe_jcc );
12874   ins_short_branch(1);
12875 %}
12876 
12877 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12878 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12879   match(CountedLoopEnd cop cmp);
12880   effect(USE labl);
12881 
12882   ins_cost(300);
12883   format %{ "J$cop,us $labl\t# Loop end" %}
12884   size(2);
12885   ins_encode %{
12886     Label* L = $labl$$label;
12887     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12888   %}
12889   ins_pipe( pipe_jcc );
12890   ins_short_branch(1);
12891 %}
12892 
12893 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12894   match(CountedLoopEnd cop cmp);
12895   effect(USE labl);
12896 
12897   ins_cost(300);
12898   format %{ "J$cop,us $labl\t# Loop end" %}
12899   size(2);
12900   ins_encode %{
12901     Label* L = $labl$$label;
12902     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12903   %}
12904   ins_pipe( pipe_jcc );
12905   ins_short_branch(1);
12906 %}
12907 
12908 // Jump Direct Conditional - using unsigned comparison
12909 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12910   match(If cop cmp);
12911   effect(USE labl);
12912 
12913   ins_cost(300);
12914   format %{ "J$cop,us $labl" %}
12915   size(2);
12916   ins_encode %{
12917     Label* L = $labl$$label;
12918     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12919   %}
12920   ins_pipe( pipe_jcc );
12921   ins_short_branch(1);
12922 %}
12923 
12924 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12925   match(If cop cmp);
12926   effect(USE labl);
12927 
12928   ins_cost(300);
12929   format %{ "J$cop,us $labl" %}
12930   size(2);
12931   ins_encode %{
12932     Label* L = $labl$$label;
12933     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12934   %}
12935   ins_pipe( pipe_jcc );
12936   ins_short_branch(1);
12937 %}
12938 
12939 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12940   match(If cop cmp);
12941   effect(USE labl);
12942 
12943   ins_cost(300);
12944   format %{ $$template
12945     if ($cop$$cmpcode == Assembler::notEqual) {
12946       $$emit$$"JP,u,s   $labl\n\t"
12947       $$emit$$"J$cop,u,s   $labl"
12948     } else {
12949       $$emit$$"JP,u,s   done\n\t"
12950       $$emit$$"J$cop,u,s  $labl\n\t"
12951       $$emit$$"done:"
12952     }
12953   %}
12954   size(4);
12955   ins_encode %{
12956     Label* l = $labl$$label;
12957     if ($cop$$cmpcode == Assembler::notEqual) {
12958       __ jccb(Assembler::parity, *l);
12959       __ jccb(Assembler::notEqual, *l);
12960     } else if ($cop$$cmpcode == Assembler::equal) {
12961       Label done;
12962       __ jccb(Assembler::parity, done);
12963       __ jccb(Assembler::equal, *l);
12964       __ bind(done);
12965     } else {
12966        ShouldNotReachHere();
12967     }
12968   %}
12969   ins_pipe(pipe_jcc);
12970   ins_short_branch(1);
12971 %}
12972 
12973 // ============================================================================
12974 // Long Compare
12975 //
12976 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12977 // is tricky.  The flavor of compare used depends on whether we are testing
12978 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12979 // The GE test is the negated LT test.  The LE test can be had by commuting
12980 // the operands (yielding a GE test) and then negating; negate again for the
12981 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12982 // NE test is negated from that.
12983 
12984 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12985 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12986 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12987 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12988 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12989 // foo match ends up with the wrong leaf.  One fix is to not match both
12990 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12991 // both forms beat the trinary form of long-compare and both are very useful
12992 // on Intel which has so few registers.
12993 
12994 // Manifest a CmpL result in an integer register.  Very painful.
12995 // This is the test to avoid.
12996 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12997   match(Set dst (CmpL3 src1 src2));
12998   effect( KILL flags );
12999   ins_cost(1000);
13000   format %{ "XOR    $dst,$dst\n\t"
13001             "CMP    $src1.hi,$src2.hi\n\t"
13002             "JLT,s  m_one\n\t"
13003             "JGT,s  p_one\n\t"
13004             "CMP    $src1.lo,$src2.lo\n\t"
13005             "JB,s   m_one\n\t"
13006             "JEQ,s  done\n"
13007     "p_one:\tINC    $dst\n\t"
13008             "JMP,s  done\n"
13009     "m_one:\tDEC    $dst\n"
13010      "done:" %}
13011   ins_encode %{
13012     Label p_one, m_one, done;
13013     __ xorptr($dst$$Register, $dst$$Register);
13014     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13015     __ jccb(Assembler::less,    m_one);
13016     __ jccb(Assembler::greater, p_one);
13017     __ cmpl($src1$$Register, $src2$$Register);
13018     __ jccb(Assembler::below,   m_one);
13019     __ jccb(Assembler::equal,   done);
13020     __ bind(p_one);
13021     __ incrementl($dst$$Register);
13022     __ jmpb(done);
13023     __ bind(m_one);
13024     __ decrementl($dst$$Register);
13025     __ bind(done);
13026   %}
13027   ins_pipe( pipe_slow );
13028 %}
13029 
13030 //======
13031 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13032 // compares.  Can be used for LE or GT compares by reversing arguments.
13033 // NOT GOOD FOR EQ/NE tests.
13034 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13035   match( Set flags (CmpL src zero ));
13036   ins_cost(100);
13037   format %{ "TEST   $src.hi,$src.hi" %}
13038   opcode(0x85);
13039   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13040   ins_pipe( ialu_cr_reg_reg );
13041 %}
13042 
13043 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13044 // compares.  Can be used for LE or GT compares by reversing arguments.
13045 // NOT GOOD FOR EQ/NE tests.
13046 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13047   match( Set flags (CmpL src1 src2 ));
13048   effect( TEMP tmp );
13049   ins_cost(300);
13050   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13051             "MOV    $tmp,$src1.hi\n\t"
13052             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13053   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13054   ins_pipe( ialu_cr_reg_reg );
13055 %}
13056 
13057 // Long compares reg < zero/req OR reg >= zero/req.
13058 // Just a wrapper for a normal branch, plus the predicate test.
13059 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13060   match(If cmp flags);
13061   effect(USE labl);
13062   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13063   expand %{
13064     jmpCon(cmp,flags,labl);    // JLT or JGE...
13065   %}
13066 %}
13067 
13068 //======
13069 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13070 // compares.  Can be used for LE or GT compares by reversing arguments.
13071 // NOT GOOD FOR EQ/NE tests.
13072 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13073   match(Set flags (CmpUL src zero));
13074   ins_cost(100);
13075   format %{ "TEST   $src.hi,$src.hi" %}
13076   opcode(0x85);
13077   ins_encode(OpcP, RegReg_Hi2(src, src));
13078   ins_pipe(ialu_cr_reg_reg);
13079 %}
13080 
13081 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13082 // compares.  Can be used for LE or GT compares by reversing arguments.
13083 // NOT GOOD FOR EQ/NE tests.
13084 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13085   match(Set flags (CmpUL src1 src2));
13086   effect(TEMP tmp);
13087   ins_cost(300);
13088   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13089             "MOV    $tmp,$src1.hi\n\t"
13090             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13091   ins_encode(long_cmp_flags2(src1, src2, tmp));
13092   ins_pipe(ialu_cr_reg_reg);
13093 %}
13094 
13095 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13096 // Just a wrapper for a normal branch, plus the predicate test.
13097 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13098   match(If cmp flags);
13099   effect(USE labl);
13100   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13101   expand %{
13102     jmpCon(cmp, flags, labl);    // JLT or JGE...
13103   %}
13104 %}
13105 
13106 // Compare 2 longs and CMOVE longs.
13107 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13108   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13109   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13110   ins_cost(400);
13111   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13112             "CMOV$cmp $dst.hi,$src.hi" %}
13113   opcode(0x0F,0x40);
13114   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13115   ins_pipe( pipe_cmov_reg_long );
13116 %}
13117 
13118 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13119   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13120   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13121   ins_cost(500);
13122   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13123             "CMOV$cmp $dst.hi,$src.hi" %}
13124   opcode(0x0F,0x40);
13125   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13126   ins_pipe( pipe_cmov_reg_long );
13127 %}
13128 
13129 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13130   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13131   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13132   ins_cost(400);
13133   expand %{
13134     cmovLL_reg_LTGE(cmp, flags, dst, src);
13135   %}
13136 %}
13137 
13138 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13139   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13140   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13141   ins_cost(500);
13142   expand %{
13143     cmovLL_mem_LTGE(cmp, flags, dst, src);
13144   %}
13145 %}
13146 
13147 // Compare 2 longs and CMOVE ints.
13148 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13149   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13150   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13151   ins_cost(200);
13152   format %{ "CMOV$cmp $dst,$src" %}
13153   opcode(0x0F,0x40);
13154   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13155   ins_pipe( pipe_cmov_reg );
13156 %}
13157 
13158 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13159   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13160   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13161   ins_cost(250);
13162   format %{ "CMOV$cmp $dst,$src" %}
13163   opcode(0x0F,0x40);
13164   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13165   ins_pipe( pipe_cmov_mem );
13166 %}
13167 
13168 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13169   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13170   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13171   ins_cost(200);
13172   expand %{
13173     cmovII_reg_LTGE(cmp, flags, dst, src);
13174   %}
13175 %}
13176 
13177 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13178   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13179   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13180   ins_cost(250);
13181   expand %{
13182     cmovII_mem_LTGE(cmp, flags, dst, src);
13183   %}
13184 %}
13185 
13186 // Compare 2 longs and CMOVE ptrs.
13187 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13188   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13189   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13190   ins_cost(200);
13191   format %{ "CMOV$cmp $dst,$src" %}
13192   opcode(0x0F,0x40);
13193   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13194   ins_pipe( pipe_cmov_reg );
13195 %}
13196 
13197 // Compare 2 unsigned longs and CMOVE ptrs.
13198 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13199   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13200   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13201   ins_cost(200);
13202   expand %{
13203     cmovPP_reg_LTGE(cmp,flags,dst,src);
13204   %}
13205 %}
13206 
13207 // Compare 2 longs and CMOVE doubles
13208 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13209   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13210   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13211   ins_cost(200);
13212   expand %{
13213     fcmovDPR_regS(cmp,flags,dst,src);
13214   %}
13215 %}
13216 
13217 // Compare 2 longs and CMOVE doubles
13218 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13219   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13220   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13221   ins_cost(200);
13222   expand %{
13223     fcmovD_regS(cmp,flags,dst,src);
13224   %}
13225 %}
13226 
13227 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13228   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13229   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13230   ins_cost(200);
13231   expand %{
13232     fcmovFPR_regS(cmp,flags,dst,src);
13233   %}
13234 %}
13235 
13236 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13237   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13238   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13239   ins_cost(200);
13240   expand %{
13241     fcmovF_regS(cmp,flags,dst,src);
13242   %}
13243 %}
13244 
13245 //======
13246 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13247 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13248   match( Set flags (CmpL src zero ));
13249   effect(TEMP tmp);
13250   ins_cost(200);
13251   format %{ "MOV    $tmp,$src.lo\n\t"
13252             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13253   ins_encode( long_cmp_flags0( src, tmp ) );
13254   ins_pipe( ialu_reg_reg_long );
13255 %}
13256 
13257 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13258 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13259   match( Set flags (CmpL src1 src2 ));
13260   ins_cost(200+300);
13261   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13262             "JNE,s  skip\n\t"
13263             "CMP    $src1.hi,$src2.hi\n\t"
13264      "skip:\t" %}
13265   ins_encode( long_cmp_flags1( src1, src2 ) );
13266   ins_pipe( ialu_cr_reg_reg );
13267 %}
13268 
13269 // Long compare reg == zero/reg OR reg != zero/reg
13270 // Just a wrapper for a normal branch, plus the predicate test.
13271 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13272   match(If cmp flags);
13273   effect(USE labl);
13274   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13275   expand %{
13276     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13277   %}
13278 %}
13279 
13280 //======
13281 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13282 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13283   match(Set flags (CmpUL src zero));
13284   effect(TEMP tmp);
13285   ins_cost(200);
13286   format %{ "MOV    $tmp,$src.lo\n\t"
13287             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13288   ins_encode(long_cmp_flags0(src, tmp));
13289   ins_pipe(ialu_reg_reg_long);
13290 %}
13291 
13292 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13293 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13294   match(Set flags (CmpUL src1 src2));
13295   ins_cost(200+300);
13296   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13297             "JNE,s  skip\n\t"
13298             "CMP    $src1.hi,$src2.hi\n\t"
13299      "skip:\t" %}
13300   ins_encode(long_cmp_flags1(src1, src2));
13301   ins_pipe(ialu_cr_reg_reg);
13302 %}
13303 
13304 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13305 // Just a wrapper for a normal branch, plus the predicate test.
13306 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13307   match(If cmp flags);
13308   effect(USE labl);
13309   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13310   expand %{
13311     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13312   %}
13313 %}
13314 
13315 // Compare 2 longs and CMOVE longs.
13316 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13317   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13318   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13319   ins_cost(400);
13320   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13321             "CMOV$cmp $dst.hi,$src.hi" %}
13322   opcode(0x0F,0x40);
13323   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13324   ins_pipe( pipe_cmov_reg_long );
13325 %}
13326 
13327 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13328   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13329   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13330   ins_cost(500);
13331   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13332             "CMOV$cmp $dst.hi,$src.hi" %}
13333   opcode(0x0F,0x40);
13334   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13335   ins_pipe( pipe_cmov_reg_long );
13336 %}
13337 
13338 // Compare 2 longs and CMOVE ints.
13339 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13340   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13341   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13342   ins_cost(200);
13343   format %{ "CMOV$cmp $dst,$src" %}
13344   opcode(0x0F,0x40);
13345   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13346   ins_pipe( pipe_cmov_reg );
13347 %}
13348 
13349 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13350   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13351   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13352   ins_cost(250);
13353   format %{ "CMOV$cmp $dst,$src" %}
13354   opcode(0x0F,0x40);
13355   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13356   ins_pipe( pipe_cmov_mem );
13357 %}
13358 
13359 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13360   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13361   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13362   ins_cost(200);
13363   expand %{
13364     cmovII_reg_EQNE(cmp, flags, dst, src);
13365   %}
13366 %}
13367 
13368 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13369   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13370   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13371   ins_cost(250);
13372   expand %{
13373     cmovII_mem_EQNE(cmp, flags, dst, src);
13374   %}
13375 %}
13376 
13377 // Compare 2 longs and CMOVE ptrs.
13378 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13379   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13380   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13381   ins_cost(200);
13382   format %{ "CMOV$cmp $dst,$src" %}
13383   opcode(0x0F,0x40);
13384   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13385   ins_pipe( pipe_cmov_reg );
13386 %}
13387 
13388 // Compare 2 unsigned longs and CMOVE ptrs.
13389 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13390   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13391   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13392   ins_cost(200);
13393   expand %{
13394     cmovPP_reg_EQNE(cmp,flags,dst,src);
13395   %}
13396 %}
13397 
13398 // Compare 2 longs and CMOVE doubles
13399 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13400   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13401   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13402   ins_cost(200);
13403   expand %{
13404     fcmovDPR_regS(cmp,flags,dst,src);
13405   %}
13406 %}
13407 
13408 // Compare 2 longs and CMOVE doubles
13409 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13410   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13411   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13412   ins_cost(200);
13413   expand %{
13414     fcmovD_regS(cmp,flags,dst,src);
13415   %}
13416 %}
13417 
13418 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13419   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13420   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13421   ins_cost(200);
13422   expand %{
13423     fcmovFPR_regS(cmp,flags,dst,src);
13424   %}
13425 %}
13426 
13427 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13428   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13429   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13430   ins_cost(200);
13431   expand %{
13432     fcmovF_regS(cmp,flags,dst,src);
13433   %}
13434 %}
13435 
13436 //======
13437 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13438 // Same as cmpL_reg_flags_LEGT except must negate src
13439 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13440   match( Set flags (CmpL src zero ));
13441   effect( TEMP tmp );
13442   ins_cost(300);
13443   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13444             "CMP    $tmp,$src.lo\n\t"
13445             "SBB    $tmp,$src.hi\n\t" %}
13446   ins_encode( long_cmp_flags3(src, tmp) );
13447   ins_pipe( ialu_reg_reg_long );
13448 %}
13449 
13450 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13451 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13452 // requires a commuted test to get the same result.
13453 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13454   match( Set flags (CmpL src1 src2 ));
13455   effect( TEMP tmp );
13456   ins_cost(300);
13457   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13458             "MOV    $tmp,$src2.hi\n\t"
13459             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13460   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13461   ins_pipe( ialu_cr_reg_reg );
13462 %}
13463 
13464 // Long compares reg < zero/req OR reg >= zero/req.
13465 // Just a wrapper for a normal branch, plus the predicate test
13466 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13467   match(If cmp flags);
13468   effect(USE labl);
13469   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13470   ins_cost(300);
13471   expand %{
13472     jmpCon(cmp,flags,labl);    // JGT or JLE...
13473   %}
13474 %}
13475 
13476 //======
13477 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13478 // Same as cmpUL_reg_flags_LEGT except must negate src
13479 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13480   match(Set flags (CmpUL src zero));
13481   effect(TEMP tmp);
13482   ins_cost(300);
13483   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13484             "CMP    $tmp,$src.lo\n\t"
13485             "SBB    $tmp,$src.hi\n\t" %}
13486   ins_encode(long_cmp_flags3(src, tmp));
13487   ins_pipe(ialu_reg_reg_long);
13488 %}
13489 
13490 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13491 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13492 // requires a commuted test to get the same result.
13493 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13494   match(Set flags (CmpUL src1 src2));
13495   effect(TEMP tmp);
13496   ins_cost(300);
13497   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13498             "MOV    $tmp,$src2.hi\n\t"
13499             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13500   ins_encode(long_cmp_flags2( src2, src1, tmp));
13501   ins_pipe(ialu_cr_reg_reg);
13502 %}
13503 
13504 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13505 // Just a wrapper for a normal branch, plus the predicate test
13506 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13507   match(If cmp flags);
13508   effect(USE labl);
13509   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13510   ins_cost(300);
13511   expand %{
13512     jmpCon(cmp, flags, labl);    // JGT or JLE...
13513   %}
13514 %}
13515 
13516 // Compare 2 longs and CMOVE longs.
13517 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13518   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13519   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13520   ins_cost(400);
13521   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13522             "CMOV$cmp $dst.hi,$src.hi" %}
13523   opcode(0x0F,0x40);
13524   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13525   ins_pipe( pipe_cmov_reg_long );
13526 %}
13527 
13528 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13529   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13530   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13531   ins_cost(500);
13532   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13533             "CMOV$cmp $dst.hi,$src.hi+4" %}
13534   opcode(0x0F,0x40);
13535   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13536   ins_pipe( pipe_cmov_reg_long );
13537 %}
13538 
13539 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13540   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13541   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13542   ins_cost(400);
13543   expand %{
13544     cmovLL_reg_LEGT(cmp, flags, dst, src);
13545   %}
13546 %}
13547 
13548 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13549   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13550   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13551   ins_cost(500);
13552   expand %{
13553     cmovLL_mem_LEGT(cmp, flags, dst, src);
13554   %}
13555 %}
13556 
13557 // Compare 2 longs and CMOVE ints.
13558 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13559   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13560   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13561   ins_cost(200);
13562   format %{ "CMOV$cmp $dst,$src" %}
13563   opcode(0x0F,0x40);
13564   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13565   ins_pipe( pipe_cmov_reg );
13566 %}
13567 
13568 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13569   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13570   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13571   ins_cost(250);
13572   format %{ "CMOV$cmp $dst,$src" %}
13573   opcode(0x0F,0x40);
13574   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13575   ins_pipe( pipe_cmov_mem );
13576 %}
13577 
13578 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13579   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13580   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13581   ins_cost(200);
13582   expand %{
13583     cmovII_reg_LEGT(cmp, flags, dst, src);
13584   %}
13585 %}
13586 
13587 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13588   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13589   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13590   ins_cost(250);
13591   expand %{
13592     cmovII_mem_LEGT(cmp, flags, dst, src);
13593   %}
13594 %}
13595 
13596 // Compare 2 longs and CMOVE ptrs.
13597 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13598   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13599   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13600   ins_cost(200);
13601   format %{ "CMOV$cmp $dst,$src" %}
13602   opcode(0x0F,0x40);
13603   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13604   ins_pipe( pipe_cmov_reg );
13605 %}
13606 
13607 // Compare 2 unsigned longs and CMOVE ptrs.
13608 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13609   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13610   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13611   ins_cost(200);
13612   expand %{
13613     cmovPP_reg_LEGT(cmp,flags,dst,src);
13614   %}
13615 %}
13616 
13617 // Compare 2 longs and CMOVE doubles
13618 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13619   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13620   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13621   ins_cost(200);
13622   expand %{
13623     fcmovDPR_regS(cmp,flags,dst,src);
13624   %}
13625 %}
13626 
13627 // Compare 2 longs and CMOVE doubles
13628 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13629   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13630   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13631   ins_cost(200);
13632   expand %{
13633     fcmovD_regS(cmp,flags,dst,src);
13634   %}
13635 %}
13636 
13637 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13638   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13639   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13640   ins_cost(200);
13641   expand %{
13642     fcmovFPR_regS(cmp,flags,dst,src);
13643   %}
13644 %}
13645 
13646 
13647 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13648   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13649   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13650   ins_cost(200);
13651   expand %{
13652     fcmovF_regS(cmp,flags,dst,src);
13653   %}
13654 %}
13655 
13656 
13657 // ============================================================================
13658 // Procedure Call/Return Instructions
13659 // Call Java Static Instruction
13660 // Note: If this code changes, the corresponding ret_addr_offset() and
13661 //       compute_padding() functions will have to be adjusted.
13662 instruct CallStaticJavaDirect(method meth) %{
13663   match(CallStaticJava);
13664   effect(USE meth);
13665 
13666   ins_cost(300);
13667   format %{ "CALL,static " %}
13668   opcode(0xE8); /* E8 cd */
13669   ins_encode( pre_call_resets,
13670               Java_Static_Call( meth ),
13671               call_epilog,
13672               post_call_FPU );
13673   ins_pipe( pipe_slow );
13674   ins_alignment(4);
13675 %}
13676 
13677 // Call Java Dynamic Instruction
13678 // Note: If this code changes, the corresponding ret_addr_offset() and
13679 //       compute_padding() functions will have to be adjusted.
13680 instruct CallDynamicJavaDirect(method meth) %{
13681   match(CallDynamicJava);
13682   effect(USE meth);
13683 
13684   ins_cost(300);
13685   format %{ "MOV    EAX,(oop)-1\n\t"
13686             "CALL,dynamic" %}
13687   opcode(0xE8); /* E8 cd */
13688   ins_encode( pre_call_resets,
13689               Java_Dynamic_Call( meth ),
13690               call_epilog,
13691               post_call_FPU );
13692   ins_pipe( pipe_slow );
13693   ins_alignment(4);
13694 %}
13695 
13696 // Call Runtime Instruction
13697 instruct CallRuntimeDirect(method meth) %{
13698   match(CallRuntime );
13699   effect(USE meth);
13700 
13701   ins_cost(300);
13702   format %{ "CALL,runtime " %}
13703   opcode(0xE8); /* E8 cd */
13704   // Use FFREEs to clear entries in float stack
13705   ins_encode( pre_call_resets,
13706               FFree_Float_Stack_All,
13707               Java_To_Runtime( meth ),
13708               post_call_FPU );
13709   ins_pipe( pipe_slow );
13710 %}
13711 
13712 // Call runtime without safepoint
13713 instruct CallLeafDirect(method meth) %{
13714   match(CallLeaf);
13715   effect(USE meth);
13716 
13717   ins_cost(300);
13718   format %{ "CALL_LEAF,runtime " %}
13719   opcode(0xE8); /* E8 cd */
13720   ins_encode( pre_call_resets,
13721               FFree_Float_Stack_All,
13722               Java_To_Runtime( meth ),
13723               Verify_FPU_For_Leaf, post_call_FPU );
13724   ins_pipe( pipe_slow );
13725 %}
13726 
13727 instruct CallLeafNoFPDirect(method meth) %{
13728   match(CallLeafNoFP);
13729   effect(USE meth);
13730 
13731   ins_cost(300);
13732   format %{ "CALL_LEAF_NOFP,runtime " %}
13733   opcode(0xE8); /* E8 cd */
13734   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13735   ins_pipe( pipe_slow );
13736 %}
13737 
13738 
13739 // Return Instruction
13740 // Remove the return address & jump to it.
13741 instruct Ret() %{
13742   match(Return);
13743   format %{ "RET" %}
13744   opcode(0xC3);
13745   ins_encode(OpcP);
13746   ins_pipe( pipe_jmp );
13747 %}
13748 
13749 // Tail Call; Jump from runtime stub to Java code.
13750 // Also known as an 'interprocedural jump'.
13751 // Target of jump will eventually return to caller.
13752 // TailJump below removes the return address.
13753 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13754   match(TailCall jump_target method_ptr);
13755   ins_cost(300);
13756   format %{ "JMP    $jump_target \t# EBX holds method" %}
13757   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13758   ins_encode( OpcP, RegOpc(jump_target) );
13759   ins_pipe( pipe_jmp );
13760 %}
13761 
13762 
13763 // Tail Jump; remove the return address; jump to target.
13764 // TailCall above leaves the return address around.
13765 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13766   match( TailJump jump_target ex_oop );
13767   ins_cost(300);
13768   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13769             "JMP    $jump_target " %}
13770   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13771   ins_encode( enc_pop_rdx,
13772               OpcP, RegOpc(jump_target) );
13773   ins_pipe( pipe_jmp );
13774 %}
13775 
13776 // Create exception oop: created by stack-crawling runtime code.
13777 // Created exception is now available to this handler, and is setup
13778 // just prior to jumping to this handler.  No code emitted.
13779 instruct CreateException( eAXRegP ex_oop )
13780 %{
13781   match(Set ex_oop (CreateEx));
13782 
13783   size(0);
13784   // use the following format syntax
13785   format %{ "# exception oop is in EAX; no code emitted" %}
13786   ins_encode();
13787   ins_pipe( empty );
13788 %}
13789 
13790 
13791 // Rethrow exception:
13792 // The exception oop will come in the first argument position.
13793 // Then JUMP (not call) to the rethrow stub code.
13794 instruct RethrowException()
13795 %{
13796   match(Rethrow);
13797 
13798   // use the following format syntax
13799   format %{ "JMP    rethrow_stub" %}
13800   ins_encode(enc_rethrow);
13801   ins_pipe( pipe_jmp );
13802 %}
13803 
13804 // inlined locking and unlocking
13805 
13806 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13807   predicate(Compile::current()->use_rtm());
13808   match(Set cr (FastLock object box));
13809   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13810   ins_cost(300);
13811   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13812   ins_encode %{
13813     __ get_thread($thread$$Register);
13814     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13815                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13816                  _rtm_counters, _stack_rtm_counters,
13817                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13818                  true, ra_->C->profile_rtm());
13819   %}
13820   ins_pipe(pipe_slow);
13821 %}
13822 
13823 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13824   predicate(!Compile::current()->use_rtm());
13825   match(Set cr (FastLock object box));
13826   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13827   ins_cost(300);
13828   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13829   ins_encode %{
13830     __ get_thread($thread$$Register);
13831     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13832                  $scr$$Register, noreg, noreg, $thread$$Register, NULL, NULL, NULL, false, false);
13833   %}
13834   ins_pipe(pipe_slow);
13835 %}
13836 
13837 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13838   match(Set cr (FastUnlock object box));
13839   effect(TEMP tmp, USE_KILL box);
13840   ins_cost(300);
13841   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13842   ins_encode %{
13843     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13844   %}
13845   ins_pipe(pipe_slow);
13846 %}
13847 
13848 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13849   predicate(Matcher::vector_length(n) <= 32);
13850   match(Set dst (MaskAll src));
13851   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13852   ins_encode %{
13853     int mask_len = Matcher::vector_length(this);
13854     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13855   %}
13856   ins_pipe( pipe_slow );
13857 %}
13858 
13859 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13860   predicate(Matcher::vector_length(n) > 32);
13861   match(Set dst (MaskAll src));
13862   effect(TEMP ktmp);
13863   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13864   ins_encode %{
13865     int mask_len = Matcher::vector_length(this);
13866     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13867   %}
13868   ins_pipe( pipe_slow );
13869 %}
13870 
13871 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13872   predicate(Matcher::vector_length(n) > 32);
13873   match(Set dst (MaskAll src));
13874   effect(TEMP ktmp);
13875   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13876   ins_encode %{
13877     int mask_len = Matcher::vector_length(this);
13878     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13879   %}
13880   ins_pipe( pipe_slow );
13881 %}
13882 
13883 // ============================================================================
13884 // Safepoint Instruction
13885 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13886   match(SafePoint poll);
13887   effect(KILL cr, USE poll);
13888 
13889   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13890   ins_cost(125);
13891   // EBP would need size(3)
13892   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13893   ins_encode %{
13894     __ relocate(relocInfo::poll_type);
13895     address pre_pc = __ pc();
13896     __ testl(rax, Address($poll$$Register, 0));
13897     address post_pc = __ pc();
13898     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13899   %}
13900   ins_pipe(ialu_reg_mem);
13901 %}
13902 
13903 
13904 // ============================================================================
13905 // This name is KNOWN by the ADLC and cannot be changed.
13906 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13907 // for this guy.
13908 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13909   match(Set dst (ThreadLocal));
13910   effect(DEF dst, KILL cr);
13911 
13912   format %{ "MOV    $dst, Thread::current()" %}
13913   ins_encode %{
13914     Register dstReg = as_Register($dst$$reg);
13915     __ get_thread(dstReg);
13916   %}
13917   ins_pipe( ialu_reg_fat );
13918 %}
13919 
13920 
13921 
13922 //----------PEEPHOLE RULES-----------------------------------------------------
13923 // These must follow all instruction definitions as they use the names
13924 // defined in the instructions definitions.
13925 //
13926 // peepmatch ( root_instr_name [preceding_instruction]* );
13927 //
13928 // peepconstraint %{
13929 // (instruction_number.operand_name relational_op instruction_number.operand_name
13930 //  [, ...] );
13931 // // instruction numbers are zero-based using left to right order in peepmatch
13932 //
13933 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13934 // // provide an instruction_number.operand_name for each operand that appears
13935 // // in the replacement instruction's match rule
13936 //
13937 // ---------VM FLAGS---------------------------------------------------------
13938 //
13939 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13940 //
13941 // Each peephole rule is given an identifying number starting with zero and
13942 // increasing by one in the order seen by the parser.  An individual peephole
13943 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13944 // on the command-line.
13945 //
13946 // ---------CURRENT LIMITATIONS----------------------------------------------
13947 //
13948 // Only match adjacent instructions in same basic block
13949 // Only equality constraints
13950 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13951 // Only one replacement instruction
13952 //
13953 // ---------EXAMPLE----------------------------------------------------------
13954 //
13955 // // pertinent parts of existing instructions in architecture description
13956 // instruct movI(rRegI dst, rRegI src) %{
13957 //   match(Set dst (CopyI src));
13958 // %}
13959 //
13960 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13961 //   match(Set dst (AddI dst src));
13962 //   effect(KILL cr);
13963 // %}
13964 //
13965 // // Change (inc mov) to lea
13966 // peephole %{
13967 //   // increment preceded by register-register move
13968 //   peepmatch ( incI_eReg movI );
13969 //   // require that the destination register of the increment
13970 //   // match the destination register of the move
13971 //   peepconstraint ( 0.dst == 1.dst );
13972 //   // construct a replacement instruction that sets
13973 //   // the destination to ( move's source register + one )
13974 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13975 // %}
13976 //
13977 // Implementation no longer uses movX instructions since
13978 // machine-independent system no longer uses CopyX nodes.
13979 //
13980 // peephole %{
13981 //   peepmatch ( incI_eReg movI );
13982 //   peepconstraint ( 0.dst == 1.dst );
13983 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13984 // %}
13985 //
13986 // peephole %{
13987 //   peepmatch ( decI_eReg movI );
13988 //   peepconstraint ( 0.dst == 1.dst );
13989 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13990 // %}
13991 //
13992 // peephole %{
13993 //   peepmatch ( addI_eReg_imm movI );
13994 //   peepconstraint ( 0.dst == 1.dst );
13995 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13996 // %}
13997 //
13998 // peephole %{
13999 //   peepmatch ( addP_eReg_imm movP );
14000 //   peepconstraint ( 0.dst == 1.dst );
14001 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14002 // %}
14003 
14004 // // Change load of spilled value to only a spill
14005 // instruct storeI(memory mem, rRegI src) %{
14006 //   match(Set mem (StoreI mem src));
14007 // %}
14008 //
14009 // instruct loadI(rRegI dst, memory mem) %{
14010 //   match(Set dst (LoadI mem));
14011 // %}
14012 //
14013 peephole %{
14014   peepmatch ( loadI storeI );
14015   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14016   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14017 %}
14018 
14019 //----------SMARTSPILL RULES---------------------------------------------------
14020 // These must follow all instruction definitions as they use the names
14021 // defined in the instructions definitions.