1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  661               "JA      #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  712       C->output()->add_stub(stub);
  713       code_stub = &stub->entry();
  714     }
  715     __ relocate(relocInfo::poll_return_type);
  716     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  717   }
  718 }
  719 
  720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  721   return MachNode::size(ra_); // too many variables; just compute it
  722                               // the hard way
  723 }
  724 
  725 int MachEpilogNode::reloc() const {
  726   return 0; // a large enough number
  727 }
  728 
  729 const Pipeline * MachEpilogNode::pipeline() const {
  730   return MachNode::pipeline_class();
  731 }
  732 
  733 //=============================================================================
  734 
  735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  736 static enum RC rc_class( OptoReg::Name reg ) {
  737 
  738   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  739   if (OptoReg::is_stack(reg)) return rc_stack;
  740 
  741   VMReg r = OptoReg::as_VMReg(reg);
  742   if (r->is_Register()) return rc_int;
  743   if (r->is_FloatRegister()) {
  744     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  745     return rc_float;
  746   }
  747   if (r->is_KRegister()) return rc_kreg;
  748   assert(r->is_XMMRegister(), "must be");
  749   return rc_xmm;
  750 }
  751 
  752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  753                         int opcode, const char *op_str, int size, outputStream* st ) {
  754   if( cbuf ) {
  755     emit_opcode  (*cbuf, opcode );
  756     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757 #ifndef PRODUCT
  758   } else if( !do_size ) {
  759     if( size != 0 ) st->print("\n\t");
  760     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  761       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  762       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  763     } else { // FLD, FST, PUSH, POP
  764       st->print("%s [ESP + #%d]",op_str,offset);
  765     }
  766 #endif
  767   }
  768   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  769   return size+3+offset_size;
  770 }
  771 
  772 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  774                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  775   int in_size_in_bits = Assembler::EVEX_32bit;
  776   int evex_encoding = 0;
  777   if (reg_lo+1 == reg_hi) {
  778     in_size_in_bits = Assembler::EVEX_64bit;
  779     evex_encoding = Assembler::VEX_W;
  780   }
  781   if (cbuf) {
  782     MacroAssembler _masm(cbuf);
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     _masm.set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (cbuf) {
  835     MacroAssembler _masm(cbuf);
  836     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  837     _masm.set_managed();
  838     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  839       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     } else {
  842       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  844     }
  845 #ifndef PRODUCT
  846   } else if (!do_size) {
  847     if (size != 0) st->print("\n\t");
  848     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  849       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       } else {
  852         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       }
  854     } else {
  855       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  856         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       } else {
  858         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       }
  860     }
  861 #endif
  862   }
  863   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  864   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  865   int sz = (UseAVX > 2) ? 6 : 4;
  866   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  867       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  868   return size + sz;
  869 }
  870 
  871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  872                             int src_hi, int dst_hi, int size, outputStream* st ) {
  873   // 32-bit
  874   if (cbuf) {
  875     MacroAssembler _masm(cbuf);
  876     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  877     _masm.set_managed();
  878     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  879              as_Register(Matcher::_regEncode[src_lo]));
  880 #ifndef PRODUCT
  881   } else if (!do_size) {
  882     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  883 #endif
  884   }
  885   return (UseAVX> 2) ? 6 : 4;
  886 }
  887 
  888 
  889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  890                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  891   // 32-bit
  892   if (cbuf) {
  893     MacroAssembler _masm(cbuf);
  894     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  895     _masm.set_managed();
  896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  897              as_XMMRegister(Matcher::_regEncode[src_lo]));
  898 #ifndef PRODUCT
  899   } else if (!do_size) {
  900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  901 #endif
  902   }
  903   return (UseAVX> 2) ? 6 : 4;
  904 }
  905 
  906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  907   if( cbuf ) {
  908     emit_opcode(*cbuf, 0x8B );
  909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  910 #ifndef PRODUCT
  911   } else if( !do_size ) {
  912     if( size != 0 ) st->print("\n\t");
  913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  914 #endif
  915   }
  916   return size+2;
  917 }
  918 
  919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  920                                  int offset, int size, outputStream* st ) {
  921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  922     if( cbuf ) {
  923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  925 #ifndef PRODUCT
  926     } else if( !do_size ) {
  927       if( size != 0 ) st->print("\n\t");
  928       st->print("FLD    %s",Matcher::regName[src_lo]);
  929 #endif
  930     }
  931     size += 2;
  932   }
  933 
  934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  935   const char *op_str;
  936   int op;
  937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  939     op = 0xDD;
  940   } else {                   // 32-bit store
  941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  942     op = 0xD9;
  943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  944   }
  945 
  946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  947 }
  948 
  949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  952 
  953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  954                             int stack_offset, int reg, uint ireg, outputStream* st);
  955 
  956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  957                                      int dst_offset, uint ireg, outputStream* st) {
  958   if (cbuf) {
  959     MacroAssembler _masm(cbuf);
  960     switch (ireg) {
  961     case Op_VecS:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       break;
  965     case Op_VecD:
  966       __ pushl(Address(rsp, src_offset));
  967       __ popl (Address(rsp, dst_offset));
  968       __ pushl(Address(rsp, src_offset+4));
  969       __ popl (Address(rsp, dst_offset+4));
  970       break;
  971     case Op_VecX:
  972       __ movdqu(Address(rsp, -16), xmm0);
  973       __ movdqu(xmm0, Address(rsp, src_offset));
  974       __ movdqu(Address(rsp, dst_offset), xmm0);
  975       __ movdqu(xmm0, Address(rsp, -16));
  976       break;
  977     case Op_VecY:
  978       __ vmovdqu(Address(rsp, -32), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, src_offset));
  980       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  981       __ vmovdqu(xmm0, Address(rsp, -32));
  982       break;
  983     case Op_VecZ:
  984       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  986       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  987       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  988       break;
  989     default:
  990       ShouldNotReachHere();
  991     }
  992 #ifndef PRODUCT
  993   } else {
  994     switch (ireg) {
  995     case Op_VecS:
  996       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  997                 "popl    [rsp + #%d]",
  998                 src_offset, dst_offset);
  999       break;
 1000     case Op_VecD:
 1001       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1002                 "popq    [rsp + #%d]\n\t"
 1003                 "pushl   [rsp + #%d]\n\t"
 1004                 "popq    [rsp + #%d]",
 1005                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1006       break;
 1007      case Op_VecX:
 1008       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1009                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1010                 "movdqu  [rsp + #%d], xmm0\n\t"
 1011                 "movdqu  xmm0, [rsp - #16]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecY:
 1015       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #32]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecZ:
 1022       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #64]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     default:
 1029       ShouldNotReachHere();
 1030     }
 1031 #endif
 1032   }
 1033 }
 1034 
 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1036   // Get registers to move
 1037   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1038   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1039   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1040   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1041 
 1042   enum RC src_second_rc = rc_class(src_second);
 1043   enum RC src_first_rc = rc_class(src_first);
 1044   enum RC dst_second_rc = rc_class(dst_second);
 1045   enum RC dst_first_rc = rc_class(dst_first);
 1046 
 1047   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1048 
 1049   // Generate spill code!
 1050   int size = 0;
 1051 
 1052   if( src_first == dst_first && src_second == dst_second )
 1053     return size;            // Self copy, no move
 1054 
 1055   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1056     uint ireg = ideal_reg();
 1057     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1058     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1059     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1060     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1061       // mem -> mem
 1062       int src_offset = ra_->reg2offset(src_first);
 1063       int dst_offset = ra_->reg2offset(dst_first);
 1064       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1066       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1067     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1068       int stack_offset = ra_->reg2offset(dst_first);
 1069       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1070     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1071       int stack_offset = ra_->reg2offset(src_first);
 1072       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1073     } else {
 1074       ShouldNotReachHere();
 1075     }
 1076     return 0;
 1077   }
 1078 
 1079   // --------------------------------------
 1080   // Check for mem-mem move.  push/pop to move.
 1081   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1082     if( src_second == dst_first ) { // overlapping stack copy ranges
 1083       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1084       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1085       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1086       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1087     }
 1088     // move low bits
 1089     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1090     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1091     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094     }
 1095     return size;
 1096   }
 1097 
 1098   // --------------------------------------
 1099   // Check for integer reg-reg copy
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1101     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1102 
 1103   // Check for integer store
 1104   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1105     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1106 
 1107   // Check for integer load
 1108   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1109     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1110 
 1111   // Check for integer reg-xmm reg copy
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1114             "no 64 bit integer-float reg moves" );
 1115     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1116   }
 1117   // --------------------------------------
 1118   // Check for float reg-reg copy
 1119   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1121             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1122     if( cbuf ) {
 1123 
 1124       // Note the mucking with the register encode to compensate for the 0/1
 1125       // indexing issue mentioned in a comment in the reg_def sections
 1126       // for FPR registers many lines above here.
 1127 
 1128       if( src_first != FPR1L_num ) {
 1129         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1130         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1131         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1132         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1133      } else {
 1134         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1135         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1136      }
 1137 #ifndef PRODUCT
 1138     } else if( !do_size ) {
 1139       if( size != 0 ) st->print("\n\t");
 1140       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1141       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1142 #endif
 1143     }
 1144     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1145   }
 1146 
 1147   // Check for float store
 1148   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1149     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1150   }
 1151 
 1152   // Check for float load
 1153   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1154     int offset = ra_->reg2offset(src_first);
 1155     const char *op_str;
 1156     int op;
 1157     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1158       op_str = "FLD_D";
 1159       op = 0xDD;
 1160     } else {                   // 32-bit load
 1161       op_str = "FLD_S";
 1162       op = 0xD9;
 1163       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1164     }
 1165     if( cbuf ) {
 1166       emit_opcode  (*cbuf, op );
 1167       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1168       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1169       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1170 #ifndef PRODUCT
 1171     } else if( !do_size ) {
 1172       if( size != 0 ) st->print("\n\t");
 1173       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1174 #endif
 1175     }
 1176     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1177     return size + 3+offset_size+2;
 1178   }
 1179 
 1180   // Check for xmm reg-reg copy
 1181   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1182     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1183             (src_first+1 == src_second && dst_first+1 == dst_second),
 1184             "no non-adjacent float-moves" );
 1185     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1186   }
 1187 
 1188   // Check for xmm reg-integer reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1191             "no 64 bit float-integer reg moves" );
 1192     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm store
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1197     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1198   }
 1199 
 1200   // Check for float xmm load
 1201   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1202     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1203   }
 1204 
 1205   // Copy from float reg to xmm reg
 1206   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1207     // copy to the top of stack from floating point reg
 1208     // and use LEA to preserve flags
 1209     if( cbuf ) {
 1210       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1211       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1212       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1213       emit_d8(*cbuf,0xF8);
 1214 #ifndef PRODUCT
 1215     } else if( !do_size ) {
 1216       if( size != 0 ) st->print("\n\t");
 1217       st->print("LEA    ESP,[ESP-8]");
 1218 #endif
 1219     }
 1220     size += 4;
 1221 
 1222     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1223 
 1224     // Copy from the temp memory to the xmm reg.
 1225     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1226 
 1227     if( cbuf ) {
 1228       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1229       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1230       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1231       emit_d8(*cbuf,0x08);
 1232 #ifndef PRODUCT
 1233     } else if( !do_size ) {
 1234       if( size != 0 ) st->print("\n\t");
 1235       st->print("LEA    ESP,[ESP+8]");
 1236 #endif
 1237     }
 1238     size += 4;
 1239     return size;
 1240   }
 1241 
 1242   // AVX-512 opmask specific spilling.
 1243   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1244     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1245     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1246     MacroAssembler _masm(cbuf);
 1247     int offset = ra_->reg2offset(src_first);
 1248     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1249     return 0;
 1250   }
 1251 
 1252   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1253     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1254     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1255     MacroAssembler _masm(cbuf);
 1256     int offset = ra_->reg2offset(dst_first);
 1257     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1258     return 0;
 1259   }
 1260 
 1261   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1262     Unimplemented();
 1263     return 0;
 1264   }
 1265 
 1266   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1267     Unimplemented();
 1268     return 0;
 1269   }
 1270 
 1271   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1272     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1273     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1274     MacroAssembler _masm(cbuf);
 1275     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1276     return 0;
 1277   }
 1278 
 1279   assert( size > 0, "missed a case" );
 1280 
 1281   // --------------------------------------------------------------------
 1282   // Check for second bits still needing moving.
 1283   if( src_second == dst_second )
 1284     return size;               // Self copy; no move
 1285   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1286 
 1287   // Check for second word int-int move
 1288   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1289     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1290 
 1291   // Check for second word integer store
 1292   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1293     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1294 
 1295   // Check for second word integer load
 1296   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1297     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1298 
 1299   Unimplemented();
 1300   return 0; // Mute compiler
 1301 }
 1302 
 1303 #ifndef PRODUCT
 1304 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1305   implementation( NULL, ra_, false, st );
 1306 }
 1307 #endif
 1308 
 1309 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1310   implementation( &cbuf, ra_, false, NULL );
 1311 }
 1312 
 1313 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1314   return MachNode::size(ra_);
 1315 }
 1316 
 1317 
 1318 //=============================================================================
 1319 #ifndef PRODUCT
 1320 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1321   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1322   int reg = ra_->get_reg_first(this);
 1323   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1324 }
 1325 #endif
 1326 
 1327 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1329   int reg = ra_->get_encode(this);
 1330   if( offset >= 128 ) {
 1331     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1332     emit_rm(cbuf, 0x2, reg, 0x04);
 1333     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1334     emit_d32(cbuf, offset);
 1335   }
 1336   else {
 1337     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1338     emit_rm(cbuf, 0x1, reg, 0x04);
 1339     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1340     emit_d8(cbuf, offset);
 1341   }
 1342 }
 1343 
 1344 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1345   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1346   if( offset >= 128 ) {
 1347     return 7;
 1348   }
 1349   else {
 1350     return 4;
 1351   }
 1352 }
 1353 
 1354 //=============================================================================
 1355 #ifndef PRODUCT
 1356 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1357   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1358   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1359   st->print_cr("\tNOP");
 1360   st->print_cr("\tNOP");
 1361   if( !OptoBreakpoint )
 1362     st->print_cr("\tNOP");
 1363 }
 1364 #endif
 1365 
 1366 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1367   MacroAssembler masm(&cbuf);
 1368 #ifdef ASSERT
 1369   uint insts_size = cbuf.insts_size();
 1370 #endif
 1371   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1372   masm.jump_cc(Assembler::notEqual,
 1373                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1374   /* WARNING these NOPs are critical so that verified entry point is properly
 1375      aligned for patching by NativeJump::patch_verified_entry() */
 1376   int nops_cnt = 2;
 1377   if( !OptoBreakpoint ) // Leave space for int3
 1378      nops_cnt += 1;
 1379   masm.nop(nops_cnt);
 1380 
 1381   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1382 }
 1383 
 1384 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1385   return OptoBreakpoint ? 11 : 12;
 1386 }
 1387 
 1388 
 1389 //=============================================================================
 1390 
 1391 // Vector calling convention not supported.
 1392 const bool Matcher::supports_vector_calling_convention() {
 1393   return false;
 1394 }
 1395 
 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1397   Unimplemented();
 1398   return OptoRegPair(0, 0);
 1399 }
 1400 
 1401 // Is this branch offset short enough that a short branch can be used?
 1402 //
 1403 // NOTE: If the platform does not provide any short branch variants, then
 1404 //       this method should return false for offset 0.
 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1406   // The passed offset is relative to address of the branch.
 1407   // On 86 a branch displacement is calculated relative to address
 1408   // of a next instruction.
 1409   offset -= br_size;
 1410 
 1411   // the short version of jmpConUCF2 contains multiple branches,
 1412   // making the reach slightly less
 1413   if (rule == jmpConUCF2_rule)
 1414     return (-126 <= offset && offset <= 125);
 1415   return (-128 <= offset && offset <= 127);
 1416 }
 1417 
 1418 // Return whether or not this register is ever used as an argument.  This
 1419 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1421 // arguments in those registers not be available to the callee.
 1422 bool Matcher::can_be_java_arg( int reg ) {
 1423   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1424   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1425   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1426   return false;
 1427 }
 1428 
 1429 bool Matcher::is_spillable_arg( int reg ) {
 1430   return can_be_java_arg(reg);
 1431 }
 1432 
 1433 uint Matcher::int_pressure_limit()
 1434 {
 1435   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1436 }
 1437 
 1438 uint Matcher::float_pressure_limit()
 1439 {
 1440   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1441 }
 1442 
 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1444   // Use hardware integer DIV instruction when
 1445   // it is faster than a code which use multiply.
 1446   // Only when constant divisor fits into 32 bit
 1447   // (min_jint is excluded to get only correct
 1448   // positive 32 bit values from negative).
 1449   return VM_Version::has_fast_idiv() &&
 1450          (divisor == (int)divisor && divisor != min_jint);
 1451 }
 1452 
 1453 // Register for DIVI projection of divmodI
 1454 RegMask Matcher::divI_proj_mask() {
 1455   return EAX_REG_mask();
 1456 }
 1457 
 1458 // Register for MODI projection of divmodI
 1459 RegMask Matcher::modI_proj_mask() {
 1460   return EDX_REG_mask();
 1461 }
 1462 
 1463 // Register for DIVL projection of divmodL
 1464 RegMask Matcher::divL_proj_mask() {
 1465   ShouldNotReachHere();
 1466   return RegMask();
 1467 }
 1468 
 1469 // Register for MODL projection of divmodL
 1470 RegMask Matcher::modL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1476   return NO_REG_mask();
 1477 }
 1478 
 1479 // Returns true if the high 32 bits of the value is known to be zero.
 1480 bool is_operand_hi32_zero(Node* n) {
 1481   int opc = n->Opcode();
 1482   if (opc == Op_AndL) {
 1483     Node* o2 = n->in(2);
 1484     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1485       return true;
 1486     }
 1487   }
 1488   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489     return true;
 1490   }
 1491   return false;
 1492 }
 1493 
 1494 %}
 1495 
 1496 //----------ENCODING BLOCK-----------------------------------------------------
 1497 // This block specifies the encoding classes used by the compiler to output
 1498 // byte streams.  Encoding classes generate functions which are called by
 1499 // Machine Instruction Nodes in order to generate the bit encoding of the
 1500 // instruction.  Operands specify their base encoding interface with the
 1501 // interface keyword.  There are currently supported four interfaces,
 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1503 // operand to generate a function which returns its register number when
 1504 // queried.   CONST_INTER causes an operand to generate a function which
 1505 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1506 // operand to generate four functions which return the Base Register, the
 1507 // Index Register, the Scale Value, and the Offset Value of the operand when
 1508 // queried.  COND_INTER causes an operand to generate six functions which
 1509 // return the encoding code (ie - encoding bits for the instruction)
 1510 // associated with each basic boolean condition for a conditional instruction.
 1511 // Instructions specify two basic values for encoding.  They use the
 1512 // ins_encode keyword to specify their encoding class (which must be one of
 1513 // the class names specified in the encoding block), and they use the
 1514 // opcode keyword to specify, in order, their primary, secondary, and
 1515 // tertiary opcode.  Only the opcode sections which a particular instruction
 1516 // needs for encoding need to be specified.
 1517 encode %{
 1518   // Build emit functions for each basic byte or larger field in the intel
 1519   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1520   // code in the enc_class source block.  Emit functions will live in the
 1521   // main source block for now.  In future, we can generalize this by
 1522   // adding a syntax that specifies the sizes of fields in an order,
 1523   // so that the adlc can build the emit functions automagically
 1524 
 1525   // Emit primary opcode
 1526   enc_class OpcP %{
 1527     emit_opcode(cbuf, $primary);
 1528   %}
 1529 
 1530   // Emit secondary opcode
 1531   enc_class OpcS %{
 1532     emit_opcode(cbuf, $secondary);
 1533   %}
 1534 
 1535   // Emit opcode directly
 1536   enc_class Opcode(immI d8) %{
 1537     emit_opcode(cbuf, $d8$$constant);
 1538   %}
 1539 
 1540   enc_class SizePrefix %{
 1541     emit_opcode(cbuf,0x66);
 1542   %}
 1543 
 1544   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1545     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1546   %}
 1547 
 1548   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1549     emit_opcode(cbuf,$opcode$$constant);
 1550     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1551   %}
 1552 
 1553   enc_class mov_r32_imm0( rRegI dst ) %{
 1554     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1555     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1556   %}
 1557 
 1558   enc_class cdq_enc %{
 1559     // Full implementation of Java idiv and irem; checks for
 1560     // special case as described in JVM spec., p.243 & p.271.
 1561     //
 1562     //         normal case                           special case
 1563     //
 1564     // input : rax,: dividend                         min_int
 1565     //         reg: divisor                          -1
 1566     //
 1567     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1568     //         rdx: remainder (= rax, irem reg)       0
 1569     //
 1570     //  Code sequnce:
 1571     //
 1572     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1573     //  0F 85 0B 00 00 00    jne         normal_case
 1574     //  33 D2                xor         rdx,edx
 1575     //  83 F9 FF             cmp         rcx,0FFh
 1576     //  0F 84 03 00 00 00    je          done
 1577     //                  normal_case:
 1578     //  99                   cdq
 1579     //  F7 F9                idiv        rax,ecx
 1580     //                  done:
 1581     //
 1582     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1583     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1584     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1585     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1586     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1587     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1588     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1589     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1590     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1591     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1592     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1593     // normal_case:
 1594     emit_opcode(cbuf,0x99);                                         // cdq
 1595     // idiv (note: must be emitted by the user of this rule)
 1596     // normal:
 1597   %}
 1598 
 1599   // Dense encoding for older common ops
 1600   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1601     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1602   %}
 1603 
 1604 
 1605   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1606   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1607     // Check for 8-bit immediate, and set sign extend bit in opcode
 1608     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1609       emit_opcode(cbuf, $primary | 0x02);
 1610     }
 1611     else {                          // If 32-bit immediate
 1612       emit_opcode(cbuf, $primary);
 1613     }
 1614   %}
 1615 
 1616   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1617     // Emit primary opcode and set sign-extend bit
 1618     // Check for 8-bit immediate, and set sign extend bit in opcode
 1619     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1620       emit_opcode(cbuf, $primary | 0x02);    }
 1621     else {                          // If 32-bit immediate
 1622       emit_opcode(cbuf, $primary);
 1623     }
 1624     // Emit r/m byte with secondary opcode, after primary opcode.
 1625     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1626   %}
 1627 
 1628   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1629     // Check for 8-bit immediate, and set sign extend bit in opcode
 1630     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1631       $$$emit8$imm$$constant;
 1632     }
 1633     else {                          // If 32-bit immediate
 1634       // Output immediate
 1635       $$$emit32$imm$$constant;
 1636     }
 1637   %}
 1638 
 1639   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1640     // Emit primary opcode and set sign-extend bit
 1641     // Check for 8-bit immediate, and set sign extend bit in opcode
 1642     int con = (int)$imm$$constant; // Throw away top bits
 1643     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1644     // Emit r/m byte with secondary opcode, after primary opcode.
 1645     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1646     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1647     else                               emit_d32(cbuf,con);
 1648   %}
 1649 
 1650   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1651     // Emit primary opcode and set sign-extend bit
 1652     // Check for 8-bit immediate, and set sign extend bit in opcode
 1653     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1654     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1655     // Emit r/m byte with tertiary opcode, after primary opcode.
 1656     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1657     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1658     else                               emit_d32(cbuf,con);
 1659   %}
 1660 
 1661   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1662     emit_cc(cbuf, $secondary, $dst$$reg );
 1663   %}
 1664 
 1665   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1666     int destlo = $dst$$reg;
 1667     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1668     // bswap lo
 1669     emit_opcode(cbuf, 0x0F);
 1670     emit_cc(cbuf, 0xC8, destlo);
 1671     // bswap hi
 1672     emit_opcode(cbuf, 0x0F);
 1673     emit_cc(cbuf, 0xC8, desthi);
 1674     // xchg lo and hi
 1675     emit_opcode(cbuf, 0x87);
 1676     emit_rm(cbuf, 0x3, destlo, desthi);
 1677   %}
 1678 
 1679   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1680     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1681   %}
 1682 
 1683   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1684     $$$emit8$primary;
 1685     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1686   %}
 1687 
 1688   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1689     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1690     emit_d8(cbuf, op >> 8 );
 1691     emit_d8(cbuf, op & 255);
 1692   %}
 1693 
 1694   // emulate a CMOV with a conditional branch around a MOV
 1695   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1696     // Invert sense of branch from sense of CMOV
 1697     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1698     emit_d8( cbuf, $brOffs$$constant );
 1699   %}
 1700 
 1701   enc_class enc_PartialSubtypeCheck( ) %{
 1702     Register Redi = as_Register(EDI_enc); // result register
 1703     Register Reax = as_Register(EAX_enc); // super class
 1704     Register Recx = as_Register(ECX_enc); // killed
 1705     Register Resi = as_Register(ESI_enc); // sub class
 1706     Label miss;
 1707 
 1708     MacroAssembler _masm(&cbuf);
 1709     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1710                                      NULL, &miss,
 1711                                      /*set_cond_codes:*/ true);
 1712     if ($primary) {
 1713       __ xorptr(Redi, Redi);
 1714     }
 1715     __ bind(miss);
 1716   %}
 1717 
 1718   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1719     MacroAssembler masm(&cbuf);
 1720     int start = masm.offset();
 1721     if (UseSSE >= 2) {
 1722       if (VerifyFPU) {
 1723         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1724       }
 1725     } else {
 1726       // External c_calling_convention expects the FPU stack to be 'clean'.
 1727       // Compiled code leaves it dirty.  Do cleanup now.
 1728       masm.empty_FPU_stack();
 1729     }
 1730     if (sizeof_FFree_Float_Stack_All == -1) {
 1731       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1732     } else {
 1733       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1734     }
 1735   %}
 1736 
 1737   enc_class Verify_FPU_For_Leaf %{
 1738     if( VerifyFPU ) {
 1739       MacroAssembler masm(&cbuf);
 1740       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1741     }
 1742   %}
 1743 
 1744   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1745     // This is the instruction starting address for relocation info.
 1746     MacroAssembler _masm(&cbuf);
 1747     cbuf.set_insts_mark();
 1748     $$$emit8$primary;
 1749     // CALL directly to the runtime
 1750     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1751                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1752     __ post_call_nop();
 1753 
 1754     if (UseSSE >= 2) {
 1755       MacroAssembler _masm(&cbuf);
 1756       BasicType rt = tf()->return_type();
 1757 
 1758       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1759         // A C runtime call where the return value is unused.  In SSE2+
 1760         // mode the result needs to be removed from the FPU stack.  It's
 1761         // likely that this function call could be removed by the
 1762         // optimizer if the C function is a pure function.
 1763         __ ffree(0);
 1764       } else if (rt == T_FLOAT) {
 1765         __ lea(rsp, Address(rsp, -4));
 1766         __ fstp_s(Address(rsp, 0));
 1767         __ movflt(xmm0, Address(rsp, 0));
 1768         __ lea(rsp, Address(rsp,  4));
 1769       } else if (rt == T_DOUBLE) {
 1770         __ lea(rsp, Address(rsp, -8));
 1771         __ fstp_d(Address(rsp, 0));
 1772         __ movdbl(xmm0, Address(rsp, 0));
 1773         __ lea(rsp, Address(rsp,  8));
 1774       }
 1775     }
 1776   %}
 1777 
 1778   enc_class pre_call_resets %{
 1779     // If method sets FPU control word restore it here
 1780     debug_only(int off0 = cbuf.insts_size());
 1781     if (ra_->C->in_24_bit_fp_mode()) {
 1782       MacroAssembler _masm(&cbuf);
 1783       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1784     }
 1785     // Clear upper bits of YMM registers when current compiled code uses
 1786     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1787     MacroAssembler _masm(&cbuf);
 1788     __ vzeroupper();
 1789     debug_only(int off1 = cbuf.insts_size());
 1790     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1791   %}
 1792 
 1793   enc_class post_call_FPU %{
 1794     // If method sets FPU control word do it here also
 1795     if (Compile::current()->in_24_bit_fp_mode()) {
 1796       MacroAssembler masm(&cbuf);
 1797       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1798     }
 1799   %}
 1800 
 1801   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1802     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1803     // who we intended to call.
 1804     MacroAssembler _masm(&cbuf);
 1805     cbuf.set_insts_mark();
 1806     $$$emit8$primary;
 1807 
 1808     if (!_method) {
 1809       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1810                      runtime_call_Relocation::spec(),
 1811                      RELOC_IMM32);
 1812       __ post_call_nop();
 1813     } else {
 1814       int method_index = resolved_method_index(cbuf);
 1815       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1816                                                   : static_call_Relocation::spec(method_index);
 1817       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1818                      rspec, RELOC_DISP32);
 1819       __ post_call_nop();
 1820       address mark = cbuf.insts_mark();
 1821       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1822         // Calls of the same statically bound method can share
 1823         // a stub to the interpreter.
 1824         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1825       } else {
 1826         // Emit stubs for static call.
 1827         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1828         if (stub == NULL) {
 1829           ciEnv::current()->record_failure("CodeCache is full");
 1830           return;
 1831         }
 1832       }
 1833     }
 1834   %}
 1835 
 1836   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1837     MacroAssembler _masm(&cbuf);
 1838     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1839     __ post_call_nop();
 1840   %}
 1841 
 1842   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1843     int disp = in_bytes(Method::from_compiled_offset());
 1844     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1845 
 1846     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1847     MacroAssembler _masm(&cbuf);
 1848     cbuf.set_insts_mark();
 1849     $$$emit8$primary;
 1850     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1851     emit_d8(cbuf, disp);             // Displacement
 1852     __ post_call_nop();
 1853   %}
 1854 
 1855 //   Following encoding is no longer used, but may be restored if calling
 1856 //   convention changes significantly.
 1857 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1858 //
 1859 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1860 //     // int ic_reg     = Matcher::inline_cache_reg();
 1861 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1862 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1863 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1864 //
 1865 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1866 //     // // so we load it immediately before the call
 1867 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1868 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1869 //
 1870 //     // xor rbp,ebp
 1871 //     emit_opcode(cbuf, 0x33);
 1872 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1873 //
 1874 //     // CALL to interpreter.
 1875 //     cbuf.set_insts_mark();
 1876 //     $$$emit8$primary;
 1877 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1878 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1879 //   %}
 1880 
 1881   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1882     $$$emit8$primary;
 1883     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1884     $$$emit8$shift$$constant;
 1885   %}
 1886 
 1887   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1888     // Load immediate does not have a zero or sign extended version
 1889     // for 8-bit immediates
 1890     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1891     $$$emit32$src$$constant;
 1892   %}
 1893 
 1894   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1895     // Load immediate does not have a zero or sign extended version
 1896     // for 8-bit immediates
 1897     emit_opcode(cbuf, $primary + $dst$$reg);
 1898     $$$emit32$src$$constant;
 1899   %}
 1900 
 1901   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1902     // Load immediate does not have a zero or sign extended version
 1903     // for 8-bit immediates
 1904     int dst_enc = $dst$$reg;
 1905     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1906     if (src_con == 0) {
 1907       // xor dst, dst
 1908       emit_opcode(cbuf, 0x33);
 1909       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1910     } else {
 1911       emit_opcode(cbuf, $primary + dst_enc);
 1912       emit_d32(cbuf, src_con);
 1913     }
 1914   %}
 1915 
 1916   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1917     // Load immediate does not have a zero or sign extended version
 1918     // for 8-bit immediates
 1919     int dst_enc = $dst$$reg + 2;
 1920     int src_con = ((julong)($src$$constant)) >> 32;
 1921     if (src_con == 0) {
 1922       // xor dst, dst
 1923       emit_opcode(cbuf, 0x33);
 1924       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1925     } else {
 1926       emit_opcode(cbuf, $primary + dst_enc);
 1927       emit_d32(cbuf, src_con);
 1928     }
 1929   %}
 1930 
 1931 
 1932   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1933   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1934     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1935   %}
 1936 
 1937   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1938     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1939   %}
 1940 
 1941   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1942     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1943   %}
 1944 
 1945   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1946     $$$emit8$primary;
 1947     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1948   %}
 1949 
 1950   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1951     $$$emit8$secondary;
 1952     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1953   %}
 1954 
 1955   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1956     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1957   %}
 1958 
 1959   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1960     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1961   %}
 1962 
 1963   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1964     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1965   %}
 1966 
 1967   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1968     // Output immediate
 1969     $$$emit32$src$$constant;
 1970   %}
 1971 
 1972   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1973     // Output Float immediate bits
 1974     jfloat jf = $src$$constant;
 1975     int    jf_as_bits = jint_cast( jf );
 1976     emit_d32(cbuf, jf_as_bits);
 1977   %}
 1978 
 1979   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1980     // Output Float immediate bits
 1981     jfloat jf = $src$$constant;
 1982     int    jf_as_bits = jint_cast( jf );
 1983     emit_d32(cbuf, jf_as_bits);
 1984   %}
 1985 
 1986   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1987     // Output immediate
 1988     $$$emit16$src$$constant;
 1989   %}
 1990 
 1991   enc_class Con_d32(immI src) %{
 1992     emit_d32(cbuf,$src$$constant);
 1993   %}
 1994 
 1995   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1996     // Output immediate memory reference
 1997     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1998     emit_d32(cbuf, 0x00);
 1999   %}
 2000 
 2001   enc_class lock_prefix( ) %{
 2002     emit_opcode(cbuf,0xF0);         // [Lock]
 2003   %}
 2004 
 2005   // Cmp-xchg long value.
 2006   // Note: we need to swap rbx, and rcx before and after the
 2007   //       cmpxchg8 instruction because the instruction uses
 2008   //       rcx as the high order word of the new value to store but
 2009   //       our register encoding uses rbx,.
 2010   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2011 
 2012     // XCHG  rbx,ecx
 2013     emit_opcode(cbuf,0x87);
 2014     emit_opcode(cbuf,0xD9);
 2015     // [Lock]
 2016     emit_opcode(cbuf,0xF0);
 2017     // CMPXCHG8 [Eptr]
 2018     emit_opcode(cbuf,0x0F);
 2019     emit_opcode(cbuf,0xC7);
 2020     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2021     // XCHG  rbx,ecx
 2022     emit_opcode(cbuf,0x87);
 2023     emit_opcode(cbuf,0xD9);
 2024   %}
 2025 
 2026   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2027     // [Lock]
 2028     emit_opcode(cbuf,0xF0);
 2029 
 2030     // CMPXCHG [Eptr]
 2031     emit_opcode(cbuf,0x0F);
 2032     emit_opcode(cbuf,0xB1);
 2033     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2034   %}
 2035 
 2036   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2037     // [Lock]
 2038     emit_opcode(cbuf,0xF0);
 2039 
 2040     // CMPXCHGB [Eptr]
 2041     emit_opcode(cbuf,0x0F);
 2042     emit_opcode(cbuf,0xB0);
 2043     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2044   %}
 2045 
 2046   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2047     // [Lock]
 2048     emit_opcode(cbuf,0xF0);
 2049 
 2050     // 16-bit mode
 2051     emit_opcode(cbuf, 0x66);
 2052 
 2053     // CMPXCHGW [Eptr]
 2054     emit_opcode(cbuf,0x0F);
 2055     emit_opcode(cbuf,0xB1);
 2056     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2057   %}
 2058 
 2059   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2060     int res_encoding = $res$$reg;
 2061 
 2062     // MOV  res,0
 2063     emit_opcode( cbuf, 0xB8 + res_encoding);
 2064     emit_d32( cbuf, 0 );
 2065     // JNE,s  fail
 2066     emit_opcode(cbuf,0x75);
 2067     emit_d8(cbuf, 5 );
 2068     // MOV  res,1
 2069     emit_opcode( cbuf, 0xB8 + res_encoding);
 2070     emit_d32( cbuf, 1 );
 2071     // fail:
 2072   %}
 2073 
 2074   enc_class set_instruction_start( ) %{
 2075     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2076   %}
 2077 
 2078   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2079     int reg_encoding = $ereg$$reg;
 2080     int base  = $mem$$base;
 2081     int index = $mem$$index;
 2082     int scale = $mem$$scale;
 2083     int displace = $mem$$disp;
 2084     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2085     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2086   %}
 2087 
 2088   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2089     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2090     int base  = $mem$$base;
 2091     int index = $mem$$index;
 2092     int scale = $mem$$scale;
 2093     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2094     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2095     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2096   %}
 2097 
 2098   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2099     int r1, r2;
 2100     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2101     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2102     emit_opcode(cbuf,0x0F);
 2103     emit_opcode(cbuf,$tertiary);
 2104     emit_rm(cbuf, 0x3, r1, r2);
 2105     emit_d8(cbuf,$cnt$$constant);
 2106     emit_d8(cbuf,$primary);
 2107     emit_rm(cbuf, 0x3, $secondary, r1);
 2108     emit_d8(cbuf,$cnt$$constant);
 2109   %}
 2110 
 2111   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2112     emit_opcode( cbuf, 0x8B ); // Move
 2113     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2114     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2115       emit_d8(cbuf,$primary);
 2116       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2117       emit_d8(cbuf,$cnt$$constant-32);
 2118     }
 2119     emit_d8(cbuf,$primary);
 2120     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2121     emit_d8(cbuf,31);
 2122   %}
 2123 
 2124   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2125     int r1, r2;
 2126     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2127     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2128 
 2129     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2130     emit_rm(cbuf, 0x3, r1, r2);
 2131     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2132       emit_opcode(cbuf,$primary);
 2133       emit_rm(cbuf, 0x3, $secondary, r1);
 2134       emit_d8(cbuf,$cnt$$constant-32);
 2135     }
 2136     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2137     emit_rm(cbuf, 0x3, r2, r2);
 2138   %}
 2139 
 2140   // Clone of RegMem but accepts an extra parameter to access each
 2141   // half of a double in memory; it never needs relocation info.
 2142   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2143     emit_opcode(cbuf,$opcode$$constant);
 2144     int reg_encoding = $rm_reg$$reg;
 2145     int base     = $mem$$base;
 2146     int index    = $mem$$index;
 2147     int scale    = $mem$$scale;
 2148     int displace = $mem$$disp + $disp_for_half$$constant;
 2149     relocInfo::relocType disp_reloc = relocInfo::none;
 2150     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2151   %}
 2152 
 2153   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2154   //
 2155   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2156   // and it never needs relocation information.
 2157   // Frequently used to move data between FPU's Stack Top and memory.
 2158   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2159     int rm_byte_opcode = $rm_opcode$$constant;
 2160     int base     = $mem$$base;
 2161     int index    = $mem$$index;
 2162     int scale    = $mem$$scale;
 2163     int displace = $mem$$disp;
 2164     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2165     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2166   %}
 2167 
 2168   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2169     int rm_byte_opcode = $rm_opcode$$constant;
 2170     int base     = $mem$$base;
 2171     int index    = $mem$$index;
 2172     int scale    = $mem$$scale;
 2173     int displace = $mem$$disp;
 2174     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2175     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2176   %}
 2177 
 2178   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2179     int reg_encoding = $dst$$reg;
 2180     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2181     int index        = 0x04;            // 0x04 indicates no index
 2182     int scale        = 0x00;            // 0x00 indicates no scale
 2183     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2184     relocInfo::relocType disp_reloc = relocInfo::none;
 2185     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2186   %}
 2187 
 2188   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2189     // Compare dst,src
 2190     emit_opcode(cbuf,0x3B);
 2191     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2192     // jmp dst < src around move
 2193     emit_opcode(cbuf,0x7C);
 2194     emit_d8(cbuf,2);
 2195     // move dst,src
 2196     emit_opcode(cbuf,0x8B);
 2197     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2198   %}
 2199 
 2200   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2201     // Compare dst,src
 2202     emit_opcode(cbuf,0x3B);
 2203     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2204     // jmp dst > src around move
 2205     emit_opcode(cbuf,0x7F);
 2206     emit_d8(cbuf,2);
 2207     // move dst,src
 2208     emit_opcode(cbuf,0x8B);
 2209     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2210   %}
 2211 
 2212   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2213     // If src is FPR1, we can just FST to store it.
 2214     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2215     int reg_encoding = 0x2; // Just store
 2216     int base  = $mem$$base;
 2217     int index = $mem$$index;
 2218     int scale = $mem$$scale;
 2219     int displace = $mem$$disp;
 2220     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2221     if( $src$$reg != FPR1L_enc ) {
 2222       reg_encoding = 0x3;  // Store & pop
 2223       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2224       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2225     }
 2226     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2227     emit_opcode(cbuf,$primary);
 2228     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2229   %}
 2230 
 2231   enc_class neg_reg(rRegI dst) %{
 2232     // NEG $dst
 2233     emit_opcode(cbuf,0xF7);
 2234     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2235   %}
 2236 
 2237   enc_class setLT_reg(eCXRegI dst) %{
 2238     // SETLT $dst
 2239     emit_opcode(cbuf,0x0F);
 2240     emit_opcode(cbuf,0x9C);
 2241     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2242   %}
 2243 
 2244   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2245     int tmpReg = $tmp$$reg;
 2246 
 2247     // SUB $p,$q
 2248     emit_opcode(cbuf,0x2B);
 2249     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2250     // SBB $tmp,$tmp
 2251     emit_opcode(cbuf,0x1B);
 2252     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2253     // AND $tmp,$y
 2254     emit_opcode(cbuf,0x23);
 2255     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2256     // ADD $p,$tmp
 2257     emit_opcode(cbuf,0x03);
 2258     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2259   %}
 2260 
 2261   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2262     // TEST shift,32
 2263     emit_opcode(cbuf,0xF7);
 2264     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2265     emit_d32(cbuf,0x20);
 2266     // JEQ,s small
 2267     emit_opcode(cbuf, 0x74);
 2268     emit_d8(cbuf, 0x04);
 2269     // MOV    $dst.hi,$dst.lo
 2270     emit_opcode( cbuf, 0x8B );
 2271     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2272     // CLR    $dst.lo
 2273     emit_opcode(cbuf, 0x33);
 2274     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2275 // small:
 2276     // SHLD   $dst.hi,$dst.lo,$shift
 2277     emit_opcode(cbuf,0x0F);
 2278     emit_opcode(cbuf,0xA5);
 2279     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2280     // SHL    $dst.lo,$shift"
 2281     emit_opcode(cbuf,0xD3);
 2282     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2283   %}
 2284 
 2285   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2286     // TEST shift,32
 2287     emit_opcode(cbuf,0xF7);
 2288     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2289     emit_d32(cbuf,0x20);
 2290     // JEQ,s small
 2291     emit_opcode(cbuf, 0x74);
 2292     emit_d8(cbuf, 0x04);
 2293     // MOV    $dst.lo,$dst.hi
 2294     emit_opcode( cbuf, 0x8B );
 2295     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2296     // CLR    $dst.hi
 2297     emit_opcode(cbuf, 0x33);
 2298     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2299 // small:
 2300     // SHRD   $dst.lo,$dst.hi,$shift
 2301     emit_opcode(cbuf,0x0F);
 2302     emit_opcode(cbuf,0xAD);
 2303     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2304     // SHR    $dst.hi,$shift"
 2305     emit_opcode(cbuf,0xD3);
 2306     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2307   %}
 2308 
 2309   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2310     // TEST shift,32
 2311     emit_opcode(cbuf,0xF7);
 2312     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2313     emit_d32(cbuf,0x20);
 2314     // JEQ,s small
 2315     emit_opcode(cbuf, 0x74);
 2316     emit_d8(cbuf, 0x05);
 2317     // MOV    $dst.lo,$dst.hi
 2318     emit_opcode( cbuf, 0x8B );
 2319     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2320     // SAR    $dst.hi,31
 2321     emit_opcode(cbuf, 0xC1);
 2322     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2323     emit_d8(cbuf, 0x1F );
 2324 // small:
 2325     // SHRD   $dst.lo,$dst.hi,$shift
 2326     emit_opcode(cbuf,0x0F);
 2327     emit_opcode(cbuf,0xAD);
 2328     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2329     // SAR    $dst.hi,$shift"
 2330     emit_opcode(cbuf,0xD3);
 2331     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2332   %}
 2333 
 2334 
 2335   // ----------------- Encodings for floating point unit -----------------
 2336   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2337   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2338     $$$emit8$primary;
 2339     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2340   %}
 2341 
 2342   // Pop argument in FPR0 with FSTP ST(0)
 2343   enc_class PopFPU() %{
 2344     emit_opcode( cbuf, 0xDD );
 2345     emit_d8( cbuf, 0xD8 );
 2346   %}
 2347 
 2348   // !!!!! equivalent to Pop_Reg_F
 2349   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2350     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2351     emit_d8( cbuf, 0xD8+$dst$$reg );
 2352   %}
 2353 
 2354   enc_class Push_Reg_DPR( regDPR dst ) %{
 2355     emit_opcode( cbuf, 0xD9 );
 2356     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2357   %}
 2358 
 2359   enc_class strictfp_bias1( regDPR dst ) %{
 2360     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2361     emit_opcode( cbuf, 0x2D );
 2362     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2363     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2364     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2365   %}
 2366 
 2367   enc_class strictfp_bias2( regDPR dst ) %{
 2368     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2369     emit_opcode( cbuf, 0x2D );
 2370     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2371     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2372     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2373   %}
 2374 
 2375   // Special case for moving an integer register to a stack slot.
 2376   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2377     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2378   %}
 2379 
 2380   // Special case for moving a register to a stack slot.
 2381   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2382     // Opcode already emitted
 2383     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2384     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2385     emit_d32(cbuf, $dst$$disp);   // Displacement
 2386   %}
 2387 
 2388   // Push the integer in stackSlot 'src' onto FP-stack
 2389   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2390     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2391   %}
 2392 
 2393   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2394   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2395     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2396   %}
 2397 
 2398   // Same as Pop_Mem_F except for opcode
 2399   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2400   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2401     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2402   %}
 2403 
 2404   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2405     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2406     emit_d8( cbuf, 0xD8+$dst$$reg );
 2407   %}
 2408 
 2409   enc_class Push_Reg_FPR( regFPR dst ) %{
 2410     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2411     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2412   %}
 2413 
 2414   // Push FPU's float to a stack-slot, and pop FPU-stack
 2415   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2416     int pop = 0x02;
 2417     if ($src$$reg != FPR1L_enc) {
 2418       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2419       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2420       pop = 0x03;
 2421     }
 2422     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2423   %}
 2424 
 2425   // Push FPU's double to a stack-slot, and pop FPU-stack
 2426   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2427     int pop = 0x02;
 2428     if ($src$$reg != FPR1L_enc) {
 2429       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2430       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2431       pop = 0x03;
 2432     }
 2433     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2434   %}
 2435 
 2436   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2437   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2438     int pop = 0xD0 - 1; // -1 since we skip FLD
 2439     if ($src$$reg != FPR1L_enc) {
 2440       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2441       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2442       pop = 0xD8;
 2443     }
 2444     emit_opcode( cbuf, 0xDD );
 2445     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2446   %}
 2447 
 2448 
 2449   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2450     // load dst in FPR0
 2451     emit_opcode( cbuf, 0xD9 );
 2452     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2453     if ($src$$reg != FPR1L_enc) {
 2454       // fincstp
 2455       emit_opcode (cbuf, 0xD9);
 2456       emit_opcode (cbuf, 0xF7);
 2457       // swap src with FPR1:
 2458       // FXCH FPR1 with src
 2459       emit_opcode(cbuf, 0xD9);
 2460       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2461       // fdecstp
 2462       emit_opcode (cbuf, 0xD9);
 2463       emit_opcode (cbuf, 0xF6);
 2464     }
 2465   %}
 2466 
 2467   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2468     MacroAssembler _masm(&cbuf);
 2469     __ subptr(rsp, 8);
 2470     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2471     __ fld_d(Address(rsp, 0));
 2472     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2473     __ fld_d(Address(rsp, 0));
 2474   %}
 2475 
 2476   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2477     MacroAssembler _masm(&cbuf);
 2478     __ subptr(rsp, 4);
 2479     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2480     __ fld_s(Address(rsp, 0));
 2481     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2482     __ fld_s(Address(rsp, 0));
 2483   %}
 2484 
 2485   enc_class Push_ResultD(regD dst) %{
 2486     MacroAssembler _masm(&cbuf);
 2487     __ fstp_d(Address(rsp, 0));
 2488     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2489     __ addptr(rsp, 8);
 2490   %}
 2491 
 2492   enc_class Push_ResultF(regF dst, immI d8) %{
 2493     MacroAssembler _masm(&cbuf);
 2494     __ fstp_s(Address(rsp, 0));
 2495     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2496     __ addptr(rsp, $d8$$constant);
 2497   %}
 2498 
 2499   enc_class Push_SrcD(regD src) %{
 2500     MacroAssembler _masm(&cbuf);
 2501     __ subptr(rsp, 8);
 2502     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2503     __ fld_d(Address(rsp, 0));
 2504   %}
 2505 
 2506   enc_class push_stack_temp_qword() %{
 2507     MacroAssembler _masm(&cbuf);
 2508     __ subptr(rsp, 8);
 2509   %}
 2510 
 2511   enc_class pop_stack_temp_qword() %{
 2512     MacroAssembler _masm(&cbuf);
 2513     __ addptr(rsp, 8);
 2514   %}
 2515 
 2516   enc_class push_xmm_to_fpr1(regD src) %{
 2517     MacroAssembler _masm(&cbuf);
 2518     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2519     __ fld_d(Address(rsp, 0));
 2520   %}
 2521 
 2522   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2523     if ($src$$reg != FPR1L_enc) {
 2524       // fincstp
 2525       emit_opcode (cbuf, 0xD9);
 2526       emit_opcode (cbuf, 0xF7);
 2527       // FXCH FPR1 with src
 2528       emit_opcode(cbuf, 0xD9);
 2529       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2530       // fdecstp
 2531       emit_opcode (cbuf, 0xD9);
 2532       emit_opcode (cbuf, 0xF6);
 2533     }
 2534     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2535     // // FSTP   FPR$dst$$reg
 2536     // emit_opcode( cbuf, 0xDD );
 2537     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2538   %}
 2539 
 2540   enc_class fnstsw_sahf_skip_parity() %{
 2541     // fnstsw ax
 2542     emit_opcode( cbuf, 0xDF );
 2543     emit_opcode( cbuf, 0xE0 );
 2544     // sahf
 2545     emit_opcode( cbuf, 0x9E );
 2546     // jnp  ::skip
 2547     emit_opcode( cbuf, 0x7B );
 2548     emit_opcode( cbuf, 0x05 );
 2549   %}
 2550 
 2551   enc_class emitModDPR() %{
 2552     // fprem must be iterative
 2553     // :: loop
 2554     // fprem
 2555     emit_opcode( cbuf, 0xD9 );
 2556     emit_opcode( cbuf, 0xF8 );
 2557     // wait
 2558     emit_opcode( cbuf, 0x9b );
 2559     // fnstsw ax
 2560     emit_opcode( cbuf, 0xDF );
 2561     emit_opcode( cbuf, 0xE0 );
 2562     // sahf
 2563     emit_opcode( cbuf, 0x9E );
 2564     // jp  ::loop
 2565     emit_opcode( cbuf, 0x0F );
 2566     emit_opcode( cbuf, 0x8A );
 2567     emit_opcode( cbuf, 0xF4 );
 2568     emit_opcode( cbuf, 0xFF );
 2569     emit_opcode( cbuf, 0xFF );
 2570     emit_opcode( cbuf, 0xFF );
 2571   %}
 2572 
 2573   enc_class fpu_flags() %{
 2574     // fnstsw_ax
 2575     emit_opcode( cbuf, 0xDF);
 2576     emit_opcode( cbuf, 0xE0);
 2577     // test ax,0x0400
 2578     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2579     emit_opcode( cbuf, 0xA9 );
 2580     emit_d16   ( cbuf, 0x0400 );
 2581     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2582     // // test rax,0x0400
 2583     // emit_opcode( cbuf, 0xA9 );
 2584     // emit_d32   ( cbuf, 0x00000400 );
 2585     //
 2586     // jz exit (no unordered comparison)
 2587     emit_opcode( cbuf, 0x74 );
 2588     emit_d8    ( cbuf, 0x02 );
 2589     // mov ah,1 - treat as LT case (set carry flag)
 2590     emit_opcode( cbuf, 0xB4 );
 2591     emit_d8    ( cbuf, 0x01 );
 2592     // sahf
 2593     emit_opcode( cbuf, 0x9E);
 2594   %}
 2595 
 2596   enc_class cmpF_P6_fixup() %{
 2597     // Fixup the integer flags in case comparison involved a NaN
 2598     //
 2599     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2600     emit_opcode( cbuf, 0x7B );
 2601     emit_d8    ( cbuf, 0x03 );
 2602     // MOV AH,1 - treat as LT case (set carry flag)
 2603     emit_opcode( cbuf, 0xB4 );
 2604     emit_d8    ( cbuf, 0x01 );
 2605     // SAHF
 2606     emit_opcode( cbuf, 0x9E);
 2607     // NOP     // target for branch to avoid branch to branch
 2608     emit_opcode( cbuf, 0x90);
 2609   %}
 2610 
 2611 //     fnstsw_ax();
 2612 //     sahf();
 2613 //     movl(dst, nan_result);
 2614 //     jcc(Assembler::parity, exit);
 2615 //     movl(dst, less_result);
 2616 //     jcc(Assembler::below, exit);
 2617 //     movl(dst, equal_result);
 2618 //     jcc(Assembler::equal, exit);
 2619 //     movl(dst, greater_result);
 2620 
 2621 // less_result     =  1;
 2622 // greater_result  = -1;
 2623 // equal_result    = 0;
 2624 // nan_result      = -1;
 2625 
 2626   enc_class CmpF_Result(rRegI dst) %{
 2627     // fnstsw_ax();
 2628     emit_opcode( cbuf, 0xDF);
 2629     emit_opcode( cbuf, 0xE0);
 2630     // sahf
 2631     emit_opcode( cbuf, 0x9E);
 2632     // movl(dst, nan_result);
 2633     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2634     emit_d32( cbuf, -1 );
 2635     // jcc(Assembler::parity, exit);
 2636     emit_opcode( cbuf, 0x7A );
 2637     emit_d8    ( cbuf, 0x13 );
 2638     // movl(dst, less_result);
 2639     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2640     emit_d32( cbuf, -1 );
 2641     // jcc(Assembler::below, exit);
 2642     emit_opcode( cbuf, 0x72 );
 2643     emit_d8    ( cbuf, 0x0C );
 2644     // movl(dst, equal_result);
 2645     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2646     emit_d32( cbuf, 0 );
 2647     // jcc(Assembler::equal, exit);
 2648     emit_opcode( cbuf, 0x74 );
 2649     emit_d8    ( cbuf, 0x05 );
 2650     // movl(dst, greater_result);
 2651     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2652     emit_d32( cbuf, 1 );
 2653   %}
 2654 
 2655 
 2656   // Compare the longs and set flags
 2657   // BROKEN!  Do Not use as-is
 2658   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2659     // CMP    $src1.hi,$src2.hi
 2660     emit_opcode( cbuf, 0x3B );
 2661     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2662     // JNE,s  done
 2663     emit_opcode(cbuf,0x75);
 2664     emit_d8(cbuf, 2 );
 2665     // CMP    $src1.lo,$src2.lo
 2666     emit_opcode( cbuf, 0x3B );
 2667     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2668 // done:
 2669   %}
 2670 
 2671   enc_class convert_int_long( regL dst, rRegI src ) %{
 2672     // mov $dst.lo,$src
 2673     int dst_encoding = $dst$$reg;
 2674     int src_encoding = $src$$reg;
 2675     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2676     // mov $dst.hi,$src
 2677     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2678     // sar $dst.hi,31
 2679     emit_opcode( cbuf, 0xC1 );
 2680     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2681     emit_d8(cbuf, 0x1F );
 2682   %}
 2683 
 2684   enc_class convert_long_double( eRegL src ) %{
 2685     // push $src.hi
 2686     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2687     // push $src.lo
 2688     emit_opcode(cbuf, 0x50+$src$$reg  );
 2689     // fild 64-bits at [SP]
 2690     emit_opcode(cbuf,0xdf);
 2691     emit_d8(cbuf, 0x6C);
 2692     emit_d8(cbuf, 0x24);
 2693     emit_d8(cbuf, 0x00);
 2694     // pop stack
 2695     emit_opcode(cbuf, 0x83); // add  SP, #8
 2696     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2697     emit_d8(cbuf, 0x8);
 2698   %}
 2699 
 2700   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2701     // IMUL   EDX:EAX,$src1
 2702     emit_opcode( cbuf, 0xF7 );
 2703     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2704     // SAR    EDX,$cnt-32
 2705     int shift_count = ((int)$cnt$$constant) - 32;
 2706     if (shift_count > 0) {
 2707       emit_opcode(cbuf, 0xC1);
 2708       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2709       emit_d8(cbuf, shift_count);
 2710     }
 2711   %}
 2712 
 2713   // this version doesn't have add sp, 8
 2714   enc_class convert_long_double2( eRegL src ) %{
 2715     // push $src.hi
 2716     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2717     // push $src.lo
 2718     emit_opcode(cbuf, 0x50+$src$$reg  );
 2719     // fild 64-bits at [SP]
 2720     emit_opcode(cbuf,0xdf);
 2721     emit_d8(cbuf, 0x6C);
 2722     emit_d8(cbuf, 0x24);
 2723     emit_d8(cbuf, 0x00);
 2724   %}
 2725 
 2726   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2727     // Basic idea: long = (long)int * (long)int
 2728     // IMUL EDX:EAX, src
 2729     emit_opcode( cbuf, 0xF7 );
 2730     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2731   %}
 2732 
 2733   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2734     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2735     // MUL EDX:EAX, src
 2736     emit_opcode( cbuf, 0xF7 );
 2737     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2738   %}
 2739 
 2740   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2741     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2742     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2743     // MOV    $tmp,$src.lo
 2744     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2745     // IMUL   $tmp,EDX
 2746     emit_opcode( cbuf, 0x0F );
 2747     emit_opcode( cbuf, 0xAF );
 2748     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2749     // MOV    EDX,$src.hi
 2750     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2751     // IMUL   EDX,EAX
 2752     emit_opcode( cbuf, 0x0F );
 2753     emit_opcode( cbuf, 0xAF );
 2754     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2755     // ADD    $tmp,EDX
 2756     emit_opcode( cbuf, 0x03 );
 2757     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2758     // MUL   EDX:EAX,$src.lo
 2759     emit_opcode( cbuf, 0xF7 );
 2760     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2761     // ADD    EDX,ESI
 2762     emit_opcode( cbuf, 0x03 );
 2763     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2764   %}
 2765 
 2766   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2767     // Basic idea: lo(result) = lo(src * y_lo)
 2768     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2769     // IMUL   $tmp,EDX,$src
 2770     emit_opcode( cbuf, 0x6B );
 2771     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2772     emit_d8( cbuf, (int)$src$$constant );
 2773     // MOV    EDX,$src
 2774     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2775     emit_d32( cbuf, (int)$src$$constant );
 2776     // MUL   EDX:EAX,EDX
 2777     emit_opcode( cbuf, 0xF7 );
 2778     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2779     // ADD    EDX,ESI
 2780     emit_opcode( cbuf, 0x03 );
 2781     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2782   %}
 2783 
 2784   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2785     // PUSH src1.hi
 2786     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2787     // PUSH src1.lo
 2788     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2789     // PUSH src2.hi
 2790     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2791     // PUSH src2.lo
 2792     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2793     // CALL directly to the runtime
 2794     MacroAssembler _masm(&cbuf);
 2795     cbuf.set_insts_mark();
 2796     emit_opcode(cbuf,0xE8);       // Call into runtime
 2797     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2798     __ post_call_nop();
 2799     // Restore stack
 2800     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2801     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2802     emit_d8(cbuf, 4*4);
 2803   %}
 2804 
 2805   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2806     // PUSH src1.hi
 2807     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2808     // PUSH src1.lo
 2809     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2810     // PUSH src2.hi
 2811     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2812     // PUSH src2.lo
 2813     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2814     // CALL directly to the runtime
 2815     MacroAssembler _masm(&cbuf);
 2816     cbuf.set_insts_mark();
 2817     emit_opcode(cbuf,0xE8);       // Call into runtime
 2818     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2819     __ post_call_nop();
 2820     // Restore stack
 2821     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2822     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2823     emit_d8(cbuf, 4*4);
 2824   %}
 2825 
 2826   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2827     // MOV   $tmp,$src.lo
 2828     emit_opcode(cbuf, 0x8B);
 2829     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2830     // OR    $tmp,$src.hi
 2831     emit_opcode(cbuf, 0x0B);
 2832     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2833   %}
 2834 
 2835   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2836     // CMP    $src1.lo,$src2.lo
 2837     emit_opcode( cbuf, 0x3B );
 2838     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2839     // JNE,s  skip
 2840     emit_cc(cbuf, 0x70, 0x5);
 2841     emit_d8(cbuf,2);
 2842     // CMP    $src1.hi,$src2.hi
 2843     emit_opcode( cbuf, 0x3B );
 2844     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2845   %}
 2846 
 2847   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2848     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2849     emit_opcode( cbuf, 0x3B );
 2850     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2851     // MOV    $tmp,$src1.hi
 2852     emit_opcode( cbuf, 0x8B );
 2853     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2854     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2855     emit_opcode( cbuf, 0x1B );
 2856     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2857   %}
 2858 
 2859   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2860     // XOR    $tmp,$tmp
 2861     emit_opcode(cbuf,0x33);  // XOR
 2862     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2863     // CMP    $tmp,$src.lo
 2864     emit_opcode( cbuf, 0x3B );
 2865     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2866     // SBB    $tmp,$src.hi
 2867     emit_opcode( cbuf, 0x1B );
 2868     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2869   %}
 2870 
 2871  // Sniff, sniff... smells like Gnu Superoptimizer
 2872   enc_class neg_long( eRegL dst ) %{
 2873     emit_opcode(cbuf,0xF7);    // NEG hi
 2874     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2875     emit_opcode(cbuf,0xF7);    // NEG lo
 2876     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2877     emit_opcode(cbuf,0x83);    // SBB hi,0
 2878     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2879     emit_d8    (cbuf,0 );
 2880   %}
 2881 
 2882   enc_class enc_pop_rdx() %{
 2883     emit_opcode(cbuf,0x5A);
 2884   %}
 2885 
 2886   enc_class enc_rethrow() %{
 2887     MacroAssembler _masm(&cbuf);
 2888     cbuf.set_insts_mark();
 2889     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2890     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2891                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2892     __ post_call_nop();
 2893   %}
 2894 
 2895 
 2896   // Convert a double to an int.  Java semantics require we do complex
 2897   // manglelations in the corner cases.  So we set the rounding mode to
 2898   // 'zero', store the darned double down as an int, and reset the
 2899   // rounding mode to 'nearest'.  The hardware throws an exception which
 2900   // patches up the correct value directly to the stack.
 2901   enc_class DPR2I_encoding( regDPR src ) %{
 2902     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2903     // exceptions here, so that a NAN or other corner-case value will
 2904     // thrown an exception (but normal values get converted at full speed).
 2905     // However, I2C adapters and other float-stack manglers leave pending
 2906     // invalid-op exceptions hanging.  We would have to clear them before
 2907     // enabling them and that is more expensive than just testing for the
 2908     // invalid value Intel stores down in the corner cases.
 2909     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2910     emit_opcode(cbuf,0x2D);
 2911     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2912     // Allocate a word
 2913     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2914     emit_opcode(cbuf,0xEC);
 2915     emit_d8(cbuf,0x04);
 2916     // Encoding assumes a double has been pushed into FPR0.
 2917     // Store down the double as an int, popping the FPU stack
 2918     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2919     emit_opcode(cbuf,0x1C);
 2920     emit_d8(cbuf,0x24);
 2921     // Restore the rounding mode; mask the exception
 2922     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2923     emit_opcode(cbuf,0x2D);
 2924     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2925         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2926         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2927 
 2928     // Load the converted int; adjust CPU stack
 2929     emit_opcode(cbuf,0x58);       // POP EAX
 2930     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2931     emit_d32   (cbuf,0x80000000); //         0x80000000
 2932     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2933     emit_d8    (cbuf,0x07);       // Size of slow_call
 2934     // Push src onto stack slow-path
 2935     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2936     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2937     // CALL directly to the runtime
 2938     MacroAssembler _masm(&cbuf);
 2939     cbuf.set_insts_mark();
 2940     emit_opcode(cbuf,0xE8);       // Call into runtime
 2941     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2942     __ post_call_nop();
 2943     // Carry on here...
 2944   %}
 2945 
 2946   enc_class DPR2L_encoding( regDPR src ) %{
 2947     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2948     emit_opcode(cbuf,0x2D);
 2949     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2950     // Allocate a word
 2951     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2952     emit_opcode(cbuf,0xEC);
 2953     emit_d8(cbuf,0x08);
 2954     // Encoding assumes a double has been pushed into FPR0.
 2955     // Store down the double as a long, popping the FPU stack
 2956     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2957     emit_opcode(cbuf,0x3C);
 2958     emit_d8(cbuf,0x24);
 2959     // Restore the rounding mode; mask the exception
 2960     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2961     emit_opcode(cbuf,0x2D);
 2962     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2963         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2964         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2965 
 2966     // Load the converted int; adjust CPU stack
 2967     emit_opcode(cbuf,0x58);       // POP EAX
 2968     emit_opcode(cbuf,0x5A);       // POP EDX
 2969     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2970     emit_d8    (cbuf,0xFA);       // rdx
 2971     emit_d32   (cbuf,0x80000000); //         0x80000000
 2972     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2973     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2974     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2975     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2976     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2977     emit_d8    (cbuf,0x07);       // Size of slow_call
 2978     // Push src onto stack slow-path
 2979     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2980     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2981     // CALL directly to the runtime
 2982     MacroAssembler _masm(&cbuf);
 2983     cbuf.set_insts_mark();
 2984     emit_opcode(cbuf,0xE8);       // Call into runtime
 2985     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2986     __ post_call_nop();
 2987     // Carry on here...
 2988   %}
 2989 
 2990   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2991     // Operand was loaded from memory into fp ST (stack top)
 2992     // FMUL   ST,$src  /* D8 C8+i */
 2993     emit_opcode(cbuf, 0xD8);
 2994     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2995   %}
 2996 
 2997   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2998     // FADDP  ST,src2  /* D8 C0+i */
 2999     emit_opcode(cbuf, 0xD8);
 3000     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3001     //could use FADDP  src2,fpST  /* DE C0+i */
 3002   %}
 3003 
 3004   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3005     // FADDP  src2,ST  /* DE C0+i */
 3006     emit_opcode(cbuf, 0xDE);
 3007     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3008   %}
 3009 
 3010   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3011     // Operand has been loaded into fp ST (stack top)
 3012       // FSUB   ST,$src1
 3013       emit_opcode(cbuf, 0xD8);
 3014       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3015 
 3016       // FDIV
 3017       emit_opcode(cbuf, 0xD8);
 3018       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3019   %}
 3020 
 3021   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3022     // Operand was loaded from memory into fp ST (stack top)
 3023     // FADD   ST,$src  /* D8 C0+i */
 3024     emit_opcode(cbuf, 0xD8);
 3025     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3026 
 3027     // FMUL  ST,src2  /* D8 C*+i */
 3028     emit_opcode(cbuf, 0xD8);
 3029     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3030   %}
 3031 
 3032 
 3033   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3034     // Operand was loaded from memory into fp ST (stack top)
 3035     // FADD   ST,$src  /* D8 C0+i */
 3036     emit_opcode(cbuf, 0xD8);
 3037     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3038 
 3039     // FMULP  src2,ST  /* DE C8+i */
 3040     emit_opcode(cbuf, 0xDE);
 3041     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3042   %}
 3043 
 3044   // Atomically load the volatile long
 3045   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3046     emit_opcode(cbuf,0xDF);
 3047     int rm_byte_opcode = 0x05;
 3048     int base     = $mem$$base;
 3049     int index    = $mem$$index;
 3050     int scale    = $mem$$scale;
 3051     int displace = $mem$$disp;
 3052     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3053     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3054     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3055   %}
 3056 
 3057   // Volatile Store Long.  Must be atomic, so move it into
 3058   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3059   // target address before the store (for null-ptr checks)
 3060   // so the memory operand is used twice in the encoding.
 3061   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3062     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3063     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3064     emit_opcode(cbuf,0xDF);
 3065     int rm_byte_opcode = 0x07;
 3066     int base     = $mem$$base;
 3067     int index    = $mem$$index;
 3068     int scale    = $mem$$scale;
 3069     int displace = $mem$$disp;
 3070     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3071     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3072   %}
 3073 
 3074 %}
 3075 
 3076 
 3077 //----------FRAME--------------------------------------------------------------
 3078 // Definition of frame structure and management information.
 3079 //
 3080 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3081 //                             |   (to get allocators register number
 3082 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3083 //  r   CALLER     |        |
 3084 //  o     |        +--------+      pad to even-align allocators stack-slot
 3085 //  w     V        |  pad0  |        numbers; owned by CALLER
 3086 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3087 //  h     ^        |   in   |  5
 3088 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3089 //  |     |        |        |  3
 3090 //  |     |        +--------+
 3091 //  V     |        | old out|      Empty on Intel, window on Sparc
 3092 //        |    old |preserve|      Must be even aligned.
 3093 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3094 //        |        |   in   |  3   area for Intel ret address
 3095 //     Owned by    |preserve|      Empty on Sparc.
 3096 //       SELF      +--------+
 3097 //        |        |  pad2  |  2   pad to align old SP
 3098 //        |        +--------+  1
 3099 //        |        | locks  |  0
 3100 //        |        +--------+----> OptoReg::stack0(), even aligned
 3101 //        |        |  pad1  | 11   pad to align new SP
 3102 //        |        +--------+
 3103 //        |        |        | 10
 3104 //        |        | spills |  9   spills
 3105 //        V        |        |  8   (pad0 slot for callee)
 3106 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3107 //        ^        |  out   |  7
 3108 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3109 //     Owned by    +--------+
 3110 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3111 //        |    new |preserve|      Must be even-aligned.
 3112 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3113 //        |        |        |
 3114 //
 3115 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3116 //         known from SELF's arguments and the Java calling convention.
 3117 //         Region 6-7 is determined per call site.
 3118 // Note 2: If the calling convention leaves holes in the incoming argument
 3119 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3120 //         are owned by the CALLEE.  Holes should not be necessary in the
 3121 //         incoming area, as the Java calling convention is completely under
 3122 //         the control of the AD file.  Doubles can be sorted and packed to
 3123 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3124 //         varargs C calling conventions.
 3125 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3126 //         even aligned with pad0 as needed.
 3127 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3128 //         region 6-11 is even aligned; it may be padded out more so that
 3129 //         the region from SP to FP meets the minimum stack alignment.
 3130 
 3131 frame %{
 3132   // These three registers define part of the calling convention
 3133   // between compiled code and the interpreter.
 3134   inline_cache_reg(EAX);                // Inline Cache Register
 3135 
 3136   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3137   cisc_spilling_operand_name(indOffset32);
 3138 
 3139   // Number of stack slots consumed by locking an object
 3140   sync_stack_slots(1);
 3141 
 3142   // Compiled code's Frame Pointer
 3143   frame_pointer(ESP);
 3144   // Interpreter stores its frame pointer in a register which is
 3145   // stored to the stack by I2CAdaptors.
 3146   // I2CAdaptors convert from interpreted java to compiled java.
 3147   interpreter_frame_pointer(EBP);
 3148 
 3149   // Stack alignment requirement
 3150   // Alignment size in bytes (128-bit -> 16 bytes)
 3151   stack_alignment(StackAlignmentInBytes);
 3152 
 3153   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3154   // for calls to C.  Supports the var-args backing area for register parms.
 3155   varargs_C_out_slots_killed(0);
 3156 
 3157   // The after-PROLOG location of the return address.  Location of
 3158   // return address specifies a type (REG or STACK) and a number
 3159   // representing the register number (i.e. - use a register name) or
 3160   // stack slot.
 3161   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3162   // Otherwise, it is above the locks and verification slot and alignment word
 3163   return_addr(STACK - 1 +
 3164               align_up((Compile::current()->in_preserve_stack_slots() +
 3165                         Compile::current()->fixed_slots()),
 3166                        stack_alignment_in_slots()));
 3167 
 3168   // Location of C & interpreter return values
 3169   c_return_value %{
 3170     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3171     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3172     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3173 
 3174     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3175     // that C functions return float and double results in XMM0.
 3176     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3177       return OptoRegPair(XMM0b_num,XMM0_num);
 3178     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3179       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3180 
 3181     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3182   %}
 3183 
 3184   // Location of return values
 3185   return_value %{
 3186     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3187     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3188     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3189     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3190       return OptoRegPair(XMM0b_num,XMM0_num);
 3191     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3192       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3193     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3194   %}
 3195 
 3196 %}
 3197 
 3198 //----------ATTRIBUTES---------------------------------------------------------
 3199 //----------Operand Attributes-------------------------------------------------
 3200 op_attrib op_cost(0);        // Required cost attribute
 3201 
 3202 //----------Instruction Attributes---------------------------------------------
 3203 ins_attrib ins_cost(100);       // Required cost attribute
 3204 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3205 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3206                                 // non-matching short branch variant of some
 3207                                                             // long branch?
 3208 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3209                                 // specifies the alignment that some part of the instruction (not
 3210                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3211                                 // function must be provided for the instruction
 3212 
 3213 //----------OPERANDS-----------------------------------------------------------
 3214 // Operand definitions must precede instruction definitions for correct parsing
 3215 // in the ADLC because operands constitute user defined types which are used in
 3216 // instruction definitions.
 3217 
 3218 //----------Simple Operands----------------------------------------------------
 3219 // Immediate Operands
 3220 // Integer Immediate
 3221 operand immI() %{
 3222   match(ConI);
 3223 
 3224   op_cost(10);
 3225   format %{ %}
 3226   interface(CONST_INTER);
 3227 %}
 3228 
 3229 // Constant for test vs zero
 3230 operand immI_0() %{
 3231   predicate(n->get_int() == 0);
 3232   match(ConI);
 3233 
 3234   op_cost(0);
 3235   format %{ %}
 3236   interface(CONST_INTER);
 3237 %}
 3238 
 3239 // Constant for increment
 3240 operand immI_1() %{
 3241   predicate(n->get_int() == 1);
 3242   match(ConI);
 3243 
 3244   op_cost(0);
 3245   format %{ %}
 3246   interface(CONST_INTER);
 3247 %}
 3248 
 3249 // Constant for decrement
 3250 operand immI_M1() %{
 3251   predicate(n->get_int() == -1);
 3252   match(ConI);
 3253 
 3254   op_cost(0);
 3255   format %{ %}
 3256   interface(CONST_INTER);
 3257 %}
 3258 
 3259 // Valid scale values for addressing modes
 3260 operand immI2() %{
 3261   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3262   match(ConI);
 3263 
 3264   format %{ %}
 3265   interface(CONST_INTER);
 3266 %}
 3267 
 3268 operand immI8() %{
 3269   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3270   match(ConI);
 3271 
 3272   op_cost(5);
 3273   format %{ %}
 3274   interface(CONST_INTER);
 3275 %}
 3276 
 3277 operand immU8() %{
 3278   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3279   match(ConI);
 3280 
 3281   op_cost(5);
 3282   format %{ %}
 3283   interface(CONST_INTER);
 3284 %}
 3285 
 3286 operand immI16() %{
 3287   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3288   match(ConI);
 3289 
 3290   op_cost(10);
 3291   format %{ %}
 3292   interface(CONST_INTER);
 3293 %}
 3294 
 3295 // Int Immediate non-negative
 3296 operand immU31()
 3297 %{
 3298   predicate(n->get_int() >= 0);
 3299   match(ConI);
 3300 
 3301   op_cost(0);
 3302   format %{ %}
 3303   interface(CONST_INTER);
 3304 %}
 3305 
 3306 // Constant for long shifts
 3307 operand immI_32() %{
 3308   predicate( n->get_int() == 32 );
 3309   match(ConI);
 3310 
 3311   op_cost(0);
 3312   format %{ %}
 3313   interface(CONST_INTER);
 3314 %}
 3315 
 3316 operand immI_1_31() %{
 3317   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3318   match(ConI);
 3319 
 3320   op_cost(0);
 3321   format %{ %}
 3322   interface(CONST_INTER);
 3323 %}
 3324 
 3325 operand immI_32_63() %{
 3326   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3327   match(ConI);
 3328   op_cost(0);
 3329 
 3330   format %{ %}
 3331   interface(CONST_INTER);
 3332 %}
 3333 
 3334 operand immI_2() %{
 3335   predicate( n->get_int() == 2 );
 3336   match(ConI);
 3337 
 3338   op_cost(0);
 3339   format %{ %}
 3340   interface(CONST_INTER);
 3341 %}
 3342 
 3343 operand immI_3() %{
 3344   predicate( n->get_int() == 3 );
 3345   match(ConI);
 3346 
 3347   op_cost(0);
 3348   format %{ %}
 3349   interface(CONST_INTER);
 3350 %}
 3351 
 3352 operand immI_4()
 3353 %{
 3354   predicate(n->get_int() == 4);
 3355   match(ConI);
 3356 
 3357   op_cost(0);
 3358   format %{ %}
 3359   interface(CONST_INTER);
 3360 %}
 3361 
 3362 operand immI_8()
 3363 %{
 3364   predicate(n->get_int() == 8);
 3365   match(ConI);
 3366 
 3367   op_cost(0);
 3368   format %{ %}
 3369   interface(CONST_INTER);
 3370 %}
 3371 
 3372 // Pointer Immediate
 3373 operand immP() %{
 3374   match(ConP);
 3375 
 3376   op_cost(10);
 3377   format %{ %}
 3378   interface(CONST_INTER);
 3379 %}
 3380 
 3381 // NULL Pointer Immediate
 3382 operand immP0() %{
 3383   predicate( n->get_ptr() == 0 );
 3384   match(ConP);
 3385   op_cost(0);
 3386 
 3387   format %{ %}
 3388   interface(CONST_INTER);
 3389 %}
 3390 
 3391 // Long Immediate
 3392 operand immL() %{
 3393   match(ConL);
 3394 
 3395   op_cost(20);
 3396   format %{ %}
 3397   interface(CONST_INTER);
 3398 %}
 3399 
 3400 // Long Immediate zero
 3401 operand immL0() %{
 3402   predicate( n->get_long() == 0L );
 3403   match(ConL);
 3404   op_cost(0);
 3405 
 3406   format %{ %}
 3407   interface(CONST_INTER);
 3408 %}
 3409 
 3410 // Long Immediate zero
 3411 operand immL_M1() %{
 3412   predicate( n->get_long() == -1L );
 3413   match(ConL);
 3414   op_cost(0);
 3415 
 3416   format %{ %}
 3417   interface(CONST_INTER);
 3418 %}
 3419 
 3420 // Long immediate from 0 to 127.
 3421 // Used for a shorter form of long mul by 10.
 3422 operand immL_127() %{
 3423   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3424   match(ConL);
 3425   op_cost(0);
 3426 
 3427   format %{ %}
 3428   interface(CONST_INTER);
 3429 %}
 3430 
 3431 // Long Immediate: low 32-bit mask
 3432 operand immL_32bits() %{
 3433   predicate(n->get_long() == 0xFFFFFFFFL);
 3434   match(ConL);
 3435   op_cost(0);
 3436 
 3437   format %{ %}
 3438   interface(CONST_INTER);
 3439 %}
 3440 
 3441 // Long Immediate: low 32-bit mask
 3442 operand immL32() %{
 3443   predicate(n->get_long() == (int)(n->get_long()));
 3444   match(ConL);
 3445   op_cost(20);
 3446 
 3447   format %{ %}
 3448   interface(CONST_INTER);
 3449 %}
 3450 
 3451 //Double Immediate zero
 3452 operand immDPR0() %{
 3453   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3454   // bug that generates code such that NaNs compare equal to 0.0
 3455   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3456   match(ConD);
 3457 
 3458   op_cost(5);
 3459   format %{ %}
 3460   interface(CONST_INTER);
 3461 %}
 3462 
 3463 // Double Immediate one
 3464 operand immDPR1() %{
 3465   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3466   match(ConD);
 3467 
 3468   op_cost(5);
 3469   format %{ %}
 3470   interface(CONST_INTER);
 3471 %}
 3472 
 3473 // Double Immediate
 3474 operand immDPR() %{
 3475   predicate(UseSSE<=1);
 3476   match(ConD);
 3477 
 3478   op_cost(5);
 3479   format %{ %}
 3480   interface(CONST_INTER);
 3481 %}
 3482 
 3483 operand immD() %{
 3484   predicate(UseSSE>=2);
 3485   match(ConD);
 3486 
 3487   op_cost(5);
 3488   format %{ %}
 3489   interface(CONST_INTER);
 3490 %}
 3491 
 3492 // Double Immediate zero
 3493 operand immD0() %{
 3494   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3495   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3496   // compare equal to -0.0.
 3497   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3498   match(ConD);
 3499 
 3500   format %{ %}
 3501   interface(CONST_INTER);
 3502 %}
 3503 
 3504 // Float Immediate zero
 3505 operand immFPR0() %{
 3506   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3507   match(ConF);
 3508 
 3509   op_cost(5);
 3510   format %{ %}
 3511   interface(CONST_INTER);
 3512 %}
 3513 
 3514 // Float Immediate one
 3515 operand immFPR1() %{
 3516   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3517   match(ConF);
 3518 
 3519   op_cost(5);
 3520   format %{ %}
 3521   interface(CONST_INTER);
 3522 %}
 3523 
 3524 // Float Immediate
 3525 operand immFPR() %{
 3526   predicate( UseSSE == 0 );
 3527   match(ConF);
 3528 
 3529   op_cost(5);
 3530   format %{ %}
 3531   interface(CONST_INTER);
 3532 %}
 3533 
 3534 // Float Immediate
 3535 operand immF() %{
 3536   predicate(UseSSE >= 1);
 3537   match(ConF);
 3538 
 3539   op_cost(5);
 3540   format %{ %}
 3541   interface(CONST_INTER);
 3542 %}
 3543 
 3544 // Float Immediate zero.  Zero and not -0.0
 3545 operand immF0() %{
 3546   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3547   match(ConF);
 3548 
 3549   op_cost(5);
 3550   format %{ %}
 3551   interface(CONST_INTER);
 3552 %}
 3553 
 3554 // Immediates for special shifts (sign extend)
 3555 
 3556 // Constants for increment
 3557 operand immI_16() %{
 3558   predicate( n->get_int() == 16 );
 3559   match(ConI);
 3560 
 3561   format %{ %}
 3562   interface(CONST_INTER);
 3563 %}
 3564 
 3565 operand immI_24() %{
 3566   predicate( n->get_int() == 24 );
 3567   match(ConI);
 3568 
 3569   format %{ %}
 3570   interface(CONST_INTER);
 3571 %}
 3572 
 3573 // Constant for byte-wide masking
 3574 operand immI_255() %{
 3575   predicate( n->get_int() == 255 );
 3576   match(ConI);
 3577 
 3578   format %{ %}
 3579   interface(CONST_INTER);
 3580 %}
 3581 
 3582 // Constant for short-wide masking
 3583 operand immI_65535() %{
 3584   predicate(n->get_int() == 65535);
 3585   match(ConI);
 3586 
 3587   format %{ %}
 3588   interface(CONST_INTER);
 3589 %}
 3590 
 3591 operand kReg()
 3592 %{
 3593   constraint(ALLOC_IN_RC(vectmask_reg));
 3594   match(RegVectMask);
 3595   format %{%}
 3596   interface(REG_INTER);
 3597 %}
 3598 
 3599 operand kReg_K1()
 3600 %{
 3601   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3602   match(RegVectMask);
 3603   format %{%}
 3604   interface(REG_INTER);
 3605 %}
 3606 
 3607 operand kReg_K2()
 3608 %{
 3609   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3610   match(RegVectMask);
 3611   format %{%}
 3612   interface(REG_INTER);
 3613 %}
 3614 
 3615 // Special Registers
 3616 operand kReg_K3()
 3617 %{
 3618   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3619   match(RegVectMask);
 3620   format %{%}
 3621   interface(REG_INTER);
 3622 %}
 3623 
 3624 operand kReg_K4()
 3625 %{
 3626   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3627   match(RegVectMask);
 3628   format %{%}
 3629   interface(REG_INTER);
 3630 %}
 3631 
 3632 operand kReg_K5()
 3633 %{
 3634   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3635   match(RegVectMask);
 3636   format %{%}
 3637   interface(REG_INTER);
 3638 %}
 3639 
 3640 operand kReg_K6()
 3641 %{
 3642   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3643   match(RegVectMask);
 3644   format %{%}
 3645   interface(REG_INTER);
 3646 %}
 3647 
 3648 // Special Registers
 3649 operand kReg_K7()
 3650 %{
 3651   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3652   match(RegVectMask);
 3653   format %{%}
 3654   interface(REG_INTER);
 3655 %}
 3656 
 3657 // Register Operands
 3658 // Integer Register
 3659 operand rRegI() %{
 3660   constraint(ALLOC_IN_RC(int_reg));
 3661   match(RegI);
 3662   match(xRegI);
 3663   match(eAXRegI);
 3664   match(eBXRegI);
 3665   match(eCXRegI);
 3666   match(eDXRegI);
 3667   match(eDIRegI);
 3668   match(eSIRegI);
 3669 
 3670   format %{ %}
 3671   interface(REG_INTER);
 3672 %}
 3673 
 3674 // Subset of Integer Register
 3675 operand xRegI(rRegI reg) %{
 3676   constraint(ALLOC_IN_RC(int_x_reg));
 3677   match(reg);
 3678   match(eAXRegI);
 3679   match(eBXRegI);
 3680   match(eCXRegI);
 3681   match(eDXRegI);
 3682 
 3683   format %{ %}
 3684   interface(REG_INTER);
 3685 %}
 3686 
 3687 // Special Registers
 3688 operand eAXRegI(xRegI reg) %{
 3689   constraint(ALLOC_IN_RC(eax_reg));
 3690   match(reg);
 3691   match(rRegI);
 3692 
 3693   format %{ "EAX" %}
 3694   interface(REG_INTER);
 3695 %}
 3696 
 3697 // Special Registers
 3698 operand eBXRegI(xRegI reg) %{
 3699   constraint(ALLOC_IN_RC(ebx_reg));
 3700   match(reg);
 3701   match(rRegI);
 3702 
 3703   format %{ "EBX" %}
 3704   interface(REG_INTER);
 3705 %}
 3706 
 3707 operand eCXRegI(xRegI reg) %{
 3708   constraint(ALLOC_IN_RC(ecx_reg));
 3709   match(reg);
 3710   match(rRegI);
 3711 
 3712   format %{ "ECX" %}
 3713   interface(REG_INTER);
 3714 %}
 3715 
 3716 operand eDXRegI(xRegI reg) %{
 3717   constraint(ALLOC_IN_RC(edx_reg));
 3718   match(reg);
 3719   match(rRegI);
 3720 
 3721   format %{ "EDX" %}
 3722   interface(REG_INTER);
 3723 %}
 3724 
 3725 operand eDIRegI(xRegI reg) %{
 3726   constraint(ALLOC_IN_RC(edi_reg));
 3727   match(reg);
 3728   match(rRegI);
 3729 
 3730   format %{ "EDI" %}
 3731   interface(REG_INTER);
 3732 %}
 3733 
 3734 operand naxRegI() %{
 3735   constraint(ALLOC_IN_RC(nax_reg));
 3736   match(RegI);
 3737   match(eCXRegI);
 3738   match(eDXRegI);
 3739   match(eSIRegI);
 3740   match(eDIRegI);
 3741 
 3742   format %{ %}
 3743   interface(REG_INTER);
 3744 %}
 3745 
 3746 operand nadxRegI() %{
 3747   constraint(ALLOC_IN_RC(nadx_reg));
 3748   match(RegI);
 3749   match(eBXRegI);
 3750   match(eCXRegI);
 3751   match(eSIRegI);
 3752   match(eDIRegI);
 3753 
 3754   format %{ %}
 3755   interface(REG_INTER);
 3756 %}
 3757 
 3758 operand ncxRegI() %{
 3759   constraint(ALLOC_IN_RC(ncx_reg));
 3760   match(RegI);
 3761   match(eAXRegI);
 3762   match(eDXRegI);
 3763   match(eSIRegI);
 3764   match(eDIRegI);
 3765 
 3766   format %{ %}
 3767   interface(REG_INTER);
 3768 %}
 3769 
 3770 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3771 // //
 3772 operand eSIRegI(xRegI reg) %{
 3773    constraint(ALLOC_IN_RC(esi_reg));
 3774    match(reg);
 3775    match(rRegI);
 3776 
 3777    format %{ "ESI" %}
 3778    interface(REG_INTER);
 3779 %}
 3780 
 3781 // Pointer Register
 3782 operand anyRegP() %{
 3783   constraint(ALLOC_IN_RC(any_reg));
 3784   match(RegP);
 3785   match(eAXRegP);
 3786   match(eBXRegP);
 3787   match(eCXRegP);
 3788   match(eDIRegP);
 3789   match(eRegP);
 3790 
 3791   format %{ %}
 3792   interface(REG_INTER);
 3793 %}
 3794 
 3795 operand eRegP() %{
 3796   constraint(ALLOC_IN_RC(int_reg));
 3797   match(RegP);
 3798   match(eAXRegP);
 3799   match(eBXRegP);
 3800   match(eCXRegP);
 3801   match(eDIRegP);
 3802 
 3803   format %{ %}
 3804   interface(REG_INTER);
 3805 %}
 3806 
 3807 operand rRegP() %{
 3808   constraint(ALLOC_IN_RC(int_reg));
 3809   match(RegP);
 3810   match(eAXRegP);
 3811   match(eBXRegP);
 3812   match(eCXRegP);
 3813   match(eDIRegP);
 3814 
 3815   format %{ %}
 3816   interface(REG_INTER);
 3817 %}
 3818 
 3819 // On windows95, EBP is not safe to use for implicit null tests.
 3820 operand eRegP_no_EBP() %{
 3821   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3822   match(RegP);
 3823   match(eAXRegP);
 3824   match(eBXRegP);
 3825   match(eCXRegP);
 3826   match(eDIRegP);
 3827 
 3828   op_cost(100);
 3829   format %{ %}
 3830   interface(REG_INTER);
 3831 %}
 3832 
 3833 operand naxRegP() %{
 3834   constraint(ALLOC_IN_RC(nax_reg));
 3835   match(RegP);
 3836   match(eBXRegP);
 3837   match(eDXRegP);
 3838   match(eCXRegP);
 3839   match(eSIRegP);
 3840   match(eDIRegP);
 3841 
 3842   format %{ %}
 3843   interface(REG_INTER);
 3844 %}
 3845 
 3846 operand nabxRegP() %{
 3847   constraint(ALLOC_IN_RC(nabx_reg));
 3848   match(RegP);
 3849   match(eCXRegP);
 3850   match(eDXRegP);
 3851   match(eSIRegP);
 3852   match(eDIRegP);
 3853 
 3854   format %{ %}
 3855   interface(REG_INTER);
 3856 %}
 3857 
 3858 operand pRegP() %{
 3859   constraint(ALLOC_IN_RC(p_reg));
 3860   match(RegP);
 3861   match(eBXRegP);
 3862   match(eDXRegP);
 3863   match(eSIRegP);
 3864   match(eDIRegP);
 3865 
 3866   format %{ %}
 3867   interface(REG_INTER);
 3868 %}
 3869 
 3870 // Special Registers
 3871 // Return a pointer value
 3872 operand eAXRegP(eRegP reg) %{
 3873   constraint(ALLOC_IN_RC(eax_reg));
 3874   match(reg);
 3875   format %{ "EAX" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 
 3879 // Used in AtomicAdd
 3880 operand eBXRegP(eRegP reg) %{
 3881   constraint(ALLOC_IN_RC(ebx_reg));
 3882   match(reg);
 3883   format %{ "EBX" %}
 3884   interface(REG_INTER);
 3885 %}
 3886 
 3887 // Tail-call (interprocedural jump) to interpreter
 3888 operand eCXRegP(eRegP reg) %{
 3889   constraint(ALLOC_IN_RC(ecx_reg));
 3890   match(reg);
 3891   format %{ "ECX" %}
 3892   interface(REG_INTER);
 3893 %}
 3894 
 3895 operand eDXRegP(eRegP reg) %{
 3896   constraint(ALLOC_IN_RC(edx_reg));
 3897   match(reg);
 3898   format %{ "EDX" %}
 3899   interface(REG_INTER);
 3900 %}
 3901 
 3902 operand eSIRegP(eRegP reg) %{
 3903   constraint(ALLOC_IN_RC(esi_reg));
 3904   match(reg);
 3905   format %{ "ESI" %}
 3906   interface(REG_INTER);
 3907 %}
 3908 
 3909 // Used in rep stosw
 3910 operand eDIRegP(eRegP reg) %{
 3911   constraint(ALLOC_IN_RC(edi_reg));
 3912   match(reg);
 3913   format %{ "EDI" %}
 3914   interface(REG_INTER);
 3915 %}
 3916 
 3917 operand eRegL() %{
 3918   constraint(ALLOC_IN_RC(long_reg));
 3919   match(RegL);
 3920   match(eADXRegL);
 3921 
 3922   format %{ %}
 3923   interface(REG_INTER);
 3924 %}
 3925 
 3926 operand eADXRegL( eRegL reg ) %{
 3927   constraint(ALLOC_IN_RC(eadx_reg));
 3928   match(reg);
 3929 
 3930   format %{ "EDX:EAX" %}
 3931   interface(REG_INTER);
 3932 %}
 3933 
 3934 operand eBCXRegL( eRegL reg ) %{
 3935   constraint(ALLOC_IN_RC(ebcx_reg));
 3936   match(reg);
 3937 
 3938   format %{ "EBX:ECX" %}
 3939   interface(REG_INTER);
 3940 %}
 3941 
 3942 operand eBDPRegL( eRegL reg ) %{
 3943   constraint(ALLOC_IN_RC(ebpd_reg));
 3944   match(reg);
 3945 
 3946   format %{ "EBP:EDI" %}
 3947   interface(REG_INTER);
 3948 %}
 3949 // Special case for integer high multiply
 3950 operand eADXRegL_low_only() %{
 3951   constraint(ALLOC_IN_RC(eadx_reg));
 3952   match(RegL);
 3953 
 3954   format %{ "EAX" %}
 3955   interface(REG_INTER);
 3956 %}
 3957 
 3958 // Flags register, used as output of compare instructions
 3959 operand rFlagsReg() %{
 3960   constraint(ALLOC_IN_RC(int_flags));
 3961   match(RegFlags);
 3962 
 3963   format %{ "EFLAGS" %}
 3964   interface(REG_INTER);
 3965 %}
 3966 
 3967 // Flags register, used as output of compare instructions
 3968 operand eFlagsReg() %{
 3969   constraint(ALLOC_IN_RC(int_flags));
 3970   match(RegFlags);
 3971 
 3972   format %{ "EFLAGS" %}
 3973   interface(REG_INTER);
 3974 %}
 3975 
 3976 // Flags register, used as output of FLOATING POINT compare instructions
 3977 operand eFlagsRegU() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980 
 3981   format %{ "EFLAGS_U" %}
 3982   interface(REG_INTER);
 3983 %}
 3984 
 3985 operand eFlagsRegUCF() %{
 3986   constraint(ALLOC_IN_RC(int_flags));
 3987   match(RegFlags);
 3988   predicate(false);
 3989 
 3990   format %{ "EFLAGS_U_CF" %}
 3991   interface(REG_INTER);
 3992 %}
 3993 
 3994 // Condition Code Register used by long compare
 3995 operand flagsReg_long_LTGE() %{
 3996   constraint(ALLOC_IN_RC(int_flags));
 3997   match(RegFlags);
 3998   format %{ "FLAGS_LTGE" %}
 3999   interface(REG_INTER);
 4000 %}
 4001 operand flagsReg_long_EQNE() %{
 4002   constraint(ALLOC_IN_RC(int_flags));
 4003   match(RegFlags);
 4004   format %{ "FLAGS_EQNE" %}
 4005   interface(REG_INTER);
 4006 %}
 4007 operand flagsReg_long_LEGT() %{
 4008   constraint(ALLOC_IN_RC(int_flags));
 4009   match(RegFlags);
 4010   format %{ "FLAGS_LEGT" %}
 4011   interface(REG_INTER);
 4012 %}
 4013 
 4014 // Condition Code Register used by unsigned long compare
 4015 operand flagsReg_ulong_LTGE() %{
 4016   constraint(ALLOC_IN_RC(int_flags));
 4017   match(RegFlags);
 4018   format %{ "FLAGS_U_LTGE" %}
 4019   interface(REG_INTER);
 4020 %}
 4021 operand flagsReg_ulong_EQNE() %{
 4022   constraint(ALLOC_IN_RC(int_flags));
 4023   match(RegFlags);
 4024   format %{ "FLAGS_U_EQNE" %}
 4025   interface(REG_INTER);
 4026 %}
 4027 operand flagsReg_ulong_LEGT() %{
 4028   constraint(ALLOC_IN_RC(int_flags));
 4029   match(RegFlags);
 4030   format %{ "FLAGS_U_LEGT" %}
 4031   interface(REG_INTER);
 4032 %}
 4033 
 4034 // Float register operands
 4035 operand regDPR() %{
 4036   predicate( UseSSE < 2 );
 4037   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4038   match(RegD);
 4039   match(regDPR1);
 4040   match(regDPR2);
 4041   format %{ %}
 4042   interface(REG_INTER);
 4043 %}
 4044 
 4045 operand regDPR1(regDPR reg) %{
 4046   predicate( UseSSE < 2 );
 4047   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4048   match(reg);
 4049   format %{ "FPR1" %}
 4050   interface(REG_INTER);
 4051 %}
 4052 
 4053 operand regDPR2(regDPR reg) %{
 4054   predicate( UseSSE < 2 );
 4055   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4056   match(reg);
 4057   format %{ "FPR2" %}
 4058   interface(REG_INTER);
 4059 %}
 4060 
 4061 operand regnotDPR1(regDPR reg) %{
 4062   predicate( UseSSE < 2 );
 4063   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4064   match(reg);
 4065   format %{ %}
 4066   interface(REG_INTER);
 4067 %}
 4068 
 4069 // Float register operands
 4070 operand regFPR() %{
 4071   predicate( UseSSE < 2 );
 4072   constraint(ALLOC_IN_RC(fp_flt_reg));
 4073   match(RegF);
 4074   match(regFPR1);
 4075   format %{ %}
 4076   interface(REG_INTER);
 4077 %}
 4078 
 4079 // Float register operands
 4080 operand regFPR1(regFPR reg) %{
 4081   predicate( UseSSE < 2 );
 4082   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4083   match(reg);
 4084   format %{ "FPR1" %}
 4085   interface(REG_INTER);
 4086 %}
 4087 
 4088 // XMM Float register operands
 4089 operand regF() %{
 4090   predicate( UseSSE>=1 );
 4091   constraint(ALLOC_IN_RC(float_reg_legacy));
 4092   match(RegF);
 4093   format %{ %}
 4094   interface(REG_INTER);
 4095 %}
 4096 
 4097 operand legRegF() %{
 4098   predicate( UseSSE>=1 );
 4099   constraint(ALLOC_IN_RC(float_reg_legacy));
 4100   match(RegF);
 4101   format %{ %}
 4102   interface(REG_INTER);
 4103 %}
 4104 
 4105 // Float register operands
 4106 operand vlRegF() %{
 4107    constraint(ALLOC_IN_RC(float_reg_vl));
 4108    match(RegF);
 4109 
 4110    format %{ %}
 4111    interface(REG_INTER);
 4112 %}
 4113 
 4114 // XMM Double register operands
 4115 operand regD() %{
 4116   predicate( UseSSE>=2 );
 4117   constraint(ALLOC_IN_RC(double_reg_legacy));
 4118   match(RegD);
 4119   format %{ %}
 4120   interface(REG_INTER);
 4121 %}
 4122 
 4123 // Double register operands
 4124 operand legRegD() %{
 4125   predicate( UseSSE>=2 );
 4126   constraint(ALLOC_IN_RC(double_reg_legacy));
 4127   match(RegD);
 4128   format %{ %}
 4129   interface(REG_INTER);
 4130 %}
 4131 
 4132 operand vlRegD() %{
 4133    constraint(ALLOC_IN_RC(double_reg_vl));
 4134    match(RegD);
 4135 
 4136    format %{ %}
 4137    interface(REG_INTER);
 4138 %}
 4139 
 4140 //----------Memory Operands----------------------------------------------------
 4141 // Direct Memory Operand
 4142 operand direct(immP addr) %{
 4143   match(addr);
 4144 
 4145   format %{ "[$addr]" %}
 4146   interface(MEMORY_INTER) %{
 4147     base(0xFFFFFFFF);
 4148     index(0x4);
 4149     scale(0x0);
 4150     disp($addr);
 4151   %}
 4152 %}
 4153 
 4154 // Indirect Memory Operand
 4155 operand indirect(eRegP reg) %{
 4156   constraint(ALLOC_IN_RC(int_reg));
 4157   match(reg);
 4158 
 4159   format %{ "[$reg]" %}
 4160   interface(MEMORY_INTER) %{
 4161     base($reg);
 4162     index(0x4);
 4163     scale(0x0);
 4164     disp(0x0);
 4165   %}
 4166 %}
 4167 
 4168 // Indirect Memory Plus Short Offset Operand
 4169 operand indOffset8(eRegP reg, immI8 off) %{
 4170   match(AddP reg off);
 4171 
 4172   format %{ "[$reg + $off]" %}
 4173   interface(MEMORY_INTER) %{
 4174     base($reg);
 4175     index(0x4);
 4176     scale(0x0);
 4177     disp($off);
 4178   %}
 4179 %}
 4180 
 4181 // Indirect Memory Plus Long Offset Operand
 4182 operand indOffset32(eRegP reg, immI off) %{
 4183   match(AddP reg off);
 4184 
 4185   format %{ "[$reg + $off]" %}
 4186   interface(MEMORY_INTER) %{
 4187     base($reg);
 4188     index(0x4);
 4189     scale(0x0);
 4190     disp($off);
 4191   %}
 4192 %}
 4193 
 4194 // Indirect Memory Plus Long Offset Operand
 4195 operand indOffset32X(rRegI reg, immP off) %{
 4196   match(AddP off reg);
 4197 
 4198   format %{ "[$reg + $off]" %}
 4199   interface(MEMORY_INTER) %{
 4200     base($reg);
 4201     index(0x4);
 4202     scale(0x0);
 4203     disp($off);
 4204   %}
 4205 %}
 4206 
 4207 // Indirect Memory Plus Index Register Plus Offset Operand
 4208 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4209   match(AddP (AddP reg ireg) off);
 4210 
 4211   op_cost(10);
 4212   format %{"[$reg + $off + $ireg]" %}
 4213   interface(MEMORY_INTER) %{
 4214     base($reg);
 4215     index($ireg);
 4216     scale(0x0);
 4217     disp($off);
 4218   %}
 4219 %}
 4220 
 4221 // Indirect Memory Plus Index Register Plus Offset Operand
 4222 operand indIndex(eRegP reg, rRegI ireg) %{
 4223   match(AddP reg ireg);
 4224 
 4225   op_cost(10);
 4226   format %{"[$reg + $ireg]" %}
 4227   interface(MEMORY_INTER) %{
 4228     base($reg);
 4229     index($ireg);
 4230     scale(0x0);
 4231     disp(0x0);
 4232   %}
 4233 %}
 4234 
 4235 // // -------------------------------------------------------------------------
 4236 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4237 // // -------------------------------------------------------------------------
 4238 // // Scaled Memory Operands
 4239 // // Indirect Memory Times Scale Plus Offset Operand
 4240 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4241 //   match(AddP off (LShiftI ireg scale));
 4242 //
 4243 //   op_cost(10);
 4244 //   format %{"[$off + $ireg << $scale]" %}
 4245 //   interface(MEMORY_INTER) %{
 4246 //     base(0x4);
 4247 //     index($ireg);
 4248 //     scale($scale);
 4249 //     disp($off);
 4250 //   %}
 4251 // %}
 4252 
 4253 // Indirect Memory Times Scale Plus Index Register
 4254 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4255   match(AddP reg (LShiftI ireg scale));
 4256 
 4257   op_cost(10);
 4258   format %{"[$reg + $ireg << $scale]" %}
 4259   interface(MEMORY_INTER) %{
 4260     base($reg);
 4261     index($ireg);
 4262     scale($scale);
 4263     disp(0x0);
 4264   %}
 4265 %}
 4266 
 4267 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4268 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4269   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4270 
 4271   op_cost(10);
 4272   format %{"[$reg + $off + $ireg << $scale]" %}
 4273   interface(MEMORY_INTER) %{
 4274     base($reg);
 4275     index($ireg);
 4276     scale($scale);
 4277     disp($off);
 4278   %}
 4279 %}
 4280 
 4281 //----------Load Long Memory Operands------------------------------------------
 4282 // The load-long idiom will use it's address expression again after loading
 4283 // the first word of the long.  If the load-long destination overlaps with
 4284 // registers used in the addressing expression, the 2nd half will be loaded
 4285 // from a clobbered address.  Fix this by requiring that load-long use
 4286 // address registers that do not overlap with the load-long target.
 4287 
 4288 // load-long support
 4289 operand load_long_RegP() %{
 4290   constraint(ALLOC_IN_RC(esi_reg));
 4291   match(RegP);
 4292   match(eSIRegP);
 4293   op_cost(100);
 4294   format %{  %}
 4295   interface(REG_INTER);
 4296 %}
 4297 
 4298 // Indirect Memory Operand Long
 4299 operand load_long_indirect(load_long_RegP reg) %{
 4300   constraint(ALLOC_IN_RC(esi_reg));
 4301   match(reg);
 4302 
 4303   format %{ "[$reg]" %}
 4304   interface(MEMORY_INTER) %{
 4305     base($reg);
 4306     index(0x4);
 4307     scale(0x0);
 4308     disp(0x0);
 4309   %}
 4310 %}
 4311 
 4312 // Indirect Memory Plus Long Offset Operand
 4313 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4314   match(AddP reg off);
 4315 
 4316   format %{ "[$reg + $off]" %}
 4317   interface(MEMORY_INTER) %{
 4318     base($reg);
 4319     index(0x4);
 4320     scale(0x0);
 4321     disp($off);
 4322   %}
 4323 %}
 4324 
 4325 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4326 
 4327 
 4328 //----------Special Memory Operands--------------------------------------------
 4329 // Stack Slot Operand - This operand is used for loading and storing temporary
 4330 //                      values on the stack where a match requires a value to
 4331 //                      flow through memory.
 4332 operand stackSlotP(sRegP reg) %{
 4333   constraint(ALLOC_IN_RC(stack_slots));
 4334   // No match rule because this operand is only generated in matching
 4335   format %{ "[$reg]" %}
 4336   interface(MEMORY_INTER) %{
 4337     base(0x4);   // ESP
 4338     index(0x4);  // No Index
 4339     scale(0x0);  // No Scale
 4340     disp($reg);  // Stack Offset
 4341   %}
 4342 %}
 4343 
 4344 operand stackSlotI(sRegI reg) %{
 4345   constraint(ALLOC_IN_RC(stack_slots));
 4346   // No match rule because this operand is only generated in matching
 4347   format %{ "[$reg]" %}
 4348   interface(MEMORY_INTER) %{
 4349     base(0x4);   // ESP
 4350     index(0x4);  // No Index
 4351     scale(0x0);  // No Scale
 4352     disp($reg);  // Stack Offset
 4353   %}
 4354 %}
 4355 
 4356 operand stackSlotF(sRegF reg) %{
 4357   constraint(ALLOC_IN_RC(stack_slots));
 4358   // No match rule because this operand is only generated in matching
 4359   format %{ "[$reg]" %}
 4360   interface(MEMORY_INTER) %{
 4361     base(0x4);   // ESP
 4362     index(0x4);  // No Index
 4363     scale(0x0);  // No Scale
 4364     disp($reg);  // Stack Offset
 4365   %}
 4366 %}
 4367 
 4368 operand stackSlotD(sRegD reg) %{
 4369   constraint(ALLOC_IN_RC(stack_slots));
 4370   // No match rule because this operand is only generated in matching
 4371   format %{ "[$reg]" %}
 4372   interface(MEMORY_INTER) %{
 4373     base(0x4);   // ESP
 4374     index(0x4);  // No Index
 4375     scale(0x0);  // No Scale
 4376     disp($reg);  // Stack Offset
 4377   %}
 4378 %}
 4379 
 4380 operand stackSlotL(sRegL reg) %{
 4381   constraint(ALLOC_IN_RC(stack_slots));
 4382   // No match rule because this operand is only generated in matching
 4383   format %{ "[$reg]" %}
 4384   interface(MEMORY_INTER) %{
 4385     base(0x4);   // ESP
 4386     index(0x4);  // No Index
 4387     scale(0x0);  // No Scale
 4388     disp($reg);  // Stack Offset
 4389   %}
 4390 %}
 4391 
 4392 //----------Conditional Branch Operands----------------------------------------
 4393 // Comparison Op  - This is the operation of the comparison, and is limited to
 4394 //                  the following set of codes:
 4395 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4396 //
 4397 // Other attributes of the comparison, such as unsignedness, are specified
 4398 // by the comparison instruction that sets a condition code flags register.
 4399 // That result is represented by a flags operand whose subtype is appropriate
 4400 // to the unsignedness (etc.) of the comparison.
 4401 //
 4402 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4403 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4404 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4405 
 4406 // Comparison Code
 4407 operand cmpOp() %{
 4408   match(Bool);
 4409 
 4410   format %{ "" %}
 4411   interface(COND_INTER) %{
 4412     equal(0x4, "e");
 4413     not_equal(0x5, "ne");
 4414     less(0xC, "l");
 4415     greater_equal(0xD, "ge");
 4416     less_equal(0xE, "le");
 4417     greater(0xF, "g");
 4418     overflow(0x0, "o");
 4419     no_overflow(0x1, "no");
 4420   %}
 4421 %}
 4422 
 4423 // Comparison Code, unsigned compare.  Used by FP also, with
 4424 // C2 (unordered) turned into GT or LT already.  The other bits
 4425 // C0 and C3 are turned into Carry & Zero flags.
 4426 operand cmpOpU() %{
 4427   match(Bool);
 4428 
 4429   format %{ "" %}
 4430   interface(COND_INTER) %{
 4431     equal(0x4, "e");
 4432     not_equal(0x5, "ne");
 4433     less(0x2, "b");
 4434     greater_equal(0x3, "nb");
 4435     less_equal(0x6, "be");
 4436     greater(0x7, "nbe");
 4437     overflow(0x0, "o");
 4438     no_overflow(0x1, "no");
 4439   %}
 4440 %}
 4441 
 4442 // Floating comparisons that don't require any fixup for the unordered case
 4443 operand cmpOpUCF() %{
 4444   match(Bool);
 4445   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4446             n->as_Bool()->_test._test == BoolTest::ge ||
 4447             n->as_Bool()->_test._test == BoolTest::le ||
 4448             n->as_Bool()->_test._test == BoolTest::gt);
 4449   format %{ "" %}
 4450   interface(COND_INTER) %{
 4451     equal(0x4, "e");
 4452     not_equal(0x5, "ne");
 4453     less(0x2, "b");
 4454     greater_equal(0x3, "nb");
 4455     less_equal(0x6, "be");
 4456     greater(0x7, "nbe");
 4457     overflow(0x0, "o");
 4458     no_overflow(0x1, "no");
 4459   %}
 4460 %}
 4461 
 4462 
 4463 // Floating comparisons that can be fixed up with extra conditional jumps
 4464 operand cmpOpUCF2() %{
 4465   match(Bool);
 4466   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4467             n->as_Bool()->_test._test == BoolTest::eq);
 4468   format %{ "" %}
 4469   interface(COND_INTER) %{
 4470     equal(0x4, "e");
 4471     not_equal(0x5, "ne");
 4472     less(0x2, "b");
 4473     greater_equal(0x3, "nb");
 4474     less_equal(0x6, "be");
 4475     greater(0x7, "nbe");
 4476     overflow(0x0, "o");
 4477     no_overflow(0x1, "no");
 4478   %}
 4479 %}
 4480 
 4481 // Comparison Code for FP conditional move
 4482 operand cmpOp_fcmov() %{
 4483   match(Bool);
 4484 
 4485   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4486             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4487   format %{ "" %}
 4488   interface(COND_INTER) %{
 4489     equal        (0x0C8);
 4490     not_equal    (0x1C8);
 4491     less         (0x0C0);
 4492     greater_equal(0x1C0);
 4493     less_equal   (0x0D0);
 4494     greater      (0x1D0);
 4495     overflow(0x0, "o"); // not really supported by the instruction
 4496     no_overflow(0x1, "no"); // not really supported by the instruction
 4497   %}
 4498 %}
 4499 
 4500 // Comparison Code used in long compares
 4501 operand cmpOp_commute() %{
 4502   match(Bool);
 4503 
 4504   format %{ "" %}
 4505   interface(COND_INTER) %{
 4506     equal(0x4, "e");
 4507     not_equal(0x5, "ne");
 4508     less(0xF, "g");
 4509     greater_equal(0xE, "le");
 4510     less_equal(0xD, "ge");
 4511     greater(0xC, "l");
 4512     overflow(0x0, "o");
 4513     no_overflow(0x1, "no");
 4514   %}
 4515 %}
 4516 
 4517 // Comparison Code used in unsigned long compares
 4518 operand cmpOpU_commute() %{
 4519   match(Bool);
 4520 
 4521   format %{ "" %}
 4522   interface(COND_INTER) %{
 4523     equal(0x4, "e");
 4524     not_equal(0x5, "ne");
 4525     less(0x7, "nbe");
 4526     greater_equal(0x6, "be");
 4527     less_equal(0x3, "nb");
 4528     greater(0x2, "b");
 4529     overflow(0x0, "o");
 4530     no_overflow(0x1, "no");
 4531   %}
 4532 %}
 4533 
 4534 //----------OPERAND CLASSES----------------------------------------------------
 4535 // Operand Classes are groups of operands that are used as to simplify
 4536 // instruction definitions by not requiring the AD writer to specify separate
 4537 // instructions for every form of operand when the instruction accepts
 4538 // multiple operand types with the same basic encoding and format.  The classic
 4539 // case of this is memory operands.
 4540 
 4541 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4542                indIndex, indIndexScale, indIndexScaleOffset);
 4543 
 4544 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4545 // This means some kind of offset is always required and you cannot use
 4546 // an oop as the offset (done when working on static globals).
 4547 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4548                     indIndex, indIndexScale, indIndexScaleOffset);
 4549 
 4550 
 4551 //----------PIPELINE-----------------------------------------------------------
 4552 // Rules which define the behavior of the target architectures pipeline.
 4553 pipeline %{
 4554 
 4555 //----------ATTRIBUTES---------------------------------------------------------
 4556 attributes %{
 4557   variable_size_instructions;        // Fixed size instructions
 4558   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4559   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4560   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4561   instruction_fetch_units = 1;       // of 16 bytes
 4562 
 4563   // List of nop instructions
 4564   nops( MachNop );
 4565 %}
 4566 
 4567 //----------RESOURCES----------------------------------------------------------
 4568 // Resources are the functional units available to the machine
 4569 
 4570 // Generic P2/P3 pipeline
 4571 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4572 // 3 instructions decoded per cycle.
 4573 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4574 // 2 ALU op, only ALU0 handles mul/div instructions.
 4575 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4576            MS0, MS1, MEM = MS0 | MS1,
 4577            BR, FPU,
 4578            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4579 
 4580 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4581 // Pipeline Description specifies the stages in the machine's pipeline
 4582 
 4583 // Generic P2/P3 pipeline
 4584 pipe_desc(S0, S1, S2, S3, S4, S5);
 4585 
 4586 //----------PIPELINE CLASSES---------------------------------------------------
 4587 // Pipeline Classes describe the stages in which input and output are
 4588 // referenced by the hardware pipeline.
 4589 
 4590 // Naming convention: ialu or fpu
 4591 // Then: _reg
 4592 // Then: _reg if there is a 2nd register
 4593 // Then: _long if it's a pair of instructions implementing a long
 4594 // Then: _fat if it requires the big decoder
 4595 //   Or: _mem if it requires the big decoder and a memory unit.
 4596 
 4597 // Integer ALU reg operation
 4598 pipe_class ialu_reg(rRegI dst) %{
 4599     single_instruction;
 4600     dst    : S4(write);
 4601     dst    : S3(read);
 4602     DECODE : S0;        // any decoder
 4603     ALU    : S3;        // any alu
 4604 %}
 4605 
 4606 // Long ALU reg operation
 4607 pipe_class ialu_reg_long(eRegL dst) %{
 4608     instruction_count(2);
 4609     dst    : S4(write);
 4610     dst    : S3(read);
 4611     DECODE : S0(2);     // any 2 decoders
 4612     ALU    : S3(2);     // both alus
 4613 %}
 4614 
 4615 // Integer ALU reg operation using big decoder
 4616 pipe_class ialu_reg_fat(rRegI dst) %{
 4617     single_instruction;
 4618     dst    : S4(write);
 4619     dst    : S3(read);
 4620     D0     : S0;        // big decoder only
 4621     ALU    : S3;        // any alu
 4622 %}
 4623 
 4624 // Long ALU reg operation using big decoder
 4625 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4626     instruction_count(2);
 4627     dst    : S4(write);
 4628     dst    : S3(read);
 4629     D0     : S0(2);     // big decoder only; twice
 4630     ALU    : S3(2);     // any 2 alus
 4631 %}
 4632 
 4633 // Integer ALU reg-reg operation
 4634 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4635     single_instruction;
 4636     dst    : S4(write);
 4637     src    : S3(read);
 4638     DECODE : S0;        // any decoder
 4639     ALU    : S3;        // any alu
 4640 %}
 4641 
 4642 // Long ALU reg-reg operation
 4643 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4644     instruction_count(2);
 4645     dst    : S4(write);
 4646     src    : S3(read);
 4647     DECODE : S0(2);     // any 2 decoders
 4648     ALU    : S3(2);     // both alus
 4649 %}
 4650 
 4651 // Integer ALU reg-reg operation
 4652 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4653     single_instruction;
 4654     dst    : S4(write);
 4655     src    : S3(read);
 4656     D0     : S0;        // big decoder only
 4657     ALU    : S3;        // any alu
 4658 %}
 4659 
 4660 // Long ALU reg-reg operation
 4661 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4662     instruction_count(2);
 4663     dst    : S4(write);
 4664     src    : S3(read);
 4665     D0     : S0(2);     // big decoder only; twice
 4666     ALU    : S3(2);     // both alus
 4667 %}
 4668 
 4669 // Integer ALU reg-mem operation
 4670 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4671     single_instruction;
 4672     dst    : S5(write);
 4673     mem    : S3(read);
 4674     D0     : S0;        // big decoder only
 4675     ALU    : S4;        // any alu
 4676     MEM    : S3;        // any mem
 4677 %}
 4678 
 4679 // Long ALU reg-mem operation
 4680 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4681     instruction_count(2);
 4682     dst    : S5(write);
 4683     mem    : S3(read);
 4684     D0     : S0(2);     // big decoder only; twice
 4685     ALU    : S4(2);     // any 2 alus
 4686     MEM    : S3(2);     // both mems
 4687 %}
 4688 
 4689 // Integer mem operation (prefetch)
 4690 pipe_class ialu_mem(memory mem)
 4691 %{
 4692     single_instruction;
 4693     mem    : S3(read);
 4694     D0     : S0;        // big decoder only
 4695     MEM    : S3;        // any mem
 4696 %}
 4697 
 4698 // Integer Store to Memory
 4699 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4700     single_instruction;
 4701     mem    : S3(read);
 4702     src    : S5(read);
 4703     D0     : S0;        // big decoder only
 4704     ALU    : S4;        // any alu
 4705     MEM    : S3;
 4706 %}
 4707 
 4708 // Long Store to Memory
 4709 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4710     instruction_count(2);
 4711     mem    : S3(read);
 4712     src    : S5(read);
 4713     D0     : S0(2);     // big decoder only; twice
 4714     ALU    : S4(2);     // any 2 alus
 4715     MEM    : S3(2);     // Both mems
 4716 %}
 4717 
 4718 // Integer Store to Memory
 4719 pipe_class ialu_mem_imm(memory mem) %{
 4720     single_instruction;
 4721     mem    : S3(read);
 4722     D0     : S0;        // big decoder only
 4723     ALU    : S4;        // any alu
 4724     MEM    : S3;
 4725 %}
 4726 
 4727 // Integer ALU0 reg-reg operation
 4728 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4729     single_instruction;
 4730     dst    : S4(write);
 4731     src    : S3(read);
 4732     D0     : S0;        // Big decoder only
 4733     ALU0   : S3;        // only alu0
 4734 %}
 4735 
 4736 // Integer ALU0 reg-mem operation
 4737 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4738     single_instruction;
 4739     dst    : S5(write);
 4740     mem    : S3(read);
 4741     D0     : S0;        // big decoder only
 4742     ALU0   : S4;        // ALU0 only
 4743     MEM    : S3;        // any mem
 4744 %}
 4745 
 4746 // Integer ALU reg-reg operation
 4747 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4748     single_instruction;
 4749     cr     : S4(write);
 4750     src1   : S3(read);
 4751     src2   : S3(read);
 4752     DECODE : S0;        // any decoder
 4753     ALU    : S3;        // any alu
 4754 %}
 4755 
 4756 // Integer ALU reg-imm operation
 4757 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4758     single_instruction;
 4759     cr     : S4(write);
 4760     src1   : S3(read);
 4761     DECODE : S0;        // any decoder
 4762     ALU    : S3;        // any alu
 4763 %}
 4764 
 4765 // Integer ALU reg-mem operation
 4766 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4767     single_instruction;
 4768     cr     : S4(write);
 4769     src1   : S3(read);
 4770     src2   : S3(read);
 4771     D0     : S0;        // big decoder only
 4772     ALU    : S4;        // any alu
 4773     MEM    : S3;
 4774 %}
 4775 
 4776 // Conditional move reg-reg
 4777 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4778     instruction_count(4);
 4779     y      : S4(read);
 4780     q      : S3(read);
 4781     p      : S3(read);
 4782     DECODE : S0(4);     // any decoder
 4783 %}
 4784 
 4785 // Conditional move reg-reg
 4786 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4787     single_instruction;
 4788     dst    : S4(write);
 4789     src    : S3(read);
 4790     cr     : S3(read);
 4791     DECODE : S0;        // any decoder
 4792 %}
 4793 
 4794 // Conditional move reg-mem
 4795 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4796     single_instruction;
 4797     dst    : S4(write);
 4798     src    : S3(read);
 4799     cr     : S3(read);
 4800     DECODE : S0;        // any decoder
 4801     MEM    : S3;
 4802 %}
 4803 
 4804 // Conditional move reg-reg long
 4805 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4806     single_instruction;
 4807     dst    : S4(write);
 4808     src    : S3(read);
 4809     cr     : S3(read);
 4810     DECODE : S0(2);     // any 2 decoders
 4811 %}
 4812 
 4813 // Conditional move double reg-reg
 4814 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4815     single_instruction;
 4816     dst    : S4(write);
 4817     src    : S3(read);
 4818     cr     : S3(read);
 4819     DECODE : S0;        // any decoder
 4820 %}
 4821 
 4822 // Float reg-reg operation
 4823 pipe_class fpu_reg(regDPR dst) %{
 4824     instruction_count(2);
 4825     dst    : S3(read);
 4826     DECODE : S0(2);     // any 2 decoders
 4827     FPU    : S3;
 4828 %}
 4829 
 4830 // Float reg-reg operation
 4831 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4832     instruction_count(2);
 4833     dst    : S4(write);
 4834     src    : S3(read);
 4835     DECODE : S0(2);     // any 2 decoders
 4836     FPU    : S3;
 4837 %}
 4838 
 4839 // Float reg-reg operation
 4840 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4841     instruction_count(3);
 4842     dst    : S4(write);
 4843     src1   : S3(read);
 4844     src2   : S3(read);
 4845     DECODE : S0(3);     // any 3 decoders
 4846     FPU    : S3(2);
 4847 %}
 4848 
 4849 // Float reg-reg operation
 4850 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4851     instruction_count(4);
 4852     dst    : S4(write);
 4853     src1   : S3(read);
 4854     src2   : S3(read);
 4855     src3   : S3(read);
 4856     DECODE : S0(4);     // any 3 decoders
 4857     FPU    : S3(2);
 4858 %}
 4859 
 4860 // Float reg-reg operation
 4861 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4862     instruction_count(4);
 4863     dst    : S4(write);
 4864     src1   : S3(read);
 4865     src2   : S3(read);
 4866     src3   : S3(read);
 4867     DECODE : S1(3);     // any 3 decoders
 4868     D0     : S0;        // Big decoder only
 4869     FPU    : S3(2);
 4870     MEM    : S3;
 4871 %}
 4872 
 4873 // Float reg-mem operation
 4874 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4875     instruction_count(2);
 4876     dst    : S5(write);
 4877     mem    : S3(read);
 4878     D0     : S0;        // big decoder only
 4879     DECODE : S1;        // any decoder for FPU POP
 4880     FPU    : S4;
 4881     MEM    : S3;        // any mem
 4882 %}
 4883 
 4884 // Float reg-mem operation
 4885 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4886     instruction_count(3);
 4887     dst    : S5(write);
 4888     src1   : S3(read);
 4889     mem    : S3(read);
 4890     D0     : S0;        // big decoder only
 4891     DECODE : S1(2);     // any decoder for FPU POP
 4892     FPU    : S4;
 4893     MEM    : S3;        // any mem
 4894 %}
 4895 
 4896 // Float mem-reg operation
 4897 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4898     instruction_count(2);
 4899     src    : S5(read);
 4900     mem    : S3(read);
 4901     DECODE : S0;        // any decoder for FPU PUSH
 4902     D0     : S1;        // big decoder only
 4903     FPU    : S4;
 4904     MEM    : S3;        // any mem
 4905 %}
 4906 
 4907 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4908     instruction_count(3);
 4909     src1   : S3(read);
 4910     src2   : S3(read);
 4911     mem    : S3(read);
 4912     DECODE : S0(2);     // any decoder for FPU PUSH
 4913     D0     : S1;        // big decoder only
 4914     FPU    : S4;
 4915     MEM    : S3;        // any mem
 4916 %}
 4917 
 4918 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4919     instruction_count(3);
 4920     src1   : S3(read);
 4921     src2   : S3(read);
 4922     mem    : S4(read);
 4923     DECODE : S0;        // any decoder for FPU PUSH
 4924     D0     : S0(2);     // big decoder only
 4925     FPU    : S4;
 4926     MEM    : S3(2);     // any mem
 4927 %}
 4928 
 4929 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4930     instruction_count(2);
 4931     src1   : S3(read);
 4932     dst    : S4(read);
 4933     D0     : S0(2);     // big decoder only
 4934     MEM    : S3(2);     // any mem
 4935 %}
 4936 
 4937 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4938     instruction_count(3);
 4939     src1   : S3(read);
 4940     src2   : S3(read);
 4941     dst    : S4(read);
 4942     D0     : S0(3);     // big decoder only
 4943     FPU    : S4;
 4944     MEM    : S3(3);     // any mem
 4945 %}
 4946 
 4947 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4948     instruction_count(3);
 4949     src1   : S4(read);
 4950     mem    : S4(read);
 4951     DECODE : S0;        // any decoder for FPU PUSH
 4952     D0     : S0(2);     // big decoder only
 4953     FPU    : S4;
 4954     MEM    : S3(2);     // any mem
 4955 %}
 4956 
 4957 // Float load constant
 4958 pipe_class fpu_reg_con(regDPR dst) %{
 4959     instruction_count(2);
 4960     dst    : S5(write);
 4961     D0     : S0;        // big decoder only for the load
 4962     DECODE : S1;        // any decoder for FPU POP
 4963     FPU    : S4;
 4964     MEM    : S3;        // any mem
 4965 %}
 4966 
 4967 // Float load constant
 4968 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4969     instruction_count(3);
 4970     dst    : S5(write);
 4971     src    : S3(read);
 4972     D0     : S0;        // big decoder only for the load
 4973     DECODE : S1(2);     // any decoder for FPU POP
 4974     FPU    : S4;
 4975     MEM    : S3;        // any mem
 4976 %}
 4977 
 4978 // UnConditional branch
 4979 pipe_class pipe_jmp( label labl ) %{
 4980     single_instruction;
 4981     BR   : S3;
 4982 %}
 4983 
 4984 // Conditional branch
 4985 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4986     single_instruction;
 4987     cr    : S1(read);
 4988     BR    : S3;
 4989 %}
 4990 
 4991 // Allocation idiom
 4992 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4993     instruction_count(1); force_serialization;
 4994     fixed_latency(6);
 4995     heap_ptr : S3(read);
 4996     DECODE   : S0(3);
 4997     D0       : S2;
 4998     MEM      : S3;
 4999     ALU      : S3(2);
 5000     dst      : S5(write);
 5001     BR       : S5;
 5002 %}
 5003 
 5004 // Generic big/slow expanded idiom
 5005 pipe_class pipe_slow(  ) %{
 5006     instruction_count(10); multiple_bundles; force_serialization;
 5007     fixed_latency(100);
 5008     D0  : S0(2);
 5009     MEM : S3(2);
 5010 %}
 5011 
 5012 // The real do-nothing guy
 5013 pipe_class empty( ) %{
 5014     instruction_count(0);
 5015 %}
 5016 
 5017 // Define the class for the Nop node
 5018 define %{
 5019    MachNop = empty;
 5020 %}
 5021 
 5022 %}
 5023 
 5024 //----------INSTRUCTIONS-------------------------------------------------------
 5025 //
 5026 // match      -- States which machine-independent subtree may be replaced
 5027 //               by this instruction.
 5028 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5029 //               selection to identify a minimum cost tree of machine
 5030 //               instructions that matches a tree of machine-independent
 5031 //               instructions.
 5032 // format     -- A string providing the disassembly for this instruction.
 5033 //               The value of an instruction's operand may be inserted
 5034 //               by referring to it with a '$' prefix.
 5035 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5036 //               to within an encode class as $primary, $secondary, and $tertiary
 5037 //               respectively.  The primary opcode is commonly used to
 5038 //               indicate the type of machine instruction, while secondary
 5039 //               and tertiary are often used for prefix options or addressing
 5040 //               modes.
 5041 // ins_encode -- A list of encode classes with parameters. The encode class
 5042 //               name must have been defined in an 'enc_class' specification
 5043 //               in the encode section of the architecture description.
 5044 
 5045 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5046 // Load Float
 5047 instruct MoveF2LEG(legRegF dst, regF src) %{
 5048   match(Set dst src);
 5049   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5050   ins_encode %{
 5051     ShouldNotReachHere();
 5052   %}
 5053   ins_pipe( fpu_reg_reg );
 5054 %}
 5055 
 5056 // Load Float
 5057 instruct MoveLEG2F(regF dst, legRegF src) %{
 5058   match(Set dst src);
 5059   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5060   ins_encode %{
 5061     ShouldNotReachHere();
 5062   %}
 5063   ins_pipe( fpu_reg_reg );
 5064 %}
 5065 
 5066 // Load Float
 5067 instruct MoveF2VL(vlRegF dst, regF src) %{
 5068   match(Set dst src);
 5069   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5070   ins_encode %{
 5071     ShouldNotReachHere();
 5072   %}
 5073   ins_pipe( fpu_reg_reg );
 5074 %}
 5075 
 5076 // Load Float
 5077 instruct MoveVL2F(regF dst, vlRegF src) %{
 5078   match(Set dst src);
 5079   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5080   ins_encode %{
 5081     ShouldNotReachHere();
 5082   %}
 5083   ins_pipe( fpu_reg_reg );
 5084 %}
 5085 
 5086 
 5087 
 5088 // Load Double
 5089 instruct MoveD2LEG(legRegD dst, regD src) %{
 5090   match(Set dst src);
 5091   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5092   ins_encode %{
 5093     ShouldNotReachHere();
 5094   %}
 5095   ins_pipe( fpu_reg_reg );
 5096 %}
 5097 
 5098 // Load Double
 5099 instruct MoveLEG2D(regD dst, legRegD src) %{
 5100   match(Set dst src);
 5101   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5102   ins_encode %{
 5103     ShouldNotReachHere();
 5104   %}
 5105   ins_pipe( fpu_reg_reg );
 5106 %}
 5107 
 5108 // Load Double
 5109 instruct MoveD2VL(vlRegD dst, regD src) %{
 5110   match(Set dst src);
 5111   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5112   ins_encode %{
 5113     ShouldNotReachHere();
 5114   %}
 5115   ins_pipe( fpu_reg_reg );
 5116 %}
 5117 
 5118 // Load Double
 5119 instruct MoveVL2D(regD dst, vlRegD src) %{
 5120   match(Set dst src);
 5121   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5122   ins_encode %{
 5123     ShouldNotReachHere();
 5124   %}
 5125   ins_pipe( fpu_reg_reg );
 5126 %}
 5127 
 5128 //----------BSWAP-Instruction--------------------------------------------------
 5129 instruct bytes_reverse_int(rRegI dst) %{
 5130   match(Set dst (ReverseBytesI dst));
 5131 
 5132   format %{ "BSWAP  $dst" %}
 5133   opcode(0x0F, 0xC8);
 5134   ins_encode( OpcP, OpcSReg(dst) );
 5135   ins_pipe( ialu_reg );
 5136 %}
 5137 
 5138 instruct bytes_reverse_long(eRegL dst) %{
 5139   match(Set dst (ReverseBytesL dst));
 5140 
 5141   format %{ "BSWAP  $dst.lo\n\t"
 5142             "BSWAP  $dst.hi\n\t"
 5143             "XCHG   $dst.lo $dst.hi" %}
 5144 
 5145   ins_cost(125);
 5146   ins_encode( bswap_long_bytes(dst) );
 5147   ins_pipe( ialu_reg_reg);
 5148 %}
 5149 
 5150 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5151   match(Set dst (ReverseBytesUS dst));
 5152   effect(KILL cr);
 5153 
 5154   format %{ "BSWAP  $dst\n\t"
 5155             "SHR    $dst,16\n\t" %}
 5156   ins_encode %{
 5157     __ bswapl($dst$$Register);
 5158     __ shrl($dst$$Register, 16);
 5159   %}
 5160   ins_pipe( ialu_reg );
 5161 %}
 5162 
 5163 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5164   match(Set dst (ReverseBytesS dst));
 5165   effect(KILL cr);
 5166 
 5167   format %{ "BSWAP  $dst\n\t"
 5168             "SAR    $dst,16\n\t" %}
 5169   ins_encode %{
 5170     __ bswapl($dst$$Register);
 5171     __ sarl($dst$$Register, 16);
 5172   %}
 5173   ins_pipe( ialu_reg );
 5174 %}
 5175 
 5176 
 5177 //---------- Zeros Count Instructions ------------------------------------------
 5178 
 5179 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5180   predicate(UseCountLeadingZerosInstruction);
 5181   match(Set dst (CountLeadingZerosI src));
 5182   effect(KILL cr);
 5183 
 5184   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5185   ins_encode %{
 5186     __ lzcntl($dst$$Register, $src$$Register);
 5187   %}
 5188   ins_pipe(ialu_reg);
 5189 %}
 5190 
 5191 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5192   predicate(!UseCountLeadingZerosInstruction);
 5193   match(Set dst (CountLeadingZerosI src));
 5194   effect(KILL cr);
 5195 
 5196   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5197             "JNZ    skip\n\t"
 5198             "MOV    $dst, -1\n"
 5199       "skip:\n\t"
 5200             "NEG    $dst\n\t"
 5201             "ADD    $dst, 31" %}
 5202   ins_encode %{
 5203     Register Rdst = $dst$$Register;
 5204     Register Rsrc = $src$$Register;
 5205     Label skip;
 5206     __ bsrl(Rdst, Rsrc);
 5207     __ jccb(Assembler::notZero, skip);
 5208     __ movl(Rdst, -1);
 5209     __ bind(skip);
 5210     __ negl(Rdst);
 5211     __ addl(Rdst, BitsPerInt - 1);
 5212   %}
 5213   ins_pipe(ialu_reg);
 5214 %}
 5215 
 5216 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5217   predicate(UseCountLeadingZerosInstruction);
 5218   match(Set dst (CountLeadingZerosL src));
 5219   effect(TEMP dst, KILL cr);
 5220 
 5221   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5222             "JNC    done\n\t"
 5223             "LZCNT  $dst, $src.lo\n\t"
 5224             "ADD    $dst, 32\n"
 5225       "done:" %}
 5226   ins_encode %{
 5227     Register Rdst = $dst$$Register;
 5228     Register Rsrc = $src$$Register;
 5229     Label done;
 5230     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5231     __ jccb(Assembler::carryClear, done);
 5232     __ lzcntl(Rdst, Rsrc);
 5233     __ addl(Rdst, BitsPerInt);
 5234     __ bind(done);
 5235   %}
 5236   ins_pipe(ialu_reg);
 5237 %}
 5238 
 5239 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5240   predicate(!UseCountLeadingZerosInstruction);
 5241   match(Set dst (CountLeadingZerosL src));
 5242   effect(TEMP dst, KILL cr);
 5243 
 5244   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5245             "JZ     msw_is_zero\n\t"
 5246             "ADD    $dst, 32\n\t"
 5247             "JMP    not_zero\n"
 5248       "msw_is_zero:\n\t"
 5249             "BSR    $dst, $src.lo\n\t"
 5250             "JNZ    not_zero\n\t"
 5251             "MOV    $dst, -1\n"
 5252       "not_zero:\n\t"
 5253             "NEG    $dst\n\t"
 5254             "ADD    $dst, 63\n" %}
 5255  ins_encode %{
 5256     Register Rdst = $dst$$Register;
 5257     Register Rsrc = $src$$Register;
 5258     Label msw_is_zero;
 5259     Label not_zero;
 5260     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5261     __ jccb(Assembler::zero, msw_is_zero);
 5262     __ addl(Rdst, BitsPerInt);
 5263     __ jmpb(not_zero);
 5264     __ bind(msw_is_zero);
 5265     __ bsrl(Rdst, Rsrc);
 5266     __ jccb(Assembler::notZero, not_zero);
 5267     __ movl(Rdst, -1);
 5268     __ bind(not_zero);
 5269     __ negl(Rdst);
 5270     __ addl(Rdst, BitsPerLong - 1);
 5271   %}
 5272   ins_pipe(ialu_reg);
 5273 %}
 5274 
 5275 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5276   predicate(UseCountTrailingZerosInstruction);
 5277   match(Set dst (CountTrailingZerosI src));
 5278   effect(KILL cr);
 5279 
 5280   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5281   ins_encode %{
 5282     __ tzcntl($dst$$Register, $src$$Register);
 5283   %}
 5284   ins_pipe(ialu_reg);
 5285 %}
 5286 
 5287 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5288   predicate(!UseCountTrailingZerosInstruction);
 5289   match(Set dst (CountTrailingZerosI src));
 5290   effect(KILL cr);
 5291 
 5292   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5293             "JNZ    done\n\t"
 5294             "MOV    $dst, 32\n"
 5295       "done:" %}
 5296   ins_encode %{
 5297     Register Rdst = $dst$$Register;
 5298     Label done;
 5299     __ bsfl(Rdst, $src$$Register);
 5300     __ jccb(Assembler::notZero, done);
 5301     __ movl(Rdst, BitsPerInt);
 5302     __ bind(done);
 5303   %}
 5304   ins_pipe(ialu_reg);
 5305 %}
 5306 
 5307 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5308   predicate(UseCountTrailingZerosInstruction);
 5309   match(Set dst (CountTrailingZerosL src));
 5310   effect(TEMP dst, KILL cr);
 5311 
 5312   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5313             "JNC    done\n\t"
 5314             "TZCNT  $dst, $src.hi\n\t"
 5315             "ADD    $dst, 32\n"
 5316             "done:" %}
 5317   ins_encode %{
 5318     Register Rdst = $dst$$Register;
 5319     Register Rsrc = $src$$Register;
 5320     Label done;
 5321     __ tzcntl(Rdst, Rsrc);
 5322     __ jccb(Assembler::carryClear, done);
 5323     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5324     __ addl(Rdst, BitsPerInt);
 5325     __ bind(done);
 5326   %}
 5327   ins_pipe(ialu_reg);
 5328 %}
 5329 
 5330 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5331   predicate(!UseCountTrailingZerosInstruction);
 5332   match(Set dst (CountTrailingZerosL src));
 5333   effect(TEMP dst, KILL cr);
 5334 
 5335   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5336             "JNZ    done\n\t"
 5337             "BSF    $dst, $src.hi\n\t"
 5338             "JNZ    msw_not_zero\n\t"
 5339             "MOV    $dst, 32\n"
 5340       "msw_not_zero:\n\t"
 5341             "ADD    $dst, 32\n"
 5342       "done:" %}
 5343   ins_encode %{
 5344     Register Rdst = $dst$$Register;
 5345     Register Rsrc = $src$$Register;
 5346     Label msw_not_zero;
 5347     Label done;
 5348     __ bsfl(Rdst, Rsrc);
 5349     __ jccb(Assembler::notZero, done);
 5350     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5351     __ jccb(Assembler::notZero, msw_not_zero);
 5352     __ movl(Rdst, BitsPerInt);
 5353     __ bind(msw_not_zero);
 5354     __ addl(Rdst, BitsPerInt);
 5355     __ bind(done);
 5356   %}
 5357   ins_pipe(ialu_reg);
 5358 %}
 5359 
 5360 
 5361 //---------- Population Count Instructions -------------------------------------
 5362 
 5363 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5364   predicate(UsePopCountInstruction);
 5365   match(Set dst (PopCountI src));
 5366   effect(KILL cr);
 5367 
 5368   format %{ "POPCNT $dst, $src" %}
 5369   ins_encode %{
 5370     __ popcntl($dst$$Register, $src$$Register);
 5371   %}
 5372   ins_pipe(ialu_reg);
 5373 %}
 5374 
 5375 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5376   predicate(UsePopCountInstruction);
 5377   match(Set dst (PopCountI (LoadI mem)));
 5378   effect(KILL cr);
 5379 
 5380   format %{ "POPCNT $dst, $mem" %}
 5381   ins_encode %{
 5382     __ popcntl($dst$$Register, $mem$$Address);
 5383   %}
 5384   ins_pipe(ialu_reg);
 5385 %}
 5386 
 5387 // Note: Long.bitCount(long) returns an int.
 5388 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5389   predicate(UsePopCountInstruction);
 5390   match(Set dst (PopCountL src));
 5391   effect(KILL cr, TEMP tmp, TEMP dst);
 5392 
 5393   format %{ "POPCNT $dst, $src.lo\n\t"
 5394             "POPCNT $tmp, $src.hi\n\t"
 5395             "ADD    $dst, $tmp" %}
 5396   ins_encode %{
 5397     __ popcntl($dst$$Register, $src$$Register);
 5398     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5399     __ addl($dst$$Register, $tmp$$Register);
 5400   %}
 5401   ins_pipe(ialu_reg);
 5402 %}
 5403 
 5404 // Note: Long.bitCount(long) returns an int.
 5405 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5406   predicate(UsePopCountInstruction);
 5407   match(Set dst (PopCountL (LoadL mem)));
 5408   effect(KILL cr, TEMP tmp, TEMP dst);
 5409 
 5410   format %{ "POPCNT $dst, $mem\n\t"
 5411             "POPCNT $tmp, $mem+4\n\t"
 5412             "ADD    $dst, $tmp" %}
 5413   ins_encode %{
 5414     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5415     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5416     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5417     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5418     __ addl($dst$$Register, $tmp$$Register);
 5419   %}
 5420   ins_pipe(ialu_reg);
 5421 %}
 5422 
 5423 
 5424 //----------Load/Store/Move Instructions---------------------------------------
 5425 //----------Load Instructions--------------------------------------------------
 5426 // Load Byte (8bit signed)
 5427 instruct loadB(xRegI dst, memory mem) %{
 5428   match(Set dst (LoadB mem));
 5429 
 5430   ins_cost(125);
 5431   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5432 
 5433   ins_encode %{
 5434     __ movsbl($dst$$Register, $mem$$Address);
 5435   %}
 5436 
 5437   ins_pipe(ialu_reg_mem);
 5438 %}
 5439 
 5440 // Load Byte (8bit signed) into Long Register
 5441 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5442   match(Set dst (ConvI2L (LoadB mem)));
 5443   effect(KILL cr);
 5444 
 5445   ins_cost(375);
 5446   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5447             "MOV    $dst.hi,$dst.lo\n\t"
 5448             "SAR    $dst.hi,7" %}
 5449 
 5450   ins_encode %{
 5451     __ movsbl($dst$$Register, $mem$$Address);
 5452     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5453     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5454   %}
 5455 
 5456   ins_pipe(ialu_reg_mem);
 5457 %}
 5458 
 5459 // Load Unsigned Byte (8bit UNsigned)
 5460 instruct loadUB(xRegI dst, memory mem) %{
 5461   match(Set dst (LoadUB mem));
 5462 
 5463   ins_cost(125);
 5464   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5465 
 5466   ins_encode %{
 5467     __ movzbl($dst$$Register, $mem$$Address);
 5468   %}
 5469 
 5470   ins_pipe(ialu_reg_mem);
 5471 %}
 5472 
 5473 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5474 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5475   match(Set dst (ConvI2L (LoadUB mem)));
 5476   effect(KILL cr);
 5477 
 5478   ins_cost(250);
 5479   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5480             "XOR    $dst.hi,$dst.hi" %}
 5481 
 5482   ins_encode %{
 5483     Register Rdst = $dst$$Register;
 5484     __ movzbl(Rdst, $mem$$Address);
 5485     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5486   %}
 5487 
 5488   ins_pipe(ialu_reg_mem);
 5489 %}
 5490 
 5491 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5492 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5493   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5494   effect(KILL cr);
 5495 
 5496   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5497             "XOR    $dst.hi,$dst.hi\n\t"
 5498             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5499   ins_encode %{
 5500     Register Rdst = $dst$$Register;
 5501     __ movzbl(Rdst, $mem$$Address);
 5502     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5503     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5504   %}
 5505   ins_pipe(ialu_reg_mem);
 5506 %}
 5507 
 5508 // Load Short (16bit signed)
 5509 instruct loadS(rRegI dst, memory mem) %{
 5510   match(Set dst (LoadS mem));
 5511 
 5512   ins_cost(125);
 5513   format %{ "MOVSX  $dst,$mem\t# short" %}
 5514 
 5515   ins_encode %{
 5516     __ movswl($dst$$Register, $mem$$Address);
 5517   %}
 5518 
 5519   ins_pipe(ialu_reg_mem);
 5520 %}
 5521 
 5522 // Load Short (16 bit signed) to Byte (8 bit signed)
 5523 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5524   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5525 
 5526   ins_cost(125);
 5527   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5528   ins_encode %{
 5529     __ movsbl($dst$$Register, $mem$$Address);
 5530   %}
 5531   ins_pipe(ialu_reg_mem);
 5532 %}
 5533 
 5534 // Load Short (16bit signed) into Long Register
 5535 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5536   match(Set dst (ConvI2L (LoadS mem)));
 5537   effect(KILL cr);
 5538 
 5539   ins_cost(375);
 5540   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5541             "MOV    $dst.hi,$dst.lo\n\t"
 5542             "SAR    $dst.hi,15" %}
 5543 
 5544   ins_encode %{
 5545     __ movswl($dst$$Register, $mem$$Address);
 5546     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5547     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5548   %}
 5549 
 5550   ins_pipe(ialu_reg_mem);
 5551 %}
 5552 
 5553 // Load Unsigned Short/Char (16bit unsigned)
 5554 instruct loadUS(rRegI dst, memory mem) %{
 5555   match(Set dst (LoadUS mem));
 5556 
 5557   ins_cost(125);
 5558   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5559 
 5560   ins_encode %{
 5561     __ movzwl($dst$$Register, $mem$$Address);
 5562   %}
 5563 
 5564   ins_pipe(ialu_reg_mem);
 5565 %}
 5566 
 5567 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5568 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5569   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5570 
 5571   ins_cost(125);
 5572   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5573   ins_encode %{
 5574     __ movsbl($dst$$Register, $mem$$Address);
 5575   %}
 5576   ins_pipe(ialu_reg_mem);
 5577 %}
 5578 
 5579 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5580 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5581   match(Set dst (ConvI2L (LoadUS mem)));
 5582   effect(KILL cr);
 5583 
 5584   ins_cost(250);
 5585   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5586             "XOR    $dst.hi,$dst.hi" %}
 5587 
 5588   ins_encode %{
 5589     __ movzwl($dst$$Register, $mem$$Address);
 5590     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5591   %}
 5592 
 5593   ins_pipe(ialu_reg_mem);
 5594 %}
 5595 
 5596 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5597 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5598   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5599   effect(KILL cr);
 5600 
 5601   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5602             "XOR    $dst.hi,$dst.hi" %}
 5603   ins_encode %{
 5604     Register Rdst = $dst$$Register;
 5605     __ movzbl(Rdst, $mem$$Address);
 5606     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5607   %}
 5608   ins_pipe(ialu_reg_mem);
 5609 %}
 5610 
 5611 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5612 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5613   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5614   effect(KILL cr);
 5615 
 5616   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5617             "XOR    $dst.hi,$dst.hi\n\t"
 5618             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5619   ins_encode %{
 5620     Register Rdst = $dst$$Register;
 5621     __ movzwl(Rdst, $mem$$Address);
 5622     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5623     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5624   %}
 5625   ins_pipe(ialu_reg_mem);
 5626 %}
 5627 
 5628 // Load Integer
 5629 instruct loadI(rRegI dst, memory mem) %{
 5630   match(Set dst (LoadI mem));
 5631 
 5632   ins_cost(125);
 5633   format %{ "MOV    $dst,$mem\t# int" %}
 5634 
 5635   ins_encode %{
 5636     __ movl($dst$$Register, $mem$$Address);
 5637   %}
 5638 
 5639   ins_pipe(ialu_reg_mem);
 5640 %}
 5641 
 5642 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5643 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5644   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5645 
 5646   ins_cost(125);
 5647   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5648   ins_encode %{
 5649     __ movsbl($dst$$Register, $mem$$Address);
 5650   %}
 5651   ins_pipe(ialu_reg_mem);
 5652 %}
 5653 
 5654 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5655 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5656   match(Set dst (AndI (LoadI mem) mask));
 5657 
 5658   ins_cost(125);
 5659   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5660   ins_encode %{
 5661     __ movzbl($dst$$Register, $mem$$Address);
 5662   %}
 5663   ins_pipe(ialu_reg_mem);
 5664 %}
 5665 
 5666 // Load Integer (32 bit signed) to Short (16 bit signed)
 5667 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5668   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5669 
 5670   ins_cost(125);
 5671   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5672   ins_encode %{
 5673     __ movswl($dst$$Register, $mem$$Address);
 5674   %}
 5675   ins_pipe(ialu_reg_mem);
 5676 %}
 5677 
 5678 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5679 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5680   match(Set dst (AndI (LoadI mem) mask));
 5681 
 5682   ins_cost(125);
 5683   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5684   ins_encode %{
 5685     __ movzwl($dst$$Register, $mem$$Address);
 5686   %}
 5687   ins_pipe(ialu_reg_mem);
 5688 %}
 5689 
 5690 // Load Integer into Long Register
 5691 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5692   match(Set dst (ConvI2L (LoadI mem)));
 5693   effect(KILL cr);
 5694 
 5695   ins_cost(375);
 5696   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5697             "MOV    $dst.hi,$dst.lo\n\t"
 5698             "SAR    $dst.hi,31" %}
 5699 
 5700   ins_encode %{
 5701     __ movl($dst$$Register, $mem$$Address);
 5702     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5703     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5704   %}
 5705 
 5706   ins_pipe(ialu_reg_mem);
 5707 %}
 5708 
 5709 // Load Integer with mask 0xFF into Long Register
 5710 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5711   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5712   effect(KILL cr);
 5713 
 5714   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5715             "XOR    $dst.hi,$dst.hi" %}
 5716   ins_encode %{
 5717     Register Rdst = $dst$$Register;
 5718     __ movzbl(Rdst, $mem$$Address);
 5719     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5720   %}
 5721   ins_pipe(ialu_reg_mem);
 5722 %}
 5723 
 5724 // Load Integer with mask 0xFFFF into Long Register
 5725 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5726   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5727   effect(KILL cr);
 5728 
 5729   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5730             "XOR    $dst.hi,$dst.hi" %}
 5731   ins_encode %{
 5732     Register Rdst = $dst$$Register;
 5733     __ movzwl(Rdst, $mem$$Address);
 5734     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5735   %}
 5736   ins_pipe(ialu_reg_mem);
 5737 %}
 5738 
 5739 // Load Integer with 31-bit mask into Long Register
 5740 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5741   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5742   effect(KILL cr);
 5743 
 5744   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5745             "XOR    $dst.hi,$dst.hi\n\t"
 5746             "AND    $dst.lo,$mask" %}
 5747   ins_encode %{
 5748     Register Rdst = $dst$$Register;
 5749     __ movl(Rdst, $mem$$Address);
 5750     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5751     __ andl(Rdst, $mask$$constant);
 5752   %}
 5753   ins_pipe(ialu_reg_mem);
 5754 %}
 5755 
 5756 // Load Unsigned Integer into Long Register
 5757 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5758   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5759   effect(KILL cr);
 5760 
 5761   ins_cost(250);
 5762   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5763             "XOR    $dst.hi,$dst.hi" %}
 5764 
 5765   ins_encode %{
 5766     __ movl($dst$$Register, $mem$$Address);
 5767     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5768   %}
 5769 
 5770   ins_pipe(ialu_reg_mem);
 5771 %}
 5772 
 5773 // Load Long.  Cannot clobber address while loading, so restrict address
 5774 // register to ESI
 5775 instruct loadL(eRegL dst, load_long_memory mem) %{
 5776   predicate(!((LoadLNode*)n)->require_atomic_access());
 5777   match(Set dst (LoadL mem));
 5778 
 5779   ins_cost(250);
 5780   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5781             "MOV    $dst.hi,$mem+4" %}
 5782 
 5783   ins_encode %{
 5784     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5785     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5786     __ movl($dst$$Register, Amemlo);
 5787     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5788   %}
 5789 
 5790   ins_pipe(ialu_reg_long_mem);
 5791 %}
 5792 
 5793 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5794 // then store it down to the stack and reload on the int
 5795 // side.
 5796 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5797   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5798   match(Set dst (LoadL mem));
 5799 
 5800   ins_cost(200);
 5801   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5802             "FISTp  $dst" %}
 5803   ins_encode(enc_loadL_volatile(mem,dst));
 5804   ins_pipe( fpu_reg_mem );
 5805 %}
 5806 
 5807 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5808   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5809   match(Set dst (LoadL mem));
 5810   effect(TEMP tmp);
 5811   ins_cost(180);
 5812   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5813             "MOVSD  $dst,$tmp" %}
 5814   ins_encode %{
 5815     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5816     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5817   %}
 5818   ins_pipe( pipe_slow );
 5819 %}
 5820 
 5821 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5822   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5823   match(Set dst (LoadL mem));
 5824   effect(TEMP tmp);
 5825   ins_cost(160);
 5826   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5827             "MOVD   $dst.lo,$tmp\n\t"
 5828             "PSRLQ  $tmp,32\n\t"
 5829             "MOVD   $dst.hi,$tmp" %}
 5830   ins_encode %{
 5831     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5832     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5833     __ psrlq($tmp$$XMMRegister, 32);
 5834     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5835   %}
 5836   ins_pipe( pipe_slow );
 5837 %}
 5838 
 5839 // Load Range
 5840 instruct loadRange(rRegI dst, memory mem) %{
 5841   match(Set dst (LoadRange mem));
 5842 
 5843   ins_cost(125);
 5844   format %{ "MOV    $dst,$mem" %}
 5845   opcode(0x8B);
 5846   ins_encode( OpcP, RegMem(dst,mem));
 5847   ins_pipe( ialu_reg_mem );
 5848 %}
 5849 
 5850 
 5851 // Load Pointer
 5852 instruct loadP(eRegP dst, memory mem) %{
 5853   match(Set dst (LoadP mem));
 5854 
 5855   ins_cost(125);
 5856   format %{ "MOV    $dst,$mem" %}
 5857   opcode(0x8B);
 5858   ins_encode( OpcP, RegMem(dst,mem));
 5859   ins_pipe( ialu_reg_mem );
 5860 %}
 5861 
 5862 // Load Klass Pointer
 5863 instruct loadKlass(eRegP dst, memory mem) %{
 5864   match(Set dst (LoadKlass mem));
 5865 
 5866   ins_cost(125);
 5867   format %{ "MOV    $dst,$mem" %}
 5868   opcode(0x8B);
 5869   ins_encode( OpcP, RegMem(dst,mem));
 5870   ins_pipe( ialu_reg_mem );
 5871 %}
 5872 
 5873 // Load Double
 5874 instruct loadDPR(regDPR dst, memory mem) %{
 5875   predicate(UseSSE<=1);
 5876   match(Set dst (LoadD mem));
 5877 
 5878   ins_cost(150);
 5879   format %{ "FLD_D  ST,$mem\n\t"
 5880             "FSTP   $dst" %}
 5881   opcode(0xDD);               /* DD /0 */
 5882   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5883               Pop_Reg_DPR(dst) );
 5884   ins_pipe( fpu_reg_mem );
 5885 %}
 5886 
 5887 // Load Double to XMM
 5888 instruct loadD(regD dst, memory mem) %{
 5889   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5890   match(Set dst (LoadD mem));
 5891   ins_cost(145);
 5892   format %{ "MOVSD  $dst,$mem" %}
 5893   ins_encode %{
 5894     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5895   %}
 5896   ins_pipe( pipe_slow );
 5897 %}
 5898 
 5899 instruct loadD_partial(regD dst, memory mem) %{
 5900   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5901   match(Set dst (LoadD mem));
 5902   ins_cost(145);
 5903   format %{ "MOVLPD $dst,$mem" %}
 5904   ins_encode %{
 5905     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5906   %}
 5907   ins_pipe( pipe_slow );
 5908 %}
 5909 
 5910 // Load to XMM register (single-precision floating point)
 5911 // MOVSS instruction
 5912 instruct loadF(regF dst, memory mem) %{
 5913   predicate(UseSSE>=1);
 5914   match(Set dst (LoadF mem));
 5915   ins_cost(145);
 5916   format %{ "MOVSS  $dst,$mem" %}
 5917   ins_encode %{
 5918     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5919   %}
 5920   ins_pipe( pipe_slow );
 5921 %}
 5922 
 5923 // Load Float
 5924 instruct loadFPR(regFPR dst, memory mem) %{
 5925   predicate(UseSSE==0);
 5926   match(Set dst (LoadF mem));
 5927 
 5928   ins_cost(150);
 5929   format %{ "FLD_S  ST,$mem\n\t"
 5930             "FSTP   $dst" %}
 5931   opcode(0xD9);               /* D9 /0 */
 5932   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5933               Pop_Reg_FPR(dst) );
 5934   ins_pipe( fpu_reg_mem );
 5935 %}
 5936 
 5937 // Load Effective Address
 5938 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5939   match(Set dst mem);
 5940 
 5941   ins_cost(110);
 5942   format %{ "LEA    $dst,$mem" %}
 5943   opcode(0x8D);
 5944   ins_encode( OpcP, RegMem(dst,mem));
 5945   ins_pipe( ialu_reg_reg_fat );
 5946 %}
 5947 
 5948 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5949   match(Set dst mem);
 5950 
 5951   ins_cost(110);
 5952   format %{ "LEA    $dst,$mem" %}
 5953   opcode(0x8D);
 5954   ins_encode( OpcP, RegMem(dst,mem));
 5955   ins_pipe( ialu_reg_reg_fat );
 5956 %}
 5957 
 5958 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5959   match(Set dst mem);
 5960 
 5961   ins_cost(110);
 5962   format %{ "LEA    $dst,$mem" %}
 5963   opcode(0x8D);
 5964   ins_encode( OpcP, RegMem(dst,mem));
 5965   ins_pipe( ialu_reg_reg_fat );
 5966 %}
 5967 
 5968 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5969   match(Set dst mem);
 5970 
 5971   ins_cost(110);
 5972   format %{ "LEA    $dst,$mem" %}
 5973   opcode(0x8D);
 5974   ins_encode( OpcP, RegMem(dst,mem));
 5975   ins_pipe( ialu_reg_reg_fat );
 5976 %}
 5977 
 5978 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5979   match(Set dst mem);
 5980 
 5981   ins_cost(110);
 5982   format %{ "LEA    $dst,$mem" %}
 5983   opcode(0x8D);
 5984   ins_encode( OpcP, RegMem(dst,mem));
 5985   ins_pipe( ialu_reg_reg_fat );
 5986 %}
 5987 
 5988 // Load Constant
 5989 instruct loadConI(rRegI dst, immI src) %{
 5990   match(Set dst src);
 5991 
 5992   format %{ "MOV    $dst,$src" %}
 5993   ins_encode( LdImmI(dst, src) );
 5994   ins_pipe( ialu_reg_fat );
 5995 %}
 5996 
 5997 // Load Constant zero
 5998 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5999   match(Set dst src);
 6000   effect(KILL cr);
 6001 
 6002   ins_cost(50);
 6003   format %{ "XOR    $dst,$dst" %}
 6004   opcode(0x33);  /* + rd */
 6005   ins_encode( OpcP, RegReg( dst, dst ) );
 6006   ins_pipe( ialu_reg );
 6007 %}
 6008 
 6009 instruct loadConP(eRegP dst, immP src) %{
 6010   match(Set dst src);
 6011 
 6012   format %{ "MOV    $dst,$src" %}
 6013   opcode(0xB8);  /* + rd */
 6014   ins_encode( LdImmP(dst, src) );
 6015   ins_pipe( ialu_reg_fat );
 6016 %}
 6017 
 6018 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6019   match(Set dst src);
 6020   effect(KILL cr);
 6021   ins_cost(200);
 6022   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6023             "MOV    $dst.hi,$src.hi" %}
 6024   opcode(0xB8);
 6025   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6026   ins_pipe( ialu_reg_long_fat );
 6027 %}
 6028 
 6029 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6030   match(Set dst src);
 6031   effect(KILL cr);
 6032   ins_cost(150);
 6033   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6034             "XOR    $dst.hi,$dst.hi" %}
 6035   opcode(0x33,0x33);
 6036   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6037   ins_pipe( ialu_reg_long );
 6038 %}
 6039 
 6040 // The instruction usage is guarded by predicate in operand immFPR().
 6041 instruct loadConFPR(regFPR dst, immFPR con) %{
 6042   match(Set dst con);
 6043   ins_cost(125);
 6044   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6045             "FSTP   $dst" %}
 6046   ins_encode %{
 6047     __ fld_s($constantaddress($con));
 6048     __ fstp_d($dst$$reg);
 6049   %}
 6050   ins_pipe(fpu_reg_con);
 6051 %}
 6052 
 6053 // The instruction usage is guarded by predicate in operand immFPR0().
 6054 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6055   match(Set dst con);
 6056   ins_cost(125);
 6057   format %{ "FLDZ   ST\n\t"
 6058             "FSTP   $dst" %}
 6059   ins_encode %{
 6060     __ fldz();
 6061     __ fstp_d($dst$$reg);
 6062   %}
 6063   ins_pipe(fpu_reg_con);
 6064 %}
 6065 
 6066 // The instruction usage is guarded by predicate in operand immFPR1().
 6067 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6068   match(Set dst con);
 6069   ins_cost(125);
 6070   format %{ "FLD1   ST\n\t"
 6071             "FSTP   $dst" %}
 6072   ins_encode %{
 6073     __ fld1();
 6074     __ fstp_d($dst$$reg);
 6075   %}
 6076   ins_pipe(fpu_reg_con);
 6077 %}
 6078 
 6079 // The instruction usage is guarded by predicate in operand immF().
 6080 instruct loadConF(regF dst, immF con) %{
 6081   match(Set dst con);
 6082   ins_cost(125);
 6083   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6084   ins_encode %{
 6085     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6086   %}
 6087   ins_pipe(pipe_slow);
 6088 %}
 6089 
 6090 // The instruction usage is guarded by predicate in operand immF0().
 6091 instruct loadConF0(regF dst, immF0 src) %{
 6092   match(Set dst src);
 6093   ins_cost(100);
 6094   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6095   ins_encode %{
 6096     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6097   %}
 6098   ins_pipe(pipe_slow);
 6099 %}
 6100 
 6101 // The instruction usage is guarded by predicate in operand immDPR().
 6102 instruct loadConDPR(regDPR dst, immDPR con) %{
 6103   match(Set dst con);
 6104   ins_cost(125);
 6105 
 6106   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6107             "FSTP   $dst" %}
 6108   ins_encode %{
 6109     __ fld_d($constantaddress($con));
 6110     __ fstp_d($dst$$reg);
 6111   %}
 6112   ins_pipe(fpu_reg_con);
 6113 %}
 6114 
 6115 // The instruction usage is guarded by predicate in operand immDPR0().
 6116 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6117   match(Set dst con);
 6118   ins_cost(125);
 6119 
 6120   format %{ "FLDZ   ST\n\t"
 6121             "FSTP   $dst" %}
 6122   ins_encode %{
 6123     __ fldz();
 6124     __ fstp_d($dst$$reg);
 6125   %}
 6126   ins_pipe(fpu_reg_con);
 6127 %}
 6128 
 6129 // The instruction usage is guarded by predicate in operand immDPR1().
 6130 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6131   match(Set dst con);
 6132   ins_cost(125);
 6133 
 6134   format %{ "FLD1   ST\n\t"
 6135             "FSTP   $dst" %}
 6136   ins_encode %{
 6137     __ fld1();
 6138     __ fstp_d($dst$$reg);
 6139   %}
 6140   ins_pipe(fpu_reg_con);
 6141 %}
 6142 
 6143 // The instruction usage is guarded by predicate in operand immD().
 6144 instruct loadConD(regD dst, immD con) %{
 6145   match(Set dst con);
 6146   ins_cost(125);
 6147   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6148   ins_encode %{
 6149     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6150   %}
 6151   ins_pipe(pipe_slow);
 6152 %}
 6153 
 6154 // The instruction usage is guarded by predicate in operand immD0().
 6155 instruct loadConD0(regD dst, immD0 src) %{
 6156   match(Set dst src);
 6157   ins_cost(100);
 6158   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6159   ins_encode %{
 6160     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6161   %}
 6162   ins_pipe( pipe_slow );
 6163 %}
 6164 
 6165 // Load Stack Slot
 6166 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6167   match(Set dst src);
 6168   ins_cost(125);
 6169 
 6170   format %{ "MOV    $dst,$src" %}
 6171   opcode(0x8B);
 6172   ins_encode( OpcP, RegMem(dst,src));
 6173   ins_pipe( ialu_reg_mem );
 6174 %}
 6175 
 6176 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6177   match(Set dst src);
 6178 
 6179   ins_cost(200);
 6180   format %{ "MOV    $dst,$src.lo\n\t"
 6181             "MOV    $dst+4,$src.hi" %}
 6182   opcode(0x8B, 0x8B);
 6183   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6184   ins_pipe( ialu_mem_long_reg );
 6185 %}
 6186 
 6187 // Load Stack Slot
 6188 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6189   match(Set dst src);
 6190   ins_cost(125);
 6191 
 6192   format %{ "MOV    $dst,$src" %}
 6193   opcode(0x8B);
 6194   ins_encode( OpcP, RegMem(dst,src));
 6195   ins_pipe( ialu_reg_mem );
 6196 %}
 6197 
 6198 // Load Stack Slot
 6199 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6200   match(Set dst src);
 6201   ins_cost(125);
 6202 
 6203   format %{ "FLD_S  $src\n\t"
 6204             "FSTP   $dst" %}
 6205   opcode(0xD9);               /* D9 /0, FLD m32real */
 6206   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6207               Pop_Reg_FPR(dst) );
 6208   ins_pipe( fpu_reg_mem );
 6209 %}
 6210 
 6211 // Load Stack Slot
 6212 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6213   match(Set dst src);
 6214   ins_cost(125);
 6215 
 6216   format %{ "FLD_D  $src\n\t"
 6217             "FSTP   $dst" %}
 6218   opcode(0xDD);               /* DD /0, FLD m64real */
 6219   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6220               Pop_Reg_DPR(dst) );
 6221   ins_pipe( fpu_reg_mem );
 6222 %}
 6223 
 6224 // Prefetch instructions for allocation.
 6225 // Must be safe to execute with invalid address (cannot fault).
 6226 
 6227 instruct prefetchAlloc0( memory mem ) %{
 6228   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6229   match(PrefetchAllocation mem);
 6230   ins_cost(0);
 6231   size(0);
 6232   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6233   ins_encode();
 6234   ins_pipe(empty);
 6235 %}
 6236 
 6237 instruct prefetchAlloc( memory mem ) %{
 6238   predicate(AllocatePrefetchInstr==3);
 6239   match( PrefetchAllocation mem );
 6240   ins_cost(100);
 6241 
 6242   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6243   ins_encode %{
 6244     __ prefetchw($mem$$Address);
 6245   %}
 6246   ins_pipe(ialu_mem);
 6247 %}
 6248 
 6249 instruct prefetchAllocNTA( memory mem ) %{
 6250   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6251   match(PrefetchAllocation mem);
 6252   ins_cost(100);
 6253 
 6254   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6255   ins_encode %{
 6256     __ prefetchnta($mem$$Address);
 6257   %}
 6258   ins_pipe(ialu_mem);
 6259 %}
 6260 
 6261 instruct prefetchAllocT0( memory mem ) %{
 6262   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6263   match(PrefetchAllocation mem);
 6264   ins_cost(100);
 6265 
 6266   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6267   ins_encode %{
 6268     __ prefetcht0($mem$$Address);
 6269   %}
 6270   ins_pipe(ialu_mem);
 6271 %}
 6272 
 6273 instruct prefetchAllocT2( memory mem ) %{
 6274   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6275   match(PrefetchAllocation mem);
 6276   ins_cost(100);
 6277 
 6278   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6279   ins_encode %{
 6280     __ prefetcht2($mem$$Address);
 6281   %}
 6282   ins_pipe(ialu_mem);
 6283 %}
 6284 
 6285 //----------Store Instructions-------------------------------------------------
 6286 
 6287 // Store Byte
 6288 instruct storeB(memory mem, xRegI src) %{
 6289   match(Set mem (StoreB mem src));
 6290 
 6291   ins_cost(125);
 6292   format %{ "MOV8   $mem,$src" %}
 6293   opcode(0x88);
 6294   ins_encode( OpcP, RegMem( src, mem ) );
 6295   ins_pipe( ialu_mem_reg );
 6296 %}
 6297 
 6298 // Store Char/Short
 6299 instruct storeC(memory mem, rRegI src) %{
 6300   match(Set mem (StoreC mem src));
 6301 
 6302   ins_cost(125);
 6303   format %{ "MOV16  $mem,$src" %}
 6304   opcode(0x89, 0x66);
 6305   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6306   ins_pipe( ialu_mem_reg );
 6307 %}
 6308 
 6309 // Store Integer
 6310 instruct storeI(memory mem, rRegI src) %{
 6311   match(Set mem (StoreI mem src));
 6312 
 6313   ins_cost(125);
 6314   format %{ "MOV    $mem,$src" %}
 6315   opcode(0x89);
 6316   ins_encode( OpcP, RegMem( src, mem ) );
 6317   ins_pipe( ialu_mem_reg );
 6318 %}
 6319 
 6320 // Store Long
 6321 instruct storeL(long_memory mem, eRegL src) %{
 6322   predicate(!((StoreLNode*)n)->require_atomic_access());
 6323   match(Set mem (StoreL mem src));
 6324 
 6325   ins_cost(200);
 6326   format %{ "MOV    $mem,$src.lo\n\t"
 6327             "MOV    $mem+4,$src.hi" %}
 6328   opcode(0x89, 0x89);
 6329   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6330   ins_pipe( ialu_mem_long_reg );
 6331 %}
 6332 
 6333 // Store Long to Integer
 6334 instruct storeL2I(memory mem, eRegL src) %{
 6335   match(Set mem (StoreI mem (ConvL2I src)));
 6336 
 6337   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6338   ins_encode %{
 6339     __ movl($mem$$Address, $src$$Register);
 6340   %}
 6341   ins_pipe(ialu_mem_reg);
 6342 %}
 6343 
 6344 // Volatile Store Long.  Must be atomic, so move it into
 6345 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6346 // target address before the store (for null-ptr checks)
 6347 // so the memory operand is used twice in the encoding.
 6348 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6349   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6350   match(Set mem (StoreL mem src));
 6351   effect( KILL cr );
 6352   ins_cost(400);
 6353   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6354             "FILD   $src\n\t"
 6355             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6356   opcode(0x3B);
 6357   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6358   ins_pipe( fpu_reg_mem );
 6359 %}
 6360 
 6361 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6362   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6363   match(Set mem (StoreL mem src));
 6364   effect( TEMP tmp, KILL cr );
 6365   ins_cost(380);
 6366   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6367             "MOVSD  $tmp,$src\n\t"
 6368             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6369   ins_encode %{
 6370     __ cmpl(rax, $mem$$Address);
 6371     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6372     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6373   %}
 6374   ins_pipe( pipe_slow );
 6375 %}
 6376 
 6377 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6378   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6379   match(Set mem (StoreL mem src));
 6380   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6381   ins_cost(360);
 6382   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6383             "MOVD   $tmp,$src.lo\n\t"
 6384             "MOVD   $tmp2,$src.hi\n\t"
 6385             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6386             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6387   ins_encode %{
 6388     __ cmpl(rax, $mem$$Address);
 6389     __ movdl($tmp$$XMMRegister, $src$$Register);
 6390     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6391     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6392     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6393   %}
 6394   ins_pipe( pipe_slow );
 6395 %}
 6396 
 6397 // Store Pointer; for storing unknown oops and raw pointers
 6398 instruct storeP(memory mem, anyRegP src) %{
 6399   match(Set mem (StoreP mem src));
 6400 
 6401   ins_cost(125);
 6402   format %{ "MOV    $mem,$src" %}
 6403   opcode(0x89);
 6404   ins_encode( OpcP, RegMem( src, mem ) );
 6405   ins_pipe( ialu_mem_reg );
 6406 %}
 6407 
 6408 // Store Integer Immediate
 6409 instruct storeImmI(memory mem, immI src) %{
 6410   match(Set mem (StoreI mem src));
 6411 
 6412   ins_cost(150);
 6413   format %{ "MOV    $mem,$src" %}
 6414   opcode(0xC7);               /* C7 /0 */
 6415   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6416   ins_pipe( ialu_mem_imm );
 6417 %}
 6418 
 6419 // Store Short/Char Immediate
 6420 instruct storeImmI16(memory mem, immI16 src) %{
 6421   predicate(UseStoreImmI16);
 6422   match(Set mem (StoreC mem src));
 6423 
 6424   ins_cost(150);
 6425   format %{ "MOV16  $mem,$src" %}
 6426   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6427   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6428   ins_pipe( ialu_mem_imm );
 6429 %}
 6430 
 6431 // Store Pointer Immediate; null pointers or constant oops that do not
 6432 // need card-mark barriers.
 6433 instruct storeImmP(memory mem, immP src) %{
 6434   match(Set mem (StoreP mem src));
 6435 
 6436   ins_cost(150);
 6437   format %{ "MOV    $mem,$src" %}
 6438   opcode(0xC7);               /* C7 /0 */
 6439   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6440   ins_pipe( ialu_mem_imm );
 6441 %}
 6442 
 6443 // Store Byte Immediate
 6444 instruct storeImmB(memory mem, immI8 src) %{
 6445   match(Set mem (StoreB mem src));
 6446 
 6447   ins_cost(150);
 6448   format %{ "MOV8   $mem,$src" %}
 6449   opcode(0xC6);               /* C6 /0 */
 6450   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6451   ins_pipe( ialu_mem_imm );
 6452 %}
 6453 
 6454 // Store CMS card-mark Immediate
 6455 instruct storeImmCM(memory mem, immI8 src) %{
 6456   match(Set mem (StoreCM mem src));
 6457 
 6458   ins_cost(150);
 6459   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6460   opcode(0xC6);               /* C6 /0 */
 6461   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6462   ins_pipe( ialu_mem_imm );
 6463 %}
 6464 
 6465 // Store Double
 6466 instruct storeDPR( memory mem, regDPR1 src) %{
 6467   predicate(UseSSE<=1);
 6468   match(Set mem (StoreD mem src));
 6469 
 6470   ins_cost(100);
 6471   format %{ "FST_D  $mem,$src" %}
 6472   opcode(0xDD);       /* DD /2 */
 6473   ins_encode( enc_FPR_store(mem,src) );
 6474   ins_pipe( fpu_mem_reg );
 6475 %}
 6476 
 6477 // Store double does rounding on x86
 6478 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6479   predicate(UseSSE<=1);
 6480   match(Set mem (StoreD mem (RoundDouble src)));
 6481 
 6482   ins_cost(100);
 6483   format %{ "FST_D  $mem,$src\t# round" %}
 6484   opcode(0xDD);       /* DD /2 */
 6485   ins_encode( enc_FPR_store(mem,src) );
 6486   ins_pipe( fpu_mem_reg );
 6487 %}
 6488 
 6489 // Store XMM register to memory (double-precision floating points)
 6490 // MOVSD instruction
 6491 instruct storeD(memory mem, regD src) %{
 6492   predicate(UseSSE>=2);
 6493   match(Set mem (StoreD mem src));
 6494   ins_cost(95);
 6495   format %{ "MOVSD  $mem,$src" %}
 6496   ins_encode %{
 6497     __ movdbl($mem$$Address, $src$$XMMRegister);
 6498   %}
 6499   ins_pipe( pipe_slow );
 6500 %}
 6501 
 6502 // Store XMM register to memory (single-precision floating point)
 6503 // MOVSS instruction
 6504 instruct storeF(memory mem, regF src) %{
 6505   predicate(UseSSE>=1);
 6506   match(Set mem (StoreF mem src));
 6507   ins_cost(95);
 6508   format %{ "MOVSS  $mem,$src" %}
 6509   ins_encode %{
 6510     __ movflt($mem$$Address, $src$$XMMRegister);
 6511   %}
 6512   ins_pipe( pipe_slow );
 6513 %}
 6514 
 6515 
 6516 // Store Float
 6517 instruct storeFPR( memory mem, regFPR1 src) %{
 6518   predicate(UseSSE==0);
 6519   match(Set mem (StoreF mem src));
 6520 
 6521   ins_cost(100);
 6522   format %{ "FST_S  $mem,$src" %}
 6523   opcode(0xD9);       /* D9 /2 */
 6524   ins_encode( enc_FPR_store(mem,src) );
 6525   ins_pipe( fpu_mem_reg );
 6526 %}
 6527 
 6528 // Store Float does rounding on x86
 6529 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6530   predicate(UseSSE==0);
 6531   match(Set mem (StoreF mem (RoundFloat src)));
 6532 
 6533   ins_cost(100);
 6534   format %{ "FST_S  $mem,$src\t# round" %}
 6535   opcode(0xD9);       /* D9 /2 */
 6536   ins_encode( enc_FPR_store(mem,src) );
 6537   ins_pipe( fpu_mem_reg );
 6538 %}
 6539 
 6540 // Store Float does rounding on x86
 6541 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6542   predicate(UseSSE<=1);
 6543   match(Set mem (StoreF mem (ConvD2F src)));
 6544 
 6545   ins_cost(100);
 6546   format %{ "FST_S  $mem,$src\t# D-round" %}
 6547   opcode(0xD9);       /* D9 /2 */
 6548   ins_encode( enc_FPR_store(mem,src) );
 6549   ins_pipe( fpu_mem_reg );
 6550 %}
 6551 
 6552 // Store immediate Float value (it is faster than store from FPU register)
 6553 // The instruction usage is guarded by predicate in operand immFPR().
 6554 instruct storeFPR_imm( memory mem, immFPR src) %{
 6555   match(Set mem (StoreF mem src));
 6556 
 6557   ins_cost(50);
 6558   format %{ "MOV    $mem,$src\t# store float" %}
 6559   opcode(0xC7);               /* C7 /0 */
 6560   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6561   ins_pipe( ialu_mem_imm );
 6562 %}
 6563 
 6564 // Store immediate Float value (it is faster than store from XMM register)
 6565 // The instruction usage is guarded by predicate in operand immF().
 6566 instruct storeF_imm( memory mem, immF src) %{
 6567   match(Set mem (StoreF mem src));
 6568 
 6569   ins_cost(50);
 6570   format %{ "MOV    $mem,$src\t# store float" %}
 6571   opcode(0xC7);               /* C7 /0 */
 6572   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6573   ins_pipe( ialu_mem_imm );
 6574 %}
 6575 
 6576 // Store Integer to stack slot
 6577 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6578   match(Set dst src);
 6579 
 6580   ins_cost(100);
 6581   format %{ "MOV    $dst,$src" %}
 6582   opcode(0x89);
 6583   ins_encode( OpcPRegSS( dst, src ) );
 6584   ins_pipe( ialu_mem_reg );
 6585 %}
 6586 
 6587 // Store Integer to stack slot
 6588 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6589   match(Set dst src);
 6590 
 6591   ins_cost(100);
 6592   format %{ "MOV    $dst,$src" %}
 6593   opcode(0x89);
 6594   ins_encode( OpcPRegSS( dst, src ) );
 6595   ins_pipe( ialu_mem_reg );
 6596 %}
 6597 
 6598 // Store Long to stack slot
 6599 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6600   match(Set dst src);
 6601 
 6602   ins_cost(200);
 6603   format %{ "MOV    $dst,$src.lo\n\t"
 6604             "MOV    $dst+4,$src.hi" %}
 6605   opcode(0x89, 0x89);
 6606   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6607   ins_pipe( ialu_mem_long_reg );
 6608 %}
 6609 
 6610 //----------MemBar Instructions-----------------------------------------------
 6611 // Memory barrier flavors
 6612 
 6613 instruct membar_acquire() %{
 6614   match(MemBarAcquire);
 6615   match(LoadFence);
 6616   ins_cost(400);
 6617 
 6618   size(0);
 6619   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6620   ins_encode();
 6621   ins_pipe(empty);
 6622 %}
 6623 
 6624 instruct membar_acquire_lock() %{
 6625   match(MemBarAcquireLock);
 6626   ins_cost(0);
 6627 
 6628   size(0);
 6629   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6630   ins_encode( );
 6631   ins_pipe(empty);
 6632 %}
 6633 
 6634 instruct membar_release() %{
 6635   match(MemBarRelease);
 6636   match(StoreFence);
 6637   ins_cost(400);
 6638 
 6639   size(0);
 6640   format %{ "MEMBAR-release ! (empty encoding)" %}
 6641   ins_encode( );
 6642   ins_pipe(empty);
 6643 %}
 6644 
 6645 instruct membar_release_lock() %{
 6646   match(MemBarReleaseLock);
 6647   ins_cost(0);
 6648 
 6649   size(0);
 6650   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6651   ins_encode( );
 6652   ins_pipe(empty);
 6653 %}
 6654 
 6655 instruct membar_volatile(eFlagsReg cr) %{
 6656   match(MemBarVolatile);
 6657   effect(KILL cr);
 6658   ins_cost(400);
 6659 
 6660   format %{
 6661     $$template
 6662     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6663   %}
 6664   ins_encode %{
 6665     __ membar(Assembler::StoreLoad);
 6666   %}
 6667   ins_pipe(pipe_slow);
 6668 %}
 6669 
 6670 instruct unnecessary_membar_volatile() %{
 6671   match(MemBarVolatile);
 6672   predicate(Matcher::post_store_load_barrier(n));
 6673   ins_cost(0);
 6674 
 6675   size(0);
 6676   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6677   ins_encode( );
 6678   ins_pipe(empty);
 6679 %}
 6680 
 6681 instruct membar_storestore() %{
 6682   match(MemBarStoreStore);
 6683   match(StoreStoreFence);
 6684   ins_cost(0);
 6685 
 6686   size(0);
 6687   format %{ "MEMBAR-storestore (empty encoding)" %}
 6688   ins_encode( );
 6689   ins_pipe(empty);
 6690 %}
 6691 
 6692 //----------Move Instructions--------------------------------------------------
 6693 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6694   match(Set dst (CastX2P src));
 6695   format %{ "# X2P  $dst, $src" %}
 6696   ins_encode( /*empty encoding*/ );
 6697   ins_cost(0);
 6698   ins_pipe(empty);
 6699 %}
 6700 
 6701 instruct castP2X(rRegI dst, eRegP src ) %{
 6702   match(Set dst (CastP2X src));
 6703   ins_cost(50);
 6704   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6705   ins_encode( enc_Copy( dst, src) );
 6706   ins_pipe( ialu_reg_reg );
 6707 %}
 6708 
 6709 //----------Conditional Move---------------------------------------------------
 6710 // Conditional move
 6711 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6712   predicate(!VM_Version::supports_cmov() );
 6713   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6714   ins_cost(200);
 6715   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6716             "MOV    $dst,$src\n"
 6717       "skip:" %}
 6718   ins_encode %{
 6719     Label Lskip;
 6720     // Invert sense of branch from sense of CMOV
 6721     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6722     __ movl($dst$$Register, $src$$Register);
 6723     __ bind(Lskip);
 6724   %}
 6725   ins_pipe( pipe_cmov_reg );
 6726 %}
 6727 
 6728 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6729   predicate(!VM_Version::supports_cmov() );
 6730   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6731   ins_cost(200);
 6732   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6733             "MOV    $dst,$src\n"
 6734       "skip:" %}
 6735   ins_encode %{
 6736     Label Lskip;
 6737     // Invert sense of branch from sense of CMOV
 6738     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6739     __ movl($dst$$Register, $src$$Register);
 6740     __ bind(Lskip);
 6741   %}
 6742   ins_pipe( pipe_cmov_reg );
 6743 %}
 6744 
 6745 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6746   predicate(VM_Version::supports_cmov() );
 6747   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6748   ins_cost(200);
 6749   format %{ "CMOV$cop $dst,$src" %}
 6750   opcode(0x0F,0x40);
 6751   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6752   ins_pipe( pipe_cmov_reg );
 6753 %}
 6754 
 6755 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6756   predicate(VM_Version::supports_cmov() );
 6757   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6758   ins_cost(200);
 6759   format %{ "CMOV$cop $dst,$src" %}
 6760   opcode(0x0F,0x40);
 6761   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6762   ins_pipe( pipe_cmov_reg );
 6763 %}
 6764 
 6765 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6766   predicate(VM_Version::supports_cmov() );
 6767   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6768   ins_cost(200);
 6769   expand %{
 6770     cmovI_regU(cop, cr, dst, src);
 6771   %}
 6772 %}
 6773 
 6774 // Conditional move
 6775 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6776   predicate(VM_Version::supports_cmov() );
 6777   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6778   ins_cost(250);
 6779   format %{ "CMOV$cop $dst,$src" %}
 6780   opcode(0x0F,0x40);
 6781   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6782   ins_pipe( pipe_cmov_mem );
 6783 %}
 6784 
 6785 // Conditional move
 6786 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6787   predicate(VM_Version::supports_cmov() );
 6788   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6789   ins_cost(250);
 6790   format %{ "CMOV$cop $dst,$src" %}
 6791   opcode(0x0F,0x40);
 6792   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6793   ins_pipe( pipe_cmov_mem );
 6794 %}
 6795 
 6796 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6797   predicate(VM_Version::supports_cmov() );
 6798   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6799   ins_cost(250);
 6800   expand %{
 6801     cmovI_memU(cop, cr, dst, src);
 6802   %}
 6803 %}
 6804 
 6805 // Conditional move
 6806 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6807   predicate(VM_Version::supports_cmov() );
 6808   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6809   ins_cost(200);
 6810   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6811   opcode(0x0F,0x40);
 6812   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6813   ins_pipe( pipe_cmov_reg );
 6814 %}
 6815 
 6816 // Conditional move (non-P6 version)
 6817 // Note:  a CMoveP is generated for  stubs and native wrappers
 6818 //        regardless of whether we are on a P6, so we
 6819 //        emulate a cmov here
 6820 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6821   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6822   ins_cost(300);
 6823   format %{ "Jn$cop   skip\n\t"
 6824           "MOV    $dst,$src\t# pointer\n"
 6825       "skip:" %}
 6826   opcode(0x8b);
 6827   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6828   ins_pipe( pipe_cmov_reg );
 6829 %}
 6830 
 6831 // Conditional move
 6832 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6833   predicate(VM_Version::supports_cmov() );
 6834   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6835   ins_cost(200);
 6836   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6837   opcode(0x0F,0x40);
 6838   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6839   ins_pipe( pipe_cmov_reg );
 6840 %}
 6841 
 6842 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6843   predicate(VM_Version::supports_cmov() );
 6844   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6845   ins_cost(200);
 6846   expand %{
 6847     cmovP_regU(cop, cr, dst, src);
 6848   %}
 6849 %}
 6850 
 6851 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6852 // correctly meets the two pointer arguments; one is an incoming
 6853 // register but the other is a memory operand.  ALSO appears to
 6854 // be buggy with implicit null checks.
 6855 //
 6856 //// Conditional move
 6857 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6858 //  predicate(VM_Version::supports_cmov() );
 6859 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6860 //  ins_cost(250);
 6861 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6862 //  opcode(0x0F,0x40);
 6863 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6864 //  ins_pipe( pipe_cmov_mem );
 6865 //%}
 6866 //
 6867 //// Conditional move
 6868 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6869 //  predicate(VM_Version::supports_cmov() );
 6870 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6871 //  ins_cost(250);
 6872 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6873 //  opcode(0x0F,0x40);
 6874 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6875 //  ins_pipe( pipe_cmov_mem );
 6876 //%}
 6877 
 6878 // Conditional move
 6879 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6880   predicate(UseSSE<=1);
 6881   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6882   ins_cost(200);
 6883   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6884   opcode(0xDA);
 6885   ins_encode( enc_cmov_dpr(cop,src) );
 6886   ins_pipe( pipe_cmovDPR_reg );
 6887 %}
 6888 
 6889 // Conditional move
 6890 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6891   predicate(UseSSE==0);
 6892   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6893   ins_cost(200);
 6894   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6895   opcode(0xDA);
 6896   ins_encode( enc_cmov_dpr(cop,src) );
 6897   ins_pipe( pipe_cmovDPR_reg );
 6898 %}
 6899 
 6900 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6901 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6902   predicate(UseSSE<=1);
 6903   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6904   ins_cost(200);
 6905   format %{ "Jn$cop   skip\n\t"
 6906             "MOV    $dst,$src\t# double\n"
 6907       "skip:" %}
 6908   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6909   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6910   ins_pipe( pipe_cmovDPR_reg );
 6911 %}
 6912 
 6913 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6914 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6915   predicate(UseSSE==0);
 6916   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6917   ins_cost(200);
 6918   format %{ "Jn$cop    skip\n\t"
 6919             "MOV    $dst,$src\t# float\n"
 6920       "skip:" %}
 6921   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6922   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6923   ins_pipe( pipe_cmovDPR_reg );
 6924 %}
 6925 
 6926 // No CMOVE with SSE/SSE2
 6927 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6928   predicate (UseSSE>=1);
 6929   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6930   ins_cost(200);
 6931   format %{ "Jn$cop   skip\n\t"
 6932             "MOVSS  $dst,$src\t# float\n"
 6933       "skip:" %}
 6934   ins_encode %{
 6935     Label skip;
 6936     // Invert sense of branch from sense of CMOV
 6937     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6938     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6939     __ bind(skip);
 6940   %}
 6941   ins_pipe( pipe_slow );
 6942 %}
 6943 
 6944 // No CMOVE with SSE/SSE2
 6945 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6946   predicate (UseSSE>=2);
 6947   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6948   ins_cost(200);
 6949   format %{ "Jn$cop   skip\n\t"
 6950             "MOVSD  $dst,$src\t# float\n"
 6951       "skip:" %}
 6952   ins_encode %{
 6953     Label skip;
 6954     // Invert sense of branch from sense of CMOV
 6955     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6956     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6957     __ bind(skip);
 6958   %}
 6959   ins_pipe( pipe_slow );
 6960 %}
 6961 
 6962 // unsigned version
 6963 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6964   predicate (UseSSE>=1);
 6965   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6966   ins_cost(200);
 6967   format %{ "Jn$cop   skip\n\t"
 6968             "MOVSS  $dst,$src\t# float\n"
 6969       "skip:" %}
 6970   ins_encode %{
 6971     Label skip;
 6972     // Invert sense of branch from sense of CMOV
 6973     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6974     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6975     __ bind(skip);
 6976   %}
 6977   ins_pipe( pipe_slow );
 6978 %}
 6979 
 6980 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6981   predicate (UseSSE>=1);
 6982   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6983   ins_cost(200);
 6984   expand %{
 6985     fcmovF_regU(cop, cr, dst, src);
 6986   %}
 6987 %}
 6988 
 6989 // unsigned version
 6990 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6991   predicate (UseSSE>=2);
 6992   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6993   ins_cost(200);
 6994   format %{ "Jn$cop   skip\n\t"
 6995             "MOVSD  $dst,$src\t# float\n"
 6996       "skip:" %}
 6997   ins_encode %{
 6998     Label skip;
 6999     // Invert sense of branch from sense of CMOV
 7000     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7001     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7002     __ bind(skip);
 7003   %}
 7004   ins_pipe( pipe_slow );
 7005 %}
 7006 
 7007 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7008   predicate (UseSSE>=2);
 7009   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7010   ins_cost(200);
 7011   expand %{
 7012     fcmovD_regU(cop, cr, dst, src);
 7013   %}
 7014 %}
 7015 
 7016 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7017   predicate(VM_Version::supports_cmov() );
 7018   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7019   ins_cost(200);
 7020   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7021             "CMOV$cop $dst.hi,$src.hi" %}
 7022   opcode(0x0F,0x40);
 7023   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7024   ins_pipe( pipe_cmov_reg_long );
 7025 %}
 7026 
 7027 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7028   predicate(VM_Version::supports_cmov() );
 7029   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7030   ins_cost(200);
 7031   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7032             "CMOV$cop $dst.hi,$src.hi" %}
 7033   opcode(0x0F,0x40);
 7034   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7035   ins_pipe( pipe_cmov_reg_long );
 7036 %}
 7037 
 7038 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7039   predicate(VM_Version::supports_cmov() );
 7040   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7041   ins_cost(200);
 7042   expand %{
 7043     cmovL_regU(cop, cr, dst, src);
 7044   %}
 7045 %}
 7046 
 7047 //----------Arithmetic Instructions--------------------------------------------
 7048 //----------Addition Instructions----------------------------------------------
 7049 
 7050 // Integer Addition Instructions
 7051 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7052   match(Set dst (AddI dst src));
 7053   effect(KILL cr);
 7054 
 7055   size(2);
 7056   format %{ "ADD    $dst,$src" %}
 7057   opcode(0x03);
 7058   ins_encode( OpcP, RegReg( dst, src) );
 7059   ins_pipe( ialu_reg_reg );
 7060 %}
 7061 
 7062 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7063   match(Set dst (AddI dst src));
 7064   effect(KILL cr);
 7065 
 7066   format %{ "ADD    $dst,$src" %}
 7067   opcode(0x81, 0x00); /* /0 id */
 7068   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7069   ins_pipe( ialu_reg );
 7070 %}
 7071 
 7072 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7073   predicate(UseIncDec);
 7074   match(Set dst (AddI dst src));
 7075   effect(KILL cr);
 7076 
 7077   size(1);
 7078   format %{ "INC    $dst" %}
 7079   opcode(0x40); /*  */
 7080   ins_encode( Opc_plus( primary, dst ) );
 7081   ins_pipe( ialu_reg );
 7082 %}
 7083 
 7084 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7085   match(Set dst (AddI src0 src1));
 7086   ins_cost(110);
 7087 
 7088   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7089   opcode(0x8D); /* 0x8D /r */
 7090   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7091   ins_pipe( ialu_reg_reg );
 7092 %}
 7093 
 7094 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7095   match(Set dst (AddP src0 src1));
 7096   ins_cost(110);
 7097 
 7098   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7099   opcode(0x8D); /* 0x8D /r */
 7100   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7101   ins_pipe( ialu_reg_reg );
 7102 %}
 7103 
 7104 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7105   predicate(UseIncDec);
 7106   match(Set dst (AddI dst src));
 7107   effect(KILL cr);
 7108 
 7109   size(1);
 7110   format %{ "DEC    $dst" %}
 7111   opcode(0x48); /*  */
 7112   ins_encode( Opc_plus( primary, dst ) );
 7113   ins_pipe( ialu_reg );
 7114 %}
 7115 
 7116 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7117   match(Set dst (AddP dst src));
 7118   effect(KILL cr);
 7119 
 7120   size(2);
 7121   format %{ "ADD    $dst,$src" %}
 7122   opcode(0x03);
 7123   ins_encode( OpcP, RegReg( dst, src) );
 7124   ins_pipe( ialu_reg_reg );
 7125 %}
 7126 
 7127 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7128   match(Set dst (AddP dst src));
 7129   effect(KILL cr);
 7130 
 7131   format %{ "ADD    $dst,$src" %}
 7132   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7133   // ins_encode( RegImm( dst, src) );
 7134   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7135   ins_pipe( ialu_reg );
 7136 %}
 7137 
 7138 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7139   match(Set dst (AddI dst (LoadI src)));
 7140   effect(KILL cr);
 7141 
 7142   ins_cost(150);
 7143   format %{ "ADD    $dst,$src" %}
 7144   opcode(0x03);
 7145   ins_encode( OpcP, RegMem( dst, src) );
 7146   ins_pipe( ialu_reg_mem );
 7147 %}
 7148 
 7149 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7150   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7151   effect(KILL cr);
 7152 
 7153   ins_cost(150);
 7154   format %{ "ADD    $dst,$src" %}
 7155   opcode(0x01);  /* Opcode 01 /r */
 7156   ins_encode( OpcP, RegMem( src, dst ) );
 7157   ins_pipe( ialu_mem_reg );
 7158 %}
 7159 
 7160 // Add Memory with Immediate
 7161 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7162   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7163   effect(KILL cr);
 7164 
 7165   ins_cost(125);
 7166   format %{ "ADD    $dst,$src" %}
 7167   opcode(0x81);               /* Opcode 81 /0 id */
 7168   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7169   ins_pipe( ialu_mem_imm );
 7170 %}
 7171 
 7172 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7173   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7174   effect(KILL cr);
 7175 
 7176   ins_cost(125);
 7177   format %{ "INC    $dst" %}
 7178   opcode(0xFF);               /* Opcode FF /0 */
 7179   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7180   ins_pipe( ialu_mem_imm );
 7181 %}
 7182 
 7183 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7184   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7185   effect(KILL cr);
 7186 
 7187   ins_cost(125);
 7188   format %{ "DEC    $dst" %}
 7189   opcode(0xFF);               /* Opcode FF /1 */
 7190   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7191   ins_pipe( ialu_mem_imm );
 7192 %}
 7193 
 7194 
 7195 instruct checkCastPP( eRegP dst ) %{
 7196   match(Set dst (CheckCastPP dst));
 7197 
 7198   size(0);
 7199   format %{ "#checkcastPP of $dst" %}
 7200   ins_encode( /*empty encoding*/ );
 7201   ins_pipe( empty );
 7202 %}
 7203 
 7204 instruct castPP( eRegP dst ) %{
 7205   match(Set dst (CastPP dst));
 7206   format %{ "#castPP of $dst" %}
 7207   ins_encode( /*empty encoding*/ );
 7208   ins_pipe( empty );
 7209 %}
 7210 
 7211 instruct castII( rRegI dst ) %{
 7212   match(Set dst (CastII dst));
 7213   format %{ "#castII of $dst" %}
 7214   ins_encode( /*empty encoding*/ );
 7215   ins_cost(0);
 7216   ins_pipe( empty );
 7217 %}
 7218 
 7219 instruct castLL( eRegL dst ) %{
 7220   match(Set dst (CastLL dst));
 7221   format %{ "#castLL of $dst" %}
 7222   ins_encode( /*empty encoding*/ );
 7223   ins_cost(0);
 7224   ins_pipe( empty );
 7225 %}
 7226 
 7227 instruct castFF( regF dst ) %{
 7228   predicate(UseSSE >= 1);
 7229   match(Set dst (CastFF dst));
 7230   format %{ "#castFF of $dst" %}
 7231   ins_encode( /*empty encoding*/ );
 7232   ins_cost(0);
 7233   ins_pipe( empty );
 7234 %}
 7235 
 7236 instruct castDD( regD dst ) %{
 7237   predicate(UseSSE >= 2);
 7238   match(Set dst (CastDD dst));
 7239   format %{ "#castDD of $dst" %}
 7240   ins_encode( /*empty encoding*/ );
 7241   ins_cost(0);
 7242   ins_pipe( empty );
 7243 %}
 7244 
 7245 instruct castFF_PR( regFPR dst ) %{
 7246   predicate(UseSSE < 1);
 7247   match(Set dst (CastFF dst));
 7248   format %{ "#castFF of $dst" %}
 7249   ins_encode( /*empty encoding*/ );
 7250   ins_cost(0);
 7251   ins_pipe( empty );
 7252 %}
 7253 
 7254 instruct castDD_PR( regDPR dst ) %{
 7255   predicate(UseSSE < 2);
 7256   match(Set dst (CastDD dst));
 7257   format %{ "#castDD of $dst" %}
 7258   ins_encode( /*empty encoding*/ );
 7259   ins_cost(0);
 7260   ins_pipe( empty );
 7261 %}
 7262 
 7263 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7264 
 7265 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7266   predicate(VM_Version::supports_cx8());
 7267   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7268   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7269   effect(KILL cr, KILL oldval);
 7270   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7271             "MOV    $res,0\n\t"
 7272             "JNE,s  fail\n\t"
 7273             "MOV    $res,1\n"
 7274           "fail:" %}
 7275   ins_encode( enc_cmpxchg8(mem_ptr),
 7276               enc_flags_ne_to_boolean(res) );
 7277   ins_pipe( pipe_cmpxchg );
 7278 %}
 7279 
 7280 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7281   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7282   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7283   effect(KILL cr, KILL oldval);
 7284   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7285             "MOV    $res,0\n\t"
 7286             "JNE,s  fail\n\t"
 7287             "MOV    $res,1\n"
 7288           "fail:" %}
 7289   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7290   ins_pipe( pipe_cmpxchg );
 7291 %}
 7292 
 7293 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7294   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7295   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7296   effect(KILL cr, KILL oldval);
 7297   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7298             "MOV    $res,0\n\t"
 7299             "JNE,s  fail\n\t"
 7300             "MOV    $res,1\n"
 7301           "fail:" %}
 7302   ins_encode( enc_cmpxchgb(mem_ptr),
 7303               enc_flags_ne_to_boolean(res) );
 7304   ins_pipe( pipe_cmpxchg );
 7305 %}
 7306 
 7307 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7308   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7309   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7310   effect(KILL cr, KILL oldval);
 7311   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7312             "MOV    $res,0\n\t"
 7313             "JNE,s  fail\n\t"
 7314             "MOV    $res,1\n"
 7315           "fail:" %}
 7316   ins_encode( enc_cmpxchgw(mem_ptr),
 7317               enc_flags_ne_to_boolean(res) );
 7318   ins_pipe( pipe_cmpxchg );
 7319 %}
 7320 
 7321 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7322   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7323   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7324   effect(KILL cr, KILL oldval);
 7325   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7326             "MOV    $res,0\n\t"
 7327             "JNE,s  fail\n\t"
 7328             "MOV    $res,1\n"
 7329           "fail:" %}
 7330   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7331   ins_pipe( pipe_cmpxchg );
 7332 %}
 7333 
 7334 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7335   predicate(VM_Version::supports_cx8());
 7336   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7337   effect(KILL cr);
 7338   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7339   ins_encode( enc_cmpxchg8(mem_ptr) );
 7340   ins_pipe( pipe_cmpxchg );
 7341 %}
 7342 
 7343 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7344   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7345   effect(KILL cr);
 7346   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7347   ins_encode( enc_cmpxchg(mem_ptr) );
 7348   ins_pipe( pipe_cmpxchg );
 7349 %}
 7350 
 7351 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7352   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7353   effect(KILL cr);
 7354   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7355   ins_encode( enc_cmpxchgb(mem_ptr) );
 7356   ins_pipe( pipe_cmpxchg );
 7357 %}
 7358 
 7359 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7360   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7361   effect(KILL cr);
 7362   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7363   ins_encode( enc_cmpxchgw(mem_ptr) );
 7364   ins_pipe( pipe_cmpxchg );
 7365 %}
 7366 
 7367 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7368   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7369   effect(KILL cr);
 7370   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7371   ins_encode( enc_cmpxchg(mem_ptr) );
 7372   ins_pipe( pipe_cmpxchg );
 7373 %}
 7374 
 7375 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7376   predicate(n->as_LoadStore()->result_not_used());
 7377   match(Set dummy (GetAndAddB mem add));
 7378   effect(KILL cr);
 7379   format %{ "ADDB  [$mem],$add" %}
 7380   ins_encode %{
 7381     __ lock();
 7382     __ addb($mem$$Address, $add$$constant);
 7383   %}
 7384   ins_pipe( pipe_cmpxchg );
 7385 %}
 7386 
 7387 // Important to match to xRegI: only 8-bit regs.
 7388 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7389   match(Set newval (GetAndAddB mem newval));
 7390   effect(KILL cr);
 7391   format %{ "XADDB  [$mem],$newval" %}
 7392   ins_encode %{
 7393     __ lock();
 7394     __ xaddb($mem$$Address, $newval$$Register);
 7395   %}
 7396   ins_pipe( pipe_cmpxchg );
 7397 %}
 7398 
 7399 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7400   predicate(n->as_LoadStore()->result_not_used());
 7401   match(Set dummy (GetAndAddS mem add));
 7402   effect(KILL cr);
 7403   format %{ "ADDS  [$mem],$add" %}
 7404   ins_encode %{
 7405     __ lock();
 7406     __ addw($mem$$Address, $add$$constant);
 7407   %}
 7408   ins_pipe( pipe_cmpxchg );
 7409 %}
 7410 
 7411 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7412   match(Set newval (GetAndAddS mem newval));
 7413   effect(KILL cr);
 7414   format %{ "XADDS  [$mem],$newval" %}
 7415   ins_encode %{
 7416     __ lock();
 7417     __ xaddw($mem$$Address, $newval$$Register);
 7418   %}
 7419   ins_pipe( pipe_cmpxchg );
 7420 %}
 7421 
 7422 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7423   predicate(n->as_LoadStore()->result_not_used());
 7424   match(Set dummy (GetAndAddI mem add));
 7425   effect(KILL cr);
 7426   format %{ "ADDL  [$mem],$add" %}
 7427   ins_encode %{
 7428     __ lock();
 7429     __ addl($mem$$Address, $add$$constant);
 7430   %}
 7431   ins_pipe( pipe_cmpxchg );
 7432 %}
 7433 
 7434 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7435   match(Set newval (GetAndAddI mem newval));
 7436   effect(KILL cr);
 7437   format %{ "XADDL  [$mem],$newval" %}
 7438   ins_encode %{
 7439     __ lock();
 7440     __ xaddl($mem$$Address, $newval$$Register);
 7441   %}
 7442   ins_pipe( pipe_cmpxchg );
 7443 %}
 7444 
 7445 // Important to match to xRegI: only 8-bit regs.
 7446 instruct xchgB( memory mem, xRegI newval) %{
 7447   match(Set newval (GetAndSetB mem newval));
 7448   format %{ "XCHGB  $newval,[$mem]" %}
 7449   ins_encode %{
 7450     __ xchgb($newval$$Register, $mem$$Address);
 7451   %}
 7452   ins_pipe( pipe_cmpxchg );
 7453 %}
 7454 
 7455 instruct xchgS( memory mem, rRegI newval) %{
 7456   match(Set newval (GetAndSetS mem newval));
 7457   format %{ "XCHGW  $newval,[$mem]" %}
 7458   ins_encode %{
 7459     __ xchgw($newval$$Register, $mem$$Address);
 7460   %}
 7461   ins_pipe( pipe_cmpxchg );
 7462 %}
 7463 
 7464 instruct xchgI( memory mem, rRegI newval) %{
 7465   match(Set newval (GetAndSetI mem newval));
 7466   format %{ "XCHGL  $newval,[$mem]" %}
 7467   ins_encode %{
 7468     __ xchgl($newval$$Register, $mem$$Address);
 7469   %}
 7470   ins_pipe( pipe_cmpxchg );
 7471 %}
 7472 
 7473 instruct xchgP( memory mem, pRegP newval) %{
 7474   match(Set newval (GetAndSetP mem newval));
 7475   format %{ "XCHGL  $newval,[$mem]" %}
 7476   ins_encode %{
 7477     __ xchgl($newval$$Register, $mem$$Address);
 7478   %}
 7479   ins_pipe( pipe_cmpxchg );
 7480 %}
 7481 
 7482 //----------Subtraction Instructions-------------------------------------------
 7483 
 7484 // Integer Subtraction Instructions
 7485 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7486   match(Set dst (SubI dst src));
 7487   effect(KILL cr);
 7488 
 7489   size(2);
 7490   format %{ "SUB    $dst,$src" %}
 7491   opcode(0x2B);
 7492   ins_encode( OpcP, RegReg( dst, src) );
 7493   ins_pipe( ialu_reg_reg );
 7494 %}
 7495 
 7496 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7497   match(Set dst (SubI dst src));
 7498   effect(KILL cr);
 7499 
 7500   format %{ "SUB    $dst,$src" %}
 7501   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7502   // ins_encode( RegImm( dst, src) );
 7503   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7504   ins_pipe( ialu_reg );
 7505 %}
 7506 
 7507 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7508   match(Set dst (SubI dst (LoadI src)));
 7509   effect(KILL cr);
 7510 
 7511   ins_cost(150);
 7512   format %{ "SUB    $dst,$src" %}
 7513   opcode(0x2B);
 7514   ins_encode( OpcP, RegMem( dst, src) );
 7515   ins_pipe( ialu_reg_mem );
 7516 %}
 7517 
 7518 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7519   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7520   effect(KILL cr);
 7521 
 7522   ins_cost(150);
 7523   format %{ "SUB    $dst,$src" %}
 7524   opcode(0x29);  /* Opcode 29 /r */
 7525   ins_encode( OpcP, RegMem( src, dst ) );
 7526   ins_pipe( ialu_mem_reg );
 7527 %}
 7528 
 7529 // Subtract from a pointer
 7530 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7531   match(Set dst (AddP dst (SubI zero src)));
 7532   effect(KILL cr);
 7533 
 7534   size(2);
 7535   format %{ "SUB    $dst,$src" %}
 7536   opcode(0x2B);
 7537   ins_encode( OpcP, RegReg( dst, src) );
 7538   ins_pipe( ialu_reg_reg );
 7539 %}
 7540 
 7541 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7542   match(Set dst (SubI zero dst));
 7543   effect(KILL cr);
 7544 
 7545   size(2);
 7546   format %{ "NEG    $dst" %}
 7547   opcode(0xF7,0x03);  // Opcode F7 /3
 7548   ins_encode( OpcP, RegOpc( dst ) );
 7549   ins_pipe( ialu_reg );
 7550 %}
 7551 
 7552 //----------Multiplication/Division Instructions-------------------------------
 7553 // Integer Multiplication Instructions
 7554 // Multiply Register
 7555 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7556   match(Set dst (MulI dst src));
 7557   effect(KILL cr);
 7558 
 7559   size(3);
 7560   ins_cost(300);
 7561   format %{ "IMUL   $dst,$src" %}
 7562   opcode(0xAF, 0x0F);
 7563   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7564   ins_pipe( ialu_reg_reg_alu0 );
 7565 %}
 7566 
 7567 // Multiply 32-bit Immediate
 7568 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7569   match(Set dst (MulI src imm));
 7570   effect(KILL cr);
 7571 
 7572   ins_cost(300);
 7573   format %{ "IMUL   $dst,$src,$imm" %}
 7574   opcode(0x69);  /* 69 /r id */
 7575   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7576   ins_pipe( ialu_reg_reg_alu0 );
 7577 %}
 7578 
 7579 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7580   match(Set dst src);
 7581   effect(KILL cr);
 7582 
 7583   // Note that this is artificially increased to make it more expensive than loadConL
 7584   ins_cost(250);
 7585   format %{ "MOV    EAX,$src\t// low word only" %}
 7586   opcode(0xB8);
 7587   ins_encode( LdImmL_Lo(dst, src) );
 7588   ins_pipe( ialu_reg_fat );
 7589 %}
 7590 
 7591 // Multiply by 32-bit Immediate, taking the shifted high order results
 7592 //  (special case for shift by 32)
 7593 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7594   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7595   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7596              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7597              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7598   effect(USE src1, KILL cr);
 7599 
 7600   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7601   ins_cost(0*100 + 1*400 - 150);
 7602   format %{ "IMUL   EDX:EAX,$src1" %}
 7603   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7604   ins_pipe( pipe_slow );
 7605 %}
 7606 
 7607 // Multiply by 32-bit Immediate, taking the shifted high order results
 7608 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7609   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7610   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7611              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7613   effect(USE src1, KILL cr);
 7614 
 7615   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7616   ins_cost(1*100 + 1*400 - 150);
 7617   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7618             "SAR    EDX,$cnt-32" %}
 7619   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7620   ins_pipe( pipe_slow );
 7621 %}
 7622 
 7623 // Multiply Memory 32-bit Immediate
 7624 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7625   match(Set dst (MulI (LoadI src) imm));
 7626   effect(KILL cr);
 7627 
 7628   ins_cost(300);
 7629   format %{ "IMUL   $dst,$src,$imm" %}
 7630   opcode(0x69);  /* 69 /r id */
 7631   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7632   ins_pipe( ialu_reg_mem_alu0 );
 7633 %}
 7634 
 7635 // Multiply Memory
 7636 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7637   match(Set dst (MulI dst (LoadI src)));
 7638   effect(KILL cr);
 7639 
 7640   ins_cost(350);
 7641   format %{ "IMUL   $dst,$src" %}
 7642   opcode(0xAF, 0x0F);
 7643   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7644   ins_pipe( ialu_reg_mem_alu0 );
 7645 %}
 7646 
 7647 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7648 %{
 7649   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7650   effect(KILL cr, KILL src2);
 7651 
 7652   expand %{ mulI_eReg(dst, src1, cr);
 7653            mulI_eReg(src2, src3, cr);
 7654            addI_eReg(dst, src2, cr); %}
 7655 %}
 7656 
 7657 // Multiply Register Int to Long
 7658 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7659   // Basic Idea: long = (long)int * (long)int
 7660   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7661   effect(DEF dst, USE src, USE src1, KILL flags);
 7662 
 7663   ins_cost(300);
 7664   format %{ "IMUL   $dst,$src1" %}
 7665 
 7666   ins_encode( long_int_multiply( dst, src1 ) );
 7667   ins_pipe( ialu_reg_reg_alu0 );
 7668 %}
 7669 
 7670 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7671   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7672   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7673   effect(KILL flags);
 7674 
 7675   ins_cost(300);
 7676   format %{ "MUL    $dst,$src1" %}
 7677 
 7678   ins_encode( long_uint_multiply(dst, src1) );
 7679   ins_pipe( ialu_reg_reg_alu0 );
 7680 %}
 7681 
 7682 // Multiply Register Long
 7683 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7684   match(Set dst (MulL dst src));
 7685   effect(KILL cr, TEMP tmp);
 7686   ins_cost(4*100+3*400);
 7687 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7688 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7689   format %{ "MOV    $tmp,$src.lo\n\t"
 7690             "IMUL   $tmp,EDX\n\t"
 7691             "MOV    EDX,$src.hi\n\t"
 7692             "IMUL   EDX,EAX\n\t"
 7693             "ADD    $tmp,EDX\n\t"
 7694             "MUL    EDX:EAX,$src.lo\n\t"
 7695             "ADD    EDX,$tmp" %}
 7696   ins_encode( long_multiply( dst, src, tmp ) );
 7697   ins_pipe( pipe_slow );
 7698 %}
 7699 
 7700 // Multiply Register Long where the left operand's high 32 bits are zero
 7701 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7702   predicate(is_operand_hi32_zero(n->in(1)));
 7703   match(Set dst (MulL dst src));
 7704   effect(KILL cr, TEMP tmp);
 7705   ins_cost(2*100+2*400);
 7706 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7707 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7708   format %{ "MOV    $tmp,$src.hi\n\t"
 7709             "IMUL   $tmp,EAX\n\t"
 7710             "MUL    EDX:EAX,$src.lo\n\t"
 7711             "ADD    EDX,$tmp" %}
 7712   ins_encode %{
 7713     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7714     __ imull($tmp$$Register, rax);
 7715     __ mull($src$$Register);
 7716     __ addl(rdx, $tmp$$Register);
 7717   %}
 7718   ins_pipe( pipe_slow );
 7719 %}
 7720 
 7721 // Multiply Register Long where the right operand's high 32 bits are zero
 7722 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7723   predicate(is_operand_hi32_zero(n->in(2)));
 7724   match(Set dst (MulL dst src));
 7725   effect(KILL cr, TEMP tmp);
 7726   ins_cost(2*100+2*400);
 7727 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7728 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7729   format %{ "MOV    $tmp,$src.lo\n\t"
 7730             "IMUL   $tmp,EDX\n\t"
 7731             "MUL    EDX:EAX,$src.lo\n\t"
 7732             "ADD    EDX,$tmp" %}
 7733   ins_encode %{
 7734     __ movl($tmp$$Register, $src$$Register);
 7735     __ imull($tmp$$Register, rdx);
 7736     __ mull($src$$Register);
 7737     __ addl(rdx, $tmp$$Register);
 7738   %}
 7739   ins_pipe( pipe_slow );
 7740 %}
 7741 
 7742 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7743 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7744   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7745   match(Set dst (MulL dst src));
 7746   effect(KILL cr);
 7747   ins_cost(1*400);
 7748 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7749 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7750   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7751   ins_encode %{
 7752     __ mull($src$$Register);
 7753   %}
 7754   ins_pipe( pipe_slow );
 7755 %}
 7756 
 7757 // Multiply Register Long by small constant
 7758 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7759   match(Set dst (MulL dst src));
 7760   effect(KILL cr, TEMP tmp);
 7761   ins_cost(2*100+2*400);
 7762   size(12);
 7763 // Basic idea: lo(result) = lo(src * EAX)
 7764 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7765   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7766             "MOV    EDX,$src\n\t"
 7767             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7768             "ADD    EDX,$tmp" %}
 7769   ins_encode( long_multiply_con( dst, src, tmp ) );
 7770   ins_pipe( pipe_slow );
 7771 %}
 7772 
 7773 // Integer DIV with Register
 7774 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7775   match(Set rax (DivI rax div));
 7776   effect(KILL rdx, KILL cr);
 7777   size(26);
 7778   ins_cost(30*100+10*100);
 7779   format %{ "CMP    EAX,0x80000000\n\t"
 7780             "JNE,s  normal\n\t"
 7781             "XOR    EDX,EDX\n\t"
 7782             "CMP    ECX,-1\n\t"
 7783             "JE,s   done\n"
 7784     "normal: CDQ\n\t"
 7785             "IDIV   $div\n\t"
 7786     "done:"        %}
 7787   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7788   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7789   ins_pipe( ialu_reg_reg_alu0 );
 7790 %}
 7791 
 7792 // Divide Register Long
 7793 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7794   match(Set dst (DivL src1 src2));
 7795   effect(CALL);
 7796   ins_cost(10000);
 7797   format %{ "PUSH   $src1.hi\n\t"
 7798             "PUSH   $src1.lo\n\t"
 7799             "PUSH   $src2.hi\n\t"
 7800             "PUSH   $src2.lo\n\t"
 7801             "CALL   SharedRuntime::ldiv\n\t"
 7802             "ADD    ESP,16" %}
 7803   ins_encode( long_div(src1,src2) );
 7804   ins_pipe( pipe_slow );
 7805 %}
 7806 
 7807 // Integer DIVMOD with Register, both quotient and mod results
 7808 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7809   match(DivModI rax div);
 7810   effect(KILL cr);
 7811   size(26);
 7812   ins_cost(30*100+10*100);
 7813   format %{ "CMP    EAX,0x80000000\n\t"
 7814             "JNE,s  normal\n\t"
 7815             "XOR    EDX,EDX\n\t"
 7816             "CMP    ECX,-1\n\t"
 7817             "JE,s   done\n"
 7818     "normal: CDQ\n\t"
 7819             "IDIV   $div\n\t"
 7820     "done:"        %}
 7821   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7822   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7823   ins_pipe( pipe_slow );
 7824 %}
 7825 
 7826 // Integer MOD with Register
 7827 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7828   match(Set rdx (ModI rax div));
 7829   effect(KILL rax, KILL cr);
 7830 
 7831   size(26);
 7832   ins_cost(300);
 7833   format %{ "CDQ\n\t"
 7834             "IDIV   $div" %}
 7835   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7836   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7837   ins_pipe( ialu_reg_reg_alu0 );
 7838 %}
 7839 
 7840 // Remainder Register Long
 7841 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7842   match(Set dst (ModL src1 src2));
 7843   effect(CALL);
 7844   ins_cost(10000);
 7845   format %{ "PUSH   $src1.hi\n\t"
 7846             "PUSH   $src1.lo\n\t"
 7847             "PUSH   $src2.hi\n\t"
 7848             "PUSH   $src2.lo\n\t"
 7849             "CALL   SharedRuntime::lrem\n\t"
 7850             "ADD    ESP,16" %}
 7851   ins_encode( long_mod(src1,src2) );
 7852   ins_pipe( pipe_slow );
 7853 %}
 7854 
 7855 // Divide Register Long (no special case since divisor != -1)
 7856 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7857   match(Set dst (DivL dst imm));
 7858   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7859   ins_cost(1000);
 7860   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7861             "XOR    $tmp2,$tmp2\n\t"
 7862             "CMP    $tmp,EDX\n\t"
 7863             "JA,s   fast\n\t"
 7864             "MOV    $tmp2,EAX\n\t"
 7865             "MOV    EAX,EDX\n\t"
 7866             "MOV    EDX,0\n\t"
 7867             "JLE,s  pos\n\t"
 7868             "LNEG   EAX : $tmp2\n\t"
 7869             "DIV    $tmp # unsigned division\n\t"
 7870             "XCHG   EAX,$tmp2\n\t"
 7871             "DIV    $tmp\n\t"
 7872             "LNEG   $tmp2 : EAX\n\t"
 7873             "JMP,s  done\n"
 7874     "pos:\n\t"
 7875             "DIV    $tmp\n\t"
 7876             "XCHG   EAX,$tmp2\n"
 7877     "fast:\n\t"
 7878             "DIV    $tmp\n"
 7879     "done:\n\t"
 7880             "MOV    EDX,$tmp2\n\t"
 7881             "NEG    EDX:EAX # if $imm < 0" %}
 7882   ins_encode %{
 7883     int con = (int)$imm$$constant;
 7884     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7885     int pcon = (con > 0) ? con : -con;
 7886     Label Lfast, Lpos, Ldone;
 7887 
 7888     __ movl($tmp$$Register, pcon);
 7889     __ xorl($tmp2$$Register,$tmp2$$Register);
 7890     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7891     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7892 
 7893     __ movl($tmp2$$Register, $dst$$Register); // save
 7894     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7895     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7896     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7897 
 7898     // Negative dividend.
 7899     // convert value to positive to use unsigned division
 7900     __ lneg($dst$$Register, $tmp2$$Register);
 7901     __ divl($tmp$$Register);
 7902     __ xchgl($dst$$Register, $tmp2$$Register);
 7903     __ divl($tmp$$Register);
 7904     // revert result back to negative
 7905     __ lneg($tmp2$$Register, $dst$$Register);
 7906     __ jmpb(Ldone);
 7907 
 7908     __ bind(Lpos);
 7909     __ divl($tmp$$Register); // Use unsigned division
 7910     __ xchgl($dst$$Register, $tmp2$$Register);
 7911     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7912 
 7913     __ bind(Lfast);
 7914     // fast path: src is positive
 7915     __ divl($tmp$$Register); // Use unsigned division
 7916 
 7917     __ bind(Ldone);
 7918     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7919     if (con < 0) {
 7920       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7921     }
 7922   %}
 7923   ins_pipe( pipe_slow );
 7924 %}
 7925 
 7926 // Remainder Register Long (remainder fit into 32 bits)
 7927 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7928   match(Set dst (ModL dst imm));
 7929   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7930   ins_cost(1000);
 7931   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7932             "CMP    $tmp,EDX\n\t"
 7933             "JA,s   fast\n\t"
 7934             "MOV    $tmp2,EAX\n\t"
 7935             "MOV    EAX,EDX\n\t"
 7936             "MOV    EDX,0\n\t"
 7937             "JLE,s  pos\n\t"
 7938             "LNEG   EAX : $tmp2\n\t"
 7939             "DIV    $tmp # unsigned division\n\t"
 7940             "MOV    EAX,$tmp2\n\t"
 7941             "DIV    $tmp\n\t"
 7942             "NEG    EDX\n\t"
 7943             "JMP,s  done\n"
 7944     "pos:\n\t"
 7945             "DIV    $tmp\n\t"
 7946             "MOV    EAX,$tmp2\n"
 7947     "fast:\n\t"
 7948             "DIV    $tmp\n"
 7949     "done:\n\t"
 7950             "MOV    EAX,EDX\n\t"
 7951             "SAR    EDX,31\n\t" %}
 7952   ins_encode %{
 7953     int con = (int)$imm$$constant;
 7954     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7955     int pcon = (con > 0) ? con : -con;
 7956     Label  Lfast, Lpos, Ldone;
 7957 
 7958     __ movl($tmp$$Register, pcon);
 7959     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7960     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7961 
 7962     __ movl($tmp2$$Register, $dst$$Register); // save
 7963     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7964     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7965     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7966 
 7967     // Negative dividend.
 7968     // convert value to positive to use unsigned division
 7969     __ lneg($dst$$Register, $tmp2$$Register);
 7970     __ divl($tmp$$Register);
 7971     __ movl($dst$$Register, $tmp2$$Register);
 7972     __ divl($tmp$$Register);
 7973     // revert remainder back to negative
 7974     __ negl(HIGH_FROM_LOW($dst$$Register));
 7975     __ jmpb(Ldone);
 7976 
 7977     __ bind(Lpos);
 7978     __ divl($tmp$$Register);
 7979     __ movl($dst$$Register, $tmp2$$Register);
 7980 
 7981     __ bind(Lfast);
 7982     // fast path: src is positive
 7983     __ divl($tmp$$Register);
 7984 
 7985     __ bind(Ldone);
 7986     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7987     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7988 
 7989   %}
 7990   ins_pipe( pipe_slow );
 7991 %}
 7992 
 7993 // Integer Shift Instructions
 7994 // Shift Left by one
 7995 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7996   match(Set dst (LShiftI dst shift));
 7997   effect(KILL cr);
 7998 
 7999   size(2);
 8000   format %{ "SHL    $dst,$shift" %}
 8001   opcode(0xD1, 0x4);  /* D1 /4 */
 8002   ins_encode( OpcP, RegOpc( dst ) );
 8003   ins_pipe( ialu_reg );
 8004 %}
 8005 
 8006 // Shift Left by 8-bit immediate
 8007 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8008   match(Set dst (LShiftI dst shift));
 8009   effect(KILL cr);
 8010 
 8011   size(3);
 8012   format %{ "SHL    $dst,$shift" %}
 8013   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8014   ins_encode( RegOpcImm( dst, shift) );
 8015   ins_pipe( ialu_reg );
 8016 %}
 8017 
 8018 // Shift Left by variable
 8019 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8020   match(Set dst (LShiftI dst shift));
 8021   effect(KILL cr);
 8022 
 8023   size(2);
 8024   format %{ "SHL    $dst,$shift" %}
 8025   opcode(0xD3, 0x4);  /* D3 /4 */
 8026   ins_encode( OpcP, RegOpc( dst ) );
 8027   ins_pipe( ialu_reg_reg );
 8028 %}
 8029 
 8030 // Arithmetic shift right by one
 8031 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8032   match(Set dst (RShiftI dst shift));
 8033   effect(KILL cr);
 8034 
 8035   size(2);
 8036   format %{ "SAR    $dst,$shift" %}
 8037   opcode(0xD1, 0x7);  /* D1 /7 */
 8038   ins_encode( OpcP, RegOpc( dst ) );
 8039   ins_pipe( ialu_reg );
 8040 %}
 8041 
 8042 // Arithmetic shift right by one
 8043 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8044   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8045   effect(KILL cr);
 8046   format %{ "SAR    $dst,$shift" %}
 8047   opcode(0xD1, 0x7);  /* D1 /7 */
 8048   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8049   ins_pipe( ialu_mem_imm );
 8050 %}
 8051 
 8052 // Arithmetic Shift Right by 8-bit immediate
 8053 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8054   match(Set dst (RShiftI dst shift));
 8055   effect(KILL cr);
 8056 
 8057   size(3);
 8058   format %{ "SAR    $dst,$shift" %}
 8059   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8060   ins_encode( RegOpcImm( dst, shift ) );
 8061   ins_pipe( ialu_mem_imm );
 8062 %}
 8063 
 8064 // Arithmetic Shift Right by 8-bit immediate
 8065 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8066   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8067   effect(KILL cr);
 8068 
 8069   format %{ "SAR    $dst,$shift" %}
 8070   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8071   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8072   ins_pipe( ialu_mem_imm );
 8073 %}
 8074 
 8075 // Arithmetic Shift Right by variable
 8076 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8077   match(Set dst (RShiftI dst shift));
 8078   effect(KILL cr);
 8079 
 8080   size(2);
 8081   format %{ "SAR    $dst,$shift" %}
 8082   opcode(0xD3, 0x7);  /* D3 /7 */
 8083   ins_encode( OpcP, RegOpc( dst ) );
 8084   ins_pipe( ialu_reg_reg );
 8085 %}
 8086 
 8087 // Logical shift right by one
 8088 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8089   match(Set dst (URShiftI dst shift));
 8090   effect(KILL cr);
 8091 
 8092   size(2);
 8093   format %{ "SHR    $dst,$shift" %}
 8094   opcode(0xD1, 0x5);  /* D1 /5 */
 8095   ins_encode( OpcP, RegOpc( dst ) );
 8096   ins_pipe( ialu_reg );
 8097 %}
 8098 
 8099 // Logical Shift Right by 8-bit immediate
 8100 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8101   match(Set dst (URShiftI dst shift));
 8102   effect(KILL cr);
 8103 
 8104   size(3);
 8105   format %{ "SHR    $dst,$shift" %}
 8106   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8107   ins_encode( RegOpcImm( dst, shift) );
 8108   ins_pipe( ialu_reg );
 8109 %}
 8110 
 8111 
 8112 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8113 // This idiom is used by the compiler for the i2b bytecode.
 8114 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8115   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8116 
 8117   size(3);
 8118   format %{ "MOVSX  $dst,$src :8" %}
 8119   ins_encode %{
 8120     __ movsbl($dst$$Register, $src$$Register);
 8121   %}
 8122   ins_pipe(ialu_reg_reg);
 8123 %}
 8124 
 8125 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8126 // This idiom is used by the compiler the i2s bytecode.
 8127 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8128   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8129 
 8130   size(3);
 8131   format %{ "MOVSX  $dst,$src :16" %}
 8132   ins_encode %{
 8133     __ movswl($dst$$Register, $src$$Register);
 8134   %}
 8135   ins_pipe(ialu_reg_reg);
 8136 %}
 8137 
 8138 
 8139 // Logical Shift Right by variable
 8140 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8141   match(Set dst (URShiftI dst shift));
 8142   effect(KILL cr);
 8143 
 8144   size(2);
 8145   format %{ "SHR    $dst,$shift" %}
 8146   opcode(0xD3, 0x5);  /* D3 /5 */
 8147   ins_encode( OpcP, RegOpc( dst ) );
 8148   ins_pipe( ialu_reg_reg );
 8149 %}
 8150 
 8151 
 8152 //----------Logical Instructions-----------------------------------------------
 8153 //----------Integer Logical Instructions---------------------------------------
 8154 // And Instructions
 8155 // And Register with Register
 8156 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8157   match(Set dst (AndI dst src));
 8158   effect(KILL cr);
 8159 
 8160   size(2);
 8161   format %{ "AND    $dst,$src" %}
 8162   opcode(0x23);
 8163   ins_encode( OpcP, RegReg( dst, src) );
 8164   ins_pipe( ialu_reg_reg );
 8165 %}
 8166 
 8167 // And Register with Immediate
 8168 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8169   match(Set dst (AndI dst src));
 8170   effect(KILL cr);
 8171 
 8172   format %{ "AND    $dst,$src" %}
 8173   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8174   // ins_encode( RegImm( dst, src) );
 8175   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8176   ins_pipe( ialu_reg );
 8177 %}
 8178 
 8179 // And Register with Memory
 8180 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8181   match(Set dst (AndI dst (LoadI src)));
 8182   effect(KILL cr);
 8183 
 8184   ins_cost(150);
 8185   format %{ "AND    $dst,$src" %}
 8186   opcode(0x23);
 8187   ins_encode( OpcP, RegMem( dst, src) );
 8188   ins_pipe( ialu_reg_mem );
 8189 %}
 8190 
 8191 // And Memory with Register
 8192 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8193   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8194   effect(KILL cr);
 8195 
 8196   ins_cost(150);
 8197   format %{ "AND    $dst,$src" %}
 8198   opcode(0x21);  /* Opcode 21 /r */
 8199   ins_encode( OpcP, RegMem( src, dst ) );
 8200   ins_pipe( ialu_mem_reg );
 8201 %}
 8202 
 8203 // And Memory with Immediate
 8204 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8205   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8206   effect(KILL cr);
 8207 
 8208   ins_cost(125);
 8209   format %{ "AND    $dst,$src" %}
 8210   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8211   // ins_encode( MemImm( dst, src) );
 8212   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8213   ins_pipe( ialu_mem_imm );
 8214 %}
 8215 
 8216 // BMI1 instructions
 8217 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8218   match(Set dst (AndI (XorI src1 minus_1) src2));
 8219   predicate(UseBMI1Instructions);
 8220   effect(KILL cr);
 8221 
 8222   format %{ "ANDNL  $dst, $src1, $src2" %}
 8223 
 8224   ins_encode %{
 8225     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8226   %}
 8227   ins_pipe(ialu_reg);
 8228 %}
 8229 
 8230 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8231   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8232   predicate(UseBMI1Instructions);
 8233   effect(KILL cr);
 8234 
 8235   ins_cost(125);
 8236   format %{ "ANDNL  $dst, $src1, $src2" %}
 8237 
 8238   ins_encode %{
 8239     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8240   %}
 8241   ins_pipe(ialu_reg_mem);
 8242 %}
 8243 
 8244 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8245   match(Set dst (AndI (SubI imm_zero src) src));
 8246   predicate(UseBMI1Instructions);
 8247   effect(KILL cr);
 8248 
 8249   format %{ "BLSIL  $dst, $src" %}
 8250 
 8251   ins_encode %{
 8252     __ blsil($dst$$Register, $src$$Register);
 8253   %}
 8254   ins_pipe(ialu_reg);
 8255 %}
 8256 
 8257 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8258   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8259   predicate(UseBMI1Instructions);
 8260   effect(KILL cr);
 8261 
 8262   ins_cost(125);
 8263   format %{ "BLSIL  $dst, $src" %}
 8264 
 8265   ins_encode %{
 8266     __ blsil($dst$$Register, $src$$Address);
 8267   %}
 8268   ins_pipe(ialu_reg_mem);
 8269 %}
 8270 
 8271 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8272 %{
 8273   match(Set dst (XorI (AddI src minus_1) src));
 8274   predicate(UseBMI1Instructions);
 8275   effect(KILL cr);
 8276 
 8277   format %{ "BLSMSKL $dst, $src" %}
 8278 
 8279   ins_encode %{
 8280     __ blsmskl($dst$$Register, $src$$Register);
 8281   %}
 8282 
 8283   ins_pipe(ialu_reg);
 8284 %}
 8285 
 8286 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8287 %{
 8288   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8289   predicate(UseBMI1Instructions);
 8290   effect(KILL cr);
 8291 
 8292   ins_cost(125);
 8293   format %{ "BLSMSKL $dst, $src" %}
 8294 
 8295   ins_encode %{
 8296     __ blsmskl($dst$$Register, $src$$Address);
 8297   %}
 8298 
 8299   ins_pipe(ialu_reg_mem);
 8300 %}
 8301 
 8302 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8303 %{
 8304   match(Set dst (AndI (AddI src minus_1) src) );
 8305   predicate(UseBMI1Instructions);
 8306   effect(KILL cr);
 8307 
 8308   format %{ "BLSRL  $dst, $src" %}
 8309 
 8310   ins_encode %{
 8311     __ blsrl($dst$$Register, $src$$Register);
 8312   %}
 8313 
 8314   ins_pipe(ialu_reg);
 8315 %}
 8316 
 8317 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8318 %{
 8319   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8320   predicate(UseBMI1Instructions);
 8321   effect(KILL cr);
 8322 
 8323   ins_cost(125);
 8324   format %{ "BLSRL  $dst, $src" %}
 8325 
 8326   ins_encode %{
 8327     __ blsrl($dst$$Register, $src$$Address);
 8328   %}
 8329 
 8330   ins_pipe(ialu_reg_mem);
 8331 %}
 8332 
 8333 // Or Instructions
 8334 // Or Register with Register
 8335 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8336   match(Set dst (OrI dst src));
 8337   effect(KILL cr);
 8338 
 8339   size(2);
 8340   format %{ "OR     $dst,$src" %}
 8341   opcode(0x0B);
 8342   ins_encode( OpcP, RegReg( dst, src) );
 8343   ins_pipe( ialu_reg_reg );
 8344 %}
 8345 
 8346 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8347   match(Set dst (OrI dst (CastP2X src)));
 8348   effect(KILL cr);
 8349 
 8350   size(2);
 8351   format %{ "OR     $dst,$src" %}
 8352   opcode(0x0B);
 8353   ins_encode( OpcP, RegReg( dst, src) );
 8354   ins_pipe( ialu_reg_reg );
 8355 %}
 8356 
 8357 
 8358 // Or Register with Immediate
 8359 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8360   match(Set dst (OrI dst src));
 8361   effect(KILL cr);
 8362 
 8363   format %{ "OR     $dst,$src" %}
 8364   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8365   // ins_encode( RegImm( dst, src) );
 8366   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8367   ins_pipe( ialu_reg );
 8368 %}
 8369 
 8370 // Or Register with Memory
 8371 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8372   match(Set dst (OrI dst (LoadI src)));
 8373   effect(KILL cr);
 8374 
 8375   ins_cost(150);
 8376   format %{ "OR     $dst,$src" %}
 8377   opcode(0x0B);
 8378   ins_encode( OpcP, RegMem( dst, src) );
 8379   ins_pipe( ialu_reg_mem );
 8380 %}
 8381 
 8382 // Or Memory with Register
 8383 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8384   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8385   effect(KILL cr);
 8386 
 8387   ins_cost(150);
 8388   format %{ "OR     $dst,$src" %}
 8389   opcode(0x09);  /* Opcode 09 /r */
 8390   ins_encode( OpcP, RegMem( src, dst ) );
 8391   ins_pipe( ialu_mem_reg );
 8392 %}
 8393 
 8394 // Or Memory with Immediate
 8395 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8396   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8397   effect(KILL cr);
 8398 
 8399   ins_cost(125);
 8400   format %{ "OR     $dst,$src" %}
 8401   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8402   // ins_encode( MemImm( dst, src) );
 8403   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8404   ins_pipe( ialu_mem_imm );
 8405 %}
 8406 
 8407 // ROL/ROR
 8408 // ROL expand
 8409 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8410   effect(USE_DEF dst, USE shift, KILL cr);
 8411 
 8412   format %{ "ROL    $dst, $shift" %}
 8413   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8414   ins_encode( OpcP, RegOpc( dst ));
 8415   ins_pipe( ialu_reg );
 8416 %}
 8417 
 8418 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8419   effect(USE_DEF dst, USE shift, KILL cr);
 8420 
 8421   format %{ "ROL    $dst, $shift" %}
 8422   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8423   ins_encode( RegOpcImm(dst, shift) );
 8424   ins_pipe(ialu_reg);
 8425 %}
 8426 
 8427 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8428   effect(USE_DEF dst, USE shift, KILL cr);
 8429 
 8430   format %{ "ROL    $dst, $shift" %}
 8431   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8432   ins_encode(OpcP, RegOpc(dst));
 8433   ins_pipe( ialu_reg_reg );
 8434 %}
 8435 // end of ROL expand
 8436 
 8437 // ROL 32bit by one once
 8438 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8439   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8440 
 8441   expand %{
 8442     rolI_eReg_imm1(dst, lshift, cr);
 8443   %}
 8444 %}
 8445 
 8446 // ROL 32bit var by imm8 once
 8447 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8448   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8449   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8450 
 8451   expand %{
 8452     rolI_eReg_imm8(dst, lshift, cr);
 8453   %}
 8454 %}
 8455 
 8456 // ROL 32bit var by var once
 8457 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8458   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8459 
 8460   expand %{
 8461     rolI_eReg_CL(dst, shift, cr);
 8462   %}
 8463 %}
 8464 
 8465 // ROL 32bit var by var once
 8466 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8467   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8468 
 8469   expand %{
 8470     rolI_eReg_CL(dst, shift, cr);
 8471   %}
 8472 %}
 8473 
 8474 // ROR expand
 8475 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8476   effect(USE_DEF dst, USE shift, KILL cr);
 8477 
 8478   format %{ "ROR    $dst, $shift" %}
 8479   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8480   ins_encode( OpcP, RegOpc( dst ) );
 8481   ins_pipe( ialu_reg );
 8482 %}
 8483 
 8484 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8485   effect (USE_DEF dst, USE shift, KILL cr);
 8486 
 8487   format %{ "ROR    $dst, $shift" %}
 8488   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8489   ins_encode( RegOpcImm(dst, shift) );
 8490   ins_pipe( ialu_reg );
 8491 %}
 8492 
 8493 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8494   effect(USE_DEF dst, USE shift, KILL cr);
 8495 
 8496   format %{ "ROR    $dst, $shift" %}
 8497   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8498   ins_encode(OpcP, RegOpc(dst));
 8499   ins_pipe( ialu_reg_reg );
 8500 %}
 8501 // end of ROR expand
 8502 
 8503 // ROR right once
 8504 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8505   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8506 
 8507   expand %{
 8508     rorI_eReg_imm1(dst, rshift, cr);
 8509   %}
 8510 %}
 8511 
 8512 // ROR 32bit by immI8 once
 8513 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8514   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8515   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8516 
 8517   expand %{
 8518     rorI_eReg_imm8(dst, rshift, cr);
 8519   %}
 8520 %}
 8521 
 8522 // ROR 32bit var by var once
 8523 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8524   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8525 
 8526   expand %{
 8527     rorI_eReg_CL(dst, shift, cr);
 8528   %}
 8529 %}
 8530 
 8531 // ROR 32bit var by var once
 8532 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8533   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8534 
 8535   expand %{
 8536     rorI_eReg_CL(dst, shift, cr);
 8537   %}
 8538 %}
 8539 
 8540 // Xor Instructions
 8541 // Xor Register with Register
 8542 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8543   match(Set dst (XorI dst src));
 8544   effect(KILL cr);
 8545 
 8546   size(2);
 8547   format %{ "XOR    $dst,$src" %}
 8548   opcode(0x33);
 8549   ins_encode( OpcP, RegReg( dst, src) );
 8550   ins_pipe( ialu_reg_reg );
 8551 %}
 8552 
 8553 // Xor Register with Immediate -1
 8554 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8555   match(Set dst (XorI dst imm));
 8556 
 8557   size(2);
 8558   format %{ "NOT    $dst" %}
 8559   ins_encode %{
 8560      __ notl($dst$$Register);
 8561   %}
 8562   ins_pipe( ialu_reg );
 8563 %}
 8564 
 8565 // Xor Register with Immediate
 8566 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8567   match(Set dst (XorI dst src));
 8568   effect(KILL cr);
 8569 
 8570   format %{ "XOR    $dst,$src" %}
 8571   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8572   // ins_encode( RegImm( dst, src) );
 8573   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8574   ins_pipe( ialu_reg );
 8575 %}
 8576 
 8577 // Xor Register with Memory
 8578 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8579   match(Set dst (XorI dst (LoadI src)));
 8580   effect(KILL cr);
 8581 
 8582   ins_cost(150);
 8583   format %{ "XOR    $dst,$src" %}
 8584   opcode(0x33);
 8585   ins_encode( OpcP, RegMem(dst, src) );
 8586   ins_pipe( ialu_reg_mem );
 8587 %}
 8588 
 8589 // Xor Memory with Register
 8590 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8591   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8592   effect(KILL cr);
 8593 
 8594   ins_cost(150);
 8595   format %{ "XOR    $dst,$src" %}
 8596   opcode(0x31);  /* Opcode 31 /r */
 8597   ins_encode( OpcP, RegMem( src, dst ) );
 8598   ins_pipe( ialu_mem_reg );
 8599 %}
 8600 
 8601 // Xor Memory with Immediate
 8602 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8603   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8604   effect(KILL cr);
 8605 
 8606   ins_cost(125);
 8607   format %{ "XOR    $dst,$src" %}
 8608   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8609   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8610   ins_pipe( ialu_mem_imm );
 8611 %}
 8612 
 8613 //----------Convert Int to Boolean---------------------------------------------
 8614 
 8615 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8616   effect( DEF dst, USE src );
 8617   format %{ "MOV    $dst,$src" %}
 8618   ins_encode( enc_Copy( dst, src) );
 8619   ins_pipe( ialu_reg_reg );
 8620 %}
 8621 
 8622 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8623   effect( USE_DEF dst, USE src, KILL cr );
 8624 
 8625   size(4);
 8626   format %{ "NEG    $dst\n\t"
 8627             "ADC    $dst,$src" %}
 8628   ins_encode( neg_reg(dst),
 8629               OpcRegReg(0x13,dst,src) );
 8630   ins_pipe( ialu_reg_reg_long );
 8631 %}
 8632 
 8633 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8634   match(Set dst (Conv2B src));
 8635 
 8636   expand %{
 8637     movI_nocopy(dst,src);
 8638     ci2b(dst,src,cr);
 8639   %}
 8640 %}
 8641 
 8642 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8643   effect( DEF dst, USE src );
 8644   format %{ "MOV    $dst,$src" %}
 8645   ins_encode( enc_Copy( dst, src) );
 8646   ins_pipe( ialu_reg_reg );
 8647 %}
 8648 
 8649 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8650   effect( USE_DEF dst, USE src, KILL cr );
 8651   format %{ "NEG    $dst\n\t"
 8652             "ADC    $dst,$src" %}
 8653   ins_encode( neg_reg(dst),
 8654               OpcRegReg(0x13,dst,src) );
 8655   ins_pipe( ialu_reg_reg_long );
 8656 %}
 8657 
 8658 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8659   match(Set dst (Conv2B src));
 8660 
 8661   expand %{
 8662     movP_nocopy(dst,src);
 8663     cp2b(dst,src,cr);
 8664   %}
 8665 %}
 8666 
 8667 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8668   match(Set dst (CmpLTMask p q));
 8669   effect(KILL cr);
 8670   ins_cost(400);
 8671 
 8672   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8673   format %{ "XOR    $dst,$dst\n\t"
 8674             "CMP    $p,$q\n\t"
 8675             "SETlt  $dst\n\t"
 8676             "NEG    $dst" %}
 8677   ins_encode %{
 8678     Register Rp = $p$$Register;
 8679     Register Rq = $q$$Register;
 8680     Register Rd = $dst$$Register;
 8681     Label done;
 8682     __ xorl(Rd, Rd);
 8683     __ cmpl(Rp, Rq);
 8684     __ setb(Assembler::less, Rd);
 8685     __ negl(Rd);
 8686   %}
 8687 
 8688   ins_pipe(pipe_slow);
 8689 %}
 8690 
 8691 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8692   match(Set dst (CmpLTMask dst zero));
 8693   effect(DEF dst, KILL cr);
 8694   ins_cost(100);
 8695 
 8696   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8697   ins_encode %{
 8698   __ sarl($dst$$Register, 31);
 8699   %}
 8700   ins_pipe(ialu_reg);
 8701 %}
 8702 
 8703 /* better to save a register than avoid a branch */
 8704 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8705   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8706   effect(KILL cr);
 8707   ins_cost(400);
 8708   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8709             "JGE    done\n\t"
 8710             "ADD    $p,$y\n"
 8711             "done:  " %}
 8712   ins_encode %{
 8713     Register Rp = $p$$Register;
 8714     Register Rq = $q$$Register;
 8715     Register Ry = $y$$Register;
 8716     Label done;
 8717     __ subl(Rp, Rq);
 8718     __ jccb(Assembler::greaterEqual, done);
 8719     __ addl(Rp, Ry);
 8720     __ bind(done);
 8721   %}
 8722 
 8723   ins_pipe(pipe_cmplt);
 8724 %}
 8725 
 8726 /* better to save a register than avoid a branch */
 8727 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8728   match(Set y (AndI (CmpLTMask p q) y));
 8729   effect(KILL cr);
 8730 
 8731   ins_cost(300);
 8732 
 8733   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8734             "JLT      done\n\t"
 8735             "XORL     $y, $y\n"
 8736             "done:  " %}
 8737   ins_encode %{
 8738     Register Rp = $p$$Register;
 8739     Register Rq = $q$$Register;
 8740     Register Ry = $y$$Register;
 8741     Label done;
 8742     __ cmpl(Rp, Rq);
 8743     __ jccb(Assembler::less, done);
 8744     __ xorl(Ry, Ry);
 8745     __ bind(done);
 8746   %}
 8747 
 8748   ins_pipe(pipe_cmplt);
 8749 %}
 8750 
 8751 /* If I enable this, I encourage spilling in the inner loop of compress.
 8752 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8753   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8754 */
 8755 //----------Overflow Math Instructions-----------------------------------------
 8756 
 8757 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8758 %{
 8759   match(Set cr (OverflowAddI op1 op2));
 8760   effect(DEF cr, USE_KILL op1, USE op2);
 8761 
 8762   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8763 
 8764   ins_encode %{
 8765     __ addl($op1$$Register, $op2$$Register);
 8766   %}
 8767   ins_pipe(ialu_reg_reg);
 8768 %}
 8769 
 8770 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8771 %{
 8772   match(Set cr (OverflowAddI op1 op2));
 8773   effect(DEF cr, USE_KILL op1, USE op2);
 8774 
 8775   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8776 
 8777   ins_encode %{
 8778     __ addl($op1$$Register, $op2$$constant);
 8779   %}
 8780   ins_pipe(ialu_reg_reg);
 8781 %}
 8782 
 8783 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8784 %{
 8785   match(Set cr (OverflowSubI op1 op2));
 8786 
 8787   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8788   ins_encode %{
 8789     __ cmpl($op1$$Register, $op2$$Register);
 8790   %}
 8791   ins_pipe(ialu_reg_reg);
 8792 %}
 8793 
 8794 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8795 %{
 8796   match(Set cr (OverflowSubI op1 op2));
 8797 
 8798   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8799   ins_encode %{
 8800     __ cmpl($op1$$Register, $op2$$constant);
 8801   %}
 8802   ins_pipe(ialu_reg_reg);
 8803 %}
 8804 
 8805 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8806 %{
 8807   match(Set cr (OverflowSubI zero op2));
 8808   effect(DEF cr, USE_KILL op2);
 8809 
 8810   format %{ "NEG    $op2\t# overflow check int" %}
 8811   ins_encode %{
 8812     __ negl($op2$$Register);
 8813   %}
 8814   ins_pipe(ialu_reg_reg);
 8815 %}
 8816 
 8817 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8818 %{
 8819   match(Set cr (OverflowMulI op1 op2));
 8820   effect(DEF cr, USE_KILL op1, USE op2);
 8821 
 8822   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8823   ins_encode %{
 8824     __ imull($op1$$Register, $op2$$Register);
 8825   %}
 8826   ins_pipe(ialu_reg_reg_alu0);
 8827 %}
 8828 
 8829 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8830 %{
 8831   match(Set cr (OverflowMulI op1 op2));
 8832   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8833 
 8834   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8835   ins_encode %{
 8836     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8837   %}
 8838   ins_pipe(ialu_reg_reg_alu0);
 8839 %}
 8840 
 8841 // Integer Absolute Instructions
 8842 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8843 %{
 8844   match(Set dst (AbsI src));
 8845   effect(TEMP dst, TEMP tmp, KILL cr);
 8846   format %{ "movl $tmp, $src\n\t"
 8847             "sarl $tmp, 31\n\t"
 8848             "movl $dst, $src\n\t"
 8849             "xorl $dst, $tmp\n\t"
 8850             "subl $dst, $tmp\n"
 8851           %}
 8852   ins_encode %{
 8853     __ movl($tmp$$Register, $src$$Register);
 8854     __ sarl($tmp$$Register, 31);
 8855     __ movl($dst$$Register, $src$$Register);
 8856     __ xorl($dst$$Register, $tmp$$Register);
 8857     __ subl($dst$$Register, $tmp$$Register);
 8858   %}
 8859 
 8860   ins_pipe(ialu_reg_reg);
 8861 %}
 8862 
 8863 //----------Long Instructions------------------------------------------------
 8864 // Add Long Register with Register
 8865 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8866   match(Set dst (AddL dst src));
 8867   effect(KILL cr);
 8868   ins_cost(200);
 8869   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8870             "ADC    $dst.hi,$src.hi" %}
 8871   opcode(0x03, 0x13);
 8872   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8873   ins_pipe( ialu_reg_reg_long );
 8874 %}
 8875 
 8876 // Add Long Register with Immediate
 8877 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8878   match(Set dst (AddL dst src));
 8879   effect(KILL cr);
 8880   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8881             "ADC    $dst.hi,$src.hi" %}
 8882   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8883   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8884   ins_pipe( ialu_reg_long );
 8885 %}
 8886 
 8887 // Add Long Register with Memory
 8888 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8889   match(Set dst (AddL dst (LoadL mem)));
 8890   effect(KILL cr);
 8891   ins_cost(125);
 8892   format %{ "ADD    $dst.lo,$mem\n\t"
 8893             "ADC    $dst.hi,$mem+4" %}
 8894   opcode(0x03, 0x13);
 8895   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8896   ins_pipe( ialu_reg_long_mem );
 8897 %}
 8898 
 8899 // Subtract Long Register with Register.
 8900 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8901   match(Set dst (SubL dst src));
 8902   effect(KILL cr);
 8903   ins_cost(200);
 8904   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8905             "SBB    $dst.hi,$src.hi" %}
 8906   opcode(0x2B, 0x1B);
 8907   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8908   ins_pipe( ialu_reg_reg_long );
 8909 %}
 8910 
 8911 // Subtract Long Register with Immediate
 8912 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8913   match(Set dst (SubL dst src));
 8914   effect(KILL cr);
 8915   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8916             "SBB    $dst.hi,$src.hi" %}
 8917   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8918   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8919   ins_pipe( ialu_reg_long );
 8920 %}
 8921 
 8922 // Subtract Long Register with Memory
 8923 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8924   match(Set dst (SubL dst (LoadL mem)));
 8925   effect(KILL cr);
 8926   ins_cost(125);
 8927   format %{ "SUB    $dst.lo,$mem\n\t"
 8928             "SBB    $dst.hi,$mem+4" %}
 8929   opcode(0x2B, 0x1B);
 8930   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8931   ins_pipe( ialu_reg_long_mem );
 8932 %}
 8933 
 8934 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8935   match(Set dst (SubL zero dst));
 8936   effect(KILL cr);
 8937   ins_cost(300);
 8938   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8939   ins_encode( neg_long(dst) );
 8940   ins_pipe( ialu_reg_reg_long );
 8941 %}
 8942 
 8943 // And Long Register with Register
 8944 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8945   match(Set dst (AndL dst src));
 8946   effect(KILL cr);
 8947   format %{ "AND    $dst.lo,$src.lo\n\t"
 8948             "AND    $dst.hi,$src.hi" %}
 8949   opcode(0x23,0x23);
 8950   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8951   ins_pipe( ialu_reg_reg_long );
 8952 %}
 8953 
 8954 // And Long Register with Immediate
 8955 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8956   match(Set dst (AndL dst src));
 8957   effect(KILL cr);
 8958   format %{ "AND    $dst.lo,$src.lo\n\t"
 8959             "AND    $dst.hi,$src.hi" %}
 8960   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8961   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8962   ins_pipe( ialu_reg_long );
 8963 %}
 8964 
 8965 // And Long Register with Memory
 8966 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8967   match(Set dst (AndL dst (LoadL mem)));
 8968   effect(KILL cr);
 8969   ins_cost(125);
 8970   format %{ "AND    $dst.lo,$mem\n\t"
 8971             "AND    $dst.hi,$mem+4" %}
 8972   opcode(0x23, 0x23);
 8973   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8974   ins_pipe( ialu_reg_long_mem );
 8975 %}
 8976 
 8977 // BMI1 instructions
 8978 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8979   match(Set dst (AndL (XorL src1 minus_1) src2));
 8980   predicate(UseBMI1Instructions);
 8981   effect(KILL cr, TEMP dst);
 8982 
 8983   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8984             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8985          %}
 8986 
 8987   ins_encode %{
 8988     Register Rdst = $dst$$Register;
 8989     Register Rsrc1 = $src1$$Register;
 8990     Register Rsrc2 = $src2$$Register;
 8991     __ andnl(Rdst, Rsrc1, Rsrc2);
 8992     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8993   %}
 8994   ins_pipe(ialu_reg_reg_long);
 8995 %}
 8996 
 8997 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8998   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8999   predicate(UseBMI1Instructions);
 9000   effect(KILL cr, TEMP dst);
 9001 
 9002   ins_cost(125);
 9003   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9004             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9005          %}
 9006 
 9007   ins_encode %{
 9008     Register Rdst = $dst$$Register;
 9009     Register Rsrc1 = $src1$$Register;
 9010     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9011 
 9012     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9013     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9014   %}
 9015   ins_pipe(ialu_reg_mem);
 9016 %}
 9017 
 9018 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9019   match(Set dst (AndL (SubL imm_zero src) src));
 9020   predicate(UseBMI1Instructions);
 9021   effect(KILL cr, TEMP dst);
 9022 
 9023   format %{ "MOVL   $dst.hi, 0\n\t"
 9024             "BLSIL  $dst.lo, $src.lo\n\t"
 9025             "JNZ    done\n\t"
 9026             "BLSIL  $dst.hi, $src.hi\n"
 9027             "done:"
 9028          %}
 9029 
 9030   ins_encode %{
 9031     Label done;
 9032     Register Rdst = $dst$$Register;
 9033     Register Rsrc = $src$$Register;
 9034     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9035     __ blsil(Rdst, Rsrc);
 9036     __ jccb(Assembler::notZero, done);
 9037     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9038     __ bind(done);
 9039   %}
 9040   ins_pipe(ialu_reg);
 9041 %}
 9042 
 9043 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9044   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9045   predicate(UseBMI1Instructions);
 9046   effect(KILL cr, TEMP dst);
 9047 
 9048   ins_cost(125);
 9049   format %{ "MOVL   $dst.hi, 0\n\t"
 9050             "BLSIL  $dst.lo, $src\n\t"
 9051             "JNZ    done\n\t"
 9052             "BLSIL  $dst.hi, $src+4\n"
 9053             "done:"
 9054          %}
 9055 
 9056   ins_encode %{
 9057     Label done;
 9058     Register Rdst = $dst$$Register;
 9059     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9060 
 9061     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9062     __ blsil(Rdst, $src$$Address);
 9063     __ jccb(Assembler::notZero, done);
 9064     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9065     __ bind(done);
 9066   %}
 9067   ins_pipe(ialu_reg_mem);
 9068 %}
 9069 
 9070 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9071 %{
 9072   match(Set dst (XorL (AddL src minus_1) src));
 9073   predicate(UseBMI1Instructions);
 9074   effect(KILL cr, TEMP dst);
 9075 
 9076   format %{ "MOVL    $dst.hi, 0\n\t"
 9077             "BLSMSKL $dst.lo, $src.lo\n\t"
 9078             "JNC     done\n\t"
 9079             "BLSMSKL $dst.hi, $src.hi\n"
 9080             "done:"
 9081          %}
 9082 
 9083   ins_encode %{
 9084     Label done;
 9085     Register Rdst = $dst$$Register;
 9086     Register Rsrc = $src$$Register;
 9087     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9088     __ blsmskl(Rdst, Rsrc);
 9089     __ jccb(Assembler::carryClear, done);
 9090     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9091     __ bind(done);
 9092   %}
 9093 
 9094   ins_pipe(ialu_reg);
 9095 %}
 9096 
 9097 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9098 %{
 9099   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9100   predicate(UseBMI1Instructions);
 9101   effect(KILL cr, TEMP dst);
 9102 
 9103   ins_cost(125);
 9104   format %{ "MOVL    $dst.hi, 0\n\t"
 9105             "BLSMSKL $dst.lo, $src\n\t"
 9106             "JNC     done\n\t"
 9107             "BLSMSKL $dst.hi, $src+4\n"
 9108             "done:"
 9109          %}
 9110 
 9111   ins_encode %{
 9112     Label done;
 9113     Register Rdst = $dst$$Register;
 9114     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9115 
 9116     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9117     __ blsmskl(Rdst, $src$$Address);
 9118     __ jccb(Assembler::carryClear, done);
 9119     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9120     __ bind(done);
 9121   %}
 9122 
 9123   ins_pipe(ialu_reg_mem);
 9124 %}
 9125 
 9126 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9127 %{
 9128   match(Set dst (AndL (AddL src minus_1) src) );
 9129   predicate(UseBMI1Instructions);
 9130   effect(KILL cr, TEMP dst);
 9131 
 9132   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9133             "BLSRL  $dst.lo, $src.lo\n\t"
 9134             "JNC    done\n\t"
 9135             "BLSRL  $dst.hi, $src.hi\n"
 9136             "done:"
 9137   %}
 9138 
 9139   ins_encode %{
 9140     Label done;
 9141     Register Rdst = $dst$$Register;
 9142     Register Rsrc = $src$$Register;
 9143     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9144     __ blsrl(Rdst, Rsrc);
 9145     __ jccb(Assembler::carryClear, done);
 9146     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9147     __ bind(done);
 9148   %}
 9149 
 9150   ins_pipe(ialu_reg);
 9151 %}
 9152 
 9153 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9154 %{
 9155   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9156   predicate(UseBMI1Instructions);
 9157   effect(KILL cr, TEMP dst);
 9158 
 9159   ins_cost(125);
 9160   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9161             "BLSRL  $dst.lo, $src\n\t"
 9162             "JNC    done\n\t"
 9163             "BLSRL  $dst.hi, $src+4\n"
 9164             "done:"
 9165   %}
 9166 
 9167   ins_encode %{
 9168     Label done;
 9169     Register Rdst = $dst$$Register;
 9170     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9171     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9172     __ blsrl(Rdst, $src$$Address);
 9173     __ jccb(Assembler::carryClear, done);
 9174     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9175     __ bind(done);
 9176   %}
 9177 
 9178   ins_pipe(ialu_reg_mem);
 9179 %}
 9180 
 9181 // Or Long Register with Register
 9182 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9183   match(Set dst (OrL dst src));
 9184   effect(KILL cr);
 9185   format %{ "OR     $dst.lo,$src.lo\n\t"
 9186             "OR     $dst.hi,$src.hi" %}
 9187   opcode(0x0B,0x0B);
 9188   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9189   ins_pipe( ialu_reg_reg_long );
 9190 %}
 9191 
 9192 // Or Long Register with Immediate
 9193 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9194   match(Set dst (OrL dst src));
 9195   effect(KILL cr);
 9196   format %{ "OR     $dst.lo,$src.lo\n\t"
 9197             "OR     $dst.hi,$src.hi" %}
 9198   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9199   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9200   ins_pipe( ialu_reg_long );
 9201 %}
 9202 
 9203 // Or Long Register with Memory
 9204 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9205   match(Set dst (OrL dst (LoadL mem)));
 9206   effect(KILL cr);
 9207   ins_cost(125);
 9208   format %{ "OR     $dst.lo,$mem\n\t"
 9209             "OR     $dst.hi,$mem+4" %}
 9210   opcode(0x0B,0x0B);
 9211   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9212   ins_pipe( ialu_reg_long_mem );
 9213 %}
 9214 
 9215 // Xor Long Register with Register
 9216 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9217   match(Set dst (XorL dst src));
 9218   effect(KILL cr);
 9219   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9220             "XOR    $dst.hi,$src.hi" %}
 9221   opcode(0x33,0x33);
 9222   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9223   ins_pipe( ialu_reg_reg_long );
 9224 %}
 9225 
 9226 // Xor Long Register with Immediate -1
 9227 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9228   match(Set dst (XorL dst imm));
 9229   format %{ "NOT    $dst.lo\n\t"
 9230             "NOT    $dst.hi" %}
 9231   ins_encode %{
 9232      __ notl($dst$$Register);
 9233      __ notl(HIGH_FROM_LOW($dst$$Register));
 9234   %}
 9235   ins_pipe( ialu_reg_long );
 9236 %}
 9237 
 9238 // Xor Long Register with Immediate
 9239 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9240   match(Set dst (XorL dst src));
 9241   effect(KILL cr);
 9242   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9243             "XOR    $dst.hi,$src.hi" %}
 9244   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9245   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9246   ins_pipe( ialu_reg_long );
 9247 %}
 9248 
 9249 // Xor Long Register with Memory
 9250 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9251   match(Set dst (XorL dst (LoadL mem)));
 9252   effect(KILL cr);
 9253   ins_cost(125);
 9254   format %{ "XOR    $dst.lo,$mem\n\t"
 9255             "XOR    $dst.hi,$mem+4" %}
 9256   opcode(0x33,0x33);
 9257   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9258   ins_pipe( ialu_reg_long_mem );
 9259 %}
 9260 
 9261 // Shift Left Long by 1
 9262 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9263   predicate(UseNewLongLShift);
 9264   match(Set dst (LShiftL dst cnt));
 9265   effect(KILL cr);
 9266   ins_cost(100);
 9267   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9268             "ADC    $dst.hi,$dst.hi" %}
 9269   ins_encode %{
 9270     __ addl($dst$$Register,$dst$$Register);
 9271     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9272   %}
 9273   ins_pipe( ialu_reg_long );
 9274 %}
 9275 
 9276 // Shift Left Long by 2
 9277 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9278   predicate(UseNewLongLShift);
 9279   match(Set dst (LShiftL dst cnt));
 9280   effect(KILL cr);
 9281   ins_cost(100);
 9282   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9283             "ADC    $dst.hi,$dst.hi\n\t"
 9284             "ADD    $dst.lo,$dst.lo\n\t"
 9285             "ADC    $dst.hi,$dst.hi" %}
 9286   ins_encode %{
 9287     __ addl($dst$$Register,$dst$$Register);
 9288     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9289     __ addl($dst$$Register,$dst$$Register);
 9290     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9291   %}
 9292   ins_pipe( ialu_reg_long );
 9293 %}
 9294 
 9295 // Shift Left Long by 3
 9296 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9297   predicate(UseNewLongLShift);
 9298   match(Set dst (LShiftL dst cnt));
 9299   effect(KILL cr);
 9300   ins_cost(100);
 9301   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9302             "ADC    $dst.hi,$dst.hi\n\t"
 9303             "ADD    $dst.lo,$dst.lo\n\t"
 9304             "ADC    $dst.hi,$dst.hi\n\t"
 9305             "ADD    $dst.lo,$dst.lo\n\t"
 9306             "ADC    $dst.hi,$dst.hi" %}
 9307   ins_encode %{
 9308     __ addl($dst$$Register,$dst$$Register);
 9309     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9310     __ addl($dst$$Register,$dst$$Register);
 9311     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9312     __ addl($dst$$Register,$dst$$Register);
 9313     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9314   %}
 9315   ins_pipe( ialu_reg_long );
 9316 %}
 9317 
 9318 // Shift Left Long by 1-31
 9319 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9320   match(Set dst (LShiftL dst cnt));
 9321   effect(KILL cr);
 9322   ins_cost(200);
 9323   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9324             "SHL    $dst.lo,$cnt" %}
 9325   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9326   ins_encode( move_long_small_shift(dst,cnt) );
 9327   ins_pipe( ialu_reg_long );
 9328 %}
 9329 
 9330 // Shift Left Long by 32-63
 9331 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9332   match(Set dst (LShiftL dst cnt));
 9333   effect(KILL cr);
 9334   ins_cost(300);
 9335   format %{ "MOV    $dst.hi,$dst.lo\n"
 9336           "\tSHL    $dst.hi,$cnt-32\n"
 9337           "\tXOR    $dst.lo,$dst.lo" %}
 9338   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9339   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9340   ins_pipe( ialu_reg_long );
 9341 %}
 9342 
 9343 // Shift Left Long by variable
 9344 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9345   match(Set dst (LShiftL dst shift));
 9346   effect(KILL cr);
 9347   ins_cost(500+200);
 9348   size(17);
 9349   format %{ "TEST   $shift,32\n\t"
 9350             "JEQ,s  small\n\t"
 9351             "MOV    $dst.hi,$dst.lo\n\t"
 9352             "XOR    $dst.lo,$dst.lo\n"
 9353     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9354             "SHL    $dst.lo,$shift" %}
 9355   ins_encode( shift_left_long( dst, shift ) );
 9356   ins_pipe( pipe_slow );
 9357 %}
 9358 
 9359 // Shift Right Long by 1-31
 9360 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9361   match(Set dst (URShiftL dst cnt));
 9362   effect(KILL cr);
 9363   ins_cost(200);
 9364   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9365             "SHR    $dst.hi,$cnt" %}
 9366   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9367   ins_encode( move_long_small_shift(dst,cnt) );
 9368   ins_pipe( ialu_reg_long );
 9369 %}
 9370 
 9371 // Shift Right Long by 32-63
 9372 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9373   match(Set dst (URShiftL dst cnt));
 9374   effect(KILL cr);
 9375   ins_cost(300);
 9376   format %{ "MOV    $dst.lo,$dst.hi\n"
 9377           "\tSHR    $dst.lo,$cnt-32\n"
 9378           "\tXOR    $dst.hi,$dst.hi" %}
 9379   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9380   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9381   ins_pipe( ialu_reg_long );
 9382 %}
 9383 
 9384 // Shift Right Long by variable
 9385 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9386   match(Set dst (URShiftL dst shift));
 9387   effect(KILL cr);
 9388   ins_cost(600);
 9389   size(17);
 9390   format %{ "TEST   $shift,32\n\t"
 9391             "JEQ,s  small\n\t"
 9392             "MOV    $dst.lo,$dst.hi\n\t"
 9393             "XOR    $dst.hi,$dst.hi\n"
 9394     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9395             "SHR    $dst.hi,$shift" %}
 9396   ins_encode( shift_right_long( dst, shift ) );
 9397   ins_pipe( pipe_slow );
 9398 %}
 9399 
 9400 // Shift Right Long by 1-31
 9401 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9402   match(Set dst (RShiftL dst cnt));
 9403   effect(KILL cr);
 9404   ins_cost(200);
 9405   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9406             "SAR    $dst.hi,$cnt" %}
 9407   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9408   ins_encode( move_long_small_shift(dst,cnt) );
 9409   ins_pipe( ialu_reg_long );
 9410 %}
 9411 
 9412 // Shift Right Long by 32-63
 9413 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9414   match(Set dst (RShiftL dst cnt));
 9415   effect(KILL cr);
 9416   ins_cost(300);
 9417   format %{ "MOV    $dst.lo,$dst.hi\n"
 9418           "\tSAR    $dst.lo,$cnt-32\n"
 9419           "\tSAR    $dst.hi,31" %}
 9420   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9421   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9422   ins_pipe( ialu_reg_long );
 9423 %}
 9424 
 9425 // Shift Right arithmetic Long by variable
 9426 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9427   match(Set dst (RShiftL dst shift));
 9428   effect(KILL cr);
 9429   ins_cost(600);
 9430   size(18);
 9431   format %{ "TEST   $shift,32\n\t"
 9432             "JEQ,s  small\n\t"
 9433             "MOV    $dst.lo,$dst.hi\n\t"
 9434             "SAR    $dst.hi,31\n"
 9435     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9436             "SAR    $dst.hi,$shift" %}
 9437   ins_encode( shift_right_arith_long( dst, shift ) );
 9438   ins_pipe( pipe_slow );
 9439 %}
 9440 
 9441 
 9442 //----------Double Instructions------------------------------------------------
 9443 // Double Math
 9444 
 9445 // Compare & branch
 9446 
 9447 // P6 version of float compare, sets condition codes in EFLAGS
 9448 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9449   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9450   match(Set cr (CmpD src1 src2));
 9451   effect(KILL rax);
 9452   ins_cost(150);
 9453   format %{ "FLD    $src1\n\t"
 9454             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9455             "JNP    exit\n\t"
 9456             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9457             "SAHF\n"
 9458      "exit:\tNOP               // avoid branch to branch" %}
 9459   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9460   ins_encode( Push_Reg_DPR(src1),
 9461               OpcP, RegOpc(src2),
 9462               cmpF_P6_fixup );
 9463   ins_pipe( pipe_slow );
 9464 %}
 9465 
 9466 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9467   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9468   match(Set cr (CmpD src1 src2));
 9469   ins_cost(150);
 9470   format %{ "FLD    $src1\n\t"
 9471             "FUCOMIP ST,$src2  // P6 instruction" %}
 9472   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9473   ins_encode( Push_Reg_DPR(src1),
 9474               OpcP, RegOpc(src2));
 9475   ins_pipe( pipe_slow );
 9476 %}
 9477 
 9478 // Compare & branch
 9479 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9480   predicate(UseSSE<=1);
 9481   match(Set cr (CmpD src1 src2));
 9482   effect(KILL rax);
 9483   ins_cost(200);
 9484   format %{ "FLD    $src1\n\t"
 9485             "FCOMp  $src2\n\t"
 9486             "FNSTSW AX\n\t"
 9487             "TEST   AX,0x400\n\t"
 9488             "JZ,s   flags\n\t"
 9489             "MOV    AH,1\t# unordered treat as LT\n"
 9490     "flags:\tSAHF" %}
 9491   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9492   ins_encode( Push_Reg_DPR(src1),
 9493               OpcP, RegOpc(src2),
 9494               fpu_flags);
 9495   ins_pipe( pipe_slow );
 9496 %}
 9497 
 9498 // Compare vs zero into -1,0,1
 9499 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9500   predicate(UseSSE<=1);
 9501   match(Set dst (CmpD3 src1 zero));
 9502   effect(KILL cr, KILL rax);
 9503   ins_cost(280);
 9504   format %{ "FTSTD  $dst,$src1" %}
 9505   opcode(0xE4, 0xD9);
 9506   ins_encode( Push_Reg_DPR(src1),
 9507               OpcS, OpcP, PopFPU,
 9508               CmpF_Result(dst));
 9509   ins_pipe( pipe_slow );
 9510 %}
 9511 
 9512 // Compare into -1,0,1
 9513 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9514   predicate(UseSSE<=1);
 9515   match(Set dst (CmpD3 src1 src2));
 9516   effect(KILL cr, KILL rax);
 9517   ins_cost(300);
 9518   format %{ "FCMPD  $dst,$src1,$src2" %}
 9519   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9520   ins_encode( Push_Reg_DPR(src1),
 9521               OpcP, RegOpc(src2),
 9522               CmpF_Result(dst));
 9523   ins_pipe( pipe_slow );
 9524 %}
 9525 
 9526 // float compare and set condition codes in EFLAGS by XMM regs
 9527 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9528   predicate(UseSSE>=2);
 9529   match(Set cr (CmpD src1 src2));
 9530   ins_cost(145);
 9531   format %{ "UCOMISD $src1,$src2\n\t"
 9532             "JNP,s   exit\n\t"
 9533             "PUSHF\t# saw NaN, set CF\n\t"
 9534             "AND     [rsp], #0xffffff2b\n\t"
 9535             "POPF\n"
 9536     "exit:" %}
 9537   ins_encode %{
 9538     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9539     emit_cmpfp_fixup(_masm);
 9540   %}
 9541   ins_pipe( pipe_slow );
 9542 %}
 9543 
 9544 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9545   predicate(UseSSE>=2);
 9546   match(Set cr (CmpD src1 src2));
 9547   ins_cost(100);
 9548   format %{ "UCOMISD $src1,$src2" %}
 9549   ins_encode %{
 9550     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9551   %}
 9552   ins_pipe( pipe_slow );
 9553 %}
 9554 
 9555 // float compare and set condition codes in EFLAGS by XMM regs
 9556 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9557   predicate(UseSSE>=2);
 9558   match(Set cr (CmpD src1 (LoadD src2)));
 9559   ins_cost(145);
 9560   format %{ "UCOMISD $src1,$src2\n\t"
 9561             "JNP,s   exit\n\t"
 9562             "PUSHF\t# saw NaN, set CF\n\t"
 9563             "AND     [rsp], #0xffffff2b\n\t"
 9564             "POPF\n"
 9565     "exit:" %}
 9566   ins_encode %{
 9567     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9568     emit_cmpfp_fixup(_masm);
 9569   %}
 9570   ins_pipe( pipe_slow );
 9571 %}
 9572 
 9573 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9574   predicate(UseSSE>=2);
 9575   match(Set cr (CmpD src1 (LoadD src2)));
 9576   ins_cost(100);
 9577   format %{ "UCOMISD $src1,$src2" %}
 9578   ins_encode %{
 9579     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9580   %}
 9581   ins_pipe( pipe_slow );
 9582 %}
 9583 
 9584 // Compare into -1,0,1 in XMM
 9585 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9586   predicate(UseSSE>=2);
 9587   match(Set dst (CmpD3 src1 src2));
 9588   effect(KILL cr);
 9589   ins_cost(255);
 9590   format %{ "UCOMISD $src1, $src2\n\t"
 9591             "MOV     $dst, #-1\n\t"
 9592             "JP,s    done\n\t"
 9593             "JB,s    done\n\t"
 9594             "SETNE   $dst\n\t"
 9595             "MOVZB   $dst, $dst\n"
 9596     "done:" %}
 9597   ins_encode %{
 9598     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9599     emit_cmpfp3(_masm, $dst$$Register);
 9600   %}
 9601   ins_pipe( pipe_slow );
 9602 %}
 9603 
 9604 // Compare into -1,0,1 in XMM and memory
 9605 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9606   predicate(UseSSE>=2);
 9607   match(Set dst (CmpD3 src1 (LoadD src2)));
 9608   effect(KILL cr);
 9609   ins_cost(275);
 9610   format %{ "UCOMISD $src1, $src2\n\t"
 9611             "MOV     $dst, #-1\n\t"
 9612             "JP,s    done\n\t"
 9613             "JB,s    done\n\t"
 9614             "SETNE   $dst\n\t"
 9615             "MOVZB   $dst, $dst\n"
 9616     "done:" %}
 9617   ins_encode %{
 9618     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9619     emit_cmpfp3(_masm, $dst$$Register);
 9620   %}
 9621   ins_pipe( pipe_slow );
 9622 %}
 9623 
 9624 
 9625 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9626   predicate (UseSSE <=1);
 9627   match(Set dst (SubD dst src));
 9628 
 9629   format %{ "FLD    $src\n\t"
 9630             "DSUBp  $dst,ST" %}
 9631   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9632   ins_cost(150);
 9633   ins_encode( Push_Reg_DPR(src),
 9634               OpcP, RegOpc(dst) );
 9635   ins_pipe( fpu_reg_reg );
 9636 %}
 9637 
 9638 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9639   predicate (UseSSE <=1);
 9640   match(Set dst (RoundDouble (SubD src1 src2)));
 9641   ins_cost(250);
 9642 
 9643   format %{ "FLD    $src2\n\t"
 9644             "DSUB   ST,$src1\n\t"
 9645             "FSTP_D $dst\t# D-round" %}
 9646   opcode(0xD8, 0x5);
 9647   ins_encode( Push_Reg_DPR(src2),
 9648               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9649   ins_pipe( fpu_mem_reg_reg );
 9650 %}
 9651 
 9652 
 9653 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9654   predicate (UseSSE <=1);
 9655   match(Set dst (SubD dst (LoadD src)));
 9656   ins_cost(150);
 9657 
 9658   format %{ "FLD    $src\n\t"
 9659             "DSUBp  $dst,ST" %}
 9660   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9661   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9662               OpcP, RegOpc(dst) );
 9663   ins_pipe( fpu_reg_mem );
 9664 %}
 9665 
 9666 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9667   predicate (UseSSE<=1);
 9668   match(Set dst (AbsD src));
 9669   ins_cost(100);
 9670   format %{ "FABS" %}
 9671   opcode(0xE1, 0xD9);
 9672   ins_encode( OpcS, OpcP );
 9673   ins_pipe( fpu_reg_reg );
 9674 %}
 9675 
 9676 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9677   predicate(UseSSE<=1);
 9678   match(Set dst (NegD src));
 9679   ins_cost(100);
 9680   format %{ "FCHS" %}
 9681   opcode(0xE0, 0xD9);
 9682   ins_encode( OpcS, OpcP );
 9683   ins_pipe( fpu_reg_reg );
 9684 %}
 9685 
 9686 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9687   predicate(UseSSE<=1);
 9688   match(Set dst (AddD dst src));
 9689   format %{ "FLD    $src\n\t"
 9690             "DADD   $dst,ST" %}
 9691   size(4);
 9692   ins_cost(150);
 9693   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9694   ins_encode( Push_Reg_DPR(src),
 9695               OpcP, RegOpc(dst) );
 9696   ins_pipe( fpu_reg_reg );
 9697 %}
 9698 
 9699 
 9700 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9701   predicate(UseSSE<=1);
 9702   match(Set dst (RoundDouble (AddD src1 src2)));
 9703   ins_cost(250);
 9704 
 9705   format %{ "FLD    $src2\n\t"
 9706             "DADD   ST,$src1\n\t"
 9707             "FSTP_D $dst\t# D-round" %}
 9708   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9709   ins_encode( Push_Reg_DPR(src2),
 9710               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9711   ins_pipe( fpu_mem_reg_reg );
 9712 %}
 9713 
 9714 
 9715 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9716   predicate(UseSSE<=1);
 9717   match(Set dst (AddD dst (LoadD src)));
 9718   ins_cost(150);
 9719 
 9720   format %{ "FLD    $src\n\t"
 9721             "DADDp  $dst,ST" %}
 9722   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9723   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9724               OpcP, RegOpc(dst) );
 9725   ins_pipe( fpu_reg_mem );
 9726 %}
 9727 
 9728 // add-to-memory
 9729 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9730   predicate(UseSSE<=1);
 9731   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9732   ins_cost(150);
 9733 
 9734   format %{ "FLD_D  $dst\n\t"
 9735             "DADD   ST,$src\n\t"
 9736             "FST_D  $dst" %}
 9737   opcode(0xDD, 0x0);
 9738   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9739               Opcode(0xD8), RegOpc(src),
 9740               set_instruction_start,
 9741               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9742   ins_pipe( fpu_reg_mem );
 9743 %}
 9744 
 9745 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9746   predicate(UseSSE<=1);
 9747   match(Set dst (AddD dst con));
 9748   ins_cost(125);
 9749   format %{ "FLD1\n\t"
 9750             "DADDp  $dst,ST" %}
 9751   ins_encode %{
 9752     __ fld1();
 9753     __ faddp($dst$$reg);
 9754   %}
 9755   ins_pipe(fpu_reg);
 9756 %}
 9757 
 9758 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9759   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9760   match(Set dst (AddD dst con));
 9761   ins_cost(200);
 9762   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9763             "DADDp  $dst,ST" %}
 9764   ins_encode %{
 9765     __ fld_d($constantaddress($con));
 9766     __ faddp($dst$$reg);
 9767   %}
 9768   ins_pipe(fpu_reg_mem);
 9769 %}
 9770 
 9771 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9772   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9773   match(Set dst (RoundDouble (AddD src con)));
 9774   ins_cost(200);
 9775   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9776             "DADD   ST,$src\n\t"
 9777             "FSTP_D $dst\t# D-round" %}
 9778   ins_encode %{
 9779     __ fld_d($constantaddress($con));
 9780     __ fadd($src$$reg);
 9781     __ fstp_d(Address(rsp, $dst$$disp));
 9782   %}
 9783   ins_pipe(fpu_mem_reg_con);
 9784 %}
 9785 
 9786 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9787   predicate(UseSSE<=1);
 9788   match(Set dst (MulD dst src));
 9789   format %{ "FLD    $src\n\t"
 9790             "DMULp  $dst,ST" %}
 9791   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9792   ins_cost(150);
 9793   ins_encode( Push_Reg_DPR(src),
 9794               OpcP, RegOpc(dst) );
 9795   ins_pipe( fpu_reg_reg );
 9796 %}
 9797 
 9798 // Strict FP instruction biases argument before multiply then
 9799 // biases result to avoid double rounding of subnormals.
 9800 //
 9801 // scale arg1 by multiplying arg1 by 2^(-15360)
 9802 // load arg2
 9803 // multiply scaled arg1 by arg2
 9804 // rescale product by 2^(15360)
 9805 //
 9806 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9807   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9808   match(Set dst (MulD dst src));
 9809   ins_cost(1);   // Select this instruction for all FP double multiplies
 9810 
 9811   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9812             "DMULp  $dst,ST\n\t"
 9813             "FLD    $src\n\t"
 9814             "DMULp  $dst,ST\n\t"
 9815             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9816             "DMULp  $dst,ST\n\t" %}
 9817   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9818   ins_encode( strictfp_bias1(dst),
 9819               Push_Reg_DPR(src),
 9820               OpcP, RegOpc(dst),
 9821               strictfp_bias2(dst) );
 9822   ins_pipe( fpu_reg_reg );
 9823 %}
 9824 
 9825 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9826   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9827   match(Set dst (MulD dst con));
 9828   ins_cost(200);
 9829   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9830             "DMULp  $dst,ST" %}
 9831   ins_encode %{
 9832     __ fld_d($constantaddress($con));
 9833     __ fmulp($dst$$reg);
 9834   %}
 9835   ins_pipe(fpu_reg_mem);
 9836 %}
 9837 
 9838 
 9839 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9840   predicate( UseSSE<=1 );
 9841   match(Set dst (MulD dst (LoadD src)));
 9842   ins_cost(200);
 9843   format %{ "FLD_D  $src\n\t"
 9844             "DMULp  $dst,ST" %}
 9845   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9846   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9847               OpcP, RegOpc(dst) );
 9848   ins_pipe( fpu_reg_mem );
 9849 %}
 9850 
 9851 //
 9852 // Cisc-alternate to reg-reg multiply
 9853 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9854   predicate( UseSSE<=1 );
 9855   match(Set dst (MulD src (LoadD mem)));
 9856   ins_cost(250);
 9857   format %{ "FLD_D  $mem\n\t"
 9858             "DMUL   ST,$src\n\t"
 9859             "FSTP_D $dst" %}
 9860   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9861   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9862               OpcReg_FPR(src),
 9863               Pop_Reg_DPR(dst) );
 9864   ins_pipe( fpu_reg_reg_mem );
 9865 %}
 9866 
 9867 
 9868 // MACRO3 -- addDPR a mulDPR
 9869 // This instruction is a '2-address' instruction in that the result goes
 9870 // back to src2.  This eliminates a move from the macro; possibly the
 9871 // register allocator will have to add it back (and maybe not).
 9872 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9873   predicate( UseSSE<=1 );
 9874   match(Set src2 (AddD (MulD src0 src1) src2));
 9875   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9876             "DMUL   ST,$src1\n\t"
 9877             "DADDp  $src2,ST" %}
 9878   ins_cost(250);
 9879   opcode(0xDD); /* LoadD DD /0 */
 9880   ins_encode( Push_Reg_FPR(src0),
 9881               FMul_ST_reg(src1),
 9882               FAddP_reg_ST(src2) );
 9883   ins_pipe( fpu_reg_reg_reg );
 9884 %}
 9885 
 9886 
 9887 // MACRO3 -- subDPR a mulDPR
 9888 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9889   predicate( UseSSE<=1 );
 9890   match(Set src2 (SubD (MulD src0 src1) src2));
 9891   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9892             "DMUL   ST,$src1\n\t"
 9893             "DSUBRp $src2,ST" %}
 9894   ins_cost(250);
 9895   ins_encode( Push_Reg_FPR(src0),
 9896               FMul_ST_reg(src1),
 9897               Opcode(0xDE), Opc_plus(0xE0,src2));
 9898   ins_pipe( fpu_reg_reg_reg );
 9899 %}
 9900 
 9901 
 9902 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9903   predicate( UseSSE<=1 );
 9904   match(Set dst (DivD dst src));
 9905 
 9906   format %{ "FLD    $src\n\t"
 9907             "FDIVp  $dst,ST" %}
 9908   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9909   ins_cost(150);
 9910   ins_encode( Push_Reg_DPR(src),
 9911               OpcP, RegOpc(dst) );
 9912   ins_pipe( fpu_reg_reg );
 9913 %}
 9914 
 9915 // Strict FP instruction biases argument before division then
 9916 // biases result, to avoid double rounding of subnormals.
 9917 //
 9918 // scale dividend by multiplying dividend by 2^(-15360)
 9919 // load divisor
 9920 // divide scaled dividend by divisor
 9921 // rescale quotient by 2^(15360)
 9922 //
 9923 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9924   predicate (UseSSE<=1);
 9925   match(Set dst (DivD dst src));
 9926   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9927   ins_cost(01);
 9928 
 9929   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9930             "DMULp  $dst,ST\n\t"
 9931             "FLD    $src\n\t"
 9932             "FDIVp  $dst,ST\n\t"
 9933             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9934             "DMULp  $dst,ST\n\t" %}
 9935   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9936   ins_encode( strictfp_bias1(dst),
 9937               Push_Reg_DPR(src),
 9938               OpcP, RegOpc(dst),
 9939               strictfp_bias2(dst) );
 9940   ins_pipe( fpu_reg_reg );
 9941 %}
 9942 
 9943 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9944   predicate(UseSSE<=1);
 9945   match(Set dst (ModD dst src));
 9946   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9947 
 9948   format %{ "DMOD   $dst,$src" %}
 9949   ins_cost(250);
 9950   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9951               emitModDPR(),
 9952               Push_Result_Mod_DPR(src),
 9953               Pop_Reg_DPR(dst));
 9954   ins_pipe( pipe_slow );
 9955 %}
 9956 
 9957 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9958   predicate(UseSSE>=2);
 9959   match(Set dst (ModD src0 src1));
 9960   effect(KILL rax, KILL cr);
 9961 
 9962   format %{ "SUB    ESP,8\t # DMOD\n"
 9963           "\tMOVSD  [ESP+0],$src1\n"
 9964           "\tFLD_D  [ESP+0]\n"
 9965           "\tMOVSD  [ESP+0],$src0\n"
 9966           "\tFLD_D  [ESP+0]\n"
 9967      "loop:\tFPREM\n"
 9968           "\tFWAIT\n"
 9969           "\tFNSTSW AX\n"
 9970           "\tSAHF\n"
 9971           "\tJP     loop\n"
 9972           "\tFSTP_D [ESP+0]\n"
 9973           "\tMOVSD  $dst,[ESP+0]\n"
 9974           "\tADD    ESP,8\n"
 9975           "\tFSTP   ST0\t # Restore FPU Stack"
 9976     %}
 9977   ins_cost(250);
 9978   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9979   ins_pipe( pipe_slow );
 9980 %}
 9981 
 9982 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9983   predicate (UseSSE<=1);
 9984   match(Set dst(AtanD dst src));
 9985   format %{ "DATA   $dst,$src" %}
 9986   opcode(0xD9, 0xF3);
 9987   ins_encode( Push_Reg_DPR(src),
 9988               OpcP, OpcS, RegOpc(dst) );
 9989   ins_pipe( pipe_slow );
 9990 %}
 9991 
 9992 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9993   predicate (UseSSE>=2);
 9994   match(Set dst(AtanD dst src));
 9995   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9996   format %{ "DATA   $dst,$src" %}
 9997   opcode(0xD9, 0xF3);
 9998   ins_encode( Push_SrcD(src),
 9999               OpcP, OpcS, Push_ResultD(dst) );
10000   ins_pipe( pipe_slow );
10001 %}
10002 
10003 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10004   predicate (UseSSE<=1);
10005   match(Set dst (SqrtD src));
10006   format %{ "DSQRT  $dst,$src" %}
10007   opcode(0xFA, 0xD9);
10008   ins_encode( Push_Reg_DPR(src),
10009               OpcS, OpcP, Pop_Reg_DPR(dst) );
10010   ins_pipe( pipe_slow );
10011 %}
10012 
10013 //-------------Float Instructions-------------------------------
10014 // Float Math
10015 
10016 // Code for float compare:
10017 //     fcompp();
10018 //     fwait(); fnstsw_ax();
10019 //     sahf();
10020 //     movl(dst, unordered_result);
10021 //     jcc(Assembler::parity, exit);
10022 //     movl(dst, less_result);
10023 //     jcc(Assembler::below, exit);
10024 //     movl(dst, equal_result);
10025 //     jcc(Assembler::equal, exit);
10026 //     movl(dst, greater_result);
10027 //   exit:
10028 
10029 // P6 version of float compare, sets condition codes in EFLAGS
10030 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10031   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10032   match(Set cr (CmpF src1 src2));
10033   effect(KILL rax);
10034   ins_cost(150);
10035   format %{ "FLD    $src1\n\t"
10036             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10037             "JNP    exit\n\t"
10038             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10039             "SAHF\n"
10040      "exit:\tNOP               // avoid branch to branch" %}
10041   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10042   ins_encode( Push_Reg_DPR(src1),
10043               OpcP, RegOpc(src2),
10044               cmpF_P6_fixup );
10045   ins_pipe( pipe_slow );
10046 %}
10047 
10048 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10049   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10050   match(Set cr (CmpF src1 src2));
10051   ins_cost(100);
10052   format %{ "FLD    $src1\n\t"
10053             "FUCOMIP ST,$src2  // P6 instruction" %}
10054   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10055   ins_encode( Push_Reg_DPR(src1),
10056               OpcP, RegOpc(src2));
10057   ins_pipe( pipe_slow );
10058 %}
10059 
10060 
10061 // Compare & branch
10062 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10063   predicate(UseSSE == 0);
10064   match(Set cr (CmpF src1 src2));
10065   effect(KILL rax);
10066   ins_cost(200);
10067   format %{ "FLD    $src1\n\t"
10068             "FCOMp  $src2\n\t"
10069             "FNSTSW AX\n\t"
10070             "TEST   AX,0x400\n\t"
10071             "JZ,s   flags\n\t"
10072             "MOV    AH,1\t# unordered treat as LT\n"
10073     "flags:\tSAHF" %}
10074   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10075   ins_encode( Push_Reg_DPR(src1),
10076               OpcP, RegOpc(src2),
10077               fpu_flags);
10078   ins_pipe( pipe_slow );
10079 %}
10080 
10081 // Compare vs zero into -1,0,1
10082 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10083   predicate(UseSSE == 0);
10084   match(Set dst (CmpF3 src1 zero));
10085   effect(KILL cr, KILL rax);
10086   ins_cost(280);
10087   format %{ "FTSTF  $dst,$src1" %}
10088   opcode(0xE4, 0xD9);
10089   ins_encode( Push_Reg_DPR(src1),
10090               OpcS, OpcP, PopFPU,
10091               CmpF_Result(dst));
10092   ins_pipe( pipe_slow );
10093 %}
10094 
10095 // Compare into -1,0,1
10096 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10097   predicate(UseSSE == 0);
10098   match(Set dst (CmpF3 src1 src2));
10099   effect(KILL cr, KILL rax);
10100   ins_cost(300);
10101   format %{ "FCMPF  $dst,$src1,$src2" %}
10102   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10103   ins_encode( Push_Reg_DPR(src1),
10104               OpcP, RegOpc(src2),
10105               CmpF_Result(dst));
10106   ins_pipe( pipe_slow );
10107 %}
10108 
10109 // float compare and set condition codes in EFLAGS by XMM regs
10110 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10111   predicate(UseSSE>=1);
10112   match(Set cr (CmpF src1 src2));
10113   ins_cost(145);
10114   format %{ "UCOMISS $src1,$src2\n\t"
10115             "JNP,s   exit\n\t"
10116             "PUSHF\t# saw NaN, set CF\n\t"
10117             "AND     [rsp], #0xffffff2b\n\t"
10118             "POPF\n"
10119     "exit:" %}
10120   ins_encode %{
10121     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10122     emit_cmpfp_fixup(_masm);
10123   %}
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10128   predicate(UseSSE>=1);
10129   match(Set cr (CmpF src1 src2));
10130   ins_cost(100);
10131   format %{ "UCOMISS $src1,$src2" %}
10132   ins_encode %{
10133     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10134   %}
10135   ins_pipe( pipe_slow );
10136 %}
10137 
10138 // float compare and set condition codes in EFLAGS by XMM regs
10139 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10140   predicate(UseSSE>=1);
10141   match(Set cr (CmpF src1 (LoadF src2)));
10142   ins_cost(165);
10143   format %{ "UCOMISS $src1,$src2\n\t"
10144             "JNP,s   exit\n\t"
10145             "PUSHF\t# saw NaN, set CF\n\t"
10146             "AND     [rsp], #0xffffff2b\n\t"
10147             "POPF\n"
10148     "exit:" %}
10149   ins_encode %{
10150     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10151     emit_cmpfp_fixup(_masm);
10152   %}
10153   ins_pipe( pipe_slow );
10154 %}
10155 
10156 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10157   predicate(UseSSE>=1);
10158   match(Set cr (CmpF src1 (LoadF src2)));
10159   ins_cost(100);
10160   format %{ "UCOMISS $src1,$src2" %}
10161   ins_encode %{
10162     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10163   %}
10164   ins_pipe( pipe_slow );
10165 %}
10166 
10167 // Compare into -1,0,1 in XMM
10168 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10169   predicate(UseSSE>=1);
10170   match(Set dst (CmpF3 src1 src2));
10171   effect(KILL cr);
10172   ins_cost(255);
10173   format %{ "UCOMISS $src1, $src2\n\t"
10174             "MOV     $dst, #-1\n\t"
10175             "JP,s    done\n\t"
10176             "JB,s    done\n\t"
10177             "SETNE   $dst\n\t"
10178             "MOVZB   $dst, $dst\n"
10179     "done:" %}
10180   ins_encode %{
10181     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10182     emit_cmpfp3(_masm, $dst$$Register);
10183   %}
10184   ins_pipe( pipe_slow );
10185 %}
10186 
10187 // Compare into -1,0,1 in XMM and memory
10188 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10189   predicate(UseSSE>=1);
10190   match(Set dst (CmpF3 src1 (LoadF src2)));
10191   effect(KILL cr);
10192   ins_cost(275);
10193   format %{ "UCOMISS $src1, $src2\n\t"
10194             "MOV     $dst, #-1\n\t"
10195             "JP,s    done\n\t"
10196             "JB,s    done\n\t"
10197             "SETNE   $dst\n\t"
10198             "MOVZB   $dst, $dst\n"
10199     "done:" %}
10200   ins_encode %{
10201     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10202     emit_cmpfp3(_masm, $dst$$Register);
10203   %}
10204   ins_pipe( pipe_slow );
10205 %}
10206 
10207 // Spill to obtain 24-bit precision
10208 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10209   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210   match(Set dst (SubF src1 src2));
10211 
10212   format %{ "FSUB   $dst,$src1 - $src2" %}
10213   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10214   ins_encode( Push_Reg_FPR(src1),
10215               OpcReg_FPR(src2),
10216               Pop_Mem_FPR(dst) );
10217   ins_pipe( fpu_mem_reg_reg );
10218 %}
10219 //
10220 // This instruction does not round to 24-bits
10221 instruct subFPR_reg(regFPR dst, regFPR src) %{
10222   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10223   match(Set dst (SubF dst src));
10224 
10225   format %{ "FSUB   $dst,$src" %}
10226   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10227   ins_encode( Push_Reg_FPR(src),
10228               OpcP, RegOpc(dst) );
10229   ins_pipe( fpu_reg_reg );
10230 %}
10231 
10232 // Spill to obtain 24-bit precision
10233 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10234   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10235   match(Set dst (AddF src1 src2));
10236 
10237   format %{ "FADD   $dst,$src1,$src2" %}
10238   opcode(0xD8, 0x0); /* D8 C0+i */
10239   ins_encode( Push_Reg_FPR(src2),
10240               OpcReg_FPR(src1),
10241               Pop_Mem_FPR(dst) );
10242   ins_pipe( fpu_mem_reg_reg );
10243 %}
10244 //
10245 // This instruction does not round to 24-bits
10246 instruct addFPR_reg(regFPR dst, regFPR src) %{
10247   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF dst src));
10249 
10250   format %{ "FLD    $src\n\t"
10251             "FADDp  $dst,ST" %}
10252   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10253   ins_encode( Push_Reg_FPR(src),
10254               OpcP, RegOpc(dst) );
10255   ins_pipe( fpu_reg_reg );
10256 %}
10257 
10258 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10259   predicate(UseSSE==0);
10260   match(Set dst (AbsF src));
10261   ins_cost(100);
10262   format %{ "FABS" %}
10263   opcode(0xE1, 0xD9);
10264   ins_encode( OpcS, OpcP );
10265   ins_pipe( fpu_reg_reg );
10266 %}
10267 
10268 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10269   predicate(UseSSE==0);
10270   match(Set dst (NegF src));
10271   ins_cost(100);
10272   format %{ "FCHS" %}
10273   opcode(0xE0, 0xD9);
10274   ins_encode( OpcS, OpcP );
10275   ins_pipe( fpu_reg_reg );
10276 %}
10277 
10278 // Cisc-alternate to addFPR_reg
10279 // Spill to obtain 24-bit precision
10280 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10281   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10282   match(Set dst (AddF src1 (LoadF src2)));
10283 
10284   format %{ "FLD    $src2\n\t"
10285             "FADD   ST,$src1\n\t"
10286             "FSTP_S $dst" %}
10287   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10288   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10289               OpcReg_FPR(src1),
10290               Pop_Mem_FPR(dst) );
10291   ins_pipe( fpu_mem_reg_mem );
10292 %}
10293 //
10294 // Cisc-alternate to addFPR_reg
10295 // This instruction does not round to 24-bits
10296 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10297   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10298   match(Set dst (AddF dst (LoadF src)));
10299 
10300   format %{ "FADD   $dst,$src" %}
10301   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10302   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10303               OpcP, RegOpc(dst) );
10304   ins_pipe( fpu_reg_mem );
10305 %}
10306 
10307 // // Following two instructions for _222_mpegaudio
10308 // Spill to obtain 24-bit precision
10309 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10310   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10311   match(Set dst (AddF src1 src2));
10312 
10313   format %{ "FADD   $dst,$src1,$src2" %}
10314   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10315   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10316               OpcReg_FPR(src2),
10317               Pop_Mem_FPR(dst) );
10318   ins_pipe( fpu_mem_reg_mem );
10319 %}
10320 
10321 // Cisc-spill variant
10322 // Spill to obtain 24-bit precision
10323 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10324   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10325   match(Set dst (AddF src1 (LoadF src2)));
10326 
10327   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10328   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10329   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10330               set_instruction_start,
10331               OpcP, RMopc_Mem(secondary,src1),
10332               Pop_Mem_FPR(dst) );
10333   ins_pipe( fpu_mem_mem_mem );
10334 %}
10335 
10336 // Spill to obtain 24-bit precision
10337 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10338   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10339   match(Set dst (AddF src1 src2));
10340 
10341   format %{ "FADD   $dst,$src1,$src2" %}
10342   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10343   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10344               set_instruction_start,
10345               OpcP, RMopc_Mem(secondary,src1),
10346               Pop_Mem_FPR(dst) );
10347   ins_pipe( fpu_mem_mem_mem );
10348 %}
10349 
10350 
10351 // Spill to obtain 24-bit precision
10352 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10353   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10354   match(Set dst (AddF src con));
10355   format %{ "FLD    $src\n\t"
10356             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10357             "FSTP_S $dst"  %}
10358   ins_encode %{
10359     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10360     __ fadd_s($constantaddress($con));
10361     __ fstp_s(Address(rsp, $dst$$disp));
10362   %}
10363   ins_pipe(fpu_mem_reg_con);
10364 %}
10365 //
10366 // This instruction does not round to 24-bits
10367 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10368   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10369   match(Set dst (AddF src con));
10370   format %{ "FLD    $src\n\t"
10371             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10372             "FSTP   $dst"  %}
10373   ins_encode %{
10374     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10375     __ fadd_s($constantaddress($con));
10376     __ fstp_d($dst$$reg);
10377   %}
10378   ins_pipe(fpu_reg_reg_con);
10379 %}
10380 
10381 // Spill to obtain 24-bit precision
10382 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10383   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10384   match(Set dst (MulF src1 src2));
10385 
10386   format %{ "FLD    $src1\n\t"
10387             "FMUL   $src2\n\t"
10388             "FSTP_S $dst"  %}
10389   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10390   ins_encode( Push_Reg_FPR(src1),
10391               OpcReg_FPR(src2),
10392               Pop_Mem_FPR(dst) );
10393   ins_pipe( fpu_mem_reg_reg );
10394 %}
10395 //
10396 // This instruction does not round to 24-bits
10397 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10398   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10399   match(Set dst (MulF src1 src2));
10400 
10401   format %{ "FLD    $src1\n\t"
10402             "FMUL   $src2\n\t"
10403             "FSTP_S $dst"  %}
10404   opcode(0xD8, 0x1); /* D8 C8+i */
10405   ins_encode( Push_Reg_FPR(src2),
10406               OpcReg_FPR(src1),
10407               Pop_Reg_FPR(dst) );
10408   ins_pipe( fpu_reg_reg_reg );
10409 %}
10410 
10411 
10412 // Spill to obtain 24-bit precision
10413 // Cisc-alternate to reg-reg multiply
10414 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10415   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10416   match(Set dst (MulF src1 (LoadF src2)));
10417 
10418   format %{ "FLD_S  $src2\n\t"
10419             "FMUL   $src1\n\t"
10420             "FSTP_S $dst"  %}
10421   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10422   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10423               OpcReg_FPR(src1),
10424               Pop_Mem_FPR(dst) );
10425   ins_pipe( fpu_mem_reg_mem );
10426 %}
10427 //
10428 // This instruction does not round to 24-bits
10429 // Cisc-alternate to reg-reg multiply
10430 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10431   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10432   match(Set dst (MulF src1 (LoadF src2)));
10433 
10434   format %{ "FMUL   $dst,$src1,$src2" %}
10435   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10436   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10437               OpcReg_FPR(src1),
10438               Pop_Reg_FPR(dst) );
10439   ins_pipe( fpu_reg_reg_mem );
10440 %}
10441 
10442 // Spill to obtain 24-bit precision
10443 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10444   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 src2));
10446 
10447   format %{ "FMUL   $dst,$src1,$src2" %}
10448   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10449   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10450               set_instruction_start,
10451               OpcP, RMopc_Mem(secondary,src1),
10452               Pop_Mem_FPR(dst) );
10453   ins_pipe( fpu_mem_mem_mem );
10454 %}
10455 
10456 // Spill to obtain 24-bit precision
10457 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10458   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10459   match(Set dst (MulF src con));
10460 
10461   format %{ "FLD    $src\n\t"
10462             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10463             "FSTP_S $dst"  %}
10464   ins_encode %{
10465     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10466     __ fmul_s($constantaddress($con));
10467     __ fstp_s(Address(rsp, $dst$$disp));
10468   %}
10469   ins_pipe(fpu_mem_reg_con);
10470 %}
10471 //
10472 // This instruction does not round to 24-bits
10473 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10474   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10475   match(Set dst (MulF src con));
10476 
10477   format %{ "FLD    $src\n\t"
10478             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10479             "FSTP   $dst"  %}
10480   ins_encode %{
10481     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10482     __ fmul_s($constantaddress($con));
10483     __ fstp_d($dst$$reg);
10484   %}
10485   ins_pipe(fpu_reg_reg_con);
10486 %}
10487 
10488 
10489 //
10490 // MACRO1 -- subsume unshared load into mulFPR
10491 // This instruction does not round to 24-bits
10492 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10493   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10494   match(Set dst (MulF (LoadF mem1) src));
10495 
10496   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10497             "FMUL   ST,$src\n\t"
10498             "FSTP   $dst" %}
10499   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10500   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10501               OpcReg_FPR(src),
10502               Pop_Reg_FPR(dst) );
10503   ins_pipe( fpu_reg_reg_mem );
10504 %}
10505 //
10506 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10507 // This instruction does not round to 24-bits
10508 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10509   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10510   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10511   ins_cost(95);
10512 
10513   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10514             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10515             "FADD   ST,$src2\n\t"
10516             "FSTP   $dst" %}
10517   opcode(0xD9); /* LoadF D9 /0 */
10518   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10519               FMul_ST_reg(src1),
10520               FAdd_ST_reg(src2),
10521               Pop_Reg_FPR(dst) );
10522   ins_pipe( fpu_reg_mem_reg_reg );
10523 %}
10524 
10525 // MACRO3 -- addFPR a mulFPR
10526 // This instruction does not round to 24-bits.  It is a '2-address'
10527 // instruction in that the result goes back to src2.  This eliminates
10528 // a move from the macro; possibly the register allocator will have
10529 // to add it back (and maybe not).
10530 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10531   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10532   match(Set src2 (AddF (MulF src0 src1) src2));
10533 
10534   format %{ "FLD    $src0     ===MACRO3===\n\t"
10535             "FMUL   ST,$src1\n\t"
10536             "FADDP  $src2,ST" %}
10537   opcode(0xD9); /* LoadF D9 /0 */
10538   ins_encode( Push_Reg_FPR(src0),
10539               FMul_ST_reg(src1),
10540               FAddP_reg_ST(src2) );
10541   ins_pipe( fpu_reg_reg_reg );
10542 %}
10543 
10544 // MACRO4 -- divFPR subFPR
10545 // This instruction does not round to 24-bits
10546 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10547   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10548   match(Set dst (DivF (SubF src2 src1) src3));
10549 
10550   format %{ "FLD    $src2   ===MACRO4===\n\t"
10551             "FSUB   ST,$src1\n\t"
10552             "FDIV   ST,$src3\n\t"
10553             "FSTP  $dst" %}
10554   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10555   ins_encode( Push_Reg_FPR(src2),
10556               subFPR_divFPR_encode(src1,src3),
10557               Pop_Reg_FPR(dst) );
10558   ins_pipe( fpu_reg_reg_reg_reg );
10559 %}
10560 
10561 // Spill to obtain 24-bit precision
10562 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10563   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10564   match(Set dst (DivF src1 src2));
10565 
10566   format %{ "FDIV   $dst,$src1,$src2" %}
10567   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10568   ins_encode( Push_Reg_FPR(src1),
10569               OpcReg_FPR(src2),
10570               Pop_Mem_FPR(dst) );
10571   ins_pipe( fpu_mem_reg_reg );
10572 %}
10573 //
10574 // This instruction does not round to 24-bits
10575 instruct divFPR_reg(regFPR dst, regFPR src) %{
10576   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10577   match(Set dst (DivF dst src));
10578 
10579   format %{ "FDIV   $dst,$src" %}
10580   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10581   ins_encode( Push_Reg_FPR(src),
10582               OpcP, RegOpc(dst) );
10583   ins_pipe( fpu_reg_reg );
10584 %}
10585 
10586 
10587 // Spill to obtain 24-bit precision
10588 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10589   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10590   match(Set dst (ModF src1 src2));
10591   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10592 
10593   format %{ "FMOD   $dst,$src1,$src2" %}
10594   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10595               emitModDPR(),
10596               Push_Result_Mod_DPR(src2),
10597               Pop_Mem_FPR(dst));
10598   ins_pipe( pipe_slow );
10599 %}
10600 //
10601 // This instruction does not round to 24-bits
10602 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10603   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10604   match(Set dst (ModF dst src));
10605   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10606 
10607   format %{ "FMOD   $dst,$src" %}
10608   ins_encode(Push_Reg_Mod_DPR(dst, src),
10609               emitModDPR(),
10610               Push_Result_Mod_DPR(src),
10611               Pop_Reg_FPR(dst));
10612   ins_pipe( pipe_slow );
10613 %}
10614 
10615 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10616   predicate(UseSSE>=1);
10617   match(Set dst (ModF src0 src1));
10618   effect(KILL rax, KILL cr);
10619   format %{ "SUB    ESP,4\t # FMOD\n"
10620           "\tMOVSS  [ESP+0],$src1\n"
10621           "\tFLD_S  [ESP+0]\n"
10622           "\tMOVSS  [ESP+0],$src0\n"
10623           "\tFLD_S  [ESP+0]\n"
10624      "loop:\tFPREM\n"
10625           "\tFWAIT\n"
10626           "\tFNSTSW AX\n"
10627           "\tSAHF\n"
10628           "\tJP     loop\n"
10629           "\tFSTP_S [ESP+0]\n"
10630           "\tMOVSS  $dst,[ESP+0]\n"
10631           "\tADD    ESP,4\n"
10632           "\tFSTP   ST0\t # Restore FPU Stack"
10633     %}
10634   ins_cost(250);
10635   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10636   ins_pipe( pipe_slow );
10637 %}
10638 
10639 
10640 //----------Arithmetic Conversion Instructions---------------------------------
10641 // The conversions operations are all Alpha sorted.  Please keep it that way!
10642 
10643 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10644   predicate(UseSSE==0);
10645   match(Set dst (RoundFloat src));
10646   ins_cost(125);
10647   format %{ "FST_S  $dst,$src\t# F-round" %}
10648   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10649   ins_pipe( fpu_mem_reg );
10650 %}
10651 
10652 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10653   predicate(UseSSE<=1);
10654   match(Set dst (RoundDouble src));
10655   ins_cost(125);
10656   format %{ "FST_D  $dst,$src\t# D-round" %}
10657   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10658   ins_pipe( fpu_mem_reg );
10659 %}
10660 
10661 // Force rounding to 24-bit precision and 6-bit exponent
10662 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10663   predicate(UseSSE==0);
10664   match(Set dst (ConvD2F src));
10665   format %{ "FST_S  $dst,$src\t# F-round" %}
10666   expand %{
10667     roundFloat_mem_reg(dst,src);
10668   %}
10669 %}
10670 
10671 // Force rounding to 24-bit precision and 6-bit exponent
10672 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10673   predicate(UseSSE==1);
10674   match(Set dst (ConvD2F src));
10675   effect( KILL cr );
10676   format %{ "SUB    ESP,4\n\t"
10677             "FST_S  [ESP],$src\t# F-round\n\t"
10678             "MOVSS  $dst,[ESP]\n\t"
10679             "ADD ESP,4" %}
10680   ins_encode %{
10681     __ subptr(rsp, 4);
10682     if ($src$$reg != FPR1L_enc) {
10683       __ fld_s($src$$reg-1);
10684       __ fstp_s(Address(rsp, 0));
10685     } else {
10686       __ fst_s(Address(rsp, 0));
10687     }
10688     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10689     __ addptr(rsp, 4);
10690   %}
10691   ins_pipe( pipe_slow );
10692 %}
10693 
10694 // Force rounding double precision to single precision
10695 instruct convD2F_reg(regF dst, regD src) %{
10696   predicate(UseSSE>=2);
10697   match(Set dst (ConvD2F src));
10698   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10699   ins_encode %{
10700     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10701   %}
10702   ins_pipe( pipe_slow );
10703 %}
10704 
10705 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10706   predicate(UseSSE==0);
10707   match(Set dst (ConvF2D src));
10708   format %{ "FST_S  $dst,$src\t# D-round" %}
10709   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10710   ins_pipe( fpu_reg_reg );
10711 %}
10712 
10713 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10714   predicate(UseSSE==1);
10715   match(Set dst (ConvF2D src));
10716   format %{ "FST_D  $dst,$src\t# D-round" %}
10717   expand %{
10718     roundDouble_mem_reg(dst,src);
10719   %}
10720 %}
10721 
10722 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10723   predicate(UseSSE==1);
10724   match(Set dst (ConvF2D src));
10725   effect( KILL cr );
10726   format %{ "SUB    ESP,4\n\t"
10727             "MOVSS  [ESP] $src\n\t"
10728             "FLD_S  [ESP]\n\t"
10729             "ADD    ESP,4\n\t"
10730             "FSTP   $dst\t# D-round" %}
10731   ins_encode %{
10732     __ subptr(rsp, 4);
10733     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10734     __ fld_s(Address(rsp, 0));
10735     __ addptr(rsp, 4);
10736     __ fstp_d($dst$$reg);
10737   %}
10738   ins_pipe( pipe_slow );
10739 %}
10740 
10741 instruct convF2D_reg(regD dst, regF src) %{
10742   predicate(UseSSE>=2);
10743   match(Set dst (ConvF2D src));
10744   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10745   ins_encode %{
10746     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10747   %}
10748   ins_pipe( pipe_slow );
10749 %}
10750 
10751 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10752 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10753   predicate(UseSSE<=1);
10754   match(Set dst (ConvD2I src));
10755   effect( KILL tmp, KILL cr );
10756   format %{ "FLD    $src\t# Convert double to int \n\t"
10757             "FLDCW  trunc mode\n\t"
10758             "SUB    ESP,4\n\t"
10759             "FISTp  [ESP + #0]\n\t"
10760             "FLDCW  std/24-bit mode\n\t"
10761             "POP    EAX\n\t"
10762             "CMP    EAX,0x80000000\n\t"
10763             "JNE,s  fast\n\t"
10764             "FLD_D  $src\n\t"
10765             "CALL   d2i_wrapper\n"
10766       "fast:" %}
10767   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10772 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10773   predicate(UseSSE>=2);
10774   match(Set dst (ConvD2I src));
10775   effect( KILL tmp, KILL cr );
10776   format %{ "CVTTSD2SI $dst, $src\n\t"
10777             "CMP    $dst,0x80000000\n\t"
10778             "JNE,s  fast\n\t"
10779             "SUB    ESP, 8\n\t"
10780             "MOVSD  [ESP], $src\n\t"
10781             "FLD_D  [ESP]\n\t"
10782             "ADD    ESP, 8\n\t"
10783             "CALL   d2i_wrapper\n"
10784       "fast:" %}
10785   ins_encode %{
10786     Label fast;
10787     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10788     __ cmpl($dst$$Register, 0x80000000);
10789     __ jccb(Assembler::notEqual, fast);
10790     __ subptr(rsp, 8);
10791     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10792     __ fld_d(Address(rsp, 0));
10793     __ addptr(rsp, 8);
10794     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10795     __ post_call_nop();
10796     __ bind(fast);
10797   %}
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10802   predicate(UseSSE<=1);
10803   match(Set dst (ConvD2L src));
10804   effect( KILL cr );
10805   format %{ "FLD    $src\t# Convert double to long\n\t"
10806             "FLDCW  trunc mode\n\t"
10807             "SUB    ESP,8\n\t"
10808             "FISTp  [ESP + #0]\n\t"
10809             "FLDCW  std/24-bit mode\n\t"
10810             "POP    EAX\n\t"
10811             "POP    EDX\n\t"
10812             "CMP    EDX,0x80000000\n\t"
10813             "JNE,s  fast\n\t"
10814             "TEST   EAX,EAX\n\t"
10815             "JNE,s  fast\n\t"
10816             "FLD    $src\n\t"
10817             "CALL   d2l_wrapper\n"
10818       "fast:" %}
10819   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10820   ins_pipe( pipe_slow );
10821 %}
10822 
10823 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10824 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10825   predicate (UseSSE>=2);
10826   match(Set dst (ConvD2L src));
10827   effect( KILL cr );
10828   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10829             "MOVSD  [ESP],$src\n\t"
10830             "FLD_D  [ESP]\n\t"
10831             "FLDCW  trunc mode\n\t"
10832             "FISTp  [ESP + #0]\n\t"
10833             "FLDCW  std/24-bit mode\n\t"
10834             "POP    EAX\n\t"
10835             "POP    EDX\n\t"
10836             "CMP    EDX,0x80000000\n\t"
10837             "JNE,s  fast\n\t"
10838             "TEST   EAX,EAX\n\t"
10839             "JNE,s  fast\n\t"
10840             "SUB    ESP,8\n\t"
10841             "MOVSD  [ESP],$src\n\t"
10842             "FLD_D  [ESP]\n\t"
10843             "ADD    ESP,8\n\t"
10844             "CALL   d2l_wrapper\n"
10845       "fast:" %}
10846   ins_encode %{
10847     Label fast;
10848     __ subptr(rsp, 8);
10849     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10850     __ fld_d(Address(rsp, 0));
10851     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10852     __ fistp_d(Address(rsp, 0));
10853     // Restore the rounding mode, mask the exception
10854     if (Compile::current()->in_24_bit_fp_mode()) {
10855       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10856     } else {
10857       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10858     }
10859     // Load the converted long, adjust CPU stack
10860     __ pop(rax);
10861     __ pop(rdx);
10862     __ cmpl(rdx, 0x80000000);
10863     __ jccb(Assembler::notEqual, fast);
10864     __ testl(rax, rax);
10865     __ jccb(Assembler::notEqual, fast);
10866     __ subptr(rsp, 8);
10867     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10868     __ fld_d(Address(rsp, 0));
10869     __ addptr(rsp, 8);
10870     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10871     __ post_call_nop();
10872     __ bind(fast);
10873   %}
10874   ins_pipe( pipe_slow );
10875 %}
10876 
10877 // Convert a double to an int.  Java semantics require we do complex
10878 // manglations in the corner cases.  So we set the rounding mode to
10879 // 'zero', store the darned double down as an int, and reset the
10880 // rounding mode to 'nearest'.  The hardware stores a flag value down
10881 // if we would overflow or converted a NAN; we check for this and
10882 // and go the slow path if needed.
10883 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10884   predicate(UseSSE==0);
10885   match(Set dst (ConvF2I src));
10886   effect( KILL tmp, KILL cr );
10887   format %{ "FLD    $src\t# Convert float to int \n\t"
10888             "FLDCW  trunc mode\n\t"
10889             "SUB    ESP,4\n\t"
10890             "FISTp  [ESP + #0]\n\t"
10891             "FLDCW  std/24-bit mode\n\t"
10892             "POP    EAX\n\t"
10893             "CMP    EAX,0x80000000\n\t"
10894             "JNE,s  fast\n\t"
10895             "FLD    $src\n\t"
10896             "CALL   d2i_wrapper\n"
10897       "fast:" %}
10898   // DPR2I_encoding works for FPR2I
10899   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10900   ins_pipe( pipe_slow );
10901 %}
10902 
10903 // Convert a float in xmm to an int reg.
10904 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10905   predicate(UseSSE>=1);
10906   match(Set dst (ConvF2I src));
10907   effect( KILL tmp, KILL cr );
10908   format %{ "CVTTSS2SI $dst, $src\n\t"
10909             "CMP    $dst,0x80000000\n\t"
10910             "JNE,s  fast\n\t"
10911             "SUB    ESP, 4\n\t"
10912             "MOVSS  [ESP], $src\n\t"
10913             "FLD    [ESP]\n\t"
10914             "ADD    ESP, 4\n\t"
10915             "CALL   d2i_wrapper\n"
10916       "fast:" %}
10917   ins_encode %{
10918     Label fast;
10919     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10920     __ cmpl($dst$$Register, 0x80000000);
10921     __ jccb(Assembler::notEqual, fast);
10922     __ subptr(rsp, 4);
10923     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10924     __ fld_s(Address(rsp, 0));
10925     __ addptr(rsp, 4);
10926     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10927     __ post_call_nop();
10928     __ bind(fast);
10929   %}
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10934   predicate(UseSSE==0);
10935   match(Set dst (ConvF2L src));
10936   effect( KILL cr );
10937   format %{ "FLD    $src\t# Convert float to long\n\t"
10938             "FLDCW  trunc mode\n\t"
10939             "SUB    ESP,8\n\t"
10940             "FISTp  [ESP + #0]\n\t"
10941             "FLDCW  std/24-bit mode\n\t"
10942             "POP    EAX\n\t"
10943             "POP    EDX\n\t"
10944             "CMP    EDX,0x80000000\n\t"
10945             "JNE,s  fast\n\t"
10946             "TEST   EAX,EAX\n\t"
10947             "JNE,s  fast\n\t"
10948             "FLD    $src\n\t"
10949             "CALL   d2l_wrapper\n"
10950       "fast:" %}
10951   // DPR2L_encoding works for FPR2L
10952   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10953   ins_pipe( pipe_slow );
10954 %}
10955 
10956 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10957 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10958   predicate (UseSSE>=1);
10959   match(Set dst (ConvF2L src));
10960   effect( KILL cr );
10961   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10962             "MOVSS  [ESP],$src\n\t"
10963             "FLD_S  [ESP]\n\t"
10964             "FLDCW  trunc mode\n\t"
10965             "FISTp  [ESP + #0]\n\t"
10966             "FLDCW  std/24-bit mode\n\t"
10967             "POP    EAX\n\t"
10968             "POP    EDX\n\t"
10969             "CMP    EDX,0x80000000\n\t"
10970             "JNE,s  fast\n\t"
10971             "TEST   EAX,EAX\n\t"
10972             "JNE,s  fast\n\t"
10973             "SUB    ESP,4\t# Convert float to long\n\t"
10974             "MOVSS  [ESP],$src\n\t"
10975             "FLD_S  [ESP]\n\t"
10976             "ADD    ESP,4\n\t"
10977             "CALL   d2l_wrapper\n"
10978       "fast:" %}
10979   ins_encode %{
10980     Label fast;
10981     __ subptr(rsp, 8);
10982     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10983     __ fld_s(Address(rsp, 0));
10984     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10985     __ fistp_d(Address(rsp, 0));
10986     // Restore the rounding mode, mask the exception
10987     if (Compile::current()->in_24_bit_fp_mode()) {
10988       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10989     } else {
10990       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10991     }
10992     // Load the converted long, adjust CPU stack
10993     __ pop(rax);
10994     __ pop(rdx);
10995     __ cmpl(rdx, 0x80000000);
10996     __ jccb(Assembler::notEqual, fast);
10997     __ testl(rax, rax);
10998     __ jccb(Assembler::notEqual, fast);
10999     __ subptr(rsp, 4);
11000     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001     __ fld_s(Address(rsp, 0));
11002     __ addptr(rsp, 4);
11003     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11004     __ post_call_nop();
11005     __ bind(fast);
11006   %}
11007   ins_pipe( pipe_slow );
11008 %}
11009 
11010 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11011   predicate( UseSSE<=1 );
11012   match(Set dst (ConvI2D src));
11013   format %{ "FILD   $src\n\t"
11014             "FSTP   $dst" %}
11015   opcode(0xDB, 0x0);  /* DB /0 */
11016   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11017   ins_pipe( fpu_reg_mem );
11018 %}
11019 
11020 instruct convI2D_reg(regD dst, rRegI src) %{
11021   predicate( UseSSE>=2 && !UseXmmI2D );
11022   match(Set dst (ConvI2D src));
11023   format %{ "CVTSI2SD $dst,$src" %}
11024   ins_encode %{
11025     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11026   %}
11027   ins_pipe( pipe_slow );
11028 %}
11029 
11030 instruct convI2D_mem(regD dst, memory mem) %{
11031   predicate( UseSSE>=2 );
11032   match(Set dst (ConvI2D (LoadI mem)));
11033   format %{ "CVTSI2SD $dst,$mem" %}
11034   ins_encode %{
11035     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11036   %}
11037   ins_pipe( pipe_slow );
11038 %}
11039 
11040 instruct convXI2D_reg(regD dst, rRegI src)
11041 %{
11042   predicate( UseSSE>=2 && UseXmmI2D );
11043   match(Set dst (ConvI2D src));
11044 
11045   format %{ "MOVD  $dst,$src\n\t"
11046             "CVTDQ2PD $dst,$dst\t# i2d" %}
11047   ins_encode %{
11048     __ movdl($dst$$XMMRegister, $src$$Register);
11049     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11050   %}
11051   ins_pipe(pipe_slow); // XXX
11052 %}
11053 
11054 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11055   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11056   match(Set dst (ConvI2D (LoadI mem)));
11057   format %{ "FILD   $mem\n\t"
11058             "FSTP   $dst" %}
11059   opcode(0xDB);      /* DB /0 */
11060   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11061               Pop_Reg_DPR(dst));
11062   ins_pipe( fpu_reg_mem );
11063 %}
11064 
11065 // Convert a byte to a float; no rounding step needed.
11066 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11067   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11068   match(Set dst (ConvI2F src));
11069   format %{ "FILD   $src\n\t"
11070             "FSTP   $dst" %}
11071 
11072   opcode(0xDB, 0x0);  /* DB /0 */
11073   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11074   ins_pipe( fpu_reg_mem );
11075 %}
11076 
11077 // In 24-bit mode, force exponent rounding by storing back out
11078 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11079   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11080   match(Set dst (ConvI2F src));
11081   ins_cost(200);
11082   format %{ "FILD   $src\n\t"
11083             "FSTP_S $dst" %}
11084   opcode(0xDB, 0x0);  /* DB /0 */
11085   ins_encode( Push_Mem_I(src),
11086               Pop_Mem_FPR(dst));
11087   ins_pipe( fpu_mem_mem );
11088 %}
11089 
11090 // In 24-bit mode, force exponent rounding by storing back out
11091 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11092   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11093   match(Set dst (ConvI2F (LoadI mem)));
11094   ins_cost(200);
11095   format %{ "FILD   $mem\n\t"
11096             "FSTP_S $dst" %}
11097   opcode(0xDB);  /* DB /0 */
11098   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11099               Pop_Mem_FPR(dst));
11100   ins_pipe( fpu_mem_mem );
11101 %}
11102 
11103 // This instruction does not round to 24-bits
11104 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11105   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11106   match(Set dst (ConvI2F src));
11107   format %{ "FILD   $src\n\t"
11108             "FSTP   $dst" %}
11109   opcode(0xDB, 0x0);  /* DB /0 */
11110   ins_encode( Push_Mem_I(src),
11111               Pop_Reg_FPR(dst));
11112   ins_pipe( fpu_reg_mem );
11113 %}
11114 
11115 // This instruction does not round to 24-bits
11116 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11117   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11118   match(Set dst (ConvI2F (LoadI mem)));
11119   format %{ "FILD   $mem\n\t"
11120             "FSTP   $dst" %}
11121   opcode(0xDB);      /* DB /0 */
11122   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11123               Pop_Reg_FPR(dst));
11124   ins_pipe( fpu_reg_mem );
11125 %}
11126 
11127 // Convert an int to a float in xmm; no rounding step needed.
11128 instruct convI2F_reg(regF dst, rRegI src) %{
11129   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11130   match(Set dst (ConvI2F src));
11131   format %{ "CVTSI2SS $dst, $src" %}
11132   ins_encode %{
11133     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11134   %}
11135   ins_pipe( pipe_slow );
11136 %}
11137 
11138  instruct convXI2F_reg(regF dst, rRegI src)
11139 %{
11140   predicate( UseSSE>=2 && UseXmmI2F );
11141   match(Set dst (ConvI2F src));
11142 
11143   format %{ "MOVD  $dst,$src\n\t"
11144             "CVTDQ2PS $dst,$dst\t# i2f" %}
11145   ins_encode %{
11146     __ movdl($dst$$XMMRegister, $src$$Register);
11147     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11148   %}
11149   ins_pipe(pipe_slow); // XXX
11150 %}
11151 
11152 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11153   match(Set dst (ConvI2L src));
11154   effect(KILL cr);
11155   ins_cost(375);
11156   format %{ "MOV    $dst.lo,$src\n\t"
11157             "MOV    $dst.hi,$src\n\t"
11158             "SAR    $dst.hi,31" %}
11159   ins_encode(convert_int_long(dst,src));
11160   ins_pipe( ialu_reg_reg_long );
11161 %}
11162 
11163 // Zero-extend convert int to long
11164 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11165   match(Set dst (AndL (ConvI2L src) mask) );
11166   effect( KILL flags );
11167   ins_cost(250);
11168   format %{ "MOV    $dst.lo,$src\n\t"
11169             "XOR    $dst.hi,$dst.hi" %}
11170   opcode(0x33); // XOR
11171   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11172   ins_pipe( ialu_reg_reg_long );
11173 %}
11174 
11175 // Zero-extend long
11176 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11177   match(Set dst (AndL src mask) );
11178   effect( KILL flags );
11179   ins_cost(250);
11180   format %{ "MOV    $dst.lo,$src.lo\n\t"
11181             "XOR    $dst.hi,$dst.hi\n\t" %}
11182   opcode(0x33); // XOR
11183   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11184   ins_pipe( ialu_reg_reg_long );
11185 %}
11186 
11187 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11188   predicate (UseSSE<=1);
11189   match(Set dst (ConvL2D src));
11190   effect( KILL cr );
11191   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11192             "PUSH   $src.lo\n\t"
11193             "FILD   ST,[ESP + #0]\n\t"
11194             "ADD    ESP,8\n\t"
11195             "FSTP_D $dst\t# D-round" %}
11196   opcode(0xDF, 0x5);  /* DF /5 */
11197   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11198   ins_pipe( pipe_slow );
11199 %}
11200 
11201 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11202   predicate (UseSSE>=2);
11203   match(Set dst (ConvL2D src));
11204   effect( KILL cr );
11205   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11206             "PUSH   $src.lo\n\t"
11207             "FILD_D [ESP]\n\t"
11208             "FSTP_D [ESP]\n\t"
11209             "MOVSD  $dst,[ESP]\n\t"
11210             "ADD    ESP,8" %}
11211   opcode(0xDF, 0x5);  /* DF /5 */
11212   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11213   ins_pipe( pipe_slow );
11214 %}
11215 
11216 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11217   predicate (UseSSE>=1);
11218   match(Set dst (ConvL2F src));
11219   effect( KILL cr );
11220   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11221             "PUSH   $src.lo\n\t"
11222             "FILD_D [ESP]\n\t"
11223             "FSTP_S [ESP]\n\t"
11224             "MOVSS  $dst,[ESP]\n\t"
11225             "ADD    ESP,8" %}
11226   opcode(0xDF, 0x5);  /* DF /5 */
11227   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11228   ins_pipe( pipe_slow );
11229 %}
11230 
11231 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11232   match(Set dst (ConvL2F src));
11233   effect( KILL cr );
11234   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11235             "PUSH   $src.lo\n\t"
11236             "FILD   ST,[ESP + #0]\n\t"
11237             "ADD    ESP,8\n\t"
11238             "FSTP_S $dst\t# F-round" %}
11239   opcode(0xDF, 0x5);  /* DF /5 */
11240   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11241   ins_pipe( pipe_slow );
11242 %}
11243 
11244 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11245   match(Set dst (ConvL2I src));
11246   effect( DEF dst, USE src );
11247   format %{ "MOV    $dst,$src.lo" %}
11248   ins_encode(enc_CopyL_Lo(dst,src));
11249   ins_pipe( ialu_reg_reg );
11250 %}
11251 
11252 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11253   match(Set dst (MoveF2I src));
11254   effect( DEF dst, USE src );
11255   ins_cost(100);
11256   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11257   ins_encode %{
11258     __ movl($dst$$Register, Address(rsp, $src$$disp));
11259   %}
11260   ins_pipe( ialu_reg_mem );
11261 %}
11262 
11263 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11264   predicate(UseSSE==0);
11265   match(Set dst (MoveF2I src));
11266   effect( DEF dst, USE src );
11267 
11268   ins_cost(125);
11269   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11270   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11271   ins_pipe( fpu_mem_reg );
11272 %}
11273 
11274 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11275   predicate(UseSSE>=1);
11276   match(Set dst (MoveF2I src));
11277   effect( DEF dst, USE src );
11278 
11279   ins_cost(95);
11280   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11281   ins_encode %{
11282     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11283   %}
11284   ins_pipe( pipe_slow );
11285 %}
11286 
11287 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11288   predicate(UseSSE>=2);
11289   match(Set dst (MoveF2I src));
11290   effect( DEF dst, USE src );
11291   ins_cost(85);
11292   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11293   ins_encode %{
11294     __ movdl($dst$$Register, $src$$XMMRegister);
11295   %}
11296   ins_pipe( pipe_slow );
11297 %}
11298 
11299 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11300   match(Set dst (MoveI2F src));
11301   effect( DEF dst, USE src );
11302 
11303   ins_cost(100);
11304   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11305   ins_encode %{
11306     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11307   %}
11308   ins_pipe( ialu_mem_reg );
11309 %}
11310 
11311 
11312 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11313   predicate(UseSSE==0);
11314   match(Set dst (MoveI2F src));
11315   effect(DEF dst, USE src);
11316 
11317   ins_cost(125);
11318   format %{ "FLD_S  $src\n\t"
11319             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11320   opcode(0xD9);               /* D9 /0, FLD m32real */
11321   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11322               Pop_Reg_FPR(dst) );
11323   ins_pipe( fpu_reg_mem );
11324 %}
11325 
11326 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11327   predicate(UseSSE>=1);
11328   match(Set dst (MoveI2F src));
11329   effect( DEF dst, USE src );
11330 
11331   ins_cost(95);
11332   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11333   ins_encode %{
11334     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11335   %}
11336   ins_pipe( pipe_slow );
11337 %}
11338 
11339 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11340   predicate(UseSSE>=2);
11341   match(Set dst (MoveI2F src));
11342   effect( DEF dst, USE src );
11343 
11344   ins_cost(85);
11345   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11346   ins_encode %{
11347     __ movdl($dst$$XMMRegister, $src$$Register);
11348   %}
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11353   match(Set dst (MoveD2L src));
11354   effect(DEF dst, USE src);
11355 
11356   ins_cost(250);
11357   format %{ "MOV    $dst.lo,$src\n\t"
11358             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11359   opcode(0x8B, 0x8B);
11360   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11361   ins_pipe( ialu_mem_long_reg );
11362 %}
11363 
11364 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11365   predicate(UseSSE<=1);
11366   match(Set dst (MoveD2L src));
11367   effect(DEF dst, USE src);
11368 
11369   ins_cost(125);
11370   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11371   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11372   ins_pipe( fpu_mem_reg );
11373 %}
11374 
11375 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11376   predicate(UseSSE>=2);
11377   match(Set dst (MoveD2L src));
11378   effect(DEF dst, USE src);
11379   ins_cost(95);
11380   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11381   ins_encode %{
11382     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11383   %}
11384   ins_pipe( pipe_slow );
11385 %}
11386 
11387 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11388   predicate(UseSSE>=2);
11389   match(Set dst (MoveD2L src));
11390   effect(DEF dst, USE src, TEMP tmp);
11391   ins_cost(85);
11392   format %{ "MOVD   $dst.lo,$src\n\t"
11393             "PSHUFLW $tmp,$src,0x4E\n\t"
11394             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11395   ins_encode %{
11396     __ movdl($dst$$Register, $src$$XMMRegister);
11397     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11398     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11399   %}
11400   ins_pipe( pipe_slow );
11401 %}
11402 
11403 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11404   match(Set dst (MoveL2D src));
11405   effect(DEF dst, USE src);
11406 
11407   ins_cost(200);
11408   format %{ "MOV    $dst,$src.lo\n\t"
11409             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11410   opcode(0x89, 0x89);
11411   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11412   ins_pipe( ialu_mem_long_reg );
11413 %}
11414 
11415 
11416 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11417   predicate(UseSSE<=1);
11418   match(Set dst (MoveL2D src));
11419   effect(DEF dst, USE src);
11420   ins_cost(125);
11421 
11422   format %{ "FLD_D  $src\n\t"
11423             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11424   opcode(0xDD);               /* DD /0, FLD m64real */
11425   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11426               Pop_Reg_DPR(dst) );
11427   ins_pipe( fpu_reg_mem );
11428 %}
11429 
11430 
11431 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11432   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11433   match(Set dst (MoveL2D src));
11434   effect(DEF dst, USE src);
11435 
11436   ins_cost(95);
11437   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11438   ins_encode %{
11439     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11440   %}
11441   ins_pipe( pipe_slow );
11442 %}
11443 
11444 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11445   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11446   match(Set dst (MoveL2D src));
11447   effect(DEF dst, USE src);
11448 
11449   ins_cost(95);
11450   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11451   ins_encode %{
11452     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11453   %}
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11458   predicate(UseSSE>=2);
11459   match(Set dst (MoveL2D src));
11460   effect(TEMP dst, USE src, TEMP tmp);
11461   ins_cost(85);
11462   format %{ "MOVD   $dst,$src.lo\n\t"
11463             "MOVD   $tmp,$src.hi\n\t"
11464             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11465   ins_encode %{
11466     __ movdl($dst$$XMMRegister, $src$$Register);
11467     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11468     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 //----------------------------- CompressBits/ExpandBits ------------------------
11474 
11475 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11476   predicate(n->bottom_type()->isa_long());
11477   match(Set dst (CompressBits src mask));
11478   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11479   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11480   ins_encode %{
11481     Label exit, partail_result;
11482     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11483     // Merge the results of upper and lower destination registers such that upper destination
11484     // results are contiguously laid out after the lower destination result.
11485     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11486     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11487     __ popcntl($rtmp$$Register, $mask$$Register);
11488     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11489     __ cmpl($rtmp$$Register, 32);
11490     __ jccb(Assembler::equal, exit);
11491     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11492     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11493     // Shift left the contents of upper destination register by true bit count of lower mask register
11494     // and merge with lower destination register.
11495     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11496     __ orl($dst$$Register, $rtmp$$Register);
11497     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11498     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11499     // since contents of upper destination have already been copied to lower destination
11500     // register.
11501     __ cmpl($rtmp$$Register, 0);
11502     __ jccb(Assembler::greater, partail_result);
11503     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11504     __ jmp(exit);
11505     __ bind(partail_result);
11506     // Perform right shift over upper destination register to move out bits already copied
11507     // to lower destination register.
11508     __ subl($rtmp$$Register, 32);
11509     __ negl($rtmp$$Register);
11510     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11511     __ bind(exit);
11512   %}
11513   ins_pipe( pipe_slow );
11514 %}
11515 
11516 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11517   predicate(n->bottom_type()->isa_long());
11518   match(Set dst (ExpandBits src mask));
11519   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11520   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11521   ins_encode %{
11522     // Extraction operation sequentially reads the bits from source register starting from LSB
11523     // and lays them out into destination register at bit locations corresponding to true bits
11524     // in mask register. Thus number of source bits read are equal to combined true bit count
11525     // of mask register pair.
11526     Label exit, mask_clipping;
11527     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11528     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11529     __ popcntl($rtmp$$Register, $mask$$Register);
11530     // If true bit count of lower mask register is 32 then none of bit of lower source register
11531     // will feed to upper destination register.
11532     __ cmpl($rtmp$$Register, 32);
11533     __ jccb(Assembler::equal, exit);
11534     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11535     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11536     // Shift right the contents of lower source register to remove already consumed bits.
11537     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11538     // Extract the bits from lower source register starting from LSB under the influence
11539     // of upper mask register.
11540     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11541     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11542     __ subl($rtmp$$Register, 32);
11543     __ negl($rtmp$$Register);
11544     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11545     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11546     // Clear the set bits in upper mask register which have been used to extract the contents
11547     // from lower source register.
11548     __ bind(mask_clipping);
11549     __ blsrl($mask$$Register, $mask$$Register);
11550     __ decrementl($rtmp$$Register, 1);
11551     __ jccb(Assembler::greater, mask_clipping);
11552     // Starting from LSB extract the bits from upper source register under the influence of
11553     // remaining set bits in upper mask register.
11554     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11555     // Merge the partial results extracted from lower and upper source register bits.
11556     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11557     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11558     __ bind(exit);
11559   %}
11560   ins_pipe( pipe_slow );
11561 %}
11562 
11563 // =======================================================================
11564 // fast clearing of an array
11565 // Small ClearArray non-AVX512.
11566 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11567   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11568   match(Set dummy (ClearArray cnt base));
11569   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11570 
11571   format %{ $$template
11572     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11573     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11574     $$emit$$"JG     LARGE\n\t"
11575     $$emit$$"SHL    ECX, 1\n\t"
11576     $$emit$$"DEC    ECX\n\t"
11577     $$emit$$"JS     DONE\t# Zero length\n\t"
11578     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11579     $$emit$$"DEC    ECX\n\t"
11580     $$emit$$"JGE    LOOP\n\t"
11581     $$emit$$"JMP    DONE\n\t"
11582     $$emit$$"# LARGE:\n\t"
11583     if (UseFastStosb) {
11584        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11585        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11586     } else if (UseXMMForObjInit) {
11587        $$emit$$"MOV     RDI,RAX\n\t"
11588        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11589        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11590        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11591        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11592        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11593        $$emit$$"ADD     0x40,RAX\n\t"
11594        $$emit$$"# L_zero_64_bytes:\n\t"
11595        $$emit$$"SUB     0x8,RCX\n\t"
11596        $$emit$$"JGE     L_loop\n\t"
11597        $$emit$$"ADD     0x4,RCX\n\t"
11598        $$emit$$"JL      L_tail\n\t"
11599        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11600        $$emit$$"ADD     0x20,RAX\n\t"
11601        $$emit$$"SUB     0x4,RCX\n\t"
11602        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11603        $$emit$$"ADD     0x4,RCX\n\t"
11604        $$emit$$"JLE     L_end\n\t"
11605        $$emit$$"DEC     RCX\n\t"
11606        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11607        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11608        $$emit$$"ADD     0x8,RAX\n\t"
11609        $$emit$$"DEC     RCX\n\t"
11610        $$emit$$"JGE     L_sloop\n\t"
11611        $$emit$$"# L_end:\n\t"
11612     } else {
11613        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11614        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11615     }
11616     $$emit$$"# DONE"
11617   %}
11618   ins_encode %{
11619     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11620                  $tmp$$XMMRegister, false, knoreg);
11621   %}
11622   ins_pipe( pipe_slow );
11623 %}
11624 
11625 // Small ClearArray AVX512 non-constant length.
11626 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11627   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11628   match(Set dummy (ClearArray cnt base));
11629   ins_cost(125);
11630   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11631 
11632   format %{ $$template
11633     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11634     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11635     $$emit$$"JG     LARGE\n\t"
11636     $$emit$$"SHL    ECX, 1\n\t"
11637     $$emit$$"DEC    ECX\n\t"
11638     $$emit$$"JS     DONE\t# Zero length\n\t"
11639     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11640     $$emit$$"DEC    ECX\n\t"
11641     $$emit$$"JGE    LOOP\n\t"
11642     $$emit$$"JMP    DONE\n\t"
11643     $$emit$$"# LARGE:\n\t"
11644     if (UseFastStosb) {
11645        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11646        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11647     } else if (UseXMMForObjInit) {
11648        $$emit$$"MOV     RDI,RAX\n\t"
11649        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11650        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11651        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11652        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11653        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11654        $$emit$$"ADD     0x40,RAX\n\t"
11655        $$emit$$"# L_zero_64_bytes:\n\t"
11656        $$emit$$"SUB     0x8,RCX\n\t"
11657        $$emit$$"JGE     L_loop\n\t"
11658        $$emit$$"ADD     0x4,RCX\n\t"
11659        $$emit$$"JL      L_tail\n\t"
11660        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11661        $$emit$$"ADD     0x20,RAX\n\t"
11662        $$emit$$"SUB     0x4,RCX\n\t"
11663        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11664        $$emit$$"ADD     0x4,RCX\n\t"
11665        $$emit$$"JLE     L_end\n\t"
11666        $$emit$$"DEC     RCX\n\t"
11667        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11668        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11669        $$emit$$"ADD     0x8,RAX\n\t"
11670        $$emit$$"DEC     RCX\n\t"
11671        $$emit$$"JGE     L_sloop\n\t"
11672        $$emit$$"# L_end:\n\t"
11673     } else {
11674        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11675        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11676     }
11677     $$emit$$"# DONE"
11678   %}
11679   ins_encode %{
11680     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11681                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11682   %}
11683   ins_pipe( pipe_slow );
11684 %}
11685 
11686 // Large ClearArray non-AVX512.
11687 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11688   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11689   match(Set dummy (ClearArray cnt base));
11690   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11691   format %{ $$template
11692     if (UseFastStosb) {
11693        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11694        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11695        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11696     } else if (UseXMMForObjInit) {
11697        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11698        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11699        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11700        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11701        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11702        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11703        $$emit$$"ADD     0x40,RAX\n\t"
11704        $$emit$$"# L_zero_64_bytes:\n\t"
11705        $$emit$$"SUB     0x8,RCX\n\t"
11706        $$emit$$"JGE     L_loop\n\t"
11707        $$emit$$"ADD     0x4,RCX\n\t"
11708        $$emit$$"JL      L_tail\n\t"
11709        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11710        $$emit$$"ADD     0x20,RAX\n\t"
11711        $$emit$$"SUB     0x4,RCX\n\t"
11712        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11713        $$emit$$"ADD     0x4,RCX\n\t"
11714        $$emit$$"JLE     L_end\n\t"
11715        $$emit$$"DEC     RCX\n\t"
11716        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11717        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11718        $$emit$$"ADD     0x8,RAX\n\t"
11719        $$emit$$"DEC     RCX\n\t"
11720        $$emit$$"JGE     L_sloop\n\t"
11721        $$emit$$"# L_end:\n\t"
11722     } else {
11723        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11724        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11725        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11726     }
11727     $$emit$$"# DONE"
11728   %}
11729   ins_encode %{
11730     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11731                  $tmp$$XMMRegister, true, knoreg);
11732   %}
11733   ins_pipe( pipe_slow );
11734 %}
11735 
11736 // Large ClearArray AVX512.
11737 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11738   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11739   match(Set dummy (ClearArray cnt base));
11740   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11741   format %{ $$template
11742     if (UseFastStosb) {
11743        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11744        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11745        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11746     } else if (UseXMMForObjInit) {
11747        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11748        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11749        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11750        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11751        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11752        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11753        $$emit$$"ADD     0x40,RAX\n\t"
11754        $$emit$$"# L_zero_64_bytes:\n\t"
11755        $$emit$$"SUB     0x8,RCX\n\t"
11756        $$emit$$"JGE     L_loop\n\t"
11757        $$emit$$"ADD     0x4,RCX\n\t"
11758        $$emit$$"JL      L_tail\n\t"
11759        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11760        $$emit$$"ADD     0x20,RAX\n\t"
11761        $$emit$$"SUB     0x4,RCX\n\t"
11762        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11763        $$emit$$"ADD     0x4,RCX\n\t"
11764        $$emit$$"JLE     L_end\n\t"
11765        $$emit$$"DEC     RCX\n\t"
11766        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11767        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11768        $$emit$$"ADD     0x8,RAX\n\t"
11769        $$emit$$"DEC     RCX\n\t"
11770        $$emit$$"JGE     L_sloop\n\t"
11771        $$emit$$"# L_end:\n\t"
11772     } else {
11773        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11774        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11775        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11776     }
11777     $$emit$$"# DONE"
11778   %}
11779   ins_encode %{
11780     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11781                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11782   %}
11783   ins_pipe( pipe_slow );
11784 %}
11785 
11786 // Small ClearArray AVX512 constant length.
11787 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11788 %{
11789   predicate(!((ClearArrayNode*)n)->is_large() &&
11790                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11791   match(Set dummy (ClearArray cnt base));
11792   ins_cost(100);
11793   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11794   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11795   ins_encode %{
11796    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11797   %}
11798   ins_pipe(pipe_slow);
11799 %}
11800 
11801 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11802                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11803   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11804   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11805   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11806 
11807   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11808   ins_encode %{
11809     __ string_compare($str1$$Register, $str2$$Register,
11810                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11811                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11812   %}
11813   ins_pipe( pipe_slow );
11814 %}
11815 
11816 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11817                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11818   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11819   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11820   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11821 
11822   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11823   ins_encode %{
11824     __ string_compare($str1$$Register, $str2$$Register,
11825                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11826                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11827   %}
11828   ins_pipe( pipe_slow );
11829 %}
11830 
11831 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11832                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11833   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11834   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11835   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11836 
11837   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11838   ins_encode %{
11839     __ string_compare($str1$$Register, $str2$$Register,
11840                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11841                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11842   %}
11843   ins_pipe( pipe_slow );
11844 %}
11845 
11846 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11847                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11848   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11849   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11850   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11851 
11852   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11853   ins_encode %{
11854     __ string_compare($str1$$Register, $str2$$Register,
11855                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11856                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11857   %}
11858   ins_pipe( pipe_slow );
11859 %}
11860 
11861 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11862                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11863   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11864   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11865   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11866 
11867   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11868   ins_encode %{
11869     __ string_compare($str1$$Register, $str2$$Register,
11870                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11871                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11872   %}
11873   ins_pipe( pipe_slow );
11874 %}
11875 
11876 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11877                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11878   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11879   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11880   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11881 
11882   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11883   ins_encode %{
11884     __ string_compare($str1$$Register, $str2$$Register,
11885                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11886                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11887   %}
11888   ins_pipe( pipe_slow );
11889 %}
11890 
11891 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11892                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11893   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11894   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11895   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11896 
11897   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11898   ins_encode %{
11899     __ string_compare($str2$$Register, $str1$$Register,
11900                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11901                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11902   %}
11903   ins_pipe( pipe_slow );
11904 %}
11905 
11906 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11907                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11908   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11909   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11910   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11911 
11912   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11913   ins_encode %{
11914     __ string_compare($str2$$Register, $str1$$Register,
11915                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11916                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11917   %}
11918   ins_pipe( pipe_slow );
11919 %}
11920 
11921 // fast string equals
11922 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11923                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11924   predicate(!VM_Version::supports_avx512vlbw());
11925   match(Set result (StrEquals (Binary str1 str2) cnt));
11926   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11927 
11928   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11929   ins_encode %{
11930     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11931                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11932                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11933   %}
11934 
11935   ins_pipe( pipe_slow );
11936 %}
11937 
11938 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11939                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11940   predicate(VM_Version::supports_avx512vlbw());
11941   match(Set result (StrEquals (Binary str1 str2) cnt));
11942   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11943 
11944   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11945   ins_encode %{
11946     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11947                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11948                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11949   %}
11950 
11951   ins_pipe( pipe_slow );
11952 %}
11953 
11954 
11955 // fast search of substring with known size.
11956 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11957                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11958   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11959   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11960   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11961 
11962   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11963   ins_encode %{
11964     int icnt2 = (int)$int_cnt2$$constant;
11965     if (icnt2 >= 16) {
11966       // IndexOf for constant substrings with size >= 16 elements
11967       // which don't need to be loaded through stack.
11968       __ string_indexofC8($str1$$Register, $str2$$Register,
11969                           $cnt1$$Register, $cnt2$$Register,
11970                           icnt2, $result$$Register,
11971                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11972     } else {
11973       // Small strings are loaded through stack if they cross page boundary.
11974       __ string_indexof($str1$$Register, $str2$$Register,
11975                         $cnt1$$Register, $cnt2$$Register,
11976                         icnt2, $result$$Register,
11977                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11978     }
11979   %}
11980   ins_pipe( pipe_slow );
11981 %}
11982 
11983 // fast search of substring with known size.
11984 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11985                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11986   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11987   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11988   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11989 
11990   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11991   ins_encode %{
11992     int icnt2 = (int)$int_cnt2$$constant;
11993     if (icnt2 >= 8) {
11994       // IndexOf for constant substrings with size >= 8 elements
11995       // which don't need to be loaded through stack.
11996       __ string_indexofC8($str1$$Register, $str2$$Register,
11997                           $cnt1$$Register, $cnt2$$Register,
11998                           icnt2, $result$$Register,
11999                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12000     } else {
12001       // Small strings are loaded through stack if they cross page boundary.
12002       __ string_indexof($str1$$Register, $str2$$Register,
12003                         $cnt1$$Register, $cnt2$$Register,
12004                         icnt2, $result$$Register,
12005                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12006     }
12007   %}
12008   ins_pipe( pipe_slow );
12009 %}
12010 
12011 // fast search of substring with known size.
12012 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12013                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12014   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12015   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12016   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12017 
12018   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12019   ins_encode %{
12020     int icnt2 = (int)$int_cnt2$$constant;
12021     if (icnt2 >= 8) {
12022       // IndexOf for constant substrings with size >= 8 elements
12023       // which don't need to be loaded through stack.
12024       __ string_indexofC8($str1$$Register, $str2$$Register,
12025                           $cnt1$$Register, $cnt2$$Register,
12026                           icnt2, $result$$Register,
12027                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12028     } else {
12029       // Small strings are loaded through stack if they cross page boundary.
12030       __ string_indexof($str1$$Register, $str2$$Register,
12031                         $cnt1$$Register, $cnt2$$Register,
12032                         icnt2, $result$$Register,
12033                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12034     }
12035   %}
12036   ins_pipe( pipe_slow );
12037 %}
12038 
12039 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12040                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12041   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12042   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12043   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12044 
12045   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12046   ins_encode %{
12047     __ string_indexof($str1$$Register, $str2$$Register,
12048                       $cnt1$$Register, $cnt2$$Register,
12049                       (-1), $result$$Register,
12050                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12051   %}
12052   ins_pipe( pipe_slow );
12053 %}
12054 
12055 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12056                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12057   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12058   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12059   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12060 
12061   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12062   ins_encode %{
12063     __ string_indexof($str1$$Register, $str2$$Register,
12064                       $cnt1$$Register, $cnt2$$Register,
12065                       (-1), $result$$Register,
12066                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12067   %}
12068   ins_pipe( pipe_slow );
12069 %}
12070 
12071 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12072                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12073   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12074   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12075   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12076 
12077   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12078   ins_encode %{
12079     __ string_indexof($str1$$Register, $str2$$Register,
12080                       $cnt1$$Register, $cnt2$$Register,
12081                       (-1), $result$$Register,
12082                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12083   %}
12084   ins_pipe( pipe_slow );
12085 %}
12086 
12087 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12088                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12089   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12090   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12091   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12092   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12093   ins_encode %{
12094     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12095                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12096   %}
12097   ins_pipe( pipe_slow );
12098 %}
12099 
12100 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12101                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12102   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12103   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12104   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12105   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12106   ins_encode %{
12107     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12108                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12109   %}
12110   ins_pipe( pipe_slow );
12111 %}
12112 
12113 
12114 // fast array equals
12115 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12116                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12117 %{
12118   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12119   match(Set result (AryEq ary1 ary2));
12120   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12121   //ins_cost(300);
12122 
12123   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12124   ins_encode %{
12125     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12126                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12127                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12128   %}
12129   ins_pipe( pipe_slow );
12130 %}
12131 
12132 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12133                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12134 %{
12135   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12136   match(Set result (AryEq ary1 ary2));
12137   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12138   //ins_cost(300);
12139 
12140   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12141   ins_encode %{
12142     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12143                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12144                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12145   %}
12146   ins_pipe( pipe_slow );
12147 %}
12148 
12149 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12150                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12151 %{
12152   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12153   match(Set result (AryEq ary1 ary2));
12154   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12155   //ins_cost(300);
12156 
12157   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12158   ins_encode %{
12159     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12160                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12161                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12162   %}
12163   ins_pipe( pipe_slow );
12164 %}
12165 
12166 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12167                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12168 %{
12169   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12170   match(Set result (AryEq ary1 ary2));
12171   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12172   //ins_cost(300);
12173 
12174   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12175   ins_encode %{
12176     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12177                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12178                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12179   %}
12180   ins_pipe( pipe_slow );
12181 %}
12182 
12183 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12184                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12185 %{
12186   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12187   match(Set result (CountPositives ary1 len));
12188   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12189 
12190   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12191   ins_encode %{
12192     __ count_positives($ary1$$Register, $len$$Register,
12193                        $result$$Register, $tmp3$$Register,
12194                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12195   %}
12196   ins_pipe( pipe_slow );
12197 %}
12198 
12199 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12200                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12201 %{
12202   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12203   match(Set result (CountPositives ary1 len));
12204   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12205 
12206   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12207   ins_encode %{
12208     __ count_positives($ary1$$Register, $len$$Register,
12209                        $result$$Register, $tmp3$$Register,
12210                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12211   %}
12212   ins_pipe( pipe_slow );
12213 %}
12214 
12215 
12216 // fast char[] to byte[] compression
12217 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12218                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12219   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12220   match(Set result (StrCompressedCopy src (Binary dst len)));
12221   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12222 
12223   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12224   ins_encode %{
12225     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12226                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12227                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12228                            knoreg, knoreg);
12229   %}
12230   ins_pipe( pipe_slow );
12231 %}
12232 
12233 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12234                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12235   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12236   match(Set result (StrCompressedCopy src (Binary dst len)));
12237   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12238 
12239   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12240   ins_encode %{
12241     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12242                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12243                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12244                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12245   %}
12246   ins_pipe( pipe_slow );
12247 %}
12248 
12249 // fast byte[] to char[] inflation
12250 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12251                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12252   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12253   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12254   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12255 
12256   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12257   ins_encode %{
12258     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12259                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12260   %}
12261   ins_pipe( pipe_slow );
12262 %}
12263 
12264 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12265                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12266   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12267   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12268   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12269 
12270   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12271   ins_encode %{
12272     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12273                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12274   %}
12275   ins_pipe( pipe_slow );
12276 %}
12277 
12278 // encode char[] to byte[] in ISO_8859_1
12279 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12280                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12281                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12282   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12283   match(Set result (EncodeISOArray src (Binary dst len)));
12284   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12285 
12286   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12287   ins_encode %{
12288     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12289                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12290                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12291   %}
12292   ins_pipe( pipe_slow );
12293 %}
12294 
12295 // encode char[] to byte[] in ASCII
12296 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12297                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12298                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12299   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12300   match(Set result (EncodeISOArray src (Binary dst len)));
12301   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12302 
12303   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12304   ins_encode %{
12305     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12306                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12307                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12308   %}
12309   ins_pipe( pipe_slow );
12310 %}
12311 
12312 //----------Control Flow Instructions------------------------------------------
12313 // Signed compare Instructions
12314 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12315   match(Set cr (CmpI op1 op2));
12316   effect( DEF cr, USE op1, USE op2 );
12317   format %{ "CMP    $op1,$op2" %}
12318   opcode(0x3B);  /* Opcode 3B /r */
12319   ins_encode( OpcP, RegReg( op1, op2) );
12320   ins_pipe( ialu_cr_reg_reg );
12321 %}
12322 
12323 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12324   match(Set cr (CmpI op1 op2));
12325   effect( DEF cr, USE op1 );
12326   format %{ "CMP    $op1,$op2" %}
12327   opcode(0x81,0x07);  /* Opcode 81 /7 */
12328   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12329   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12330   ins_pipe( ialu_cr_reg_imm );
12331 %}
12332 
12333 // Cisc-spilled version of cmpI_eReg
12334 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12335   match(Set cr (CmpI op1 (LoadI op2)));
12336 
12337   format %{ "CMP    $op1,$op2" %}
12338   ins_cost(500);
12339   opcode(0x3B);  /* Opcode 3B /r */
12340   ins_encode( OpcP, RegMem( op1, op2) );
12341   ins_pipe( ialu_cr_reg_mem );
12342 %}
12343 
12344 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12345   match(Set cr (CmpI src zero));
12346   effect( DEF cr, USE src );
12347 
12348   format %{ "TEST   $src,$src" %}
12349   opcode(0x85);
12350   ins_encode( OpcP, RegReg( src, src ) );
12351   ins_pipe( ialu_cr_reg_imm );
12352 %}
12353 
12354 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12355   match(Set cr (CmpI (AndI src con) zero));
12356 
12357   format %{ "TEST   $src,$con" %}
12358   opcode(0xF7,0x00);
12359   ins_encode( OpcP, RegOpc(src), Con32(con) );
12360   ins_pipe( ialu_cr_reg_imm );
12361 %}
12362 
12363 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12364   match(Set cr (CmpI (AndI src mem) zero));
12365 
12366   format %{ "TEST   $src,$mem" %}
12367   opcode(0x85);
12368   ins_encode( OpcP, RegMem( src, mem ) );
12369   ins_pipe( ialu_cr_reg_mem );
12370 %}
12371 
12372 // Unsigned compare Instructions; really, same as signed except they
12373 // produce an eFlagsRegU instead of eFlagsReg.
12374 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12375   match(Set cr (CmpU op1 op2));
12376 
12377   format %{ "CMPu   $op1,$op2" %}
12378   opcode(0x3B);  /* Opcode 3B /r */
12379   ins_encode( OpcP, RegReg( op1, op2) );
12380   ins_pipe( ialu_cr_reg_reg );
12381 %}
12382 
12383 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12384   match(Set cr (CmpU op1 op2));
12385 
12386   format %{ "CMPu   $op1,$op2" %}
12387   opcode(0x81,0x07);  /* Opcode 81 /7 */
12388   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12389   ins_pipe( ialu_cr_reg_imm );
12390 %}
12391 
12392 // // Cisc-spilled version of cmpU_eReg
12393 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12394   match(Set cr (CmpU op1 (LoadI op2)));
12395 
12396   format %{ "CMPu   $op1,$op2" %}
12397   ins_cost(500);
12398   opcode(0x3B);  /* Opcode 3B /r */
12399   ins_encode( OpcP, RegMem( op1, op2) );
12400   ins_pipe( ialu_cr_reg_mem );
12401 %}
12402 
12403 // // Cisc-spilled version of cmpU_eReg
12404 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12405 //  match(Set cr (CmpU (LoadI op1) op2));
12406 //
12407 //  format %{ "CMPu   $op1,$op2" %}
12408 //  ins_cost(500);
12409 //  opcode(0x39);  /* Opcode 39 /r */
12410 //  ins_encode( OpcP, RegMem( op1, op2) );
12411 //%}
12412 
12413 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12414   match(Set cr (CmpU src zero));
12415 
12416   format %{ "TESTu  $src,$src" %}
12417   opcode(0x85);
12418   ins_encode( OpcP, RegReg( src, src ) );
12419   ins_pipe( ialu_cr_reg_imm );
12420 %}
12421 
12422 // Unsigned pointer compare Instructions
12423 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12424   match(Set cr (CmpP op1 op2));
12425 
12426   format %{ "CMPu   $op1,$op2" %}
12427   opcode(0x3B);  /* Opcode 3B /r */
12428   ins_encode( OpcP, RegReg( op1, op2) );
12429   ins_pipe( ialu_cr_reg_reg );
12430 %}
12431 
12432 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12433   match(Set cr (CmpP op1 op2));
12434 
12435   format %{ "CMPu   $op1,$op2" %}
12436   opcode(0x81,0x07);  /* Opcode 81 /7 */
12437   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12438   ins_pipe( ialu_cr_reg_imm );
12439 %}
12440 
12441 // // Cisc-spilled version of cmpP_eReg
12442 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12443   match(Set cr (CmpP op1 (LoadP op2)));
12444 
12445   format %{ "CMPu   $op1,$op2" %}
12446   ins_cost(500);
12447   opcode(0x3B);  /* Opcode 3B /r */
12448   ins_encode( OpcP, RegMem( op1, op2) );
12449   ins_pipe( ialu_cr_reg_mem );
12450 %}
12451 
12452 // // Cisc-spilled version of cmpP_eReg
12453 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12454 //  match(Set cr (CmpP (LoadP op1) op2));
12455 //
12456 //  format %{ "CMPu   $op1,$op2" %}
12457 //  ins_cost(500);
12458 //  opcode(0x39);  /* Opcode 39 /r */
12459 //  ins_encode( OpcP, RegMem( op1, op2) );
12460 //%}
12461 
12462 // Compare raw pointer (used in out-of-heap check).
12463 // Only works because non-oop pointers must be raw pointers
12464 // and raw pointers have no anti-dependencies.
12465 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12466   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12467   match(Set cr (CmpP op1 (LoadP op2)));
12468 
12469   format %{ "CMPu   $op1,$op2" %}
12470   opcode(0x3B);  /* Opcode 3B /r */
12471   ins_encode( OpcP, RegMem( op1, op2) );
12472   ins_pipe( ialu_cr_reg_mem );
12473 %}
12474 
12475 //
12476 // This will generate a signed flags result. This should be ok
12477 // since any compare to a zero should be eq/neq.
12478 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12479   match(Set cr (CmpP src zero));
12480 
12481   format %{ "TEST   $src,$src" %}
12482   opcode(0x85);
12483   ins_encode( OpcP, RegReg( src, src ) );
12484   ins_pipe( ialu_cr_reg_imm );
12485 %}
12486 
12487 // Cisc-spilled version of testP_reg
12488 // This will generate a signed flags result. This should be ok
12489 // since any compare to a zero should be eq/neq.
12490 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12491   match(Set cr (CmpP (LoadP op) zero));
12492 
12493   format %{ "TEST   $op,0xFFFFFFFF" %}
12494   ins_cost(500);
12495   opcode(0xF7);               /* Opcode F7 /0 */
12496   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12497   ins_pipe( ialu_cr_reg_imm );
12498 %}
12499 
12500 // Yanked all unsigned pointer compare operations.
12501 // Pointer compares are done with CmpP which is already unsigned.
12502 
12503 //----------Max and Min--------------------------------------------------------
12504 // Min Instructions
12505 ////
12506 //   *** Min and Max using the conditional move are slower than the
12507 //   *** branch version on a Pentium III.
12508 // // Conditional move for min
12509 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12510 //  effect( USE_DEF op2, USE op1, USE cr );
12511 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12512 //  opcode(0x4C,0x0F);
12513 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12514 //  ins_pipe( pipe_cmov_reg );
12515 //%}
12516 //
12517 //// Min Register with Register (P6 version)
12518 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12519 //  predicate(VM_Version::supports_cmov() );
12520 //  match(Set op2 (MinI op1 op2));
12521 //  ins_cost(200);
12522 //  expand %{
12523 //    eFlagsReg cr;
12524 //    compI_eReg(cr,op1,op2);
12525 //    cmovI_reg_lt(op2,op1,cr);
12526 //  %}
12527 //%}
12528 
12529 // Min Register with Register (generic version)
12530 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12531   match(Set dst (MinI dst src));
12532   effect(KILL flags);
12533   ins_cost(300);
12534 
12535   format %{ "MIN    $dst,$src" %}
12536   opcode(0xCC);
12537   ins_encode( min_enc(dst,src) );
12538   ins_pipe( pipe_slow );
12539 %}
12540 
12541 // Max Register with Register
12542 //   *** Min and Max using the conditional move are slower than the
12543 //   *** branch version on a Pentium III.
12544 // // Conditional move for max
12545 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12546 //  effect( USE_DEF op2, USE op1, USE cr );
12547 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12548 //  opcode(0x4F,0x0F);
12549 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12550 //  ins_pipe( pipe_cmov_reg );
12551 //%}
12552 //
12553 // // Max Register with Register (P6 version)
12554 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12555 //  predicate(VM_Version::supports_cmov() );
12556 //  match(Set op2 (MaxI op1 op2));
12557 //  ins_cost(200);
12558 //  expand %{
12559 //    eFlagsReg cr;
12560 //    compI_eReg(cr,op1,op2);
12561 //    cmovI_reg_gt(op2,op1,cr);
12562 //  %}
12563 //%}
12564 
12565 // Max Register with Register (generic version)
12566 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12567   match(Set dst (MaxI dst src));
12568   effect(KILL flags);
12569   ins_cost(300);
12570 
12571   format %{ "MAX    $dst,$src" %}
12572   opcode(0xCC);
12573   ins_encode( max_enc(dst,src) );
12574   ins_pipe( pipe_slow );
12575 %}
12576 
12577 // ============================================================================
12578 // Counted Loop limit node which represents exact final iterator value.
12579 // Note: the resulting value should fit into integer range since
12580 // counted loops have limit check on overflow.
12581 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12582   match(Set limit (LoopLimit (Binary init limit) stride));
12583   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12584   ins_cost(300);
12585 
12586   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12587   ins_encode %{
12588     int strd = (int)$stride$$constant;
12589     assert(strd != 1 && strd != -1, "sanity");
12590     int m1 = (strd > 0) ? 1 : -1;
12591     // Convert limit to long (EAX:EDX)
12592     __ cdql();
12593     // Convert init to long (init:tmp)
12594     __ movl($tmp$$Register, $init$$Register);
12595     __ sarl($tmp$$Register, 31);
12596     // $limit - $init
12597     __ subl($limit$$Register, $init$$Register);
12598     __ sbbl($limit_hi$$Register, $tmp$$Register);
12599     // + ($stride - 1)
12600     if (strd > 0) {
12601       __ addl($limit$$Register, (strd - 1));
12602       __ adcl($limit_hi$$Register, 0);
12603       __ movl($tmp$$Register, strd);
12604     } else {
12605       __ addl($limit$$Register, (strd + 1));
12606       __ adcl($limit_hi$$Register, -1);
12607       __ lneg($limit_hi$$Register, $limit$$Register);
12608       __ movl($tmp$$Register, -strd);
12609     }
12610     // signed division: (EAX:EDX) / pos_stride
12611     __ idivl($tmp$$Register);
12612     if (strd < 0) {
12613       // restore sign
12614       __ negl($tmp$$Register);
12615     }
12616     // (EAX) * stride
12617     __ mull($tmp$$Register);
12618     // + init (ignore upper bits)
12619     __ addl($limit$$Register, $init$$Register);
12620   %}
12621   ins_pipe( pipe_slow );
12622 %}
12623 
12624 // ============================================================================
12625 // Branch Instructions
12626 // Jump Table
12627 instruct jumpXtnd(rRegI switch_val) %{
12628   match(Jump switch_val);
12629   ins_cost(350);
12630   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12631   ins_encode %{
12632     // Jump to Address(table_base + switch_reg)
12633     Address index(noreg, $switch_val$$Register, Address::times_1);
12634     __ jump(ArrayAddress($constantaddress, index), noreg);
12635   %}
12636   ins_pipe(pipe_jmp);
12637 %}
12638 
12639 // Jump Direct - Label defines a relative address from JMP+1
12640 instruct jmpDir(label labl) %{
12641   match(Goto);
12642   effect(USE labl);
12643 
12644   ins_cost(300);
12645   format %{ "JMP    $labl" %}
12646   size(5);
12647   ins_encode %{
12648     Label* L = $labl$$label;
12649     __ jmp(*L, false); // Always long jump
12650   %}
12651   ins_pipe( pipe_jmp );
12652 %}
12653 
12654 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12655 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12656   match(If cop cr);
12657   effect(USE labl);
12658 
12659   ins_cost(300);
12660   format %{ "J$cop    $labl" %}
12661   size(6);
12662   ins_encode %{
12663     Label* L = $labl$$label;
12664     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12665   %}
12666   ins_pipe( pipe_jcc );
12667 %}
12668 
12669 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12670 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12671   match(CountedLoopEnd cop cr);
12672   effect(USE labl);
12673 
12674   ins_cost(300);
12675   format %{ "J$cop    $labl\t# Loop end" %}
12676   size(6);
12677   ins_encode %{
12678     Label* L = $labl$$label;
12679     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12680   %}
12681   ins_pipe( pipe_jcc );
12682 %}
12683 
12684 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12685 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12686   match(CountedLoopEnd cop cmp);
12687   effect(USE labl);
12688 
12689   ins_cost(300);
12690   format %{ "J$cop,u  $labl\t# Loop end" %}
12691   size(6);
12692   ins_encode %{
12693     Label* L = $labl$$label;
12694     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12695   %}
12696   ins_pipe( pipe_jcc );
12697 %}
12698 
12699 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12700   match(CountedLoopEnd cop cmp);
12701   effect(USE labl);
12702 
12703   ins_cost(200);
12704   format %{ "J$cop,u  $labl\t# Loop end" %}
12705   size(6);
12706   ins_encode %{
12707     Label* L = $labl$$label;
12708     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12709   %}
12710   ins_pipe( pipe_jcc );
12711 %}
12712 
12713 // Jump Direct Conditional - using unsigned comparison
12714 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12715   match(If cop cmp);
12716   effect(USE labl);
12717 
12718   ins_cost(300);
12719   format %{ "J$cop,u  $labl" %}
12720   size(6);
12721   ins_encode %{
12722     Label* L = $labl$$label;
12723     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12724   %}
12725   ins_pipe(pipe_jcc);
12726 %}
12727 
12728 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12729   match(If cop cmp);
12730   effect(USE labl);
12731 
12732   ins_cost(200);
12733   format %{ "J$cop,u  $labl" %}
12734   size(6);
12735   ins_encode %{
12736     Label* L = $labl$$label;
12737     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12738   %}
12739   ins_pipe(pipe_jcc);
12740 %}
12741 
12742 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12743   match(If cop cmp);
12744   effect(USE labl);
12745 
12746   ins_cost(200);
12747   format %{ $$template
12748     if ($cop$$cmpcode == Assembler::notEqual) {
12749       $$emit$$"JP,u   $labl\n\t"
12750       $$emit$$"J$cop,u   $labl"
12751     } else {
12752       $$emit$$"JP,u   done\n\t"
12753       $$emit$$"J$cop,u   $labl\n\t"
12754       $$emit$$"done:"
12755     }
12756   %}
12757   ins_encode %{
12758     Label* l = $labl$$label;
12759     if ($cop$$cmpcode == Assembler::notEqual) {
12760       __ jcc(Assembler::parity, *l, false);
12761       __ jcc(Assembler::notEqual, *l, false);
12762     } else if ($cop$$cmpcode == Assembler::equal) {
12763       Label done;
12764       __ jccb(Assembler::parity, done);
12765       __ jcc(Assembler::equal, *l, false);
12766       __ bind(done);
12767     } else {
12768        ShouldNotReachHere();
12769     }
12770   %}
12771   ins_pipe(pipe_jcc);
12772 %}
12773 
12774 // ============================================================================
12775 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12776 // array for an instance of the superklass.  Set a hidden internal cache on a
12777 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12778 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12779 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12780   match(Set result (PartialSubtypeCheck sub super));
12781   effect( KILL rcx, KILL cr );
12782 
12783   ins_cost(1100);  // slightly larger than the next version
12784   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12785             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12786             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12787             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12788             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12789             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12790             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12791      "miss:\t" %}
12792 
12793   opcode(0x1); // Force a XOR of EDI
12794   ins_encode( enc_PartialSubtypeCheck() );
12795   ins_pipe( pipe_slow );
12796 %}
12797 
12798 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12799   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12800   effect( KILL rcx, KILL result );
12801 
12802   ins_cost(1000);
12803   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12804             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12805             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12806             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12807             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12808             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12809      "miss:\t" %}
12810 
12811   opcode(0x0);  // No need to XOR EDI
12812   ins_encode( enc_PartialSubtypeCheck() );
12813   ins_pipe( pipe_slow );
12814 %}
12815 
12816 // ============================================================================
12817 // Branch Instructions -- short offset versions
12818 //
12819 // These instructions are used to replace jumps of a long offset (the default
12820 // match) with jumps of a shorter offset.  These instructions are all tagged
12821 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12822 // match rules in general matching.  Instead, the ADLC generates a conversion
12823 // method in the MachNode which can be used to do in-place replacement of the
12824 // long variant with the shorter variant.  The compiler will determine if a
12825 // branch can be taken by the is_short_branch_offset() predicate in the machine
12826 // specific code section of the file.
12827 
12828 // Jump Direct - Label defines a relative address from JMP+1
12829 instruct jmpDir_short(label labl) %{
12830   match(Goto);
12831   effect(USE labl);
12832 
12833   ins_cost(300);
12834   format %{ "JMP,s  $labl" %}
12835   size(2);
12836   ins_encode %{
12837     Label* L = $labl$$label;
12838     __ jmpb(*L);
12839   %}
12840   ins_pipe( pipe_jmp );
12841   ins_short_branch(1);
12842 %}
12843 
12844 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12845 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12846   match(If cop cr);
12847   effect(USE labl);
12848 
12849   ins_cost(300);
12850   format %{ "J$cop,s  $labl" %}
12851   size(2);
12852   ins_encode %{
12853     Label* L = $labl$$label;
12854     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12855   %}
12856   ins_pipe( pipe_jcc );
12857   ins_short_branch(1);
12858 %}
12859 
12860 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12861 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12862   match(CountedLoopEnd cop cr);
12863   effect(USE labl);
12864 
12865   ins_cost(300);
12866   format %{ "J$cop,s  $labl\t# Loop end" %}
12867   size(2);
12868   ins_encode %{
12869     Label* L = $labl$$label;
12870     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12871   %}
12872   ins_pipe( pipe_jcc );
12873   ins_short_branch(1);
12874 %}
12875 
12876 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12877 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12878   match(CountedLoopEnd cop cmp);
12879   effect(USE labl);
12880 
12881   ins_cost(300);
12882   format %{ "J$cop,us $labl\t# Loop end" %}
12883   size(2);
12884   ins_encode %{
12885     Label* L = $labl$$label;
12886     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12887   %}
12888   ins_pipe( pipe_jcc );
12889   ins_short_branch(1);
12890 %}
12891 
12892 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12893   match(CountedLoopEnd cop cmp);
12894   effect(USE labl);
12895 
12896   ins_cost(300);
12897   format %{ "J$cop,us $labl\t# Loop end" %}
12898   size(2);
12899   ins_encode %{
12900     Label* L = $labl$$label;
12901     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12902   %}
12903   ins_pipe( pipe_jcc );
12904   ins_short_branch(1);
12905 %}
12906 
12907 // Jump Direct Conditional - using unsigned comparison
12908 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12909   match(If cop cmp);
12910   effect(USE labl);
12911 
12912   ins_cost(300);
12913   format %{ "J$cop,us $labl" %}
12914   size(2);
12915   ins_encode %{
12916     Label* L = $labl$$label;
12917     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12918   %}
12919   ins_pipe( pipe_jcc );
12920   ins_short_branch(1);
12921 %}
12922 
12923 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12924   match(If cop cmp);
12925   effect(USE labl);
12926 
12927   ins_cost(300);
12928   format %{ "J$cop,us $labl" %}
12929   size(2);
12930   ins_encode %{
12931     Label* L = $labl$$label;
12932     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12933   %}
12934   ins_pipe( pipe_jcc );
12935   ins_short_branch(1);
12936 %}
12937 
12938 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12939   match(If cop cmp);
12940   effect(USE labl);
12941 
12942   ins_cost(300);
12943   format %{ $$template
12944     if ($cop$$cmpcode == Assembler::notEqual) {
12945       $$emit$$"JP,u,s   $labl\n\t"
12946       $$emit$$"J$cop,u,s   $labl"
12947     } else {
12948       $$emit$$"JP,u,s   done\n\t"
12949       $$emit$$"J$cop,u,s  $labl\n\t"
12950       $$emit$$"done:"
12951     }
12952   %}
12953   size(4);
12954   ins_encode %{
12955     Label* l = $labl$$label;
12956     if ($cop$$cmpcode == Assembler::notEqual) {
12957       __ jccb(Assembler::parity, *l);
12958       __ jccb(Assembler::notEqual, *l);
12959     } else if ($cop$$cmpcode == Assembler::equal) {
12960       Label done;
12961       __ jccb(Assembler::parity, done);
12962       __ jccb(Assembler::equal, *l);
12963       __ bind(done);
12964     } else {
12965        ShouldNotReachHere();
12966     }
12967   %}
12968   ins_pipe(pipe_jcc);
12969   ins_short_branch(1);
12970 %}
12971 
12972 // ============================================================================
12973 // Long Compare
12974 //
12975 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12976 // is tricky.  The flavor of compare used depends on whether we are testing
12977 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12978 // The GE test is the negated LT test.  The LE test can be had by commuting
12979 // the operands (yielding a GE test) and then negating; negate again for the
12980 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12981 // NE test is negated from that.
12982 
12983 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12984 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12985 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12986 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12987 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12988 // foo match ends up with the wrong leaf.  One fix is to not match both
12989 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12990 // both forms beat the trinary form of long-compare and both are very useful
12991 // on Intel which has so few registers.
12992 
12993 // Manifest a CmpL result in an integer register.  Very painful.
12994 // This is the test to avoid.
12995 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12996   match(Set dst (CmpL3 src1 src2));
12997   effect( KILL flags );
12998   ins_cost(1000);
12999   format %{ "XOR    $dst,$dst\n\t"
13000             "CMP    $src1.hi,$src2.hi\n\t"
13001             "JLT,s  m_one\n\t"
13002             "JGT,s  p_one\n\t"
13003             "CMP    $src1.lo,$src2.lo\n\t"
13004             "JB,s   m_one\n\t"
13005             "JEQ,s  done\n"
13006     "p_one:\tINC    $dst\n\t"
13007             "JMP,s  done\n"
13008     "m_one:\tDEC    $dst\n"
13009      "done:" %}
13010   ins_encode %{
13011     Label p_one, m_one, done;
13012     __ xorptr($dst$$Register, $dst$$Register);
13013     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13014     __ jccb(Assembler::less,    m_one);
13015     __ jccb(Assembler::greater, p_one);
13016     __ cmpl($src1$$Register, $src2$$Register);
13017     __ jccb(Assembler::below,   m_one);
13018     __ jccb(Assembler::equal,   done);
13019     __ bind(p_one);
13020     __ incrementl($dst$$Register);
13021     __ jmpb(done);
13022     __ bind(m_one);
13023     __ decrementl($dst$$Register);
13024     __ bind(done);
13025   %}
13026   ins_pipe( pipe_slow );
13027 %}
13028 
13029 //======
13030 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13031 // compares.  Can be used for LE or GT compares by reversing arguments.
13032 // NOT GOOD FOR EQ/NE tests.
13033 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13034   match( Set flags (CmpL src zero ));
13035   ins_cost(100);
13036   format %{ "TEST   $src.hi,$src.hi" %}
13037   opcode(0x85);
13038   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13039   ins_pipe( ialu_cr_reg_reg );
13040 %}
13041 
13042 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13043 // compares.  Can be used for LE or GT compares by reversing arguments.
13044 // NOT GOOD FOR EQ/NE tests.
13045 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13046   match( Set flags (CmpL src1 src2 ));
13047   effect( TEMP tmp );
13048   ins_cost(300);
13049   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13050             "MOV    $tmp,$src1.hi\n\t"
13051             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13052   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13053   ins_pipe( ialu_cr_reg_reg );
13054 %}
13055 
13056 // Long compares reg < zero/req OR reg >= zero/req.
13057 // Just a wrapper for a normal branch, plus the predicate test.
13058 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13059   match(If cmp flags);
13060   effect(USE labl);
13061   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13062   expand %{
13063     jmpCon(cmp,flags,labl);    // JLT or JGE...
13064   %}
13065 %}
13066 
13067 //======
13068 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13069 // compares.  Can be used for LE or GT compares by reversing arguments.
13070 // NOT GOOD FOR EQ/NE tests.
13071 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13072   match(Set flags (CmpUL src zero));
13073   ins_cost(100);
13074   format %{ "TEST   $src.hi,$src.hi" %}
13075   opcode(0x85);
13076   ins_encode(OpcP, RegReg_Hi2(src, src));
13077   ins_pipe(ialu_cr_reg_reg);
13078 %}
13079 
13080 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13081 // compares.  Can be used for LE or GT compares by reversing arguments.
13082 // NOT GOOD FOR EQ/NE tests.
13083 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13084   match(Set flags (CmpUL src1 src2));
13085   effect(TEMP tmp);
13086   ins_cost(300);
13087   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13088             "MOV    $tmp,$src1.hi\n\t"
13089             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13090   ins_encode(long_cmp_flags2(src1, src2, tmp));
13091   ins_pipe(ialu_cr_reg_reg);
13092 %}
13093 
13094 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13095 // Just a wrapper for a normal branch, plus the predicate test.
13096 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13097   match(If cmp flags);
13098   effect(USE labl);
13099   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13100   expand %{
13101     jmpCon(cmp, flags, labl);    // JLT or JGE...
13102   %}
13103 %}
13104 
13105 // Compare 2 longs and CMOVE longs.
13106 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13107   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13108   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13109   ins_cost(400);
13110   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13111             "CMOV$cmp $dst.hi,$src.hi" %}
13112   opcode(0x0F,0x40);
13113   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13114   ins_pipe( pipe_cmov_reg_long );
13115 %}
13116 
13117 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13118   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13119   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13120   ins_cost(500);
13121   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13122             "CMOV$cmp $dst.hi,$src.hi" %}
13123   opcode(0x0F,0x40);
13124   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13125   ins_pipe( pipe_cmov_reg_long );
13126 %}
13127 
13128 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13129   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13130   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13131   ins_cost(400);
13132   expand %{
13133     cmovLL_reg_LTGE(cmp, flags, dst, src);
13134   %}
13135 %}
13136 
13137 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13138   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13139   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13140   ins_cost(500);
13141   expand %{
13142     cmovLL_mem_LTGE(cmp, flags, dst, src);
13143   %}
13144 %}
13145 
13146 // Compare 2 longs and CMOVE ints.
13147 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13148   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13149   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13150   ins_cost(200);
13151   format %{ "CMOV$cmp $dst,$src" %}
13152   opcode(0x0F,0x40);
13153   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13154   ins_pipe( pipe_cmov_reg );
13155 %}
13156 
13157 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13158   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13159   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13160   ins_cost(250);
13161   format %{ "CMOV$cmp $dst,$src" %}
13162   opcode(0x0F,0x40);
13163   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13164   ins_pipe( pipe_cmov_mem );
13165 %}
13166 
13167 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13168   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13169   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13170   ins_cost(200);
13171   expand %{
13172     cmovII_reg_LTGE(cmp, flags, dst, src);
13173   %}
13174 %}
13175 
13176 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13177   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13178   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13179   ins_cost(250);
13180   expand %{
13181     cmovII_mem_LTGE(cmp, flags, dst, src);
13182   %}
13183 %}
13184 
13185 // Compare 2 longs and CMOVE ptrs.
13186 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13187   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13188   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13189   ins_cost(200);
13190   format %{ "CMOV$cmp $dst,$src" %}
13191   opcode(0x0F,0x40);
13192   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13193   ins_pipe( pipe_cmov_reg );
13194 %}
13195 
13196 // Compare 2 unsigned longs and CMOVE ptrs.
13197 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13198   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13199   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13200   ins_cost(200);
13201   expand %{
13202     cmovPP_reg_LTGE(cmp,flags,dst,src);
13203   %}
13204 %}
13205 
13206 // Compare 2 longs and CMOVE doubles
13207 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13208   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13209   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13210   ins_cost(200);
13211   expand %{
13212     fcmovDPR_regS(cmp,flags,dst,src);
13213   %}
13214 %}
13215 
13216 // Compare 2 longs and CMOVE doubles
13217 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13218   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13219   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13220   ins_cost(200);
13221   expand %{
13222     fcmovD_regS(cmp,flags,dst,src);
13223   %}
13224 %}
13225 
13226 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13227   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13228   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13229   ins_cost(200);
13230   expand %{
13231     fcmovFPR_regS(cmp,flags,dst,src);
13232   %}
13233 %}
13234 
13235 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13236   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13237   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13238   ins_cost(200);
13239   expand %{
13240     fcmovF_regS(cmp,flags,dst,src);
13241   %}
13242 %}
13243 
13244 //======
13245 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13246 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13247   match( Set flags (CmpL src zero ));
13248   effect(TEMP tmp);
13249   ins_cost(200);
13250   format %{ "MOV    $tmp,$src.lo\n\t"
13251             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13252   ins_encode( long_cmp_flags0( src, tmp ) );
13253   ins_pipe( ialu_reg_reg_long );
13254 %}
13255 
13256 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13257 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13258   match( Set flags (CmpL src1 src2 ));
13259   ins_cost(200+300);
13260   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13261             "JNE,s  skip\n\t"
13262             "CMP    $src1.hi,$src2.hi\n\t"
13263      "skip:\t" %}
13264   ins_encode( long_cmp_flags1( src1, src2 ) );
13265   ins_pipe( ialu_cr_reg_reg );
13266 %}
13267 
13268 // Long compare reg == zero/reg OR reg != zero/reg
13269 // Just a wrapper for a normal branch, plus the predicate test.
13270 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13271   match(If cmp flags);
13272   effect(USE labl);
13273   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13274   expand %{
13275     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13276   %}
13277 %}
13278 
13279 //======
13280 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13281 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13282   match(Set flags (CmpUL src zero));
13283   effect(TEMP tmp);
13284   ins_cost(200);
13285   format %{ "MOV    $tmp,$src.lo\n\t"
13286             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13287   ins_encode(long_cmp_flags0(src, tmp));
13288   ins_pipe(ialu_reg_reg_long);
13289 %}
13290 
13291 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13292 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13293   match(Set flags (CmpUL src1 src2));
13294   ins_cost(200+300);
13295   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13296             "JNE,s  skip\n\t"
13297             "CMP    $src1.hi,$src2.hi\n\t"
13298      "skip:\t" %}
13299   ins_encode(long_cmp_flags1(src1, src2));
13300   ins_pipe(ialu_cr_reg_reg);
13301 %}
13302 
13303 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13304 // Just a wrapper for a normal branch, plus the predicate test.
13305 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13306   match(If cmp flags);
13307   effect(USE labl);
13308   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13309   expand %{
13310     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13311   %}
13312 %}
13313 
13314 // Compare 2 longs and CMOVE longs.
13315 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13316   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13317   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13318   ins_cost(400);
13319   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13320             "CMOV$cmp $dst.hi,$src.hi" %}
13321   opcode(0x0F,0x40);
13322   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13323   ins_pipe( pipe_cmov_reg_long );
13324 %}
13325 
13326 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13327   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13328   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13329   ins_cost(500);
13330   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13331             "CMOV$cmp $dst.hi,$src.hi" %}
13332   opcode(0x0F,0x40);
13333   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13334   ins_pipe( pipe_cmov_reg_long );
13335 %}
13336 
13337 // Compare 2 longs and CMOVE ints.
13338 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13339   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13340   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13341   ins_cost(200);
13342   format %{ "CMOV$cmp $dst,$src" %}
13343   opcode(0x0F,0x40);
13344   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13345   ins_pipe( pipe_cmov_reg );
13346 %}
13347 
13348 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13349   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13350   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13351   ins_cost(250);
13352   format %{ "CMOV$cmp $dst,$src" %}
13353   opcode(0x0F,0x40);
13354   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13355   ins_pipe( pipe_cmov_mem );
13356 %}
13357 
13358 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13359   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13360   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13361   ins_cost(200);
13362   expand %{
13363     cmovII_reg_EQNE(cmp, flags, dst, src);
13364   %}
13365 %}
13366 
13367 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13368   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13369   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13370   ins_cost(250);
13371   expand %{
13372     cmovII_mem_EQNE(cmp, flags, dst, src);
13373   %}
13374 %}
13375 
13376 // Compare 2 longs and CMOVE ptrs.
13377 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13378   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13379   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13380   ins_cost(200);
13381   format %{ "CMOV$cmp $dst,$src" %}
13382   opcode(0x0F,0x40);
13383   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13384   ins_pipe( pipe_cmov_reg );
13385 %}
13386 
13387 // Compare 2 unsigned longs and CMOVE ptrs.
13388 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13389   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13390   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13391   ins_cost(200);
13392   expand %{
13393     cmovPP_reg_EQNE(cmp,flags,dst,src);
13394   %}
13395 %}
13396 
13397 // Compare 2 longs and CMOVE doubles
13398 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13399   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13400   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13401   ins_cost(200);
13402   expand %{
13403     fcmovDPR_regS(cmp,flags,dst,src);
13404   %}
13405 %}
13406 
13407 // Compare 2 longs and CMOVE doubles
13408 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13409   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13410   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13411   ins_cost(200);
13412   expand %{
13413     fcmovD_regS(cmp,flags,dst,src);
13414   %}
13415 %}
13416 
13417 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13418   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13419   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13420   ins_cost(200);
13421   expand %{
13422     fcmovFPR_regS(cmp,flags,dst,src);
13423   %}
13424 %}
13425 
13426 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13427   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13428   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13429   ins_cost(200);
13430   expand %{
13431     fcmovF_regS(cmp,flags,dst,src);
13432   %}
13433 %}
13434 
13435 //======
13436 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13437 // Same as cmpL_reg_flags_LEGT except must negate src
13438 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13439   match( Set flags (CmpL src zero ));
13440   effect( TEMP tmp );
13441   ins_cost(300);
13442   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13443             "CMP    $tmp,$src.lo\n\t"
13444             "SBB    $tmp,$src.hi\n\t" %}
13445   ins_encode( long_cmp_flags3(src, tmp) );
13446   ins_pipe( ialu_reg_reg_long );
13447 %}
13448 
13449 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13450 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13451 // requires a commuted test to get the same result.
13452 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13453   match( Set flags (CmpL src1 src2 ));
13454   effect( TEMP tmp );
13455   ins_cost(300);
13456   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13457             "MOV    $tmp,$src2.hi\n\t"
13458             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13459   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13460   ins_pipe( ialu_cr_reg_reg );
13461 %}
13462 
13463 // Long compares reg < zero/req OR reg >= zero/req.
13464 // Just a wrapper for a normal branch, plus the predicate test
13465 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13466   match(If cmp flags);
13467   effect(USE labl);
13468   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13469   ins_cost(300);
13470   expand %{
13471     jmpCon(cmp,flags,labl);    // JGT or JLE...
13472   %}
13473 %}
13474 
13475 //======
13476 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13477 // Same as cmpUL_reg_flags_LEGT except must negate src
13478 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13479   match(Set flags (CmpUL src zero));
13480   effect(TEMP tmp);
13481   ins_cost(300);
13482   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13483             "CMP    $tmp,$src.lo\n\t"
13484             "SBB    $tmp,$src.hi\n\t" %}
13485   ins_encode(long_cmp_flags3(src, tmp));
13486   ins_pipe(ialu_reg_reg_long);
13487 %}
13488 
13489 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13490 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13491 // requires a commuted test to get the same result.
13492 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13493   match(Set flags (CmpUL src1 src2));
13494   effect(TEMP tmp);
13495   ins_cost(300);
13496   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13497             "MOV    $tmp,$src2.hi\n\t"
13498             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13499   ins_encode(long_cmp_flags2( src2, src1, tmp));
13500   ins_pipe(ialu_cr_reg_reg);
13501 %}
13502 
13503 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13504 // Just a wrapper for a normal branch, plus the predicate test
13505 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13506   match(If cmp flags);
13507   effect(USE labl);
13508   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13509   ins_cost(300);
13510   expand %{
13511     jmpCon(cmp, flags, labl);    // JGT or JLE...
13512   %}
13513 %}
13514 
13515 // Compare 2 longs and CMOVE longs.
13516 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13517   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13518   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13519   ins_cost(400);
13520   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13521             "CMOV$cmp $dst.hi,$src.hi" %}
13522   opcode(0x0F,0x40);
13523   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13524   ins_pipe( pipe_cmov_reg_long );
13525 %}
13526 
13527 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13528   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13529   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13530   ins_cost(500);
13531   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13532             "CMOV$cmp $dst.hi,$src.hi+4" %}
13533   opcode(0x0F,0x40);
13534   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13535   ins_pipe( pipe_cmov_reg_long );
13536 %}
13537 
13538 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13539   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13540   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13541   ins_cost(400);
13542   expand %{
13543     cmovLL_reg_LEGT(cmp, flags, dst, src);
13544   %}
13545 %}
13546 
13547 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13548   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13549   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13550   ins_cost(500);
13551   expand %{
13552     cmovLL_mem_LEGT(cmp, flags, dst, src);
13553   %}
13554 %}
13555 
13556 // Compare 2 longs and CMOVE ints.
13557 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13558   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13559   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13560   ins_cost(200);
13561   format %{ "CMOV$cmp $dst,$src" %}
13562   opcode(0x0F,0x40);
13563   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13564   ins_pipe( pipe_cmov_reg );
13565 %}
13566 
13567 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13568   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13569   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13570   ins_cost(250);
13571   format %{ "CMOV$cmp $dst,$src" %}
13572   opcode(0x0F,0x40);
13573   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13574   ins_pipe( pipe_cmov_mem );
13575 %}
13576 
13577 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13578   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13579   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13580   ins_cost(200);
13581   expand %{
13582     cmovII_reg_LEGT(cmp, flags, dst, src);
13583   %}
13584 %}
13585 
13586 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13587   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13588   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13589   ins_cost(250);
13590   expand %{
13591     cmovII_mem_LEGT(cmp, flags, dst, src);
13592   %}
13593 %}
13594 
13595 // Compare 2 longs and CMOVE ptrs.
13596 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13597   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13598   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13599   ins_cost(200);
13600   format %{ "CMOV$cmp $dst,$src" %}
13601   opcode(0x0F,0x40);
13602   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13603   ins_pipe( pipe_cmov_reg );
13604 %}
13605 
13606 // Compare 2 unsigned longs and CMOVE ptrs.
13607 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13608   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13609   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13610   ins_cost(200);
13611   expand %{
13612     cmovPP_reg_LEGT(cmp,flags,dst,src);
13613   %}
13614 %}
13615 
13616 // Compare 2 longs and CMOVE doubles
13617 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13618   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13619   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13620   ins_cost(200);
13621   expand %{
13622     fcmovDPR_regS(cmp,flags,dst,src);
13623   %}
13624 %}
13625 
13626 // Compare 2 longs and CMOVE doubles
13627 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13628   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13629   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13630   ins_cost(200);
13631   expand %{
13632     fcmovD_regS(cmp,flags,dst,src);
13633   %}
13634 %}
13635 
13636 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13637   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13638   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13639   ins_cost(200);
13640   expand %{
13641     fcmovFPR_regS(cmp,flags,dst,src);
13642   %}
13643 %}
13644 
13645 
13646 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13647   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13648   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13649   ins_cost(200);
13650   expand %{
13651     fcmovF_regS(cmp,flags,dst,src);
13652   %}
13653 %}
13654 
13655 
13656 // ============================================================================
13657 // Procedure Call/Return Instructions
13658 // Call Java Static Instruction
13659 // Note: If this code changes, the corresponding ret_addr_offset() and
13660 //       compute_padding() functions will have to be adjusted.
13661 instruct CallStaticJavaDirect(method meth) %{
13662   match(CallStaticJava);
13663   effect(USE meth);
13664 
13665   ins_cost(300);
13666   format %{ "CALL,static " %}
13667   opcode(0xE8); /* E8 cd */
13668   ins_encode( pre_call_resets,
13669               Java_Static_Call( meth ),
13670               call_epilog,
13671               post_call_FPU );
13672   ins_pipe( pipe_slow );
13673   ins_alignment(4);
13674 %}
13675 
13676 // Call Java Dynamic Instruction
13677 // Note: If this code changes, the corresponding ret_addr_offset() and
13678 //       compute_padding() functions will have to be adjusted.
13679 instruct CallDynamicJavaDirect(method meth) %{
13680   match(CallDynamicJava);
13681   effect(USE meth);
13682 
13683   ins_cost(300);
13684   format %{ "MOV    EAX,(oop)-1\n\t"
13685             "CALL,dynamic" %}
13686   opcode(0xE8); /* E8 cd */
13687   ins_encode( pre_call_resets,
13688               Java_Dynamic_Call( meth ),
13689               call_epilog,
13690               post_call_FPU );
13691   ins_pipe( pipe_slow );
13692   ins_alignment(4);
13693 %}
13694 
13695 // Call Runtime Instruction
13696 instruct CallRuntimeDirect(method meth) %{
13697   match(CallRuntime );
13698   effect(USE meth);
13699 
13700   ins_cost(300);
13701   format %{ "CALL,runtime " %}
13702   opcode(0xE8); /* E8 cd */
13703   // Use FFREEs to clear entries in float stack
13704   ins_encode( pre_call_resets,
13705               FFree_Float_Stack_All,
13706               Java_To_Runtime( meth ),
13707               post_call_FPU );
13708   ins_pipe( pipe_slow );
13709 %}
13710 
13711 // Call runtime without safepoint
13712 instruct CallLeafDirect(method meth) %{
13713   match(CallLeaf);
13714   effect(USE meth);
13715 
13716   ins_cost(300);
13717   format %{ "CALL_LEAF,runtime " %}
13718   opcode(0xE8); /* E8 cd */
13719   ins_encode( pre_call_resets,
13720               FFree_Float_Stack_All,
13721               Java_To_Runtime( meth ),
13722               Verify_FPU_For_Leaf, post_call_FPU );
13723   ins_pipe( pipe_slow );
13724 %}
13725 
13726 instruct CallLeafNoFPDirect(method meth) %{
13727   match(CallLeafNoFP);
13728   effect(USE meth);
13729 
13730   ins_cost(300);
13731   format %{ "CALL_LEAF_NOFP,runtime " %}
13732   opcode(0xE8); /* E8 cd */
13733   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13734   ins_pipe( pipe_slow );
13735 %}
13736 
13737 
13738 // Return Instruction
13739 // Remove the return address & jump to it.
13740 instruct Ret() %{
13741   match(Return);
13742   format %{ "RET" %}
13743   opcode(0xC3);
13744   ins_encode(OpcP);
13745   ins_pipe( pipe_jmp );
13746 %}
13747 
13748 // Tail Call; Jump from runtime stub to Java code.
13749 // Also known as an 'interprocedural jump'.
13750 // Target of jump will eventually return to caller.
13751 // TailJump below removes the return address.
13752 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13753   match(TailCall jump_target method_ptr);
13754   ins_cost(300);
13755   format %{ "JMP    $jump_target \t# EBX holds method" %}
13756   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13757   ins_encode( OpcP, RegOpc(jump_target) );
13758   ins_pipe( pipe_jmp );
13759 %}
13760 
13761 
13762 // Tail Jump; remove the return address; jump to target.
13763 // TailCall above leaves the return address around.
13764 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13765   match( TailJump jump_target ex_oop );
13766   ins_cost(300);
13767   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13768             "JMP    $jump_target " %}
13769   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13770   ins_encode( enc_pop_rdx,
13771               OpcP, RegOpc(jump_target) );
13772   ins_pipe( pipe_jmp );
13773 %}
13774 
13775 // Create exception oop: created by stack-crawling runtime code.
13776 // Created exception is now available to this handler, and is setup
13777 // just prior to jumping to this handler.  No code emitted.
13778 instruct CreateException( eAXRegP ex_oop )
13779 %{
13780   match(Set ex_oop (CreateEx));
13781 
13782   size(0);
13783   // use the following format syntax
13784   format %{ "# exception oop is in EAX; no code emitted" %}
13785   ins_encode();
13786   ins_pipe( empty );
13787 %}
13788 
13789 
13790 // Rethrow exception:
13791 // The exception oop will come in the first argument position.
13792 // Then JUMP (not call) to the rethrow stub code.
13793 instruct RethrowException()
13794 %{
13795   match(Rethrow);
13796 
13797   // use the following format syntax
13798   format %{ "JMP    rethrow_stub" %}
13799   ins_encode(enc_rethrow);
13800   ins_pipe( pipe_jmp );
13801 %}
13802 
13803 // inlined locking and unlocking
13804 
13805 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13806   predicate(Compile::current()->use_rtm());
13807   match(Set cr (FastLock object box));
13808   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13809   ins_cost(300);
13810   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13811   ins_encode %{
13812     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13813                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13814                  _rtm_counters, _stack_rtm_counters,
13815                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13816                  true, ra_->C->profile_rtm());
13817   %}
13818   ins_pipe(pipe_slow);
13819 %}
13820 
13821 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13822   predicate(!Compile::current()->use_rtm());
13823   match(Set cr (FastLock object box));
13824   effect(TEMP tmp, TEMP scr, USE_KILL box);
13825   ins_cost(300);
13826   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13827   ins_encode %{
13828     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13829                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13830   %}
13831   ins_pipe(pipe_slow);
13832 %}
13833 
13834 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13835   match(Set cr (FastUnlock object box));
13836   effect(TEMP tmp, USE_KILL box);
13837   ins_cost(300);
13838   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13839   ins_encode %{
13840     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13841   %}
13842   ins_pipe(pipe_slow);
13843 %}
13844 
13845 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13846   predicate(Matcher::vector_length(n) <= 32);
13847   match(Set dst (MaskAll src));
13848   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13849   ins_encode %{
13850     int mask_len = Matcher::vector_length(this);
13851     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13852   %}
13853   ins_pipe( pipe_slow );
13854 %}
13855 
13856 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13857   predicate(Matcher::vector_length(n) > 32);
13858   match(Set dst (MaskAll src));
13859   effect(TEMP ktmp);
13860   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13861   ins_encode %{
13862     int mask_len = Matcher::vector_length(this);
13863     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13864   %}
13865   ins_pipe( pipe_slow );
13866 %}
13867 
13868 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13869   predicate(Matcher::vector_length(n) > 32);
13870   match(Set dst (MaskAll src));
13871   effect(TEMP ktmp);
13872   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13873   ins_encode %{
13874     int mask_len = Matcher::vector_length(this);
13875     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13876   %}
13877   ins_pipe( pipe_slow );
13878 %}
13879 
13880 // ============================================================================
13881 // Safepoint Instruction
13882 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13883   match(SafePoint poll);
13884   effect(KILL cr, USE poll);
13885 
13886   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13887   ins_cost(125);
13888   // EBP would need size(3)
13889   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13890   ins_encode %{
13891     __ relocate(relocInfo::poll_type);
13892     address pre_pc = __ pc();
13893     __ testl(rax, Address($poll$$Register, 0));
13894     address post_pc = __ pc();
13895     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13896   %}
13897   ins_pipe(ialu_reg_mem);
13898 %}
13899 
13900 
13901 // ============================================================================
13902 // This name is KNOWN by the ADLC and cannot be changed.
13903 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13904 // for this guy.
13905 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13906   match(Set dst (ThreadLocal));
13907   effect(DEF dst, KILL cr);
13908 
13909   format %{ "MOV    $dst, Thread::current()" %}
13910   ins_encode %{
13911     Register dstReg = as_Register($dst$$reg);
13912     __ get_thread(dstReg);
13913   %}
13914   ins_pipe( ialu_reg_fat );
13915 %}
13916 
13917 
13918 
13919 //----------PEEPHOLE RULES-----------------------------------------------------
13920 // These must follow all instruction definitions as they use the names
13921 // defined in the instructions definitions.
13922 //
13923 // peepmatch ( root_instr_name [preceding_instruction]* );
13924 //
13925 // peepconstraint %{
13926 // (instruction_number.operand_name relational_op instruction_number.operand_name
13927 //  [, ...] );
13928 // // instruction numbers are zero-based using left to right order in peepmatch
13929 //
13930 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13931 // // provide an instruction_number.operand_name for each operand that appears
13932 // // in the replacement instruction's match rule
13933 //
13934 // ---------VM FLAGS---------------------------------------------------------
13935 //
13936 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13937 //
13938 // Each peephole rule is given an identifying number starting with zero and
13939 // increasing by one in the order seen by the parser.  An individual peephole
13940 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13941 // on the command-line.
13942 //
13943 // ---------CURRENT LIMITATIONS----------------------------------------------
13944 //
13945 // Only match adjacent instructions in same basic block
13946 // Only equality constraints
13947 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13948 // Only one replacement instruction
13949 //
13950 // ---------EXAMPLE----------------------------------------------------------
13951 //
13952 // // pertinent parts of existing instructions in architecture description
13953 // instruct movI(rRegI dst, rRegI src) %{
13954 //   match(Set dst (CopyI src));
13955 // %}
13956 //
13957 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13958 //   match(Set dst (AddI dst src));
13959 //   effect(KILL cr);
13960 // %}
13961 //
13962 // // Change (inc mov) to lea
13963 // peephole %{
13964 //   // increment preceded by register-register move
13965 //   peepmatch ( incI_eReg movI );
13966 //   // require that the destination register of the increment
13967 //   // match the destination register of the move
13968 //   peepconstraint ( 0.dst == 1.dst );
13969 //   // construct a replacement instruction that sets
13970 //   // the destination to ( move's source register + one )
13971 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13972 // %}
13973 //
13974 // Implementation no longer uses movX instructions since
13975 // machine-independent system no longer uses CopyX nodes.
13976 //
13977 // peephole %{
13978 //   peepmatch ( incI_eReg movI );
13979 //   peepconstraint ( 0.dst == 1.dst );
13980 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13981 // %}
13982 //
13983 // peephole %{
13984 //   peepmatch ( decI_eReg movI );
13985 //   peepconstraint ( 0.dst == 1.dst );
13986 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13987 // %}
13988 //
13989 // peephole %{
13990 //   peepmatch ( addI_eReg_imm movI );
13991 //   peepconstraint ( 0.dst == 1.dst );
13992 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13993 // %}
13994 //
13995 // peephole %{
13996 //   peepmatch ( addP_eReg_imm movP );
13997 //   peepconstraint ( 0.dst == 1.dst );
13998 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13999 // %}
14000 
14001 // // Change load of spilled value to only a spill
14002 // instruct storeI(memory mem, rRegI src) %{
14003 //   match(Set mem (StoreI mem src));
14004 // %}
14005 //
14006 // instruct loadI(rRegI dst, memory mem) %{
14007 //   match(Set dst (LoadI mem));
14008 // %}
14009 //
14010 peephole %{
14011   peepmatch ( loadI storeI );
14012   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14013   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14014 %}
14015 
14016 //----------SMARTSPILL RULES---------------------------------------------------
14017 // These must follow all instruction definitions as they use the names
14018 // defined in the instructions definitions.