1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   __ verified_entry(C);
  615 
  616   C->output()->set_frame_complete(cbuf.insts_size());
  617 
  618   if (C->has_mach_constant_base_node()) {
  619     // NOTE: We set the table base offset here because users might be
  620     // emitted before MachConstantBaseNode.
  621     ConstantTable& constant_table = C->output()->constant_table();
  622     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  623   }
  624 }
  625 
  626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  627   return MachNode::size(ra_); // too many variables; just compute it the hard way
  628 }
  629 
  630 int MachPrologNode::reloc() const {
  631   return 0; // a large enough number
  632 }
  633 
  634 //=============================================================================
  635 #ifndef PRODUCT
  636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  637   Compile *C = ra_->C;
  638   int framesize = C->output()->frame_size_in_bytes();
  639   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  640   // Remove two words for return addr and rbp,
  641   framesize -= 2*wordSize;
  642 
  643   if (C->max_vector_size() > 16) {
  644     st->print("VZEROUPPER");
  645     st->cr(); st->print("\t");
  646   }
  647   if (C->in_24_bit_fp_mode()) {
  648     st->print("FLDCW  standard control word");
  649     st->cr(); st->print("\t");
  650   }
  651   if (framesize) {
  652     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  653     st->cr(); st->print("\t");
  654   }
  655   st->print_cr("POPL   EBP"); st->print("\t");
  656   if (do_polling() && C->is_method_compilation()) {
  657     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  658               "JA      #safepoint_stub\t"
  659               "# Safepoint: poll for GC");
  660   }
  661 }
  662 #endif
  663 
  664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  665   Compile *C = ra_->C;
  666   MacroAssembler _masm(&cbuf);
  667 
  668   if (C->max_vector_size() > 16) {
  669     // Clear upper bits of YMM registers when current compiled code uses
  670     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  671     _masm.vzeroupper();
  672   }
  673   // If method set FPU control word, restore to standard control word
  674   if (C->in_24_bit_fp_mode()) {
  675     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  676   }
  677 
  678   int framesize = C->output()->frame_size_in_bytes();
  679   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  680   // Remove two words for return addr and rbp,
  681   framesize -= 2*wordSize;
  682 
  683   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  684 
  685   if (framesize >= 128) {
  686     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  687     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  688     emit_d32(cbuf, framesize);
  689   } else if (framesize) {
  690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d8(cbuf, framesize);
  693   }
  694 
  695   emit_opcode(cbuf, 0x58 | EBP_enc);
  696 
  697   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  698     __ reserved_stack_check();
  699   }
  700 
  701   if (do_polling() && C->is_method_compilation()) {
  702     Register thread = as_Register(EBX_enc);
  703     MacroAssembler masm(&cbuf);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ relocate(relocInfo::poll_return_type);
  713     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  714   }
  715 }
  716 
  717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  718   return MachNode::size(ra_); // too many variables; just compute it
  719                               // the hard way
  720 }
  721 
  722 int MachEpilogNode::reloc() const {
  723   return 0; // a large enough number
  724 }
  725 
  726 const Pipeline * MachEpilogNode::pipeline() const {
  727   return MachNode::pipeline_class();
  728 }
  729 
  730 //=============================================================================
  731 
  732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  733 static enum RC rc_class( OptoReg::Name reg ) {
  734 
  735   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  736   if (OptoReg::is_stack(reg)) return rc_stack;
  737 
  738   VMReg r = OptoReg::as_VMReg(reg);
  739   if (r->is_Register()) return rc_int;
  740   if (r->is_FloatRegister()) {
  741     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  742     return rc_float;
  743   }
  744   if (r->is_KRegister()) return rc_kreg;
  745   assert(r->is_XMMRegister(), "must be");
  746   return rc_xmm;
  747 }
  748 
  749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  750                         int opcode, const char *op_str, int size, outputStream* st ) {
  751   if( cbuf ) {
  752     emit_opcode  (*cbuf, opcode );
  753     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754 #ifndef PRODUCT
  755   } else if( !do_size ) {
  756     if( size != 0 ) st->print("\n\t");
  757     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  758       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  759       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  760     } else { // FLD, FST, PUSH, POP
  761       st->print("%s [ESP + #%d]",op_str,offset);
  762     }
  763 #endif
  764   }
  765   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  766   return size+3+offset_size;
  767 }
  768 
  769 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  771                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  772   int in_size_in_bits = Assembler::EVEX_32bit;
  773   int evex_encoding = 0;
  774   if (reg_lo+1 == reg_hi) {
  775     in_size_in_bits = Assembler::EVEX_64bit;
  776     evex_encoding = Assembler::VEX_W;
  777   }
  778   if (cbuf) {
  779     MacroAssembler _masm(cbuf);
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     _masm.set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (cbuf) {
  832     MacroAssembler _masm(cbuf);
  833     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  834     _masm.set_managed();
  835     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  836       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  837                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  838     } else {
  839       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     }
  842 #ifndef PRODUCT
  843   } else if (!do_size) {
  844     if (size != 0) st->print("\n\t");
  845     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  846       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  847         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  848       } else {
  849         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       }
  851     } else {
  852       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  853         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       } else {
  855         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       }
  857     }
  858 #endif
  859   }
  860   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  861   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  862   int sz = (UseAVX > 2) ? 6 : 4;
  863   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  864       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  865   return size + sz;
  866 }
  867 
  868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  869                             int src_hi, int dst_hi, int size, outputStream* st ) {
  870   // 32-bit
  871   if (cbuf) {
  872     MacroAssembler _masm(cbuf);
  873     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  874     _masm.set_managed();
  875     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  876              as_Register(Matcher::_regEncode[src_lo]));
  877 #ifndef PRODUCT
  878   } else if (!do_size) {
  879     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  880 #endif
  881   }
  882   return (UseAVX> 2) ? 6 : 4;
  883 }
  884 
  885 
  886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  887                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  888   // 32-bit
  889   if (cbuf) {
  890     MacroAssembler _masm(cbuf);
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     _masm.set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( cbuf ) {
  905     emit_opcode(*cbuf, 0x8B );
  906     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( cbuf ) {
  920       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (cbuf) {
  956     MacroAssembler _masm(cbuf);
  957     switch (ireg) {
  958     case Op_VecS:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       break;
  962     case Op_VecD:
  963       __ pushl(Address(rsp, src_offset));
  964       __ popl (Address(rsp, dst_offset));
  965       __ pushl(Address(rsp, src_offset+4));
  966       __ popl (Address(rsp, dst_offset+4));
  967       break;
  968     case Op_VecX:
  969       __ movdqu(Address(rsp, -16), xmm0);
  970       __ movdqu(xmm0, Address(rsp, src_offset));
  971       __ movdqu(Address(rsp, dst_offset), xmm0);
  972       __ movdqu(xmm0, Address(rsp, -16));
  973       break;
  974     case Op_VecY:
  975       __ vmovdqu(Address(rsp, -32), xmm0);
  976       __ vmovdqu(xmm0, Address(rsp, src_offset));
  977       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  978       __ vmovdqu(xmm0, Address(rsp, -32));
  979       break;
  980     case Op_VecZ:
  981       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  982       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  983       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  984       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  985       break;
  986     default:
  987       ShouldNotReachHere();
  988     }
  989 #ifndef PRODUCT
  990   } else {
  991     switch (ireg) {
  992     case Op_VecS:
  993       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  994                 "popl    [rsp + #%d]",
  995                 src_offset, dst_offset);
  996       break;
  997     case Op_VecD:
  998       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  999                 "popq    [rsp + #%d]\n\t"
 1000                 "pushl   [rsp + #%d]\n\t"
 1001                 "popq    [rsp + #%d]",
 1002                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1003       break;
 1004      case Op_VecX:
 1005       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1006                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1007                 "movdqu  [rsp + #%d], xmm0\n\t"
 1008                 "movdqu  xmm0, [rsp - #16]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecY:
 1012       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1013                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1014                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1015                 "vmovdqu xmm0, [rsp - #32]",
 1016                 src_offset, dst_offset);
 1017       break;
 1018     case Op_VecZ:
 1019       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1020                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1021                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1022                 "vmovdqu xmm0, [rsp - #64]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     default:
 1026       ShouldNotReachHere();
 1027     }
 1028 #endif
 1029   }
 1030 }
 1031 
 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1033   // Get registers to move
 1034   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1035   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1036   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1037   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1038 
 1039   enum RC src_second_rc = rc_class(src_second);
 1040   enum RC src_first_rc = rc_class(src_first);
 1041   enum RC dst_second_rc = rc_class(dst_second);
 1042   enum RC dst_first_rc = rc_class(dst_first);
 1043 
 1044   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1045 
 1046   // Generate spill code!
 1047   int size = 0;
 1048 
 1049   if( src_first == dst_first && src_second == dst_second )
 1050     return size;            // Self copy, no move
 1051 
 1052   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1053     uint ireg = ideal_reg();
 1054     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1055     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1056     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1057     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1058       // mem -> mem
 1059       int src_offset = ra_->reg2offset(src_first);
 1060       int dst_offset = ra_->reg2offset(dst_first);
 1061       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1063       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1064     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1065       int stack_offset = ra_->reg2offset(dst_first);
 1066       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1067     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1068       int stack_offset = ra_->reg2offset(src_first);
 1069       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1070     } else {
 1071       ShouldNotReachHere();
 1072     }
 1073     return 0;
 1074   }
 1075 
 1076   // --------------------------------------
 1077   // Check for mem-mem move.  push/pop to move.
 1078   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1079     if( src_second == dst_first ) { // overlapping stack copy ranges
 1080       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1081       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1082       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1083       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1084     }
 1085     // move low bits
 1086     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1087     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1088     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1089       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1090       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1091     }
 1092     return size;
 1093   }
 1094 
 1095   // --------------------------------------
 1096   // Check for integer reg-reg copy
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1098     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1099 
 1100   // Check for integer store
 1101   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1102     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1103 
 1104   // Check for integer load
 1105   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1106     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1107 
 1108   // Check for integer reg-xmm reg copy
 1109   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1110     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1111             "no 64 bit integer-float reg moves" );
 1112     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1113   }
 1114   // --------------------------------------
 1115   // Check for float reg-reg copy
 1116   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1117     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1118             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1119     if( cbuf ) {
 1120 
 1121       // Note the mucking with the register encode to compensate for the 0/1
 1122       // indexing issue mentioned in a comment in the reg_def sections
 1123       // for FPR registers many lines above here.
 1124 
 1125       if( src_first != FPR1L_num ) {
 1126         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1127         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1128         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1129         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1130      } else {
 1131         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1132         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1133      }
 1134 #ifndef PRODUCT
 1135     } else if( !do_size ) {
 1136       if( size != 0 ) st->print("\n\t");
 1137       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1138       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1139 #endif
 1140     }
 1141     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1142   }
 1143 
 1144   // Check for float store
 1145   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1146     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1147   }
 1148 
 1149   // Check for float load
 1150   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1151     int offset = ra_->reg2offset(src_first);
 1152     const char *op_str;
 1153     int op;
 1154     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1155       op_str = "FLD_D";
 1156       op = 0xDD;
 1157     } else {                   // 32-bit load
 1158       op_str = "FLD_S";
 1159       op = 0xD9;
 1160       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1161     }
 1162     if( cbuf ) {
 1163       emit_opcode  (*cbuf, op );
 1164       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1167 #ifndef PRODUCT
 1168     } else if( !do_size ) {
 1169       if( size != 0 ) st->print("\n\t");
 1170       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1171 #endif
 1172     }
 1173     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1174     return size + 3+offset_size+2;
 1175   }
 1176 
 1177   // Check for xmm reg-reg copy
 1178   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1179     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1180             (src_first+1 == src_second && dst_first+1 == dst_second),
 1181             "no non-adjacent float-moves" );
 1182     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1183   }
 1184 
 1185   // Check for xmm reg-integer reg copy
 1186   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1187     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1188             "no 64 bit float-integer reg moves" );
 1189     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1190   }
 1191 
 1192   // Check for xmm store
 1193   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1194     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1195   }
 1196 
 1197   // Check for float xmm load
 1198   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1199     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1200   }
 1201 
 1202   // Copy from float reg to xmm reg
 1203   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1204     // copy to the top of stack from floating point reg
 1205     // and use LEA to preserve flags
 1206     if( cbuf ) {
 1207       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1208       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1209       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1210       emit_d8(*cbuf,0xF8);
 1211 #ifndef PRODUCT
 1212     } else if( !do_size ) {
 1213       if( size != 0 ) st->print("\n\t");
 1214       st->print("LEA    ESP,[ESP-8]");
 1215 #endif
 1216     }
 1217     size += 4;
 1218 
 1219     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1220 
 1221     // Copy from the temp memory to the xmm reg.
 1222     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1223 
 1224     if( cbuf ) {
 1225       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1226       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1227       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1228       emit_d8(*cbuf,0x08);
 1229 #ifndef PRODUCT
 1230     } else if( !do_size ) {
 1231       if( size != 0 ) st->print("\n\t");
 1232       st->print("LEA    ESP,[ESP+8]");
 1233 #endif
 1234     }
 1235     size += 4;
 1236     return size;
 1237   }
 1238 
 1239   // AVX-512 opmask specific spilling.
 1240   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1241     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1242     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1243     int offset = ra_->reg2offset(src_first);
 1244     if (cbuf != nullptr) {
 1245       MacroAssembler _masm(cbuf);
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (cbuf != nullptr) {
 1260       MacroAssembler _masm(cbuf);
 1261       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1262 #ifndef PRODUCT
 1263     } else {
 1264       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1265 #endif
 1266     }
 1267     return 0;
 1268   }
 1269 
 1270   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1271     Unimplemented();
 1272     return 0;
 1273   }
 1274 
 1275   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1276     Unimplemented();
 1277     return 0;
 1278   }
 1279 
 1280   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1281     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1282     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1283     if (cbuf != nullptr) {
 1284       MacroAssembler _masm(cbuf);
 1285       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1286 #ifndef PRODUCT
 1287     } else {
 1288       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1289 #endif
 1290     }
 1291     return 0;
 1292   }
 1293 
 1294   assert( size > 0, "missed a case" );
 1295 
 1296   // --------------------------------------------------------------------
 1297   // Check for second bits still needing moving.
 1298   if( src_second == dst_second )
 1299     return size;               // Self copy; no move
 1300   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1301 
 1302   // Check for second word int-int move
 1303   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1304     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1305 
 1306   // Check for second word integer store
 1307   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1308     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1309 
 1310   // Check for second word integer load
 1311   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1312     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1313 
 1314   Unimplemented();
 1315   return 0; // Mute compiler
 1316 }
 1317 
 1318 #ifndef PRODUCT
 1319 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1320   implementation( nullptr, ra_, false, st );
 1321 }
 1322 #endif
 1323 
 1324 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1325   implementation( &cbuf, ra_, false, nullptr );
 1326 }
 1327 
 1328 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1329   return MachNode::size(ra_);
 1330 }
 1331 
 1332 
 1333 //=============================================================================
 1334 #ifndef PRODUCT
 1335 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1336   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1337   int reg = ra_->get_reg_first(this);
 1338   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1339 }
 1340 #endif
 1341 
 1342 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1344   int reg = ra_->get_encode(this);
 1345   if( offset >= 128 ) {
 1346     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1347     emit_rm(cbuf, 0x2, reg, 0x04);
 1348     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1349     emit_d32(cbuf, offset);
 1350   }
 1351   else {
 1352     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1353     emit_rm(cbuf, 0x1, reg, 0x04);
 1354     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1355     emit_d8(cbuf, offset);
 1356   }
 1357 }
 1358 
 1359 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1360   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1361   if( offset >= 128 ) {
 1362     return 7;
 1363   }
 1364   else {
 1365     return 4;
 1366   }
 1367 }
 1368 
 1369 //=============================================================================
 1370 #ifndef PRODUCT
 1371 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1372   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1373   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1374   st->print_cr("\tNOP");
 1375   st->print_cr("\tNOP");
 1376   if( !OptoBreakpoint )
 1377     st->print_cr("\tNOP");
 1378 }
 1379 #endif
 1380 
 1381 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1382   MacroAssembler masm(&cbuf);
 1383   masm.ic_check(CodeEntryAlignment);
 1384 }
 1385 
 1386 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1387   return MachNode::size(ra_); // too many variables; just compute it
 1388                               // the hard way
 1389 }
 1390 
 1391 
 1392 //=============================================================================
 1393 
 1394 // Vector calling convention not supported.
 1395 bool Matcher::supports_vector_calling_convention() {
 1396   return false;
 1397 }
 1398 
 1399 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1400   Unimplemented();
 1401   return OptoRegPair(0, 0);
 1402 }
 1403 
 1404 // Is this branch offset short enough that a short branch can be used?
 1405 //
 1406 // NOTE: If the platform does not provide any short branch variants, then
 1407 //       this method should return false for offset 0.
 1408 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1409   // The passed offset is relative to address of the branch.
 1410   // On 86 a branch displacement is calculated relative to address
 1411   // of a next instruction.
 1412   offset -= br_size;
 1413 
 1414   // the short version of jmpConUCF2 contains multiple branches,
 1415   // making the reach slightly less
 1416   if (rule == jmpConUCF2_rule)
 1417     return (-126 <= offset && offset <= 125);
 1418   return (-128 <= offset && offset <= 127);
 1419 }
 1420 
 1421 // Return whether or not this register is ever used as an argument.  This
 1422 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1423 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1424 // arguments in those registers not be available to the callee.
 1425 bool Matcher::can_be_java_arg( int reg ) {
 1426   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1427   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1428   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1429   return false;
 1430 }
 1431 
 1432 bool Matcher::is_spillable_arg( int reg ) {
 1433   return can_be_java_arg(reg);
 1434 }
 1435 
 1436 uint Matcher::int_pressure_limit()
 1437 {
 1438   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1439 }
 1440 
 1441 uint Matcher::float_pressure_limit()
 1442 {
 1443   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1444 }
 1445 
 1446 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1447   // Use hardware integer DIV instruction when
 1448   // it is faster than a code which use multiply.
 1449   // Only when constant divisor fits into 32 bit
 1450   // (min_jint is excluded to get only correct
 1451   // positive 32 bit values from negative).
 1452   return VM_Version::has_fast_idiv() &&
 1453          (divisor == (int)divisor && divisor != min_jint);
 1454 }
 1455 
 1456 // Register for DIVI projection of divmodI
 1457 RegMask Matcher::divI_proj_mask() {
 1458   return EAX_REG_mask();
 1459 }
 1460 
 1461 // Register for MODI projection of divmodI
 1462 RegMask Matcher::modI_proj_mask() {
 1463   return EDX_REG_mask();
 1464 }
 1465 
 1466 // Register for DIVL projection of divmodL
 1467 RegMask Matcher::divL_proj_mask() {
 1468   ShouldNotReachHere();
 1469   return RegMask();
 1470 }
 1471 
 1472 // Register for MODL projection of divmodL
 1473 RegMask Matcher::modL_proj_mask() {
 1474   ShouldNotReachHere();
 1475   return RegMask();
 1476 }
 1477 
 1478 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1479   return NO_REG_mask();
 1480 }
 1481 
 1482 // Returns true if the high 32 bits of the value is known to be zero.
 1483 bool is_operand_hi32_zero(Node* n) {
 1484   int opc = n->Opcode();
 1485   if (opc == Op_AndL) {
 1486     Node* o2 = n->in(2);
 1487     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1488       return true;
 1489     }
 1490   }
 1491   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1492     return true;
 1493   }
 1494   return false;
 1495 }
 1496 
 1497 %}
 1498 
 1499 //----------ENCODING BLOCK-----------------------------------------------------
 1500 // This block specifies the encoding classes used by the compiler to output
 1501 // byte streams.  Encoding classes generate functions which are called by
 1502 // Machine Instruction Nodes in order to generate the bit encoding of the
 1503 // instruction.  Operands specify their base encoding interface with the
 1504 // interface keyword.  There are currently supported four interfaces,
 1505 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1506 // operand to generate a function which returns its register number when
 1507 // queried.   CONST_INTER causes an operand to generate a function which
 1508 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1509 // operand to generate four functions which return the Base Register, the
 1510 // Index Register, the Scale Value, and the Offset Value of the operand when
 1511 // queried.  COND_INTER causes an operand to generate six functions which
 1512 // return the encoding code (ie - encoding bits for the instruction)
 1513 // associated with each basic boolean condition for a conditional instruction.
 1514 // Instructions specify two basic values for encoding.  They use the
 1515 // ins_encode keyword to specify their encoding class (which must be one of
 1516 // the class names specified in the encoding block), and they use the
 1517 // opcode keyword to specify, in order, their primary, secondary, and
 1518 // tertiary opcode.  Only the opcode sections which a particular instruction
 1519 // needs for encoding need to be specified.
 1520 encode %{
 1521   // Build emit functions for each basic byte or larger field in the intel
 1522   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1523   // code in the enc_class source block.  Emit functions will live in the
 1524   // main source block for now.  In future, we can generalize this by
 1525   // adding a syntax that specifies the sizes of fields in an order,
 1526   // so that the adlc can build the emit functions automagically
 1527 
 1528   // Emit primary opcode
 1529   enc_class OpcP %{
 1530     emit_opcode(cbuf, $primary);
 1531   %}
 1532 
 1533   // Emit secondary opcode
 1534   enc_class OpcS %{
 1535     emit_opcode(cbuf, $secondary);
 1536   %}
 1537 
 1538   // Emit opcode directly
 1539   enc_class Opcode(immI d8) %{
 1540     emit_opcode(cbuf, $d8$$constant);
 1541   %}
 1542 
 1543   enc_class SizePrefix %{
 1544     emit_opcode(cbuf,0x66);
 1545   %}
 1546 
 1547   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1548     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1549   %}
 1550 
 1551   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1552     emit_opcode(cbuf,$opcode$$constant);
 1553     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1554   %}
 1555 
 1556   enc_class mov_r32_imm0( rRegI dst ) %{
 1557     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1558     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1559   %}
 1560 
 1561   enc_class cdq_enc %{
 1562     // Full implementation of Java idiv and irem; checks for
 1563     // special case as described in JVM spec., p.243 & p.271.
 1564     //
 1565     //         normal case                           special case
 1566     //
 1567     // input : rax,: dividend                         min_int
 1568     //         reg: divisor                          -1
 1569     //
 1570     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1571     //         rdx: remainder (= rax, irem reg)       0
 1572     //
 1573     //  Code sequnce:
 1574     //
 1575     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1576     //  0F 85 0B 00 00 00    jne         normal_case
 1577     //  33 D2                xor         rdx,edx
 1578     //  83 F9 FF             cmp         rcx,0FFh
 1579     //  0F 84 03 00 00 00    je          done
 1580     //                  normal_case:
 1581     //  99                   cdq
 1582     //  F7 F9                idiv        rax,ecx
 1583     //                  done:
 1584     //
 1585     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1586     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1587     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1588     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1589     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1591     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1592     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1593     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1594     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1595     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1596     // normal_case:
 1597     emit_opcode(cbuf,0x99);                                         // cdq
 1598     // idiv (note: must be emitted by the user of this rule)
 1599     // normal:
 1600   %}
 1601 
 1602   // Dense encoding for older common ops
 1603   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1604     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1605   %}
 1606 
 1607 
 1608   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1609   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1610     // Check for 8-bit immediate, and set sign extend bit in opcode
 1611     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1612       emit_opcode(cbuf, $primary | 0x02);
 1613     }
 1614     else {                          // If 32-bit immediate
 1615       emit_opcode(cbuf, $primary);
 1616     }
 1617   %}
 1618 
 1619   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1620     // Emit primary opcode and set sign-extend bit
 1621     // Check for 8-bit immediate, and set sign extend bit in opcode
 1622     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1623       emit_opcode(cbuf, $primary | 0x02);    }
 1624     else {                          // If 32-bit immediate
 1625       emit_opcode(cbuf, $primary);
 1626     }
 1627     // Emit r/m byte with secondary opcode, after primary opcode.
 1628     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1629   %}
 1630 
 1631   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1632     // Check for 8-bit immediate, and set sign extend bit in opcode
 1633     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1634       $$$emit8$imm$$constant;
 1635     }
 1636     else {                          // If 32-bit immediate
 1637       // Output immediate
 1638       $$$emit32$imm$$constant;
 1639     }
 1640   %}
 1641 
 1642   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1643     // Emit primary opcode and set sign-extend bit
 1644     // Check for 8-bit immediate, and set sign extend bit in opcode
 1645     int con = (int)$imm$$constant; // Throw away top bits
 1646     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1647     // Emit r/m byte with secondary opcode, after primary opcode.
 1648     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1649     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1650     else                               emit_d32(cbuf,con);
 1651   %}
 1652 
 1653   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1654     // Emit primary opcode and set sign-extend bit
 1655     // Check for 8-bit immediate, and set sign extend bit in opcode
 1656     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1657     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1658     // Emit r/m byte with tertiary opcode, after primary opcode.
 1659     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1660     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1661     else                               emit_d32(cbuf,con);
 1662   %}
 1663 
 1664   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1665     emit_cc(cbuf, $secondary, $dst$$reg );
 1666   %}
 1667 
 1668   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1669     int destlo = $dst$$reg;
 1670     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1671     // bswap lo
 1672     emit_opcode(cbuf, 0x0F);
 1673     emit_cc(cbuf, 0xC8, destlo);
 1674     // bswap hi
 1675     emit_opcode(cbuf, 0x0F);
 1676     emit_cc(cbuf, 0xC8, desthi);
 1677     // xchg lo and hi
 1678     emit_opcode(cbuf, 0x87);
 1679     emit_rm(cbuf, 0x3, destlo, desthi);
 1680   %}
 1681 
 1682   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1683     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1684   %}
 1685 
 1686   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1687     $$$emit8$primary;
 1688     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1689   %}
 1690 
 1691   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1692     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1693     emit_d8(cbuf, op >> 8 );
 1694     emit_d8(cbuf, op & 255);
 1695   %}
 1696 
 1697   // emulate a CMOV with a conditional branch around a MOV
 1698   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1699     // Invert sense of branch from sense of CMOV
 1700     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1701     emit_d8( cbuf, $brOffs$$constant );
 1702   %}
 1703 
 1704   enc_class enc_PartialSubtypeCheck( ) %{
 1705     Register Redi = as_Register(EDI_enc); // result register
 1706     Register Reax = as_Register(EAX_enc); // super class
 1707     Register Recx = as_Register(ECX_enc); // killed
 1708     Register Resi = as_Register(ESI_enc); // sub class
 1709     Label miss;
 1710 
 1711     MacroAssembler _masm(&cbuf);
 1712     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1713                                      nullptr, &miss,
 1714                                      /*set_cond_codes:*/ true);
 1715     if ($primary) {
 1716       __ xorptr(Redi, Redi);
 1717     }
 1718     __ bind(miss);
 1719   %}
 1720 
 1721   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1722     MacroAssembler masm(&cbuf);
 1723     int start = masm.offset();
 1724     if (UseSSE >= 2) {
 1725       if (VerifyFPU) {
 1726         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1727       }
 1728     } else {
 1729       // External c_calling_convention expects the FPU stack to be 'clean'.
 1730       // Compiled code leaves it dirty.  Do cleanup now.
 1731       masm.empty_FPU_stack();
 1732     }
 1733     if (sizeof_FFree_Float_Stack_All == -1) {
 1734       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1735     } else {
 1736       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1737     }
 1738   %}
 1739 
 1740   enc_class Verify_FPU_For_Leaf %{
 1741     if( VerifyFPU ) {
 1742       MacroAssembler masm(&cbuf);
 1743       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1744     }
 1745   %}
 1746 
 1747   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1748     // This is the instruction starting address for relocation info.
 1749     MacroAssembler _masm(&cbuf);
 1750     cbuf.set_insts_mark();
 1751     $$$emit8$primary;
 1752     // CALL directly to the runtime
 1753     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1754                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1755     __ post_call_nop();
 1756 
 1757     if (UseSSE >= 2) {
 1758       MacroAssembler _masm(&cbuf);
 1759       BasicType rt = tf()->return_type();
 1760 
 1761       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1762         // A C runtime call where the return value is unused.  In SSE2+
 1763         // mode the result needs to be removed from the FPU stack.  It's
 1764         // likely that this function call could be removed by the
 1765         // optimizer if the C function is a pure function.
 1766         __ ffree(0);
 1767       } else if (rt == T_FLOAT) {
 1768         __ lea(rsp, Address(rsp, -4));
 1769         __ fstp_s(Address(rsp, 0));
 1770         __ movflt(xmm0, Address(rsp, 0));
 1771         __ lea(rsp, Address(rsp,  4));
 1772       } else if (rt == T_DOUBLE) {
 1773         __ lea(rsp, Address(rsp, -8));
 1774         __ fstp_d(Address(rsp, 0));
 1775         __ movdbl(xmm0, Address(rsp, 0));
 1776         __ lea(rsp, Address(rsp,  8));
 1777       }
 1778     }
 1779   %}
 1780 
 1781   enc_class pre_call_resets %{
 1782     // If method sets FPU control word restore it here
 1783     debug_only(int off0 = cbuf.insts_size());
 1784     if (ra_->C->in_24_bit_fp_mode()) {
 1785       MacroAssembler _masm(&cbuf);
 1786       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1787     }
 1788     // Clear upper bits of YMM registers when current compiled code uses
 1789     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1790     MacroAssembler _masm(&cbuf);
 1791     __ vzeroupper();
 1792     debug_only(int off1 = cbuf.insts_size());
 1793     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1794   %}
 1795 
 1796   enc_class post_call_FPU %{
 1797     // If method sets FPU control word do it here also
 1798     if (Compile::current()->in_24_bit_fp_mode()) {
 1799       MacroAssembler masm(&cbuf);
 1800       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1801     }
 1802   %}
 1803 
 1804   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1805     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1806     // who we intended to call.
 1807     MacroAssembler _masm(&cbuf);
 1808     cbuf.set_insts_mark();
 1809     $$$emit8$primary;
 1810 
 1811     if (!_method) {
 1812       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1813                      runtime_call_Relocation::spec(),
 1814                      RELOC_IMM32);
 1815       __ post_call_nop();
 1816     } else {
 1817       int method_index = resolved_method_index(cbuf);
 1818       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1819                                                   : static_call_Relocation::spec(method_index);
 1820       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1821                      rspec, RELOC_DISP32);
 1822       __ post_call_nop();
 1823       address mark = cbuf.insts_mark();
 1824       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1825         // Calls of the same statically bound method can share
 1826         // a stub to the interpreter.
 1827         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1828       } else {
 1829         // Emit stubs for static call.
 1830         address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark);
 1831         if (stub == nullptr) {
 1832           ciEnv::current()->record_failure("CodeCache is full");
 1833           return;
 1834         }
 1835       }
 1836     }
 1837   %}
 1838 
 1839   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1840     MacroAssembler _masm(&cbuf);
 1841     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1842     __ post_call_nop();
 1843   %}
 1844 
 1845   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1846     int disp = in_bytes(Method::from_compiled_offset());
 1847     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1848 
 1849     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1850     MacroAssembler _masm(&cbuf);
 1851     cbuf.set_insts_mark();
 1852     $$$emit8$primary;
 1853     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1854     emit_d8(cbuf, disp);             // Displacement
 1855     __ post_call_nop();
 1856   %}
 1857 
 1858 //   Following encoding is no longer used, but may be restored if calling
 1859 //   convention changes significantly.
 1860 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1861 //
 1862 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1863 //     // int ic_reg     = Matcher::inline_cache_reg();
 1864 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1865 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1866 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1867 //
 1868 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1869 //     // // so we load it immediately before the call
 1870 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1871 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1872 //
 1873 //     // xor rbp,ebp
 1874 //     emit_opcode(cbuf, 0x33);
 1875 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1876 //
 1877 //     // CALL to interpreter.
 1878 //     cbuf.set_insts_mark();
 1879 //     $$$emit8$primary;
 1880 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1881 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1882 //   %}
 1883 
 1884   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1885     $$$emit8$primary;
 1886     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1887     $$$emit8$shift$$constant;
 1888   %}
 1889 
 1890   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1891     // Load immediate does not have a zero or sign extended version
 1892     // for 8-bit immediates
 1893     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1894     $$$emit32$src$$constant;
 1895   %}
 1896 
 1897   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1898     // Load immediate does not have a zero or sign extended version
 1899     // for 8-bit immediates
 1900     emit_opcode(cbuf, $primary + $dst$$reg);
 1901     $$$emit32$src$$constant;
 1902   %}
 1903 
 1904   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1905     // Load immediate does not have a zero or sign extended version
 1906     // for 8-bit immediates
 1907     int dst_enc = $dst$$reg;
 1908     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1909     if (src_con == 0) {
 1910       // xor dst, dst
 1911       emit_opcode(cbuf, 0x33);
 1912       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1913     } else {
 1914       emit_opcode(cbuf, $primary + dst_enc);
 1915       emit_d32(cbuf, src_con);
 1916     }
 1917   %}
 1918 
 1919   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1920     // Load immediate does not have a zero or sign extended version
 1921     // for 8-bit immediates
 1922     int dst_enc = $dst$$reg + 2;
 1923     int src_con = ((julong)($src$$constant)) >> 32;
 1924     if (src_con == 0) {
 1925       // xor dst, dst
 1926       emit_opcode(cbuf, 0x33);
 1927       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1928     } else {
 1929       emit_opcode(cbuf, $primary + dst_enc);
 1930       emit_d32(cbuf, src_con);
 1931     }
 1932   %}
 1933 
 1934 
 1935   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1936   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1937     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1938   %}
 1939 
 1940   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1941     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1942   %}
 1943 
 1944   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1945     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1946   %}
 1947 
 1948   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1949     $$$emit8$primary;
 1950     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1951   %}
 1952 
 1953   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1954     $$$emit8$secondary;
 1955     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1956   %}
 1957 
 1958   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1959     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1960   %}
 1961 
 1962   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1963     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1964   %}
 1965 
 1966   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1967     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1968   %}
 1969 
 1970   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1971     // Output immediate
 1972     $$$emit32$src$$constant;
 1973   %}
 1974 
 1975   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1976     // Output Float immediate bits
 1977     jfloat jf = $src$$constant;
 1978     int    jf_as_bits = jint_cast( jf );
 1979     emit_d32(cbuf, jf_as_bits);
 1980   %}
 1981 
 1982   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1983     // Output Float immediate bits
 1984     jfloat jf = $src$$constant;
 1985     int    jf_as_bits = jint_cast( jf );
 1986     emit_d32(cbuf, jf_as_bits);
 1987   %}
 1988 
 1989   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1990     // Output immediate
 1991     $$$emit16$src$$constant;
 1992   %}
 1993 
 1994   enc_class Con_d32(immI src) %{
 1995     emit_d32(cbuf,$src$$constant);
 1996   %}
 1997 
 1998   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1999     // Output immediate memory reference
 2000     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2001     emit_d32(cbuf, 0x00);
 2002   %}
 2003 
 2004   enc_class lock_prefix( ) %{
 2005     emit_opcode(cbuf,0xF0);         // [Lock]
 2006   %}
 2007 
 2008   // Cmp-xchg long value.
 2009   // Note: we need to swap rbx, and rcx before and after the
 2010   //       cmpxchg8 instruction because the instruction uses
 2011   //       rcx as the high order word of the new value to store but
 2012   //       our register encoding uses rbx,.
 2013   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2014 
 2015     // XCHG  rbx,ecx
 2016     emit_opcode(cbuf,0x87);
 2017     emit_opcode(cbuf,0xD9);
 2018     // [Lock]
 2019     emit_opcode(cbuf,0xF0);
 2020     // CMPXCHG8 [Eptr]
 2021     emit_opcode(cbuf,0x0F);
 2022     emit_opcode(cbuf,0xC7);
 2023     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2024     // XCHG  rbx,ecx
 2025     emit_opcode(cbuf,0x87);
 2026     emit_opcode(cbuf,0xD9);
 2027   %}
 2028 
 2029   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2030     // [Lock]
 2031     emit_opcode(cbuf,0xF0);
 2032 
 2033     // CMPXCHG [Eptr]
 2034     emit_opcode(cbuf,0x0F);
 2035     emit_opcode(cbuf,0xB1);
 2036     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2037   %}
 2038 
 2039   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2040     // [Lock]
 2041     emit_opcode(cbuf,0xF0);
 2042 
 2043     // CMPXCHGB [Eptr]
 2044     emit_opcode(cbuf,0x0F);
 2045     emit_opcode(cbuf,0xB0);
 2046     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2047   %}
 2048 
 2049   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2050     // [Lock]
 2051     emit_opcode(cbuf,0xF0);
 2052 
 2053     // 16-bit mode
 2054     emit_opcode(cbuf, 0x66);
 2055 
 2056     // CMPXCHGW [Eptr]
 2057     emit_opcode(cbuf,0x0F);
 2058     emit_opcode(cbuf,0xB1);
 2059     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2060   %}
 2061 
 2062   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2063     int res_encoding = $res$$reg;
 2064 
 2065     // MOV  res,0
 2066     emit_opcode( cbuf, 0xB8 + res_encoding);
 2067     emit_d32( cbuf, 0 );
 2068     // JNE,s  fail
 2069     emit_opcode(cbuf,0x75);
 2070     emit_d8(cbuf, 5 );
 2071     // MOV  res,1
 2072     emit_opcode( cbuf, 0xB8 + res_encoding);
 2073     emit_d32( cbuf, 1 );
 2074     // fail:
 2075   %}
 2076 
 2077   enc_class set_instruction_start( ) %{
 2078     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2079   %}
 2080 
 2081   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2082     int reg_encoding = $ereg$$reg;
 2083     int base  = $mem$$base;
 2084     int index = $mem$$index;
 2085     int scale = $mem$$scale;
 2086     int displace = $mem$$disp;
 2087     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2088     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2089   %}
 2090 
 2091   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2092     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2093     int base  = $mem$$base;
 2094     int index = $mem$$index;
 2095     int scale = $mem$$scale;
 2096     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2097     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2098     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2099   %}
 2100 
 2101   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2102     int r1, r2;
 2103     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2104     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2105     emit_opcode(cbuf,0x0F);
 2106     emit_opcode(cbuf,$tertiary);
 2107     emit_rm(cbuf, 0x3, r1, r2);
 2108     emit_d8(cbuf,$cnt$$constant);
 2109     emit_d8(cbuf,$primary);
 2110     emit_rm(cbuf, 0x3, $secondary, r1);
 2111     emit_d8(cbuf,$cnt$$constant);
 2112   %}
 2113 
 2114   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2115     emit_opcode( cbuf, 0x8B ); // Move
 2116     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2117     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2118       emit_d8(cbuf,$primary);
 2119       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2120       emit_d8(cbuf,$cnt$$constant-32);
 2121     }
 2122     emit_d8(cbuf,$primary);
 2123     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2124     emit_d8(cbuf,31);
 2125   %}
 2126 
 2127   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2128     int r1, r2;
 2129     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2130     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2131 
 2132     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2133     emit_rm(cbuf, 0x3, r1, r2);
 2134     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2135       emit_opcode(cbuf,$primary);
 2136       emit_rm(cbuf, 0x3, $secondary, r1);
 2137       emit_d8(cbuf,$cnt$$constant-32);
 2138     }
 2139     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2140     emit_rm(cbuf, 0x3, r2, r2);
 2141   %}
 2142 
 2143   // Clone of RegMem but accepts an extra parameter to access each
 2144   // half of a double in memory; it never needs relocation info.
 2145   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2146     emit_opcode(cbuf,$opcode$$constant);
 2147     int reg_encoding = $rm_reg$$reg;
 2148     int base     = $mem$$base;
 2149     int index    = $mem$$index;
 2150     int scale    = $mem$$scale;
 2151     int displace = $mem$$disp + $disp_for_half$$constant;
 2152     relocInfo::relocType disp_reloc = relocInfo::none;
 2153     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2154   %}
 2155 
 2156   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2157   //
 2158   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2159   // and it never needs relocation information.
 2160   // Frequently used to move data between FPU's Stack Top and memory.
 2161   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2162     int rm_byte_opcode = $rm_opcode$$constant;
 2163     int base     = $mem$$base;
 2164     int index    = $mem$$index;
 2165     int scale    = $mem$$scale;
 2166     int displace = $mem$$disp;
 2167     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2168     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2169   %}
 2170 
 2171   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2172     int rm_byte_opcode = $rm_opcode$$constant;
 2173     int base     = $mem$$base;
 2174     int index    = $mem$$index;
 2175     int scale    = $mem$$scale;
 2176     int displace = $mem$$disp;
 2177     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2178     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2179   %}
 2180 
 2181   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2182     int reg_encoding = $dst$$reg;
 2183     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2184     int index        = 0x04;            // 0x04 indicates no index
 2185     int scale        = 0x00;            // 0x00 indicates no scale
 2186     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2187     relocInfo::relocType disp_reloc = relocInfo::none;
 2188     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2189   %}
 2190 
 2191   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2192     // Compare dst,src
 2193     emit_opcode(cbuf,0x3B);
 2194     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2195     // jmp dst < src around move
 2196     emit_opcode(cbuf,0x7C);
 2197     emit_d8(cbuf,2);
 2198     // move dst,src
 2199     emit_opcode(cbuf,0x8B);
 2200     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2201   %}
 2202 
 2203   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2204     // Compare dst,src
 2205     emit_opcode(cbuf,0x3B);
 2206     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2207     // jmp dst > src around move
 2208     emit_opcode(cbuf,0x7F);
 2209     emit_d8(cbuf,2);
 2210     // move dst,src
 2211     emit_opcode(cbuf,0x8B);
 2212     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2213   %}
 2214 
 2215   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2216     // If src is FPR1, we can just FST to store it.
 2217     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2218     int reg_encoding = 0x2; // Just store
 2219     int base  = $mem$$base;
 2220     int index = $mem$$index;
 2221     int scale = $mem$$scale;
 2222     int displace = $mem$$disp;
 2223     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2224     if( $src$$reg != FPR1L_enc ) {
 2225       reg_encoding = 0x3;  // Store & pop
 2226       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2227       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2228     }
 2229     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2230     emit_opcode(cbuf,$primary);
 2231     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2232   %}
 2233 
 2234   enc_class neg_reg(rRegI dst) %{
 2235     // NEG $dst
 2236     emit_opcode(cbuf,0xF7);
 2237     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2238   %}
 2239 
 2240   enc_class setLT_reg(eCXRegI dst) %{
 2241     // SETLT $dst
 2242     emit_opcode(cbuf,0x0F);
 2243     emit_opcode(cbuf,0x9C);
 2244     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2245   %}
 2246 
 2247   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2248     int tmpReg = $tmp$$reg;
 2249 
 2250     // SUB $p,$q
 2251     emit_opcode(cbuf,0x2B);
 2252     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2253     // SBB $tmp,$tmp
 2254     emit_opcode(cbuf,0x1B);
 2255     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2256     // AND $tmp,$y
 2257     emit_opcode(cbuf,0x23);
 2258     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2259     // ADD $p,$tmp
 2260     emit_opcode(cbuf,0x03);
 2261     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2262   %}
 2263 
 2264   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2265     // TEST shift,32
 2266     emit_opcode(cbuf,0xF7);
 2267     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2268     emit_d32(cbuf,0x20);
 2269     // JEQ,s small
 2270     emit_opcode(cbuf, 0x74);
 2271     emit_d8(cbuf, 0x04);
 2272     // MOV    $dst.hi,$dst.lo
 2273     emit_opcode( cbuf, 0x8B );
 2274     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2275     // CLR    $dst.lo
 2276     emit_opcode(cbuf, 0x33);
 2277     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2278 // small:
 2279     // SHLD   $dst.hi,$dst.lo,$shift
 2280     emit_opcode(cbuf,0x0F);
 2281     emit_opcode(cbuf,0xA5);
 2282     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2283     // SHL    $dst.lo,$shift"
 2284     emit_opcode(cbuf,0xD3);
 2285     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2286   %}
 2287 
 2288   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2289     // TEST shift,32
 2290     emit_opcode(cbuf,0xF7);
 2291     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2292     emit_d32(cbuf,0x20);
 2293     // JEQ,s small
 2294     emit_opcode(cbuf, 0x74);
 2295     emit_d8(cbuf, 0x04);
 2296     // MOV    $dst.lo,$dst.hi
 2297     emit_opcode( cbuf, 0x8B );
 2298     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2299     // CLR    $dst.hi
 2300     emit_opcode(cbuf, 0x33);
 2301     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2302 // small:
 2303     // SHRD   $dst.lo,$dst.hi,$shift
 2304     emit_opcode(cbuf,0x0F);
 2305     emit_opcode(cbuf,0xAD);
 2306     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2307     // SHR    $dst.hi,$shift"
 2308     emit_opcode(cbuf,0xD3);
 2309     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2310   %}
 2311 
 2312   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2313     // TEST shift,32
 2314     emit_opcode(cbuf,0xF7);
 2315     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2316     emit_d32(cbuf,0x20);
 2317     // JEQ,s small
 2318     emit_opcode(cbuf, 0x74);
 2319     emit_d8(cbuf, 0x05);
 2320     // MOV    $dst.lo,$dst.hi
 2321     emit_opcode( cbuf, 0x8B );
 2322     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2323     // SAR    $dst.hi,31
 2324     emit_opcode(cbuf, 0xC1);
 2325     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2326     emit_d8(cbuf, 0x1F );
 2327 // small:
 2328     // SHRD   $dst.lo,$dst.hi,$shift
 2329     emit_opcode(cbuf,0x0F);
 2330     emit_opcode(cbuf,0xAD);
 2331     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2332     // SAR    $dst.hi,$shift"
 2333     emit_opcode(cbuf,0xD3);
 2334     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2335   %}
 2336 
 2337 
 2338   // ----------------- Encodings for floating point unit -----------------
 2339   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2340   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2341     $$$emit8$primary;
 2342     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2343   %}
 2344 
 2345   // Pop argument in FPR0 with FSTP ST(0)
 2346   enc_class PopFPU() %{
 2347     emit_opcode( cbuf, 0xDD );
 2348     emit_d8( cbuf, 0xD8 );
 2349   %}
 2350 
 2351   // !!!!! equivalent to Pop_Reg_F
 2352   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2353     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2354     emit_d8( cbuf, 0xD8+$dst$$reg );
 2355   %}
 2356 
 2357   enc_class Push_Reg_DPR( regDPR dst ) %{
 2358     emit_opcode( cbuf, 0xD9 );
 2359     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2360   %}
 2361 
 2362   enc_class strictfp_bias1( regDPR dst ) %{
 2363     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2364     emit_opcode( cbuf, 0x2D );
 2365     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2366     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2367     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2368   %}
 2369 
 2370   enc_class strictfp_bias2( regDPR dst ) %{
 2371     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2372     emit_opcode( cbuf, 0x2D );
 2373     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2374     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2375     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2376   %}
 2377 
 2378   // Special case for moving an integer register to a stack slot.
 2379   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2380     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2381   %}
 2382 
 2383   // Special case for moving a register to a stack slot.
 2384   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2385     // Opcode already emitted
 2386     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2387     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2388     emit_d32(cbuf, $dst$$disp);   // Displacement
 2389   %}
 2390 
 2391   // Push the integer in stackSlot 'src' onto FP-stack
 2392   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2393     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2394   %}
 2395 
 2396   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2397   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2398     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2399   %}
 2400 
 2401   // Same as Pop_Mem_F except for opcode
 2402   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2403   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2404     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2405   %}
 2406 
 2407   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2408     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2409     emit_d8( cbuf, 0xD8+$dst$$reg );
 2410   %}
 2411 
 2412   enc_class Push_Reg_FPR( regFPR dst ) %{
 2413     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2414     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2415   %}
 2416 
 2417   // Push FPU's float to a stack-slot, and pop FPU-stack
 2418   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2419     int pop = 0x02;
 2420     if ($src$$reg != FPR1L_enc) {
 2421       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2422       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2423       pop = 0x03;
 2424     }
 2425     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2426   %}
 2427 
 2428   // Push FPU's double to a stack-slot, and pop FPU-stack
 2429   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2430     int pop = 0x02;
 2431     if ($src$$reg != FPR1L_enc) {
 2432       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2433       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2434       pop = 0x03;
 2435     }
 2436     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2437   %}
 2438 
 2439   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2440   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2441     int pop = 0xD0 - 1; // -1 since we skip FLD
 2442     if ($src$$reg != FPR1L_enc) {
 2443       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2444       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2445       pop = 0xD8;
 2446     }
 2447     emit_opcode( cbuf, 0xDD );
 2448     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2449   %}
 2450 
 2451 
 2452   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2453     // load dst in FPR0
 2454     emit_opcode( cbuf, 0xD9 );
 2455     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2456     if ($src$$reg != FPR1L_enc) {
 2457       // fincstp
 2458       emit_opcode (cbuf, 0xD9);
 2459       emit_opcode (cbuf, 0xF7);
 2460       // swap src with FPR1:
 2461       // FXCH FPR1 with src
 2462       emit_opcode(cbuf, 0xD9);
 2463       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2464       // fdecstp
 2465       emit_opcode (cbuf, 0xD9);
 2466       emit_opcode (cbuf, 0xF6);
 2467     }
 2468   %}
 2469 
 2470   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2471     MacroAssembler _masm(&cbuf);
 2472     __ subptr(rsp, 8);
 2473     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2474     __ fld_d(Address(rsp, 0));
 2475     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2476     __ fld_d(Address(rsp, 0));
 2477   %}
 2478 
 2479   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2480     MacroAssembler _masm(&cbuf);
 2481     __ subptr(rsp, 4);
 2482     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2483     __ fld_s(Address(rsp, 0));
 2484     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2485     __ fld_s(Address(rsp, 0));
 2486   %}
 2487 
 2488   enc_class Push_ResultD(regD dst) %{
 2489     MacroAssembler _masm(&cbuf);
 2490     __ fstp_d(Address(rsp, 0));
 2491     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2492     __ addptr(rsp, 8);
 2493   %}
 2494 
 2495   enc_class Push_ResultF(regF dst, immI d8) %{
 2496     MacroAssembler _masm(&cbuf);
 2497     __ fstp_s(Address(rsp, 0));
 2498     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2499     __ addptr(rsp, $d8$$constant);
 2500   %}
 2501 
 2502   enc_class Push_SrcD(regD src) %{
 2503     MacroAssembler _masm(&cbuf);
 2504     __ subptr(rsp, 8);
 2505     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2506     __ fld_d(Address(rsp, 0));
 2507   %}
 2508 
 2509   enc_class push_stack_temp_qword() %{
 2510     MacroAssembler _masm(&cbuf);
 2511     __ subptr(rsp, 8);
 2512   %}
 2513 
 2514   enc_class pop_stack_temp_qword() %{
 2515     MacroAssembler _masm(&cbuf);
 2516     __ addptr(rsp, 8);
 2517   %}
 2518 
 2519   enc_class push_xmm_to_fpr1(regD src) %{
 2520     MacroAssembler _masm(&cbuf);
 2521     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2522     __ fld_d(Address(rsp, 0));
 2523   %}
 2524 
 2525   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2526     if ($src$$reg != FPR1L_enc) {
 2527       // fincstp
 2528       emit_opcode (cbuf, 0xD9);
 2529       emit_opcode (cbuf, 0xF7);
 2530       // FXCH FPR1 with src
 2531       emit_opcode(cbuf, 0xD9);
 2532       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2533       // fdecstp
 2534       emit_opcode (cbuf, 0xD9);
 2535       emit_opcode (cbuf, 0xF6);
 2536     }
 2537     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2538     // // FSTP   FPR$dst$$reg
 2539     // emit_opcode( cbuf, 0xDD );
 2540     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2541   %}
 2542 
 2543   enc_class fnstsw_sahf_skip_parity() %{
 2544     // fnstsw ax
 2545     emit_opcode( cbuf, 0xDF );
 2546     emit_opcode( cbuf, 0xE0 );
 2547     // sahf
 2548     emit_opcode( cbuf, 0x9E );
 2549     // jnp  ::skip
 2550     emit_opcode( cbuf, 0x7B );
 2551     emit_opcode( cbuf, 0x05 );
 2552   %}
 2553 
 2554   enc_class emitModDPR() %{
 2555     // fprem must be iterative
 2556     // :: loop
 2557     // fprem
 2558     emit_opcode( cbuf, 0xD9 );
 2559     emit_opcode( cbuf, 0xF8 );
 2560     // wait
 2561     emit_opcode( cbuf, 0x9b );
 2562     // fnstsw ax
 2563     emit_opcode( cbuf, 0xDF );
 2564     emit_opcode( cbuf, 0xE0 );
 2565     // sahf
 2566     emit_opcode( cbuf, 0x9E );
 2567     // jp  ::loop
 2568     emit_opcode( cbuf, 0x0F );
 2569     emit_opcode( cbuf, 0x8A );
 2570     emit_opcode( cbuf, 0xF4 );
 2571     emit_opcode( cbuf, 0xFF );
 2572     emit_opcode( cbuf, 0xFF );
 2573     emit_opcode( cbuf, 0xFF );
 2574   %}
 2575 
 2576   enc_class fpu_flags() %{
 2577     // fnstsw_ax
 2578     emit_opcode( cbuf, 0xDF);
 2579     emit_opcode( cbuf, 0xE0);
 2580     // test ax,0x0400
 2581     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2582     emit_opcode( cbuf, 0xA9 );
 2583     emit_d16   ( cbuf, 0x0400 );
 2584     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2585     // // test rax,0x0400
 2586     // emit_opcode( cbuf, 0xA9 );
 2587     // emit_d32   ( cbuf, 0x00000400 );
 2588     //
 2589     // jz exit (no unordered comparison)
 2590     emit_opcode( cbuf, 0x74 );
 2591     emit_d8    ( cbuf, 0x02 );
 2592     // mov ah,1 - treat as LT case (set carry flag)
 2593     emit_opcode( cbuf, 0xB4 );
 2594     emit_d8    ( cbuf, 0x01 );
 2595     // sahf
 2596     emit_opcode( cbuf, 0x9E);
 2597   %}
 2598 
 2599   enc_class cmpF_P6_fixup() %{
 2600     // Fixup the integer flags in case comparison involved a NaN
 2601     //
 2602     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2603     emit_opcode( cbuf, 0x7B );
 2604     emit_d8    ( cbuf, 0x03 );
 2605     // MOV AH,1 - treat as LT case (set carry flag)
 2606     emit_opcode( cbuf, 0xB4 );
 2607     emit_d8    ( cbuf, 0x01 );
 2608     // SAHF
 2609     emit_opcode( cbuf, 0x9E);
 2610     // NOP     // target for branch to avoid branch to branch
 2611     emit_opcode( cbuf, 0x90);
 2612   %}
 2613 
 2614 //     fnstsw_ax();
 2615 //     sahf();
 2616 //     movl(dst, nan_result);
 2617 //     jcc(Assembler::parity, exit);
 2618 //     movl(dst, less_result);
 2619 //     jcc(Assembler::below, exit);
 2620 //     movl(dst, equal_result);
 2621 //     jcc(Assembler::equal, exit);
 2622 //     movl(dst, greater_result);
 2623 
 2624 // less_result     =  1;
 2625 // greater_result  = -1;
 2626 // equal_result    = 0;
 2627 // nan_result      = -1;
 2628 
 2629   enc_class CmpF_Result(rRegI dst) %{
 2630     // fnstsw_ax();
 2631     emit_opcode( cbuf, 0xDF);
 2632     emit_opcode( cbuf, 0xE0);
 2633     // sahf
 2634     emit_opcode( cbuf, 0x9E);
 2635     // movl(dst, nan_result);
 2636     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2637     emit_d32( cbuf, -1 );
 2638     // jcc(Assembler::parity, exit);
 2639     emit_opcode( cbuf, 0x7A );
 2640     emit_d8    ( cbuf, 0x13 );
 2641     // movl(dst, less_result);
 2642     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2643     emit_d32( cbuf, -1 );
 2644     // jcc(Assembler::below, exit);
 2645     emit_opcode( cbuf, 0x72 );
 2646     emit_d8    ( cbuf, 0x0C );
 2647     // movl(dst, equal_result);
 2648     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2649     emit_d32( cbuf, 0 );
 2650     // jcc(Assembler::equal, exit);
 2651     emit_opcode( cbuf, 0x74 );
 2652     emit_d8    ( cbuf, 0x05 );
 2653     // movl(dst, greater_result);
 2654     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2655     emit_d32( cbuf, 1 );
 2656   %}
 2657 
 2658 
 2659   // Compare the longs and set flags
 2660   // BROKEN!  Do Not use as-is
 2661   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2662     // CMP    $src1.hi,$src2.hi
 2663     emit_opcode( cbuf, 0x3B );
 2664     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2665     // JNE,s  done
 2666     emit_opcode(cbuf,0x75);
 2667     emit_d8(cbuf, 2 );
 2668     // CMP    $src1.lo,$src2.lo
 2669     emit_opcode( cbuf, 0x3B );
 2670     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2671 // done:
 2672   %}
 2673 
 2674   enc_class convert_int_long( regL dst, rRegI src ) %{
 2675     // mov $dst.lo,$src
 2676     int dst_encoding = $dst$$reg;
 2677     int src_encoding = $src$$reg;
 2678     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2679     // mov $dst.hi,$src
 2680     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2681     // sar $dst.hi,31
 2682     emit_opcode( cbuf, 0xC1 );
 2683     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2684     emit_d8(cbuf, 0x1F );
 2685   %}
 2686 
 2687   enc_class convert_long_double( eRegL src ) %{
 2688     // push $src.hi
 2689     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2690     // push $src.lo
 2691     emit_opcode(cbuf, 0x50+$src$$reg  );
 2692     // fild 64-bits at [SP]
 2693     emit_opcode(cbuf,0xdf);
 2694     emit_d8(cbuf, 0x6C);
 2695     emit_d8(cbuf, 0x24);
 2696     emit_d8(cbuf, 0x00);
 2697     // pop stack
 2698     emit_opcode(cbuf, 0x83); // add  SP, #8
 2699     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2700     emit_d8(cbuf, 0x8);
 2701   %}
 2702 
 2703   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2704     // IMUL   EDX:EAX,$src1
 2705     emit_opcode( cbuf, 0xF7 );
 2706     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2707     // SAR    EDX,$cnt-32
 2708     int shift_count = ((int)$cnt$$constant) - 32;
 2709     if (shift_count > 0) {
 2710       emit_opcode(cbuf, 0xC1);
 2711       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2712       emit_d8(cbuf, shift_count);
 2713     }
 2714   %}
 2715 
 2716   // this version doesn't have add sp, 8
 2717   enc_class convert_long_double2( eRegL src ) %{
 2718     // push $src.hi
 2719     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2720     // push $src.lo
 2721     emit_opcode(cbuf, 0x50+$src$$reg  );
 2722     // fild 64-bits at [SP]
 2723     emit_opcode(cbuf,0xdf);
 2724     emit_d8(cbuf, 0x6C);
 2725     emit_d8(cbuf, 0x24);
 2726     emit_d8(cbuf, 0x00);
 2727   %}
 2728 
 2729   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2730     // Basic idea: long = (long)int * (long)int
 2731     // IMUL EDX:EAX, src
 2732     emit_opcode( cbuf, 0xF7 );
 2733     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2734   %}
 2735 
 2736   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2737     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2738     // MUL EDX:EAX, src
 2739     emit_opcode( cbuf, 0xF7 );
 2740     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2741   %}
 2742 
 2743   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2744     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2745     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2746     // MOV    $tmp,$src.lo
 2747     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2748     // IMUL   $tmp,EDX
 2749     emit_opcode( cbuf, 0x0F );
 2750     emit_opcode( cbuf, 0xAF );
 2751     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2752     // MOV    EDX,$src.hi
 2753     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2754     // IMUL   EDX,EAX
 2755     emit_opcode( cbuf, 0x0F );
 2756     emit_opcode( cbuf, 0xAF );
 2757     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2758     // ADD    $tmp,EDX
 2759     emit_opcode( cbuf, 0x03 );
 2760     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2761     // MUL   EDX:EAX,$src.lo
 2762     emit_opcode( cbuf, 0xF7 );
 2763     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2764     // ADD    EDX,ESI
 2765     emit_opcode( cbuf, 0x03 );
 2766     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2767   %}
 2768 
 2769   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2770     // Basic idea: lo(result) = lo(src * y_lo)
 2771     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2772     // IMUL   $tmp,EDX,$src
 2773     emit_opcode( cbuf, 0x6B );
 2774     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2775     emit_d8( cbuf, (int)$src$$constant );
 2776     // MOV    EDX,$src
 2777     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2778     emit_d32( cbuf, (int)$src$$constant );
 2779     // MUL   EDX:EAX,EDX
 2780     emit_opcode( cbuf, 0xF7 );
 2781     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2782     // ADD    EDX,ESI
 2783     emit_opcode( cbuf, 0x03 );
 2784     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2785   %}
 2786 
 2787   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2788     // PUSH src1.hi
 2789     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2790     // PUSH src1.lo
 2791     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2792     // PUSH src2.hi
 2793     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2794     // PUSH src2.lo
 2795     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2796     // CALL directly to the runtime
 2797     MacroAssembler _masm(&cbuf);
 2798     cbuf.set_insts_mark();
 2799     emit_opcode(cbuf,0xE8);       // Call into runtime
 2800     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2801     __ post_call_nop();
 2802     // Restore stack
 2803     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2804     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2805     emit_d8(cbuf, 4*4);
 2806   %}
 2807 
 2808   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2809     // PUSH src1.hi
 2810     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2811     // PUSH src1.lo
 2812     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2813     // PUSH src2.hi
 2814     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2815     // PUSH src2.lo
 2816     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2817     // CALL directly to the runtime
 2818     MacroAssembler _masm(&cbuf);
 2819     cbuf.set_insts_mark();
 2820     emit_opcode(cbuf,0xE8);       // Call into runtime
 2821     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2822     __ post_call_nop();
 2823     // Restore stack
 2824     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2825     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2826     emit_d8(cbuf, 4*4);
 2827   %}
 2828 
 2829   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2830     // MOV   $tmp,$src.lo
 2831     emit_opcode(cbuf, 0x8B);
 2832     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2833     // OR    $tmp,$src.hi
 2834     emit_opcode(cbuf, 0x0B);
 2835     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2836   %}
 2837 
 2838   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2839     // CMP    $src1.lo,$src2.lo
 2840     emit_opcode( cbuf, 0x3B );
 2841     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2842     // JNE,s  skip
 2843     emit_cc(cbuf, 0x70, 0x5);
 2844     emit_d8(cbuf,2);
 2845     // CMP    $src1.hi,$src2.hi
 2846     emit_opcode( cbuf, 0x3B );
 2847     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2848   %}
 2849 
 2850   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2851     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2852     emit_opcode( cbuf, 0x3B );
 2853     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2854     // MOV    $tmp,$src1.hi
 2855     emit_opcode( cbuf, 0x8B );
 2856     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2857     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2858     emit_opcode( cbuf, 0x1B );
 2859     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2860   %}
 2861 
 2862   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2863     // XOR    $tmp,$tmp
 2864     emit_opcode(cbuf,0x33);  // XOR
 2865     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2866     // CMP    $tmp,$src.lo
 2867     emit_opcode( cbuf, 0x3B );
 2868     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2869     // SBB    $tmp,$src.hi
 2870     emit_opcode( cbuf, 0x1B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2872   %}
 2873 
 2874  // Sniff, sniff... smells like Gnu Superoptimizer
 2875   enc_class neg_long( eRegL dst ) %{
 2876     emit_opcode(cbuf,0xF7);    // NEG hi
 2877     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2878     emit_opcode(cbuf,0xF7);    // NEG lo
 2879     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2880     emit_opcode(cbuf,0x83);    // SBB hi,0
 2881     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2882     emit_d8    (cbuf,0 );
 2883   %}
 2884 
 2885   enc_class enc_pop_rdx() %{
 2886     emit_opcode(cbuf,0x5A);
 2887   %}
 2888 
 2889   enc_class enc_rethrow() %{
 2890     MacroAssembler _masm(&cbuf);
 2891     cbuf.set_insts_mark();
 2892     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2893     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2894                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2895     __ post_call_nop();
 2896   %}
 2897 
 2898 
 2899   // Convert a double to an int.  Java semantics require we do complex
 2900   // manglelations in the corner cases.  So we set the rounding mode to
 2901   // 'zero', store the darned double down as an int, and reset the
 2902   // rounding mode to 'nearest'.  The hardware throws an exception which
 2903   // patches up the correct value directly to the stack.
 2904   enc_class DPR2I_encoding( regDPR src ) %{
 2905     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2906     // exceptions here, so that a NAN or other corner-case value will
 2907     // thrown an exception (but normal values get converted at full speed).
 2908     // However, I2C adapters and other float-stack manglers leave pending
 2909     // invalid-op exceptions hanging.  We would have to clear them before
 2910     // enabling them and that is more expensive than just testing for the
 2911     // invalid value Intel stores down in the corner cases.
 2912     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2913     emit_opcode(cbuf,0x2D);
 2914     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2915     // Allocate a word
 2916     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2917     emit_opcode(cbuf,0xEC);
 2918     emit_d8(cbuf,0x04);
 2919     // Encoding assumes a double has been pushed into FPR0.
 2920     // Store down the double as an int, popping the FPU stack
 2921     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2922     emit_opcode(cbuf,0x1C);
 2923     emit_d8(cbuf,0x24);
 2924     // Restore the rounding mode; mask the exception
 2925     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2926     emit_opcode(cbuf,0x2D);
 2927     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2928         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2929         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2930 
 2931     // Load the converted int; adjust CPU stack
 2932     emit_opcode(cbuf,0x58);       // POP EAX
 2933     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2934     emit_d32   (cbuf,0x80000000); //         0x80000000
 2935     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2936     emit_d8    (cbuf,0x07);       // Size of slow_call
 2937     // Push src onto stack slow-path
 2938     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2939     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2940     // CALL directly to the runtime
 2941     MacroAssembler _masm(&cbuf);
 2942     cbuf.set_insts_mark();
 2943     emit_opcode(cbuf,0xE8);       // Call into runtime
 2944     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2945     __ post_call_nop();
 2946     // Carry on here...
 2947   %}
 2948 
 2949   enc_class DPR2L_encoding( regDPR src ) %{
 2950     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2951     emit_opcode(cbuf,0x2D);
 2952     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2953     // Allocate a word
 2954     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2955     emit_opcode(cbuf,0xEC);
 2956     emit_d8(cbuf,0x08);
 2957     // Encoding assumes a double has been pushed into FPR0.
 2958     // Store down the double as a long, popping the FPU stack
 2959     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2960     emit_opcode(cbuf,0x3C);
 2961     emit_d8(cbuf,0x24);
 2962     // Restore the rounding mode; mask the exception
 2963     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2964     emit_opcode(cbuf,0x2D);
 2965     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2966         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2967         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2968 
 2969     // Load the converted int; adjust CPU stack
 2970     emit_opcode(cbuf,0x58);       // POP EAX
 2971     emit_opcode(cbuf,0x5A);       // POP EDX
 2972     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2973     emit_d8    (cbuf,0xFA);       // rdx
 2974     emit_d32   (cbuf,0x80000000); //         0x80000000
 2975     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2976     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2977     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2978     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2979     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2980     emit_d8    (cbuf,0x07);       // Size of slow_call
 2981     // Push src onto stack slow-path
 2982     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2983     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2984     // CALL directly to the runtime
 2985     MacroAssembler _masm(&cbuf);
 2986     cbuf.set_insts_mark();
 2987     emit_opcode(cbuf,0xE8);       // Call into runtime
 2988     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2989     __ post_call_nop();
 2990     // Carry on here...
 2991   %}
 2992 
 2993   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2994     // Operand was loaded from memory into fp ST (stack top)
 2995     // FMUL   ST,$src  /* D8 C8+i */
 2996     emit_opcode(cbuf, 0xD8);
 2997     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2998   %}
 2999 
 3000   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3001     // FADDP  ST,src2  /* D8 C0+i */
 3002     emit_opcode(cbuf, 0xD8);
 3003     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3004     //could use FADDP  src2,fpST  /* DE C0+i */
 3005   %}
 3006 
 3007   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3008     // FADDP  src2,ST  /* DE C0+i */
 3009     emit_opcode(cbuf, 0xDE);
 3010     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3011   %}
 3012 
 3013   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3014     // Operand has been loaded into fp ST (stack top)
 3015       // FSUB   ST,$src1
 3016       emit_opcode(cbuf, 0xD8);
 3017       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3018 
 3019       // FDIV
 3020       emit_opcode(cbuf, 0xD8);
 3021       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3022   %}
 3023 
 3024   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3025     // Operand was loaded from memory into fp ST (stack top)
 3026     // FADD   ST,$src  /* D8 C0+i */
 3027     emit_opcode(cbuf, 0xD8);
 3028     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3029 
 3030     // FMUL  ST,src2  /* D8 C*+i */
 3031     emit_opcode(cbuf, 0xD8);
 3032     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3033   %}
 3034 
 3035 
 3036   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3037     // Operand was loaded from memory into fp ST (stack top)
 3038     // FADD   ST,$src  /* D8 C0+i */
 3039     emit_opcode(cbuf, 0xD8);
 3040     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3041 
 3042     // FMULP  src2,ST  /* DE C8+i */
 3043     emit_opcode(cbuf, 0xDE);
 3044     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3045   %}
 3046 
 3047   // Atomically load the volatile long
 3048   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3049     emit_opcode(cbuf,0xDF);
 3050     int rm_byte_opcode = 0x05;
 3051     int base     = $mem$$base;
 3052     int index    = $mem$$index;
 3053     int scale    = $mem$$scale;
 3054     int displace = $mem$$disp;
 3055     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3056     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3057     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3058   %}
 3059 
 3060   // Volatile Store Long.  Must be atomic, so move it into
 3061   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3062   // target address before the store (for null-ptr checks)
 3063   // so the memory operand is used twice in the encoding.
 3064   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3065     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3066     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3067     emit_opcode(cbuf,0xDF);
 3068     int rm_byte_opcode = 0x07;
 3069     int base     = $mem$$base;
 3070     int index    = $mem$$index;
 3071     int scale    = $mem$$scale;
 3072     int displace = $mem$$disp;
 3073     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3074     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3075   %}
 3076 
 3077 %}
 3078 
 3079 
 3080 //----------FRAME--------------------------------------------------------------
 3081 // Definition of frame structure and management information.
 3082 //
 3083 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3084 //                             |   (to get allocators register number
 3085 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3086 //  r   CALLER     |        |
 3087 //  o     |        +--------+      pad to even-align allocators stack-slot
 3088 //  w     V        |  pad0  |        numbers; owned by CALLER
 3089 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3090 //  h     ^        |   in   |  5
 3091 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3092 //  |     |        |        |  3
 3093 //  |     |        +--------+
 3094 //  V     |        | old out|      Empty on Intel, window on Sparc
 3095 //        |    old |preserve|      Must be even aligned.
 3096 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3097 //        |        |   in   |  3   area for Intel ret address
 3098 //     Owned by    |preserve|      Empty on Sparc.
 3099 //       SELF      +--------+
 3100 //        |        |  pad2  |  2   pad to align old SP
 3101 //        |        +--------+  1
 3102 //        |        | locks  |  0
 3103 //        |        +--------+----> OptoReg::stack0(), even aligned
 3104 //        |        |  pad1  | 11   pad to align new SP
 3105 //        |        +--------+
 3106 //        |        |        | 10
 3107 //        |        | spills |  9   spills
 3108 //        V        |        |  8   (pad0 slot for callee)
 3109 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3110 //        ^        |  out   |  7
 3111 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3112 //     Owned by    +--------+
 3113 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3114 //        |    new |preserve|      Must be even-aligned.
 3115 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3116 //        |        |        |
 3117 //
 3118 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3119 //         known from SELF's arguments and the Java calling convention.
 3120 //         Region 6-7 is determined per call site.
 3121 // Note 2: If the calling convention leaves holes in the incoming argument
 3122 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3123 //         are owned by the CALLEE.  Holes should not be necessary in the
 3124 //         incoming area, as the Java calling convention is completely under
 3125 //         the control of the AD file.  Doubles can be sorted and packed to
 3126 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3127 //         varargs C calling conventions.
 3128 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3129 //         even aligned with pad0 as needed.
 3130 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3131 //         region 6-11 is even aligned; it may be padded out more so that
 3132 //         the region from SP to FP meets the minimum stack alignment.
 3133 
 3134 frame %{
 3135   // These three registers define part of the calling convention
 3136   // between compiled code and the interpreter.
 3137   inline_cache_reg(EAX);                // Inline Cache Register
 3138 
 3139   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3140   cisc_spilling_operand_name(indOffset32);
 3141 
 3142   // Number of stack slots consumed by locking an object
 3143   sync_stack_slots(1);
 3144 
 3145   // Compiled code's Frame Pointer
 3146   frame_pointer(ESP);
 3147   // Interpreter stores its frame pointer in a register which is
 3148   // stored to the stack by I2CAdaptors.
 3149   // I2CAdaptors convert from interpreted java to compiled java.
 3150   interpreter_frame_pointer(EBP);
 3151 
 3152   // Stack alignment requirement
 3153   // Alignment size in bytes (128-bit -> 16 bytes)
 3154   stack_alignment(StackAlignmentInBytes);
 3155 
 3156   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3157   // for calls to C.  Supports the var-args backing area for register parms.
 3158   varargs_C_out_slots_killed(0);
 3159 
 3160   // The after-PROLOG location of the return address.  Location of
 3161   // return address specifies a type (REG or STACK) and a number
 3162   // representing the register number (i.e. - use a register name) or
 3163   // stack slot.
 3164   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3165   // Otherwise, it is above the locks and verification slot and alignment word
 3166   return_addr(STACK - 1 +
 3167               align_up((Compile::current()->in_preserve_stack_slots() +
 3168                         Compile::current()->fixed_slots()),
 3169                        stack_alignment_in_slots()));
 3170 
 3171   // Location of C & interpreter return values
 3172   c_return_value %{
 3173     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3174     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3175     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3176 
 3177     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3178     // that C functions return float and double results in XMM0.
 3179     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3180       return OptoRegPair(XMM0b_num,XMM0_num);
 3181     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3182       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3183 
 3184     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3185   %}
 3186 
 3187   // Location of return values
 3188   return_value %{
 3189     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3190     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3191     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3192     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3193       return OptoRegPair(XMM0b_num,XMM0_num);
 3194     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3195       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3196     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3197   %}
 3198 
 3199 %}
 3200 
 3201 //----------ATTRIBUTES---------------------------------------------------------
 3202 //----------Operand Attributes-------------------------------------------------
 3203 op_attrib op_cost(0);        // Required cost attribute
 3204 
 3205 //----------Instruction Attributes---------------------------------------------
 3206 ins_attrib ins_cost(100);       // Required cost attribute
 3207 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3208 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3209                                 // non-matching short branch variant of some
 3210                                                             // long branch?
 3211 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3212                                 // specifies the alignment that some part of the instruction (not
 3213                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3214                                 // function must be provided for the instruction
 3215 
 3216 //----------OPERANDS-----------------------------------------------------------
 3217 // Operand definitions must precede instruction definitions for correct parsing
 3218 // in the ADLC because operands constitute user defined types which are used in
 3219 // instruction definitions.
 3220 
 3221 //----------Simple Operands----------------------------------------------------
 3222 // Immediate Operands
 3223 // Integer Immediate
 3224 operand immI() %{
 3225   match(ConI);
 3226 
 3227   op_cost(10);
 3228   format %{ %}
 3229   interface(CONST_INTER);
 3230 %}
 3231 
 3232 // Constant for test vs zero
 3233 operand immI_0() %{
 3234   predicate(n->get_int() == 0);
 3235   match(ConI);
 3236 
 3237   op_cost(0);
 3238   format %{ %}
 3239   interface(CONST_INTER);
 3240 %}
 3241 
 3242 // Constant for increment
 3243 operand immI_1() %{
 3244   predicate(n->get_int() == 1);
 3245   match(ConI);
 3246 
 3247   op_cost(0);
 3248   format %{ %}
 3249   interface(CONST_INTER);
 3250 %}
 3251 
 3252 // Constant for decrement
 3253 operand immI_M1() %{
 3254   predicate(n->get_int() == -1);
 3255   match(ConI);
 3256 
 3257   op_cost(0);
 3258   format %{ %}
 3259   interface(CONST_INTER);
 3260 %}
 3261 
 3262 // Valid scale values for addressing modes
 3263 operand immI2() %{
 3264   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3265   match(ConI);
 3266 
 3267   format %{ %}
 3268   interface(CONST_INTER);
 3269 %}
 3270 
 3271 operand immI8() %{
 3272   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3273   match(ConI);
 3274 
 3275   op_cost(5);
 3276   format %{ %}
 3277   interface(CONST_INTER);
 3278 %}
 3279 
 3280 operand immU8() %{
 3281   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3282   match(ConI);
 3283 
 3284   op_cost(5);
 3285   format %{ %}
 3286   interface(CONST_INTER);
 3287 %}
 3288 
 3289 operand immI16() %{
 3290   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3291   match(ConI);
 3292 
 3293   op_cost(10);
 3294   format %{ %}
 3295   interface(CONST_INTER);
 3296 %}
 3297 
 3298 // Int Immediate non-negative
 3299 operand immU31()
 3300 %{
 3301   predicate(n->get_int() >= 0);
 3302   match(ConI);
 3303 
 3304   op_cost(0);
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 // Constant for long shifts
 3310 operand immI_32() %{
 3311   predicate( n->get_int() == 32 );
 3312   match(ConI);
 3313 
 3314   op_cost(0);
 3315   format %{ %}
 3316   interface(CONST_INTER);
 3317 %}
 3318 
 3319 operand immI_1_31() %{
 3320   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3321   match(ConI);
 3322 
 3323   op_cost(0);
 3324   format %{ %}
 3325   interface(CONST_INTER);
 3326 %}
 3327 
 3328 operand immI_32_63() %{
 3329   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3330   match(ConI);
 3331   op_cost(0);
 3332 
 3333   format %{ %}
 3334   interface(CONST_INTER);
 3335 %}
 3336 
 3337 operand immI_2() %{
 3338   predicate( n->get_int() == 2 );
 3339   match(ConI);
 3340 
 3341   op_cost(0);
 3342   format %{ %}
 3343   interface(CONST_INTER);
 3344 %}
 3345 
 3346 operand immI_3() %{
 3347   predicate( n->get_int() == 3 );
 3348   match(ConI);
 3349 
 3350   op_cost(0);
 3351   format %{ %}
 3352   interface(CONST_INTER);
 3353 %}
 3354 
 3355 operand immI_4()
 3356 %{
 3357   predicate(n->get_int() == 4);
 3358   match(ConI);
 3359 
 3360   op_cost(0);
 3361   format %{ %}
 3362   interface(CONST_INTER);
 3363 %}
 3364 
 3365 operand immI_8()
 3366 %{
 3367   predicate(n->get_int() == 8);
 3368   match(ConI);
 3369 
 3370   op_cost(0);
 3371   format %{ %}
 3372   interface(CONST_INTER);
 3373 %}
 3374 
 3375 // Pointer Immediate
 3376 operand immP() %{
 3377   match(ConP);
 3378 
 3379   op_cost(10);
 3380   format %{ %}
 3381   interface(CONST_INTER);
 3382 %}
 3383 
 3384 // Null Pointer Immediate
 3385 operand immP0() %{
 3386   predicate( n->get_ptr() == 0 );
 3387   match(ConP);
 3388   op_cost(0);
 3389 
 3390   format %{ %}
 3391   interface(CONST_INTER);
 3392 %}
 3393 
 3394 // Long Immediate
 3395 operand immL() %{
 3396   match(ConL);
 3397 
 3398   op_cost(20);
 3399   format %{ %}
 3400   interface(CONST_INTER);
 3401 %}
 3402 
 3403 // Long Immediate zero
 3404 operand immL0() %{
 3405   predicate( n->get_long() == 0L );
 3406   match(ConL);
 3407   op_cost(0);
 3408 
 3409   format %{ %}
 3410   interface(CONST_INTER);
 3411 %}
 3412 
 3413 // Long Immediate zero
 3414 operand immL_M1() %{
 3415   predicate( n->get_long() == -1L );
 3416   match(ConL);
 3417   op_cost(0);
 3418 
 3419   format %{ %}
 3420   interface(CONST_INTER);
 3421 %}
 3422 
 3423 // Long immediate from 0 to 127.
 3424 // Used for a shorter form of long mul by 10.
 3425 operand immL_127() %{
 3426   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3427   match(ConL);
 3428   op_cost(0);
 3429 
 3430   format %{ %}
 3431   interface(CONST_INTER);
 3432 %}
 3433 
 3434 // Long Immediate: low 32-bit mask
 3435 operand immL_32bits() %{
 3436   predicate(n->get_long() == 0xFFFFFFFFL);
 3437   match(ConL);
 3438   op_cost(0);
 3439 
 3440   format %{ %}
 3441   interface(CONST_INTER);
 3442 %}
 3443 
 3444 // Long Immediate: low 32-bit mask
 3445 operand immL32() %{
 3446   predicate(n->get_long() == (int)(n->get_long()));
 3447   match(ConL);
 3448   op_cost(20);
 3449 
 3450   format %{ %}
 3451   interface(CONST_INTER);
 3452 %}
 3453 
 3454 //Double Immediate zero
 3455 operand immDPR0() %{
 3456   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3457   // bug that generates code such that NaNs compare equal to 0.0
 3458   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3459   match(ConD);
 3460 
 3461   op_cost(5);
 3462   format %{ %}
 3463   interface(CONST_INTER);
 3464 %}
 3465 
 3466 // Double Immediate one
 3467 operand immDPR1() %{
 3468   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3469   match(ConD);
 3470 
 3471   op_cost(5);
 3472   format %{ %}
 3473   interface(CONST_INTER);
 3474 %}
 3475 
 3476 // Double Immediate
 3477 operand immDPR() %{
 3478   predicate(UseSSE<=1);
 3479   match(ConD);
 3480 
 3481   op_cost(5);
 3482   format %{ %}
 3483   interface(CONST_INTER);
 3484 %}
 3485 
 3486 operand immD() %{
 3487   predicate(UseSSE>=2);
 3488   match(ConD);
 3489 
 3490   op_cost(5);
 3491   format %{ %}
 3492   interface(CONST_INTER);
 3493 %}
 3494 
 3495 // Double Immediate zero
 3496 operand immD0() %{
 3497   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3498   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3499   // compare equal to -0.0.
 3500   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3501   match(ConD);
 3502 
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Float Immediate zero
 3508 operand immFPR0() %{
 3509   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3510   match(ConF);
 3511 
 3512   op_cost(5);
 3513   format %{ %}
 3514   interface(CONST_INTER);
 3515 %}
 3516 
 3517 // Float Immediate one
 3518 operand immFPR1() %{
 3519   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3520   match(ConF);
 3521 
 3522   op_cost(5);
 3523   format %{ %}
 3524   interface(CONST_INTER);
 3525 %}
 3526 
 3527 // Float Immediate
 3528 operand immFPR() %{
 3529   predicate( UseSSE == 0 );
 3530   match(ConF);
 3531 
 3532   op_cost(5);
 3533   format %{ %}
 3534   interface(CONST_INTER);
 3535 %}
 3536 
 3537 // Float Immediate
 3538 operand immF() %{
 3539   predicate(UseSSE >= 1);
 3540   match(ConF);
 3541 
 3542   op_cost(5);
 3543   format %{ %}
 3544   interface(CONST_INTER);
 3545 %}
 3546 
 3547 // Float Immediate zero.  Zero and not -0.0
 3548 operand immF0() %{
 3549   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3550   match(ConF);
 3551 
 3552   op_cost(5);
 3553   format %{ %}
 3554   interface(CONST_INTER);
 3555 %}
 3556 
 3557 // Immediates for special shifts (sign extend)
 3558 
 3559 // Constants for increment
 3560 operand immI_16() %{
 3561   predicate( n->get_int() == 16 );
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 operand immI_24() %{
 3569   predicate( n->get_int() == 24 );
 3570   match(ConI);
 3571 
 3572   format %{ %}
 3573   interface(CONST_INTER);
 3574 %}
 3575 
 3576 // Constant for byte-wide masking
 3577 operand immI_255() %{
 3578   predicate( n->get_int() == 255 );
 3579   match(ConI);
 3580 
 3581   format %{ %}
 3582   interface(CONST_INTER);
 3583 %}
 3584 
 3585 // Constant for short-wide masking
 3586 operand immI_65535() %{
 3587   predicate(n->get_int() == 65535);
 3588   match(ConI);
 3589 
 3590   format %{ %}
 3591   interface(CONST_INTER);
 3592 %}
 3593 
 3594 operand kReg()
 3595 %{
 3596   constraint(ALLOC_IN_RC(vectmask_reg));
 3597   match(RegVectMask);
 3598   format %{%}
 3599   interface(REG_INTER);
 3600 %}
 3601 
 3602 operand kReg_K1()
 3603 %{
 3604   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3605   match(RegVectMask);
 3606   format %{%}
 3607   interface(REG_INTER);
 3608 %}
 3609 
 3610 operand kReg_K2()
 3611 %{
 3612   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3613   match(RegVectMask);
 3614   format %{%}
 3615   interface(REG_INTER);
 3616 %}
 3617 
 3618 // Special Registers
 3619 operand kReg_K3()
 3620 %{
 3621   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3622   match(RegVectMask);
 3623   format %{%}
 3624   interface(REG_INTER);
 3625 %}
 3626 
 3627 operand kReg_K4()
 3628 %{
 3629   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3630   match(RegVectMask);
 3631   format %{%}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 operand kReg_K5()
 3636 %{
 3637   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3638   match(RegVectMask);
 3639   format %{%}
 3640   interface(REG_INTER);
 3641 %}
 3642 
 3643 operand kReg_K6()
 3644 %{
 3645   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3646   match(RegVectMask);
 3647   format %{%}
 3648   interface(REG_INTER);
 3649 %}
 3650 
 3651 // Special Registers
 3652 operand kReg_K7()
 3653 %{
 3654   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3655   match(RegVectMask);
 3656   format %{%}
 3657   interface(REG_INTER);
 3658 %}
 3659 
 3660 // Register Operands
 3661 // Integer Register
 3662 operand rRegI() %{
 3663   constraint(ALLOC_IN_RC(int_reg));
 3664   match(RegI);
 3665   match(xRegI);
 3666   match(eAXRegI);
 3667   match(eBXRegI);
 3668   match(eCXRegI);
 3669   match(eDXRegI);
 3670   match(eDIRegI);
 3671   match(eSIRegI);
 3672 
 3673   format %{ %}
 3674   interface(REG_INTER);
 3675 %}
 3676 
 3677 // Subset of Integer Register
 3678 operand xRegI(rRegI reg) %{
 3679   constraint(ALLOC_IN_RC(int_x_reg));
 3680   match(reg);
 3681   match(eAXRegI);
 3682   match(eBXRegI);
 3683   match(eCXRegI);
 3684   match(eDXRegI);
 3685 
 3686   format %{ %}
 3687   interface(REG_INTER);
 3688 %}
 3689 
 3690 // Special Registers
 3691 operand eAXRegI(xRegI reg) %{
 3692   constraint(ALLOC_IN_RC(eax_reg));
 3693   match(reg);
 3694   match(rRegI);
 3695 
 3696   format %{ "EAX" %}
 3697   interface(REG_INTER);
 3698 %}
 3699 
 3700 // Special Registers
 3701 operand eBXRegI(xRegI reg) %{
 3702   constraint(ALLOC_IN_RC(ebx_reg));
 3703   match(reg);
 3704   match(rRegI);
 3705 
 3706   format %{ "EBX" %}
 3707   interface(REG_INTER);
 3708 %}
 3709 
 3710 operand eCXRegI(xRegI reg) %{
 3711   constraint(ALLOC_IN_RC(ecx_reg));
 3712   match(reg);
 3713   match(rRegI);
 3714 
 3715   format %{ "ECX" %}
 3716   interface(REG_INTER);
 3717 %}
 3718 
 3719 operand eDXRegI(xRegI reg) %{
 3720   constraint(ALLOC_IN_RC(edx_reg));
 3721   match(reg);
 3722   match(rRegI);
 3723 
 3724   format %{ "EDX" %}
 3725   interface(REG_INTER);
 3726 %}
 3727 
 3728 operand eDIRegI(xRegI reg) %{
 3729   constraint(ALLOC_IN_RC(edi_reg));
 3730   match(reg);
 3731   match(rRegI);
 3732 
 3733   format %{ "EDI" %}
 3734   interface(REG_INTER);
 3735 %}
 3736 
 3737 operand naxRegI() %{
 3738   constraint(ALLOC_IN_RC(nax_reg));
 3739   match(RegI);
 3740   match(eCXRegI);
 3741   match(eDXRegI);
 3742   match(eSIRegI);
 3743   match(eDIRegI);
 3744 
 3745   format %{ %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 operand nadxRegI() %{
 3750   constraint(ALLOC_IN_RC(nadx_reg));
 3751   match(RegI);
 3752   match(eBXRegI);
 3753   match(eCXRegI);
 3754   match(eSIRegI);
 3755   match(eDIRegI);
 3756 
 3757   format %{ %}
 3758   interface(REG_INTER);
 3759 %}
 3760 
 3761 operand ncxRegI() %{
 3762   constraint(ALLOC_IN_RC(ncx_reg));
 3763   match(RegI);
 3764   match(eAXRegI);
 3765   match(eDXRegI);
 3766   match(eSIRegI);
 3767   match(eDIRegI);
 3768 
 3769   format %{ %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3774 // //
 3775 operand eSIRegI(xRegI reg) %{
 3776    constraint(ALLOC_IN_RC(esi_reg));
 3777    match(reg);
 3778    match(rRegI);
 3779 
 3780    format %{ "ESI" %}
 3781    interface(REG_INTER);
 3782 %}
 3783 
 3784 // Pointer Register
 3785 operand anyRegP() %{
 3786   constraint(ALLOC_IN_RC(any_reg));
 3787   match(RegP);
 3788   match(eAXRegP);
 3789   match(eBXRegP);
 3790   match(eCXRegP);
 3791   match(eDIRegP);
 3792   match(eRegP);
 3793 
 3794   format %{ %}
 3795   interface(REG_INTER);
 3796 %}
 3797 
 3798 operand eRegP() %{
 3799   constraint(ALLOC_IN_RC(int_reg));
 3800   match(RegP);
 3801   match(eAXRegP);
 3802   match(eBXRegP);
 3803   match(eCXRegP);
 3804   match(eDIRegP);
 3805 
 3806   format %{ %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand rRegP() %{
 3811   constraint(ALLOC_IN_RC(int_reg));
 3812   match(RegP);
 3813   match(eAXRegP);
 3814   match(eBXRegP);
 3815   match(eCXRegP);
 3816   match(eDIRegP);
 3817 
 3818   format %{ %}
 3819   interface(REG_INTER);
 3820 %}
 3821 
 3822 // On windows95, EBP is not safe to use for implicit null tests.
 3823 operand eRegP_no_EBP() %{
 3824   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3825   match(RegP);
 3826   match(eAXRegP);
 3827   match(eBXRegP);
 3828   match(eCXRegP);
 3829   match(eDIRegP);
 3830 
 3831   op_cost(100);
 3832   format %{ %}
 3833   interface(REG_INTER);
 3834 %}
 3835 
 3836 operand naxRegP() %{
 3837   constraint(ALLOC_IN_RC(nax_reg));
 3838   match(RegP);
 3839   match(eBXRegP);
 3840   match(eDXRegP);
 3841   match(eCXRegP);
 3842   match(eSIRegP);
 3843   match(eDIRegP);
 3844 
 3845   format %{ %}
 3846   interface(REG_INTER);
 3847 %}
 3848 
 3849 operand nabxRegP() %{
 3850   constraint(ALLOC_IN_RC(nabx_reg));
 3851   match(RegP);
 3852   match(eCXRegP);
 3853   match(eDXRegP);
 3854   match(eSIRegP);
 3855   match(eDIRegP);
 3856 
 3857   format %{ %}
 3858   interface(REG_INTER);
 3859 %}
 3860 
 3861 operand pRegP() %{
 3862   constraint(ALLOC_IN_RC(p_reg));
 3863   match(RegP);
 3864   match(eBXRegP);
 3865   match(eDXRegP);
 3866   match(eSIRegP);
 3867   match(eDIRegP);
 3868 
 3869   format %{ %}
 3870   interface(REG_INTER);
 3871 %}
 3872 
 3873 // Special Registers
 3874 // Return a pointer value
 3875 operand eAXRegP(eRegP reg) %{
 3876   constraint(ALLOC_IN_RC(eax_reg));
 3877   match(reg);
 3878   format %{ "EAX" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 // Used in AtomicAdd
 3883 operand eBXRegP(eRegP reg) %{
 3884   constraint(ALLOC_IN_RC(ebx_reg));
 3885   match(reg);
 3886   format %{ "EBX" %}
 3887   interface(REG_INTER);
 3888 %}
 3889 
 3890 // Tail-call (interprocedural jump) to interpreter
 3891 operand eCXRegP(eRegP reg) %{
 3892   constraint(ALLOC_IN_RC(ecx_reg));
 3893   match(reg);
 3894   format %{ "ECX" %}
 3895   interface(REG_INTER);
 3896 %}
 3897 
 3898 operand eDXRegP(eRegP reg) %{
 3899   constraint(ALLOC_IN_RC(edx_reg));
 3900   match(reg);
 3901   format %{ "EDX" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 operand eSIRegP(eRegP reg) %{
 3906   constraint(ALLOC_IN_RC(esi_reg));
 3907   match(reg);
 3908   format %{ "ESI" %}
 3909   interface(REG_INTER);
 3910 %}
 3911 
 3912 // Used in rep stosw
 3913 operand eDIRegP(eRegP reg) %{
 3914   constraint(ALLOC_IN_RC(edi_reg));
 3915   match(reg);
 3916   format %{ "EDI" %}
 3917   interface(REG_INTER);
 3918 %}
 3919 
 3920 operand eRegL() %{
 3921   constraint(ALLOC_IN_RC(long_reg));
 3922   match(RegL);
 3923   match(eADXRegL);
 3924 
 3925   format %{ %}
 3926   interface(REG_INTER);
 3927 %}
 3928 
 3929 operand eADXRegL( eRegL reg ) %{
 3930   constraint(ALLOC_IN_RC(eadx_reg));
 3931   match(reg);
 3932 
 3933   format %{ "EDX:EAX" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 operand eBCXRegL( eRegL reg ) %{
 3938   constraint(ALLOC_IN_RC(ebcx_reg));
 3939   match(reg);
 3940 
 3941   format %{ "EBX:ECX" %}
 3942   interface(REG_INTER);
 3943 %}
 3944 
 3945 operand eBDPRegL( eRegL reg ) %{
 3946   constraint(ALLOC_IN_RC(ebpd_reg));
 3947   match(reg);
 3948 
 3949   format %{ "EBP:EDI" %}
 3950   interface(REG_INTER);
 3951 %}
 3952 // Special case for integer high multiply
 3953 operand eADXRegL_low_only() %{
 3954   constraint(ALLOC_IN_RC(eadx_reg));
 3955   match(RegL);
 3956 
 3957   format %{ "EAX" %}
 3958   interface(REG_INTER);
 3959 %}
 3960 
 3961 // Flags register, used as output of compare instructions
 3962 operand rFlagsReg() %{
 3963   constraint(ALLOC_IN_RC(int_flags));
 3964   match(RegFlags);
 3965 
 3966   format %{ "EFLAGS" %}
 3967   interface(REG_INTER);
 3968 %}
 3969 
 3970 // Flags register, used as output of compare instructions
 3971 operand eFlagsReg() %{
 3972   constraint(ALLOC_IN_RC(int_flags));
 3973   match(RegFlags);
 3974 
 3975   format %{ "EFLAGS" %}
 3976   interface(REG_INTER);
 3977 %}
 3978 
 3979 // Flags register, used as output of FLOATING POINT compare instructions
 3980 operand eFlagsRegU() %{
 3981   constraint(ALLOC_IN_RC(int_flags));
 3982   match(RegFlags);
 3983 
 3984   format %{ "EFLAGS_U" %}
 3985   interface(REG_INTER);
 3986 %}
 3987 
 3988 operand eFlagsRegUCF() %{
 3989   constraint(ALLOC_IN_RC(int_flags));
 3990   match(RegFlags);
 3991   predicate(false);
 3992 
 3993   format %{ "EFLAGS_U_CF" %}
 3994   interface(REG_INTER);
 3995 %}
 3996 
 3997 // Condition Code Register used by long compare
 3998 operand flagsReg_long_LTGE() %{
 3999   constraint(ALLOC_IN_RC(int_flags));
 4000   match(RegFlags);
 4001   format %{ "FLAGS_LTGE" %}
 4002   interface(REG_INTER);
 4003 %}
 4004 operand flagsReg_long_EQNE() %{
 4005   constraint(ALLOC_IN_RC(int_flags));
 4006   match(RegFlags);
 4007   format %{ "FLAGS_EQNE" %}
 4008   interface(REG_INTER);
 4009 %}
 4010 operand flagsReg_long_LEGT() %{
 4011   constraint(ALLOC_IN_RC(int_flags));
 4012   match(RegFlags);
 4013   format %{ "FLAGS_LEGT" %}
 4014   interface(REG_INTER);
 4015 %}
 4016 
 4017 // Condition Code Register used by unsigned long compare
 4018 operand flagsReg_ulong_LTGE() %{
 4019   constraint(ALLOC_IN_RC(int_flags));
 4020   match(RegFlags);
 4021   format %{ "FLAGS_U_LTGE" %}
 4022   interface(REG_INTER);
 4023 %}
 4024 operand flagsReg_ulong_EQNE() %{
 4025   constraint(ALLOC_IN_RC(int_flags));
 4026   match(RegFlags);
 4027   format %{ "FLAGS_U_EQNE" %}
 4028   interface(REG_INTER);
 4029 %}
 4030 operand flagsReg_ulong_LEGT() %{
 4031   constraint(ALLOC_IN_RC(int_flags));
 4032   match(RegFlags);
 4033   format %{ "FLAGS_U_LEGT" %}
 4034   interface(REG_INTER);
 4035 %}
 4036 
 4037 // Float register operands
 4038 operand regDPR() %{
 4039   predicate( UseSSE < 2 );
 4040   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4041   match(RegD);
 4042   match(regDPR1);
 4043   match(regDPR2);
 4044   format %{ %}
 4045   interface(REG_INTER);
 4046 %}
 4047 
 4048 operand regDPR1(regDPR reg) %{
 4049   predicate( UseSSE < 2 );
 4050   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4051   match(reg);
 4052   format %{ "FPR1" %}
 4053   interface(REG_INTER);
 4054 %}
 4055 
 4056 operand regDPR2(regDPR reg) %{
 4057   predicate( UseSSE < 2 );
 4058   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4059   match(reg);
 4060   format %{ "FPR2" %}
 4061   interface(REG_INTER);
 4062 %}
 4063 
 4064 operand regnotDPR1(regDPR reg) %{
 4065   predicate( UseSSE < 2 );
 4066   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4067   match(reg);
 4068   format %{ %}
 4069   interface(REG_INTER);
 4070 %}
 4071 
 4072 // Float register operands
 4073 operand regFPR() %{
 4074   predicate( UseSSE < 2 );
 4075   constraint(ALLOC_IN_RC(fp_flt_reg));
 4076   match(RegF);
 4077   match(regFPR1);
 4078   format %{ %}
 4079   interface(REG_INTER);
 4080 %}
 4081 
 4082 // Float register operands
 4083 operand regFPR1(regFPR reg) %{
 4084   predicate( UseSSE < 2 );
 4085   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4086   match(reg);
 4087   format %{ "FPR1" %}
 4088   interface(REG_INTER);
 4089 %}
 4090 
 4091 // XMM Float register operands
 4092 operand regF() %{
 4093   predicate( UseSSE>=1 );
 4094   constraint(ALLOC_IN_RC(float_reg_legacy));
 4095   match(RegF);
 4096   format %{ %}
 4097   interface(REG_INTER);
 4098 %}
 4099 
 4100 operand legRegF() %{
 4101   predicate( UseSSE>=1 );
 4102   constraint(ALLOC_IN_RC(float_reg_legacy));
 4103   match(RegF);
 4104   format %{ %}
 4105   interface(REG_INTER);
 4106 %}
 4107 
 4108 // Float register operands
 4109 operand vlRegF() %{
 4110    constraint(ALLOC_IN_RC(float_reg_vl));
 4111    match(RegF);
 4112 
 4113    format %{ %}
 4114    interface(REG_INTER);
 4115 %}
 4116 
 4117 // XMM Double register operands
 4118 operand regD() %{
 4119   predicate( UseSSE>=2 );
 4120   constraint(ALLOC_IN_RC(double_reg_legacy));
 4121   match(RegD);
 4122   format %{ %}
 4123   interface(REG_INTER);
 4124 %}
 4125 
 4126 // Double register operands
 4127 operand legRegD() %{
 4128   predicate( UseSSE>=2 );
 4129   constraint(ALLOC_IN_RC(double_reg_legacy));
 4130   match(RegD);
 4131   format %{ %}
 4132   interface(REG_INTER);
 4133 %}
 4134 
 4135 operand vlRegD() %{
 4136    constraint(ALLOC_IN_RC(double_reg_vl));
 4137    match(RegD);
 4138 
 4139    format %{ %}
 4140    interface(REG_INTER);
 4141 %}
 4142 
 4143 //----------Memory Operands----------------------------------------------------
 4144 // Direct Memory Operand
 4145 operand direct(immP addr) %{
 4146   match(addr);
 4147 
 4148   format %{ "[$addr]" %}
 4149   interface(MEMORY_INTER) %{
 4150     base(0xFFFFFFFF);
 4151     index(0x4);
 4152     scale(0x0);
 4153     disp($addr);
 4154   %}
 4155 %}
 4156 
 4157 // Indirect Memory Operand
 4158 operand indirect(eRegP reg) %{
 4159   constraint(ALLOC_IN_RC(int_reg));
 4160   match(reg);
 4161 
 4162   format %{ "[$reg]" %}
 4163   interface(MEMORY_INTER) %{
 4164     base($reg);
 4165     index(0x4);
 4166     scale(0x0);
 4167     disp(0x0);
 4168   %}
 4169 %}
 4170 
 4171 // Indirect Memory Plus Short Offset Operand
 4172 operand indOffset8(eRegP reg, immI8 off) %{
 4173   match(AddP reg off);
 4174 
 4175   format %{ "[$reg + $off]" %}
 4176   interface(MEMORY_INTER) %{
 4177     base($reg);
 4178     index(0x4);
 4179     scale(0x0);
 4180     disp($off);
 4181   %}
 4182 %}
 4183 
 4184 // Indirect Memory Plus Long Offset Operand
 4185 operand indOffset32(eRegP reg, immI off) %{
 4186   match(AddP reg off);
 4187 
 4188   format %{ "[$reg + $off]" %}
 4189   interface(MEMORY_INTER) %{
 4190     base($reg);
 4191     index(0x4);
 4192     scale(0x0);
 4193     disp($off);
 4194   %}
 4195 %}
 4196 
 4197 // Indirect Memory Plus Long Offset Operand
 4198 operand indOffset32X(rRegI reg, immP off) %{
 4199   match(AddP off reg);
 4200 
 4201   format %{ "[$reg + $off]" %}
 4202   interface(MEMORY_INTER) %{
 4203     base($reg);
 4204     index(0x4);
 4205     scale(0x0);
 4206     disp($off);
 4207   %}
 4208 %}
 4209 
 4210 // Indirect Memory Plus Index Register Plus Offset Operand
 4211 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4212   match(AddP (AddP reg ireg) off);
 4213 
 4214   op_cost(10);
 4215   format %{"[$reg + $off + $ireg]" %}
 4216   interface(MEMORY_INTER) %{
 4217     base($reg);
 4218     index($ireg);
 4219     scale(0x0);
 4220     disp($off);
 4221   %}
 4222 %}
 4223 
 4224 // Indirect Memory Plus Index Register Plus Offset Operand
 4225 operand indIndex(eRegP reg, rRegI ireg) %{
 4226   match(AddP reg ireg);
 4227 
 4228   op_cost(10);
 4229   format %{"[$reg + $ireg]" %}
 4230   interface(MEMORY_INTER) %{
 4231     base($reg);
 4232     index($ireg);
 4233     scale(0x0);
 4234     disp(0x0);
 4235   %}
 4236 %}
 4237 
 4238 // // -------------------------------------------------------------------------
 4239 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4240 // // -------------------------------------------------------------------------
 4241 // // Scaled Memory Operands
 4242 // // Indirect Memory Times Scale Plus Offset Operand
 4243 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4244 //   match(AddP off (LShiftI ireg scale));
 4245 //
 4246 //   op_cost(10);
 4247 //   format %{"[$off + $ireg << $scale]" %}
 4248 //   interface(MEMORY_INTER) %{
 4249 //     base(0x4);
 4250 //     index($ireg);
 4251 //     scale($scale);
 4252 //     disp($off);
 4253 //   %}
 4254 // %}
 4255 
 4256 // Indirect Memory Times Scale Plus Index Register
 4257 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4258   match(AddP reg (LShiftI ireg scale));
 4259 
 4260   op_cost(10);
 4261   format %{"[$reg + $ireg << $scale]" %}
 4262   interface(MEMORY_INTER) %{
 4263     base($reg);
 4264     index($ireg);
 4265     scale($scale);
 4266     disp(0x0);
 4267   %}
 4268 %}
 4269 
 4270 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4271 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4272   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4273 
 4274   op_cost(10);
 4275   format %{"[$reg + $off + $ireg << $scale]" %}
 4276   interface(MEMORY_INTER) %{
 4277     base($reg);
 4278     index($ireg);
 4279     scale($scale);
 4280     disp($off);
 4281   %}
 4282 %}
 4283 
 4284 //----------Load Long Memory Operands------------------------------------------
 4285 // The load-long idiom will use it's address expression again after loading
 4286 // the first word of the long.  If the load-long destination overlaps with
 4287 // registers used in the addressing expression, the 2nd half will be loaded
 4288 // from a clobbered address.  Fix this by requiring that load-long use
 4289 // address registers that do not overlap with the load-long target.
 4290 
 4291 // load-long support
 4292 operand load_long_RegP() %{
 4293   constraint(ALLOC_IN_RC(esi_reg));
 4294   match(RegP);
 4295   match(eSIRegP);
 4296   op_cost(100);
 4297   format %{  %}
 4298   interface(REG_INTER);
 4299 %}
 4300 
 4301 // Indirect Memory Operand Long
 4302 operand load_long_indirect(load_long_RegP reg) %{
 4303   constraint(ALLOC_IN_RC(esi_reg));
 4304   match(reg);
 4305 
 4306   format %{ "[$reg]" %}
 4307   interface(MEMORY_INTER) %{
 4308     base($reg);
 4309     index(0x4);
 4310     scale(0x0);
 4311     disp(0x0);
 4312   %}
 4313 %}
 4314 
 4315 // Indirect Memory Plus Long Offset Operand
 4316 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4317   match(AddP reg off);
 4318 
 4319   format %{ "[$reg + $off]" %}
 4320   interface(MEMORY_INTER) %{
 4321     base($reg);
 4322     index(0x4);
 4323     scale(0x0);
 4324     disp($off);
 4325   %}
 4326 %}
 4327 
 4328 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4329 
 4330 
 4331 //----------Special Memory Operands--------------------------------------------
 4332 // Stack Slot Operand - This operand is used for loading and storing temporary
 4333 //                      values on the stack where a match requires a value to
 4334 //                      flow through memory.
 4335 operand stackSlotP(sRegP reg) %{
 4336   constraint(ALLOC_IN_RC(stack_slots));
 4337   // No match rule because this operand is only generated in matching
 4338   format %{ "[$reg]" %}
 4339   interface(MEMORY_INTER) %{
 4340     base(0x4);   // ESP
 4341     index(0x4);  // No Index
 4342     scale(0x0);  // No Scale
 4343     disp($reg);  // Stack Offset
 4344   %}
 4345 %}
 4346 
 4347 operand stackSlotI(sRegI reg) %{
 4348   constraint(ALLOC_IN_RC(stack_slots));
 4349   // No match rule because this operand is only generated in matching
 4350   format %{ "[$reg]" %}
 4351   interface(MEMORY_INTER) %{
 4352     base(0x4);   // ESP
 4353     index(0x4);  // No Index
 4354     scale(0x0);  // No Scale
 4355     disp($reg);  // Stack Offset
 4356   %}
 4357 %}
 4358 
 4359 operand stackSlotF(sRegF reg) %{
 4360   constraint(ALLOC_IN_RC(stack_slots));
 4361   // No match rule because this operand is only generated in matching
 4362   format %{ "[$reg]" %}
 4363   interface(MEMORY_INTER) %{
 4364     base(0x4);   // ESP
 4365     index(0x4);  // No Index
 4366     scale(0x0);  // No Scale
 4367     disp($reg);  // Stack Offset
 4368   %}
 4369 %}
 4370 
 4371 operand stackSlotD(sRegD reg) %{
 4372   constraint(ALLOC_IN_RC(stack_slots));
 4373   // No match rule because this operand is only generated in matching
 4374   format %{ "[$reg]" %}
 4375   interface(MEMORY_INTER) %{
 4376     base(0x4);   // ESP
 4377     index(0x4);  // No Index
 4378     scale(0x0);  // No Scale
 4379     disp($reg);  // Stack Offset
 4380   %}
 4381 %}
 4382 
 4383 operand stackSlotL(sRegL reg) %{
 4384   constraint(ALLOC_IN_RC(stack_slots));
 4385   // No match rule because this operand is only generated in matching
 4386   format %{ "[$reg]" %}
 4387   interface(MEMORY_INTER) %{
 4388     base(0x4);   // ESP
 4389     index(0x4);  // No Index
 4390     scale(0x0);  // No Scale
 4391     disp($reg);  // Stack Offset
 4392   %}
 4393 %}
 4394 
 4395 //----------Conditional Branch Operands----------------------------------------
 4396 // Comparison Op  - This is the operation of the comparison, and is limited to
 4397 //                  the following set of codes:
 4398 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4399 //
 4400 // Other attributes of the comparison, such as unsignedness, are specified
 4401 // by the comparison instruction that sets a condition code flags register.
 4402 // That result is represented by a flags operand whose subtype is appropriate
 4403 // to the unsignedness (etc.) of the comparison.
 4404 //
 4405 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4406 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4407 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4408 
 4409 // Comparison Code
 4410 operand cmpOp() %{
 4411   match(Bool);
 4412 
 4413   format %{ "" %}
 4414   interface(COND_INTER) %{
 4415     equal(0x4, "e");
 4416     not_equal(0x5, "ne");
 4417     less(0xC, "l");
 4418     greater_equal(0xD, "ge");
 4419     less_equal(0xE, "le");
 4420     greater(0xF, "g");
 4421     overflow(0x0, "o");
 4422     no_overflow(0x1, "no");
 4423   %}
 4424 %}
 4425 
 4426 // Comparison Code, unsigned compare.  Used by FP also, with
 4427 // C2 (unordered) turned into GT or LT already.  The other bits
 4428 // C0 and C3 are turned into Carry & Zero flags.
 4429 operand cmpOpU() %{
 4430   match(Bool);
 4431 
 4432   format %{ "" %}
 4433   interface(COND_INTER) %{
 4434     equal(0x4, "e");
 4435     not_equal(0x5, "ne");
 4436     less(0x2, "b");
 4437     greater_equal(0x3, "nb");
 4438     less_equal(0x6, "be");
 4439     greater(0x7, "nbe");
 4440     overflow(0x0, "o");
 4441     no_overflow(0x1, "no");
 4442   %}
 4443 %}
 4444 
 4445 // Floating comparisons that don't require any fixup for the unordered case
 4446 operand cmpOpUCF() %{
 4447   match(Bool);
 4448   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4449             n->as_Bool()->_test._test == BoolTest::ge ||
 4450             n->as_Bool()->_test._test == BoolTest::le ||
 4451             n->as_Bool()->_test._test == BoolTest::gt);
 4452   format %{ "" %}
 4453   interface(COND_INTER) %{
 4454     equal(0x4, "e");
 4455     not_equal(0x5, "ne");
 4456     less(0x2, "b");
 4457     greater_equal(0x3, "nb");
 4458     less_equal(0x6, "be");
 4459     greater(0x7, "nbe");
 4460     overflow(0x0, "o");
 4461     no_overflow(0x1, "no");
 4462   %}
 4463 %}
 4464 
 4465 
 4466 // Floating comparisons that can be fixed up with extra conditional jumps
 4467 operand cmpOpUCF2() %{
 4468   match(Bool);
 4469   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4470             n->as_Bool()->_test._test == BoolTest::eq);
 4471   format %{ "" %}
 4472   interface(COND_INTER) %{
 4473     equal(0x4, "e");
 4474     not_equal(0x5, "ne");
 4475     less(0x2, "b");
 4476     greater_equal(0x3, "nb");
 4477     less_equal(0x6, "be");
 4478     greater(0x7, "nbe");
 4479     overflow(0x0, "o");
 4480     no_overflow(0x1, "no");
 4481   %}
 4482 %}
 4483 
 4484 // Comparison Code for FP conditional move
 4485 operand cmpOp_fcmov() %{
 4486   match(Bool);
 4487 
 4488   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4489             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4490   format %{ "" %}
 4491   interface(COND_INTER) %{
 4492     equal        (0x0C8);
 4493     not_equal    (0x1C8);
 4494     less         (0x0C0);
 4495     greater_equal(0x1C0);
 4496     less_equal   (0x0D0);
 4497     greater      (0x1D0);
 4498     overflow(0x0, "o"); // not really supported by the instruction
 4499     no_overflow(0x1, "no"); // not really supported by the instruction
 4500   %}
 4501 %}
 4502 
 4503 // Comparison Code used in long compares
 4504 operand cmpOp_commute() %{
 4505   match(Bool);
 4506 
 4507   format %{ "" %}
 4508   interface(COND_INTER) %{
 4509     equal(0x4, "e");
 4510     not_equal(0x5, "ne");
 4511     less(0xF, "g");
 4512     greater_equal(0xE, "le");
 4513     less_equal(0xD, "ge");
 4514     greater(0xC, "l");
 4515     overflow(0x0, "o");
 4516     no_overflow(0x1, "no");
 4517   %}
 4518 %}
 4519 
 4520 // Comparison Code used in unsigned long compares
 4521 operand cmpOpU_commute() %{
 4522   match(Bool);
 4523 
 4524   format %{ "" %}
 4525   interface(COND_INTER) %{
 4526     equal(0x4, "e");
 4527     not_equal(0x5, "ne");
 4528     less(0x7, "nbe");
 4529     greater_equal(0x6, "be");
 4530     less_equal(0x3, "nb");
 4531     greater(0x2, "b");
 4532     overflow(0x0, "o");
 4533     no_overflow(0x1, "no");
 4534   %}
 4535 %}
 4536 
 4537 //----------OPERAND CLASSES----------------------------------------------------
 4538 // Operand Classes are groups of operands that are used as to simplify
 4539 // instruction definitions by not requiring the AD writer to specify separate
 4540 // instructions for every form of operand when the instruction accepts
 4541 // multiple operand types with the same basic encoding and format.  The classic
 4542 // case of this is memory operands.
 4543 
 4544 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4545                indIndex, indIndexScale, indIndexScaleOffset);
 4546 
 4547 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4548 // This means some kind of offset is always required and you cannot use
 4549 // an oop as the offset (done when working on static globals).
 4550 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4551                     indIndex, indIndexScale, indIndexScaleOffset);
 4552 
 4553 
 4554 //----------PIPELINE-----------------------------------------------------------
 4555 // Rules which define the behavior of the target architectures pipeline.
 4556 pipeline %{
 4557 
 4558 //----------ATTRIBUTES---------------------------------------------------------
 4559 attributes %{
 4560   variable_size_instructions;        // Fixed size instructions
 4561   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4562   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4563   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4564   instruction_fetch_units = 1;       // of 16 bytes
 4565 
 4566   // List of nop instructions
 4567   nops( MachNop );
 4568 %}
 4569 
 4570 //----------RESOURCES----------------------------------------------------------
 4571 // Resources are the functional units available to the machine
 4572 
 4573 // Generic P2/P3 pipeline
 4574 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4575 // 3 instructions decoded per cycle.
 4576 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4577 // 2 ALU op, only ALU0 handles mul/div instructions.
 4578 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4579            MS0, MS1, MEM = MS0 | MS1,
 4580            BR, FPU,
 4581            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4582 
 4583 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4584 // Pipeline Description specifies the stages in the machine's pipeline
 4585 
 4586 // Generic P2/P3 pipeline
 4587 pipe_desc(S0, S1, S2, S3, S4, S5);
 4588 
 4589 //----------PIPELINE CLASSES---------------------------------------------------
 4590 // Pipeline Classes describe the stages in which input and output are
 4591 // referenced by the hardware pipeline.
 4592 
 4593 // Naming convention: ialu or fpu
 4594 // Then: _reg
 4595 // Then: _reg if there is a 2nd register
 4596 // Then: _long if it's a pair of instructions implementing a long
 4597 // Then: _fat if it requires the big decoder
 4598 //   Or: _mem if it requires the big decoder and a memory unit.
 4599 
 4600 // Integer ALU reg operation
 4601 pipe_class ialu_reg(rRegI dst) %{
 4602     single_instruction;
 4603     dst    : S4(write);
 4604     dst    : S3(read);
 4605     DECODE : S0;        // any decoder
 4606     ALU    : S3;        // any alu
 4607 %}
 4608 
 4609 // Long ALU reg operation
 4610 pipe_class ialu_reg_long(eRegL dst) %{
 4611     instruction_count(2);
 4612     dst    : S4(write);
 4613     dst    : S3(read);
 4614     DECODE : S0(2);     // any 2 decoders
 4615     ALU    : S3(2);     // both alus
 4616 %}
 4617 
 4618 // Integer ALU reg operation using big decoder
 4619 pipe_class ialu_reg_fat(rRegI dst) %{
 4620     single_instruction;
 4621     dst    : S4(write);
 4622     dst    : S3(read);
 4623     D0     : S0;        // big decoder only
 4624     ALU    : S3;        // any alu
 4625 %}
 4626 
 4627 // Long ALU reg operation using big decoder
 4628 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4629     instruction_count(2);
 4630     dst    : S4(write);
 4631     dst    : S3(read);
 4632     D0     : S0(2);     // big decoder only; twice
 4633     ALU    : S3(2);     // any 2 alus
 4634 %}
 4635 
 4636 // Integer ALU reg-reg operation
 4637 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4638     single_instruction;
 4639     dst    : S4(write);
 4640     src    : S3(read);
 4641     DECODE : S0;        // any decoder
 4642     ALU    : S3;        // any alu
 4643 %}
 4644 
 4645 // Long ALU reg-reg operation
 4646 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4647     instruction_count(2);
 4648     dst    : S4(write);
 4649     src    : S3(read);
 4650     DECODE : S0(2);     // any 2 decoders
 4651     ALU    : S3(2);     // both alus
 4652 %}
 4653 
 4654 // Integer ALU reg-reg operation
 4655 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4656     single_instruction;
 4657     dst    : S4(write);
 4658     src    : S3(read);
 4659     D0     : S0;        // big decoder only
 4660     ALU    : S3;        // any alu
 4661 %}
 4662 
 4663 // Long ALU reg-reg operation
 4664 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4665     instruction_count(2);
 4666     dst    : S4(write);
 4667     src    : S3(read);
 4668     D0     : S0(2);     // big decoder only; twice
 4669     ALU    : S3(2);     // both alus
 4670 %}
 4671 
 4672 // Integer ALU reg-mem operation
 4673 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4674     single_instruction;
 4675     dst    : S5(write);
 4676     mem    : S3(read);
 4677     D0     : S0;        // big decoder only
 4678     ALU    : S4;        // any alu
 4679     MEM    : S3;        // any mem
 4680 %}
 4681 
 4682 // Long ALU reg-mem operation
 4683 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4684     instruction_count(2);
 4685     dst    : S5(write);
 4686     mem    : S3(read);
 4687     D0     : S0(2);     // big decoder only; twice
 4688     ALU    : S4(2);     // any 2 alus
 4689     MEM    : S3(2);     // both mems
 4690 %}
 4691 
 4692 // Integer mem operation (prefetch)
 4693 pipe_class ialu_mem(memory mem)
 4694 %{
 4695     single_instruction;
 4696     mem    : S3(read);
 4697     D0     : S0;        // big decoder only
 4698     MEM    : S3;        // any mem
 4699 %}
 4700 
 4701 // Integer Store to Memory
 4702 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4703     single_instruction;
 4704     mem    : S3(read);
 4705     src    : S5(read);
 4706     D0     : S0;        // big decoder only
 4707     ALU    : S4;        // any alu
 4708     MEM    : S3;
 4709 %}
 4710 
 4711 // Long Store to Memory
 4712 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4713     instruction_count(2);
 4714     mem    : S3(read);
 4715     src    : S5(read);
 4716     D0     : S0(2);     // big decoder only; twice
 4717     ALU    : S4(2);     // any 2 alus
 4718     MEM    : S3(2);     // Both mems
 4719 %}
 4720 
 4721 // Integer Store to Memory
 4722 pipe_class ialu_mem_imm(memory mem) %{
 4723     single_instruction;
 4724     mem    : S3(read);
 4725     D0     : S0;        // big decoder only
 4726     ALU    : S4;        // any alu
 4727     MEM    : S3;
 4728 %}
 4729 
 4730 // Integer ALU0 reg-reg operation
 4731 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4732     single_instruction;
 4733     dst    : S4(write);
 4734     src    : S3(read);
 4735     D0     : S0;        // Big decoder only
 4736     ALU0   : S3;        // only alu0
 4737 %}
 4738 
 4739 // Integer ALU0 reg-mem operation
 4740 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4741     single_instruction;
 4742     dst    : S5(write);
 4743     mem    : S3(read);
 4744     D0     : S0;        // big decoder only
 4745     ALU0   : S4;        // ALU0 only
 4746     MEM    : S3;        // any mem
 4747 %}
 4748 
 4749 // Integer ALU reg-reg operation
 4750 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4751     single_instruction;
 4752     cr     : S4(write);
 4753     src1   : S3(read);
 4754     src2   : S3(read);
 4755     DECODE : S0;        // any decoder
 4756     ALU    : S3;        // any alu
 4757 %}
 4758 
 4759 // Integer ALU reg-imm operation
 4760 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4761     single_instruction;
 4762     cr     : S4(write);
 4763     src1   : S3(read);
 4764     DECODE : S0;        // any decoder
 4765     ALU    : S3;        // any alu
 4766 %}
 4767 
 4768 // Integer ALU reg-mem operation
 4769 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4770     single_instruction;
 4771     cr     : S4(write);
 4772     src1   : S3(read);
 4773     src2   : S3(read);
 4774     D0     : S0;        // big decoder only
 4775     ALU    : S4;        // any alu
 4776     MEM    : S3;
 4777 %}
 4778 
 4779 // Conditional move reg-reg
 4780 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4781     instruction_count(4);
 4782     y      : S4(read);
 4783     q      : S3(read);
 4784     p      : S3(read);
 4785     DECODE : S0(4);     // any decoder
 4786 %}
 4787 
 4788 // Conditional move reg-reg
 4789 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4790     single_instruction;
 4791     dst    : S4(write);
 4792     src    : S3(read);
 4793     cr     : S3(read);
 4794     DECODE : S0;        // any decoder
 4795 %}
 4796 
 4797 // Conditional move reg-mem
 4798 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4799     single_instruction;
 4800     dst    : S4(write);
 4801     src    : S3(read);
 4802     cr     : S3(read);
 4803     DECODE : S0;        // any decoder
 4804     MEM    : S3;
 4805 %}
 4806 
 4807 // Conditional move reg-reg long
 4808 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4809     single_instruction;
 4810     dst    : S4(write);
 4811     src    : S3(read);
 4812     cr     : S3(read);
 4813     DECODE : S0(2);     // any 2 decoders
 4814 %}
 4815 
 4816 // Conditional move double reg-reg
 4817 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4818     single_instruction;
 4819     dst    : S4(write);
 4820     src    : S3(read);
 4821     cr     : S3(read);
 4822     DECODE : S0;        // any decoder
 4823 %}
 4824 
 4825 // Float reg-reg operation
 4826 pipe_class fpu_reg(regDPR dst) %{
 4827     instruction_count(2);
 4828     dst    : S3(read);
 4829     DECODE : S0(2);     // any 2 decoders
 4830     FPU    : S3;
 4831 %}
 4832 
 4833 // Float reg-reg operation
 4834 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4835     instruction_count(2);
 4836     dst    : S4(write);
 4837     src    : S3(read);
 4838     DECODE : S0(2);     // any 2 decoders
 4839     FPU    : S3;
 4840 %}
 4841 
 4842 // Float reg-reg operation
 4843 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4844     instruction_count(3);
 4845     dst    : S4(write);
 4846     src1   : S3(read);
 4847     src2   : S3(read);
 4848     DECODE : S0(3);     // any 3 decoders
 4849     FPU    : S3(2);
 4850 %}
 4851 
 4852 // Float reg-reg operation
 4853 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4854     instruction_count(4);
 4855     dst    : S4(write);
 4856     src1   : S3(read);
 4857     src2   : S3(read);
 4858     src3   : S3(read);
 4859     DECODE : S0(4);     // any 3 decoders
 4860     FPU    : S3(2);
 4861 %}
 4862 
 4863 // Float reg-reg operation
 4864 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4865     instruction_count(4);
 4866     dst    : S4(write);
 4867     src1   : S3(read);
 4868     src2   : S3(read);
 4869     src3   : S3(read);
 4870     DECODE : S1(3);     // any 3 decoders
 4871     D0     : S0;        // Big decoder only
 4872     FPU    : S3(2);
 4873     MEM    : S3;
 4874 %}
 4875 
 4876 // Float reg-mem operation
 4877 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4878     instruction_count(2);
 4879     dst    : S5(write);
 4880     mem    : S3(read);
 4881     D0     : S0;        // big decoder only
 4882     DECODE : S1;        // any decoder for FPU POP
 4883     FPU    : S4;
 4884     MEM    : S3;        // any mem
 4885 %}
 4886 
 4887 // Float reg-mem operation
 4888 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4889     instruction_count(3);
 4890     dst    : S5(write);
 4891     src1   : S3(read);
 4892     mem    : S3(read);
 4893     D0     : S0;        // big decoder only
 4894     DECODE : S1(2);     // any decoder for FPU POP
 4895     FPU    : S4;
 4896     MEM    : S3;        // any mem
 4897 %}
 4898 
 4899 // Float mem-reg operation
 4900 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4901     instruction_count(2);
 4902     src    : S5(read);
 4903     mem    : S3(read);
 4904     DECODE : S0;        // any decoder for FPU PUSH
 4905     D0     : S1;        // big decoder only
 4906     FPU    : S4;
 4907     MEM    : S3;        // any mem
 4908 %}
 4909 
 4910 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4911     instruction_count(3);
 4912     src1   : S3(read);
 4913     src2   : S3(read);
 4914     mem    : S3(read);
 4915     DECODE : S0(2);     // any decoder for FPU PUSH
 4916     D0     : S1;        // big decoder only
 4917     FPU    : S4;
 4918     MEM    : S3;        // any mem
 4919 %}
 4920 
 4921 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4922     instruction_count(3);
 4923     src1   : S3(read);
 4924     src2   : S3(read);
 4925     mem    : S4(read);
 4926     DECODE : S0;        // any decoder for FPU PUSH
 4927     D0     : S0(2);     // big decoder only
 4928     FPU    : S4;
 4929     MEM    : S3(2);     // any mem
 4930 %}
 4931 
 4932 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4933     instruction_count(2);
 4934     src1   : S3(read);
 4935     dst    : S4(read);
 4936     D0     : S0(2);     // big decoder only
 4937     MEM    : S3(2);     // any mem
 4938 %}
 4939 
 4940 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4941     instruction_count(3);
 4942     src1   : S3(read);
 4943     src2   : S3(read);
 4944     dst    : S4(read);
 4945     D0     : S0(3);     // big decoder only
 4946     FPU    : S4;
 4947     MEM    : S3(3);     // any mem
 4948 %}
 4949 
 4950 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4951     instruction_count(3);
 4952     src1   : S4(read);
 4953     mem    : S4(read);
 4954     DECODE : S0;        // any decoder for FPU PUSH
 4955     D0     : S0(2);     // big decoder only
 4956     FPU    : S4;
 4957     MEM    : S3(2);     // any mem
 4958 %}
 4959 
 4960 // Float load constant
 4961 pipe_class fpu_reg_con(regDPR dst) %{
 4962     instruction_count(2);
 4963     dst    : S5(write);
 4964     D0     : S0;        // big decoder only for the load
 4965     DECODE : S1;        // any decoder for FPU POP
 4966     FPU    : S4;
 4967     MEM    : S3;        // any mem
 4968 %}
 4969 
 4970 // Float load constant
 4971 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4972     instruction_count(3);
 4973     dst    : S5(write);
 4974     src    : S3(read);
 4975     D0     : S0;        // big decoder only for the load
 4976     DECODE : S1(2);     // any decoder for FPU POP
 4977     FPU    : S4;
 4978     MEM    : S3;        // any mem
 4979 %}
 4980 
 4981 // UnConditional branch
 4982 pipe_class pipe_jmp( label labl ) %{
 4983     single_instruction;
 4984     BR   : S3;
 4985 %}
 4986 
 4987 // Conditional branch
 4988 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4989     single_instruction;
 4990     cr    : S1(read);
 4991     BR    : S3;
 4992 %}
 4993 
 4994 // Allocation idiom
 4995 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4996     instruction_count(1); force_serialization;
 4997     fixed_latency(6);
 4998     heap_ptr : S3(read);
 4999     DECODE   : S0(3);
 5000     D0       : S2;
 5001     MEM      : S3;
 5002     ALU      : S3(2);
 5003     dst      : S5(write);
 5004     BR       : S5;
 5005 %}
 5006 
 5007 // Generic big/slow expanded idiom
 5008 pipe_class pipe_slow(  ) %{
 5009     instruction_count(10); multiple_bundles; force_serialization;
 5010     fixed_latency(100);
 5011     D0  : S0(2);
 5012     MEM : S3(2);
 5013 %}
 5014 
 5015 // The real do-nothing guy
 5016 pipe_class empty( ) %{
 5017     instruction_count(0);
 5018 %}
 5019 
 5020 // Define the class for the Nop node
 5021 define %{
 5022    MachNop = empty;
 5023 %}
 5024 
 5025 %}
 5026 
 5027 //----------INSTRUCTIONS-------------------------------------------------------
 5028 //
 5029 // match      -- States which machine-independent subtree may be replaced
 5030 //               by this instruction.
 5031 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5032 //               selection to identify a minimum cost tree of machine
 5033 //               instructions that matches a tree of machine-independent
 5034 //               instructions.
 5035 // format     -- A string providing the disassembly for this instruction.
 5036 //               The value of an instruction's operand may be inserted
 5037 //               by referring to it with a '$' prefix.
 5038 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5039 //               to within an encode class as $primary, $secondary, and $tertiary
 5040 //               respectively.  The primary opcode is commonly used to
 5041 //               indicate the type of machine instruction, while secondary
 5042 //               and tertiary are often used for prefix options or addressing
 5043 //               modes.
 5044 // ins_encode -- A list of encode classes with parameters. The encode class
 5045 //               name must have been defined in an 'enc_class' specification
 5046 //               in the encode section of the architecture description.
 5047 
 5048 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5049 // Load Float
 5050 instruct MoveF2LEG(legRegF dst, regF src) %{
 5051   match(Set dst src);
 5052   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5053   ins_encode %{
 5054     ShouldNotReachHere();
 5055   %}
 5056   ins_pipe( fpu_reg_reg );
 5057 %}
 5058 
 5059 // Load Float
 5060 instruct MoveLEG2F(regF dst, legRegF src) %{
 5061   match(Set dst src);
 5062   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5063   ins_encode %{
 5064     ShouldNotReachHere();
 5065   %}
 5066   ins_pipe( fpu_reg_reg );
 5067 %}
 5068 
 5069 // Load Float
 5070 instruct MoveF2VL(vlRegF dst, regF src) %{
 5071   match(Set dst src);
 5072   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5073   ins_encode %{
 5074     ShouldNotReachHere();
 5075   %}
 5076   ins_pipe( fpu_reg_reg );
 5077 %}
 5078 
 5079 // Load Float
 5080 instruct MoveVL2F(regF dst, vlRegF src) %{
 5081   match(Set dst src);
 5082   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5083   ins_encode %{
 5084     ShouldNotReachHere();
 5085   %}
 5086   ins_pipe( fpu_reg_reg );
 5087 %}
 5088 
 5089 
 5090 
 5091 // Load Double
 5092 instruct MoveD2LEG(legRegD dst, regD src) %{
 5093   match(Set dst src);
 5094   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5095   ins_encode %{
 5096     ShouldNotReachHere();
 5097   %}
 5098   ins_pipe( fpu_reg_reg );
 5099 %}
 5100 
 5101 // Load Double
 5102 instruct MoveLEG2D(regD dst, legRegD src) %{
 5103   match(Set dst src);
 5104   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5105   ins_encode %{
 5106     ShouldNotReachHere();
 5107   %}
 5108   ins_pipe( fpu_reg_reg );
 5109 %}
 5110 
 5111 // Load Double
 5112 instruct MoveD2VL(vlRegD dst, regD src) %{
 5113   match(Set dst src);
 5114   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5115   ins_encode %{
 5116     ShouldNotReachHere();
 5117   %}
 5118   ins_pipe( fpu_reg_reg );
 5119 %}
 5120 
 5121 // Load Double
 5122 instruct MoveVL2D(regD dst, vlRegD src) %{
 5123   match(Set dst src);
 5124   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5125   ins_encode %{
 5126     ShouldNotReachHere();
 5127   %}
 5128   ins_pipe( fpu_reg_reg );
 5129 %}
 5130 
 5131 //----------BSWAP-Instruction--------------------------------------------------
 5132 instruct bytes_reverse_int(rRegI dst) %{
 5133   match(Set dst (ReverseBytesI dst));
 5134 
 5135   format %{ "BSWAP  $dst" %}
 5136   opcode(0x0F, 0xC8);
 5137   ins_encode( OpcP, OpcSReg(dst) );
 5138   ins_pipe( ialu_reg );
 5139 %}
 5140 
 5141 instruct bytes_reverse_long(eRegL dst) %{
 5142   match(Set dst (ReverseBytesL dst));
 5143 
 5144   format %{ "BSWAP  $dst.lo\n\t"
 5145             "BSWAP  $dst.hi\n\t"
 5146             "XCHG   $dst.lo $dst.hi" %}
 5147 
 5148   ins_cost(125);
 5149   ins_encode( bswap_long_bytes(dst) );
 5150   ins_pipe( ialu_reg_reg);
 5151 %}
 5152 
 5153 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5154   match(Set dst (ReverseBytesUS dst));
 5155   effect(KILL cr);
 5156 
 5157   format %{ "BSWAP  $dst\n\t"
 5158             "SHR    $dst,16\n\t" %}
 5159   ins_encode %{
 5160     __ bswapl($dst$$Register);
 5161     __ shrl($dst$$Register, 16);
 5162   %}
 5163   ins_pipe( ialu_reg );
 5164 %}
 5165 
 5166 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5167   match(Set dst (ReverseBytesS dst));
 5168   effect(KILL cr);
 5169 
 5170   format %{ "BSWAP  $dst\n\t"
 5171             "SAR    $dst,16\n\t" %}
 5172   ins_encode %{
 5173     __ bswapl($dst$$Register);
 5174     __ sarl($dst$$Register, 16);
 5175   %}
 5176   ins_pipe( ialu_reg );
 5177 %}
 5178 
 5179 
 5180 //---------- Zeros Count Instructions ------------------------------------------
 5181 
 5182 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5183   predicate(UseCountLeadingZerosInstruction);
 5184   match(Set dst (CountLeadingZerosI src));
 5185   effect(KILL cr);
 5186 
 5187   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5188   ins_encode %{
 5189     __ lzcntl($dst$$Register, $src$$Register);
 5190   %}
 5191   ins_pipe(ialu_reg);
 5192 %}
 5193 
 5194 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5195   predicate(!UseCountLeadingZerosInstruction);
 5196   match(Set dst (CountLeadingZerosI src));
 5197   effect(KILL cr);
 5198 
 5199   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5200             "JNZ    skip\n\t"
 5201             "MOV    $dst, -1\n"
 5202       "skip:\n\t"
 5203             "NEG    $dst\n\t"
 5204             "ADD    $dst, 31" %}
 5205   ins_encode %{
 5206     Register Rdst = $dst$$Register;
 5207     Register Rsrc = $src$$Register;
 5208     Label skip;
 5209     __ bsrl(Rdst, Rsrc);
 5210     __ jccb(Assembler::notZero, skip);
 5211     __ movl(Rdst, -1);
 5212     __ bind(skip);
 5213     __ negl(Rdst);
 5214     __ addl(Rdst, BitsPerInt - 1);
 5215   %}
 5216   ins_pipe(ialu_reg);
 5217 %}
 5218 
 5219 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5220   predicate(UseCountLeadingZerosInstruction);
 5221   match(Set dst (CountLeadingZerosL src));
 5222   effect(TEMP dst, KILL cr);
 5223 
 5224   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5225             "JNC    done\n\t"
 5226             "LZCNT  $dst, $src.lo\n\t"
 5227             "ADD    $dst, 32\n"
 5228       "done:" %}
 5229   ins_encode %{
 5230     Register Rdst = $dst$$Register;
 5231     Register Rsrc = $src$$Register;
 5232     Label done;
 5233     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5234     __ jccb(Assembler::carryClear, done);
 5235     __ lzcntl(Rdst, Rsrc);
 5236     __ addl(Rdst, BitsPerInt);
 5237     __ bind(done);
 5238   %}
 5239   ins_pipe(ialu_reg);
 5240 %}
 5241 
 5242 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5243   predicate(!UseCountLeadingZerosInstruction);
 5244   match(Set dst (CountLeadingZerosL src));
 5245   effect(TEMP dst, KILL cr);
 5246 
 5247   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5248             "JZ     msw_is_zero\n\t"
 5249             "ADD    $dst, 32\n\t"
 5250             "JMP    not_zero\n"
 5251       "msw_is_zero:\n\t"
 5252             "BSR    $dst, $src.lo\n\t"
 5253             "JNZ    not_zero\n\t"
 5254             "MOV    $dst, -1\n"
 5255       "not_zero:\n\t"
 5256             "NEG    $dst\n\t"
 5257             "ADD    $dst, 63\n" %}
 5258  ins_encode %{
 5259     Register Rdst = $dst$$Register;
 5260     Register Rsrc = $src$$Register;
 5261     Label msw_is_zero;
 5262     Label not_zero;
 5263     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5264     __ jccb(Assembler::zero, msw_is_zero);
 5265     __ addl(Rdst, BitsPerInt);
 5266     __ jmpb(not_zero);
 5267     __ bind(msw_is_zero);
 5268     __ bsrl(Rdst, Rsrc);
 5269     __ jccb(Assembler::notZero, not_zero);
 5270     __ movl(Rdst, -1);
 5271     __ bind(not_zero);
 5272     __ negl(Rdst);
 5273     __ addl(Rdst, BitsPerLong - 1);
 5274   %}
 5275   ins_pipe(ialu_reg);
 5276 %}
 5277 
 5278 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5279   predicate(UseCountTrailingZerosInstruction);
 5280   match(Set dst (CountTrailingZerosI src));
 5281   effect(KILL cr);
 5282 
 5283   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5284   ins_encode %{
 5285     __ tzcntl($dst$$Register, $src$$Register);
 5286   %}
 5287   ins_pipe(ialu_reg);
 5288 %}
 5289 
 5290 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5291   predicate(!UseCountTrailingZerosInstruction);
 5292   match(Set dst (CountTrailingZerosI src));
 5293   effect(KILL cr);
 5294 
 5295   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5296             "JNZ    done\n\t"
 5297             "MOV    $dst, 32\n"
 5298       "done:" %}
 5299   ins_encode %{
 5300     Register Rdst = $dst$$Register;
 5301     Label done;
 5302     __ bsfl(Rdst, $src$$Register);
 5303     __ jccb(Assembler::notZero, done);
 5304     __ movl(Rdst, BitsPerInt);
 5305     __ bind(done);
 5306   %}
 5307   ins_pipe(ialu_reg);
 5308 %}
 5309 
 5310 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5311   predicate(UseCountTrailingZerosInstruction);
 5312   match(Set dst (CountTrailingZerosL src));
 5313   effect(TEMP dst, KILL cr);
 5314 
 5315   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5316             "JNC    done\n\t"
 5317             "TZCNT  $dst, $src.hi\n\t"
 5318             "ADD    $dst, 32\n"
 5319             "done:" %}
 5320   ins_encode %{
 5321     Register Rdst = $dst$$Register;
 5322     Register Rsrc = $src$$Register;
 5323     Label done;
 5324     __ tzcntl(Rdst, Rsrc);
 5325     __ jccb(Assembler::carryClear, done);
 5326     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5327     __ addl(Rdst, BitsPerInt);
 5328     __ bind(done);
 5329   %}
 5330   ins_pipe(ialu_reg);
 5331 %}
 5332 
 5333 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5334   predicate(!UseCountTrailingZerosInstruction);
 5335   match(Set dst (CountTrailingZerosL src));
 5336   effect(TEMP dst, KILL cr);
 5337 
 5338   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5339             "JNZ    done\n\t"
 5340             "BSF    $dst, $src.hi\n\t"
 5341             "JNZ    msw_not_zero\n\t"
 5342             "MOV    $dst, 32\n"
 5343       "msw_not_zero:\n\t"
 5344             "ADD    $dst, 32\n"
 5345       "done:" %}
 5346   ins_encode %{
 5347     Register Rdst = $dst$$Register;
 5348     Register Rsrc = $src$$Register;
 5349     Label msw_not_zero;
 5350     Label done;
 5351     __ bsfl(Rdst, Rsrc);
 5352     __ jccb(Assembler::notZero, done);
 5353     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5354     __ jccb(Assembler::notZero, msw_not_zero);
 5355     __ movl(Rdst, BitsPerInt);
 5356     __ bind(msw_not_zero);
 5357     __ addl(Rdst, BitsPerInt);
 5358     __ bind(done);
 5359   %}
 5360   ins_pipe(ialu_reg);
 5361 %}
 5362 
 5363 
 5364 //---------- Population Count Instructions -------------------------------------
 5365 
 5366 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5367   predicate(UsePopCountInstruction);
 5368   match(Set dst (PopCountI src));
 5369   effect(KILL cr);
 5370 
 5371   format %{ "POPCNT $dst, $src" %}
 5372   ins_encode %{
 5373     __ popcntl($dst$$Register, $src$$Register);
 5374   %}
 5375   ins_pipe(ialu_reg);
 5376 %}
 5377 
 5378 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5379   predicate(UsePopCountInstruction);
 5380   match(Set dst (PopCountI (LoadI mem)));
 5381   effect(KILL cr);
 5382 
 5383   format %{ "POPCNT $dst, $mem" %}
 5384   ins_encode %{
 5385     __ popcntl($dst$$Register, $mem$$Address);
 5386   %}
 5387   ins_pipe(ialu_reg);
 5388 %}
 5389 
 5390 // Note: Long.bitCount(long) returns an int.
 5391 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5392   predicate(UsePopCountInstruction);
 5393   match(Set dst (PopCountL src));
 5394   effect(KILL cr, TEMP tmp, TEMP dst);
 5395 
 5396   format %{ "POPCNT $dst, $src.lo\n\t"
 5397             "POPCNT $tmp, $src.hi\n\t"
 5398             "ADD    $dst, $tmp" %}
 5399   ins_encode %{
 5400     __ popcntl($dst$$Register, $src$$Register);
 5401     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5402     __ addl($dst$$Register, $tmp$$Register);
 5403   %}
 5404   ins_pipe(ialu_reg);
 5405 %}
 5406 
 5407 // Note: Long.bitCount(long) returns an int.
 5408 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5409   predicate(UsePopCountInstruction);
 5410   match(Set dst (PopCountL (LoadL mem)));
 5411   effect(KILL cr, TEMP tmp, TEMP dst);
 5412 
 5413   format %{ "POPCNT $dst, $mem\n\t"
 5414             "POPCNT $tmp, $mem+4\n\t"
 5415             "ADD    $dst, $tmp" %}
 5416   ins_encode %{
 5417     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5418     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5419     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5420     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5421     __ addl($dst$$Register, $tmp$$Register);
 5422   %}
 5423   ins_pipe(ialu_reg);
 5424 %}
 5425 
 5426 
 5427 //----------Load/Store/Move Instructions---------------------------------------
 5428 //----------Load Instructions--------------------------------------------------
 5429 // Load Byte (8bit signed)
 5430 instruct loadB(xRegI dst, memory mem) %{
 5431   match(Set dst (LoadB mem));
 5432 
 5433   ins_cost(125);
 5434   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5435 
 5436   ins_encode %{
 5437     __ movsbl($dst$$Register, $mem$$Address);
 5438   %}
 5439 
 5440   ins_pipe(ialu_reg_mem);
 5441 %}
 5442 
 5443 // Load Byte (8bit signed) into Long Register
 5444 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5445   match(Set dst (ConvI2L (LoadB mem)));
 5446   effect(KILL cr);
 5447 
 5448   ins_cost(375);
 5449   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5450             "MOV    $dst.hi,$dst.lo\n\t"
 5451             "SAR    $dst.hi,7" %}
 5452 
 5453   ins_encode %{
 5454     __ movsbl($dst$$Register, $mem$$Address);
 5455     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5456     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5457   %}
 5458 
 5459   ins_pipe(ialu_reg_mem);
 5460 %}
 5461 
 5462 // Load Unsigned Byte (8bit UNsigned)
 5463 instruct loadUB(xRegI dst, memory mem) %{
 5464   match(Set dst (LoadUB mem));
 5465 
 5466   ins_cost(125);
 5467   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5468 
 5469   ins_encode %{
 5470     __ movzbl($dst$$Register, $mem$$Address);
 5471   %}
 5472 
 5473   ins_pipe(ialu_reg_mem);
 5474 %}
 5475 
 5476 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5477 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5478   match(Set dst (ConvI2L (LoadUB mem)));
 5479   effect(KILL cr);
 5480 
 5481   ins_cost(250);
 5482   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5483             "XOR    $dst.hi,$dst.hi" %}
 5484 
 5485   ins_encode %{
 5486     Register Rdst = $dst$$Register;
 5487     __ movzbl(Rdst, $mem$$Address);
 5488     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5489   %}
 5490 
 5491   ins_pipe(ialu_reg_mem);
 5492 %}
 5493 
 5494 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5495 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5496   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5497   effect(KILL cr);
 5498 
 5499   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5500             "XOR    $dst.hi,$dst.hi\n\t"
 5501             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5502   ins_encode %{
 5503     Register Rdst = $dst$$Register;
 5504     __ movzbl(Rdst, $mem$$Address);
 5505     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5506     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5507   %}
 5508   ins_pipe(ialu_reg_mem);
 5509 %}
 5510 
 5511 // Load Short (16bit signed)
 5512 instruct loadS(rRegI dst, memory mem) %{
 5513   match(Set dst (LoadS mem));
 5514 
 5515   ins_cost(125);
 5516   format %{ "MOVSX  $dst,$mem\t# short" %}
 5517 
 5518   ins_encode %{
 5519     __ movswl($dst$$Register, $mem$$Address);
 5520   %}
 5521 
 5522   ins_pipe(ialu_reg_mem);
 5523 %}
 5524 
 5525 // Load Short (16 bit signed) to Byte (8 bit signed)
 5526 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5527   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5528 
 5529   ins_cost(125);
 5530   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5531   ins_encode %{
 5532     __ movsbl($dst$$Register, $mem$$Address);
 5533   %}
 5534   ins_pipe(ialu_reg_mem);
 5535 %}
 5536 
 5537 // Load Short (16bit signed) into Long Register
 5538 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5539   match(Set dst (ConvI2L (LoadS mem)));
 5540   effect(KILL cr);
 5541 
 5542   ins_cost(375);
 5543   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5544             "MOV    $dst.hi,$dst.lo\n\t"
 5545             "SAR    $dst.hi,15" %}
 5546 
 5547   ins_encode %{
 5548     __ movswl($dst$$Register, $mem$$Address);
 5549     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5550     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5551   %}
 5552 
 5553   ins_pipe(ialu_reg_mem);
 5554 %}
 5555 
 5556 // Load Unsigned Short/Char (16bit unsigned)
 5557 instruct loadUS(rRegI dst, memory mem) %{
 5558   match(Set dst (LoadUS mem));
 5559 
 5560   ins_cost(125);
 5561   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5562 
 5563   ins_encode %{
 5564     __ movzwl($dst$$Register, $mem$$Address);
 5565   %}
 5566 
 5567   ins_pipe(ialu_reg_mem);
 5568 %}
 5569 
 5570 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5571 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5572   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5573 
 5574   ins_cost(125);
 5575   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5576   ins_encode %{
 5577     __ movsbl($dst$$Register, $mem$$Address);
 5578   %}
 5579   ins_pipe(ialu_reg_mem);
 5580 %}
 5581 
 5582 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5583 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5584   match(Set dst (ConvI2L (LoadUS mem)));
 5585   effect(KILL cr);
 5586 
 5587   ins_cost(250);
 5588   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5589             "XOR    $dst.hi,$dst.hi" %}
 5590 
 5591   ins_encode %{
 5592     __ movzwl($dst$$Register, $mem$$Address);
 5593     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5594   %}
 5595 
 5596   ins_pipe(ialu_reg_mem);
 5597 %}
 5598 
 5599 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5600 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5601   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5602   effect(KILL cr);
 5603 
 5604   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5605             "XOR    $dst.hi,$dst.hi" %}
 5606   ins_encode %{
 5607     Register Rdst = $dst$$Register;
 5608     __ movzbl(Rdst, $mem$$Address);
 5609     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5610   %}
 5611   ins_pipe(ialu_reg_mem);
 5612 %}
 5613 
 5614 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5615 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5616   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5617   effect(KILL cr);
 5618 
 5619   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5620             "XOR    $dst.hi,$dst.hi\n\t"
 5621             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5622   ins_encode %{
 5623     Register Rdst = $dst$$Register;
 5624     __ movzwl(Rdst, $mem$$Address);
 5625     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5626     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5627   %}
 5628   ins_pipe(ialu_reg_mem);
 5629 %}
 5630 
 5631 // Load Integer
 5632 instruct loadI(rRegI dst, memory mem) %{
 5633   match(Set dst (LoadI mem));
 5634 
 5635   ins_cost(125);
 5636   format %{ "MOV    $dst,$mem\t# int" %}
 5637 
 5638   ins_encode %{
 5639     __ movl($dst$$Register, $mem$$Address);
 5640   %}
 5641 
 5642   ins_pipe(ialu_reg_mem);
 5643 %}
 5644 
 5645 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5646 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5647   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5648 
 5649   ins_cost(125);
 5650   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5651   ins_encode %{
 5652     __ movsbl($dst$$Register, $mem$$Address);
 5653   %}
 5654   ins_pipe(ialu_reg_mem);
 5655 %}
 5656 
 5657 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5658 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5659   match(Set dst (AndI (LoadI mem) mask));
 5660 
 5661   ins_cost(125);
 5662   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5663   ins_encode %{
 5664     __ movzbl($dst$$Register, $mem$$Address);
 5665   %}
 5666   ins_pipe(ialu_reg_mem);
 5667 %}
 5668 
 5669 // Load Integer (32 bit signed) to Short (16 bit signed)
 5670 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5671   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5672 
 5673   ins_cost(125);
 5674   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5675   ins_encode %{
 5676     __ movswl($dst$$Register, $mem$$Address);
 5677   %}
 5678   ins_pipe(ialu_reg_mem);
 5679 %}
 5680 
 5681 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5682 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5683   match(Set dst (AndI (LoadI mem) mask));
 5684 
 5685   ins_cost(125);
 5686   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5687   ins_encode %{
 5688     __ movzwl($dst$$Register, $mem$$Address);
 5689   %}
 5690   ins_pipe(ialu_reg_mem);
 5691 %}
 5692 
 5693 // Load Integer into Long Register
 5694 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5695   match(Set dst (ConvI2L (LoadI mem)));
 5696   effect(KILL cr);
 5697 
 5698   ins_cost(375);
 5699   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5700             "MOV    $dst.hi,$dst.lo\n\t"
 5701             "SAR    $dst.hi,31" %}
 5702 
 5703   ins_encode %{
 5704     __ movl($dst$$Register, $mem$$Address);
 5705     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5706     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5707   %}
 5708 
 5709   ins_pipe(ialu_reg_mem);
 5710 %}
 5711 
 5712 // Load Integer with mask 0xFF into Long Register
 5713 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5714   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5715   effect(KILL cr);
 5716 
 5717   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5718             "XOR    $dst.hi,$dst.hi" %}
 5719   ins_encode %{
 5720     Register Rdst = $dst$$Register;
 5721     __ movzbl(Rdst, $mem$$Address);
 5722     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5723   %}
 5724   ins_pipe(ialu_reg_mem);
 5725 %}
 5726 
 5727 // Load Integer with mask 0xFFFF into Long Register
 5728 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5729   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5730   effect(KILL cr);
 5731 
 5732   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5733             "XOR    $dst.hi,$dst.hi" %}
 5734   ins_encode %{
 5735     Register Rdst = $dst$$Register;
 5736     __ movzwl(Rdst, $mem$$Address);
 5737     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5738   %}
 5739   ins_pipe(ialu_reg_mem);
 5740 %}
 5741 
 5742 // Load Integer with 31-bit mask into Long Register
 5743 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5744   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5745   effect(KILL cr);
 5746 
 5747   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5748             "XOR    $dst.hi,$dst.hi\n\t"
 5749             "AND    $dst.lo,$mask" %}
 5750   ins_encode %{
 5751     Register Rdst = $dst$$Register;
 5752     __ movl(Rdst, $mem$$Address);
 5753     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5754     __ andl(Rdst, $mask$$constant);
 5755   %}
 5756   ins_pipe(ialu_reg_mem);
 5757 %}
 5758 
 5759 // Load Unsigned Integer into Long Register
 5760 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5761   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5762   effect(KILL cr);
 5763 
 5764   ins_cost(250);
 5765   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5766             "XOR    $dst.hi,$dst.hi" %}
 5767 
 5768   ins_encode %{
 5769     __ movl($dst$$Register, $mem$$Address);
 5770     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5771   %}
 5772 
 5773   ins_pipe(ialu_reg_mem);
 5774 %}
 5775 
 5776 // Load Long.  Cannot clobber address while loading, so restrict address
 5777 // register to ESI
 5778 instruct loadL(eRegL dst, load_long_memory mem) %{
 5779   predicate(!((LoadLNode*)n)->require_atomic_access());
 5780   match(Set dst (LoadL mem));
 5781 
 5782   ins_cost(250);
 5783   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5784             "MOV    $dst.hi,$mem+4" %}
 5785 
 5786   ins_encode %{
 5787     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5788     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5789     __ movl($dst$$Register, Amemlo);
 5790     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5791   %}
 5792 
 5793   ins_pipe(ialu_reg_long_mem);
 5794 %}
 5795 
 5796 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5797 // then store it down to the stack and reload on the int
 5798 // side.
 5799 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5800   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5801   match(Set dst (LoadL mem));
 5802 
 5803   ins_cost(200);
 5804   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5805             "FISTp  $dst" %}
 5806   ins_encode(enc_loadL_volatile(mem,dst));
 5807   ins_pipe( fpu_reg_mem );
 5808 %}
 5809 
 5810 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5811   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5812   match(Set dst (LoadL mem));
 5813   effect(TEMP tmp);
 5814   ins_cost(180);
 5815   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5816             "MOVSD  $dst,$tmp" %}
 5817   ins_encode %{
 5818     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5819     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5820   %}
 5821   ins_pipe( pipe_slow );
 5822 %}
 5823 
 5824 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5825   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5826   match(Set dst (LoadL mem));
 5827   effect(TEMP tmp);
 5828   ins_cost(160);
 5829   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5830             "MOVD   $dst.lo,$tmp\n\t"
 5831             "PSRLQ  $tmp,32\n\t"
 5832             "MOVD   $dst.hi,$tmp" %}
 5833   ins_encode %{
 5834     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5835     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5836     __ psrlq($tmp$$XMMRegister, 32);
 5837     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5838   %}
 5839   ins_pipe( pipe_slow );
 5840 %}
 5841 
 5842 // Load Range
 5843 instruct loadRange(rRegI dst, memory mem) %{
 5844   match(Set dst (LoadRange mem));
 5845 
 5846   ins_cost(125);
 5847   format %{ "MOV    $dst,$mem" %}
 5848   opcode(0x8B);
 5849   ins_encode( OpcP, RegMem(dst,mem));
 5850   ins_pipe( ialu_reg_mem );
 5851 %}
 5852 
 5853 
 5854 // Load Pointer
 5855 instruct loadP(eRegP dst, memory mem) %{
 5856   match(Set dst (LoadP mem));
 5857 
 5858   ins_cost(125);
 5859   format %{ "MOV    $dst,$mem" %}
 5860   opcode(0x8B);
 5861   ins_encode( OpcP, RegMem(dst,mem));
 5862   ins_pipe( ialu_reg_mem );
 5863 %}
 5864 
 5865 // Load Klass Pointer
 5866 instruct loadKlass(eRegP dst, memory mem) %{
 5867   match(Set dst (LoadKlass mem));
 5868 
 5869   ins_cost(125);
 5870   format %{ "MOV    $dst,$mem" %}
 5871   opcode(0x8B);
 5872   ins_encode( OpcP, RegMem(dst,mem));
 5873   ins_pipe( ialu_reg_mem );
 5874 %}
 5875 
 5876 // Load Double
 5877 instruct loadDPR(regDPR dst, memory mem) %{
 5878   predicate(UseSSE<=1);
 5879   match(Set dst (LoadD mem));
 5880 
 5881   ins_cost(150);
 5882   format %{ "FLD_D  ST,$mem\n\t"
 5883             "FSTP   $dst" %}
 5884   opcode(0xDD);               /* DD /0 */
 5885   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5886               Pop_Reg_DPR(dst) );
 5887   ins_pipe( fpu_reg_mem );
 5888 %}
 5889 
 5890 // Load Double to XMM
 5891 instruct loadD(regD dst, memory mem) %{
 5892   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5893   match(Set dst (LoadD mem));
 5894   ins_cost(145);
 5895   format %{ "MOVSD  $dst,$mem" %}
 5896   ins_encode %{
 5897     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5898   %}
 5899   ins_pipe( pipe_slow );
 5900 %}
 5901 
 5902 instruct loadD_partial(regD dst, memory mem) %{
 5903   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5904   match(Set dst (LoadD mem));
 5905   ins_cost(145);
 5906   format %{ "MOVLPD $dst,$mem" %}
 5907   ins_encode %{
 5908     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5909   %}
 5910   ins_pipe( pipe_slow );
 5911 %}
 5912 
 5913 // Load to XMM register (single-precision floating point)
 5914 // MOVSS instruction
 5915 instruct loadF(regF dst, memory mem) %{
 5916   predicate(UseSSE>=1);
 5917   match(Set dst (LoadF mem));
 5918   ins_cost(145);
 5919   format %{ "MOVSS  $dst,$mem" %}
 5920   ins_encode %{
 5921     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5922   %}
 5923   ins_pipe( pipe_slow );
 5924 %}
 5925 
 5926 // Load Float
 5927 instruct loadFPR(regFPR dst, memory mem) %{
 5928   predicate(UseSSE==0);
 5929   match(Set dst (LoadF mem));
 5930 
 5931   ins_cost(150);
 5932   format %{ "FLD_S  ST,$mem\n\t"
 5933             "FSTP   $dst" %}
 5934   opcode(0xD9);               /* D9 /0 */
 5935   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5936               Pop_Reg_FPR(dst) );
 5937   ins_pipe( fpu_reg_mem );
 5938 %}
 5939 
 5940 // Load Effective Address
 5941 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5942   match(Set dst mem);
 5943 
 5944   ins_cost(110);
 5945   format %{ "LEA    $dst,$mem" %}
 5946   opcode(0x8D);
 5947   ins_encode( OpcP, RegMem(dst,mem));
 5948   ins_pipe( ialu_reg_reg_fat );
 5949 %}
 5950 
 5951 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5952   match(Set dst mem);
 5953 
 5954   ins_cost(110);
 5955   format %{ "LEA    $dst,$mem" %}
 5956   opcode(0x8D);
 5957   ins_encode( OpcP, RegMem(dst,mem));
 5958   ins_pipe( ialu_reg_reg_fat );
 5959 %}
 5960 
 5961 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5962   match(Set dst mem);
 5963 
 5964   ins_cost(110);
 5965   format %{ "LEA    $dst,$mem" %}
 5966   opcode(0x8D);
 5967   ins_encode( OpcP, RegMem(dst,mem));
 5968   ins_pipe( ialu_reg_reg_fat );
 5969 %}
 5970 
 5971 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5972   match(Set dst mem);
 5973 
 5974   ins_cost(110);
 5975   format %{ "LEA    $dst,$mem" %}
 5976   opcode(0x8D);
 5977   ins_encode( OpcP, RegMem(dst,mem));
 5978   ins_pipe( ialu_reg_reg_fat );
 5979 %}
 5980 
 5981 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5982   match(Set dst mem);
 5983 
 5984   ins_cost(110);
 5985   format %{ "LEA    $dst,$mem" %}
 5986   opcode(0x8D);
 5987   ins_encode( OpcP, RegMem(dst,mem));
 5988   ins_pipe( ialu_reg_reg_fat );
 5989 %}
 5990 
 5991 // Load Constant
 5992 instruct loadConI(rRegI dst, immI src) %{
 5993   match(Set dst src);
 5994 
 5995   format %{ "MOV    $dst,$src" %}
 5996   ins_encode( LdImmI(dst, src) );
 5997   ins_pipe( ialu_reg_fat );
 5998 %}
 5999 
 6000 // Load Constant zero
 6001 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6002   match(Set dst src);
 6003   effect(KILL cr);
 6004 
 6005   ins_cost(50);
 6006   format %{ "XOR    $dst,$dst" %}
 6007   opcode(0x33);  /* + rd */
 6008   ins_encode( OpcP, RegReg( dst, dst ) );
 6009   ins_pipe( ialu_reg );
 6010 %}
 6011 
 6012 instruct loadConP(eRegP dst, immP src) %{
 6013   match(Set dst src);
 6014 
 6015   format %{ "MOV    $dst,$src" %}
 6016   opcode(0xB8);  /* + rd */
 6017   ins_encode( LdImmP(dst, src) );
 6018   ins_pipe( ialu_reg_fat );
 6019 %}
 6020 
 6021 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6022   match(Set dst src);
 6023   effect(KILL cr);
 6024   ins_cost(200);
 6025   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6026             "MOV    $dst.hi,$src.hi" %}
 6027   opcode(0xB8);
 6028   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6029   ins_pipe( ialu_reg_long_fat );
 6030 %}
 6031 
 6032 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6033   match(Set dst src);
 6034   effect(KILL cr);
 6035   ins_cost(150);
 6036   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6037             "XOR    $dst.hi,$dst.hi" %}
 6038   opcode(0x33,0x33);
 6039   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6040   ins_pipe( ialu_reg_long );
 6041 %}
 6042 
 6043 // The instruction usage is guarded by predicate in operand immFPR().
 6044 instruct loadConFPR(regFPR dst, immFPR con) %{
 6045   match(Set dst con);
 6046   ins_cost(125);
 6047   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6048             "FSTP   $dst" %}
 6049   ins_encode %{
 6050     __ fld_s($constantaddress($con));
 6051     __ fstp_d($dst$$reg);
 6052   %}
 6053   ins_pipe(fpu_reg_con);
 6054 %}
 6055 
 6056 // The instruction usage is guarded by predicate in operand immFPR0().
 6057 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6058   match(Set dst con);
 6059   ins_cost(125);
 6060   format %{ "FLDZ   ST\n\t"
 6061             "FSTP   $dst" %}
 6062   ins_encode %{
 6063     __ fldz();
 6064     __ fstp_d($dst$$reg);
 6065   %}
 6066   ins_pipe(fpu_reg_con);
 6067 %}
 6068 
 6069 // The instruction usage is guarded by predicate in operand immFPR1().
 6070 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6071   match(Set dst con);
 6072   ins_cost(125);
 6073   format %{ "FLD1   ST\n\t"
 6074             "FSTP   $dst" %}
 6075   ins_encode %{
 6076     __ fld1();
 6077     __ fstp_d($dst$$reg);
 6078   %}
 6079   ins_pipe(fpu_reg_con);
 6080 %}
 6081 
 6082 // The instruction usage is guarded by predicate in operand immF().
 6083 instruct loadConF(regF dst, immF con) %{
 6084   match(Set dst con);
 6085   ins_cost(125);
 6086   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6087   ins_encode %{
 6088     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6089   %}
 6090   ins_pipe(pipe_slow);
 6091 %}
 6092 
 6093 // The instruction usage is guarded by predicate in operand immF0().
 6094 instruct loadConF0(regF dst, immF0 src) %{
 6095   match(Set dst src);
 6096   ins_cost(100);
 6097   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6098   ins_encode %{
 6099     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6100   %}
 6101   ins_pipe(pipe_slow);
 6102 %}
 6103 
 6104 // The instruction usage is guarded by predicate in operand immDPR().
 6105 instruct loadConDPR(regDPR dst, immDPR con) %{
 6106   match(Set dst con);
 6107   ins_cost(125);
 6108 
 6109   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6110             "FSTP   $dst" %}
 6111   ins_encode %{
 6112     __ fld_d($constantaddress($con));
 6113     __ fstp_d($dst$$reg);
 6114   %}
 6115   ins_pipe(fpu_reg_con);
 6116 %}
 6117 
 6118 // The instruction usage is guarded by predicate in operand immDPR0().
 6119 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6120   match(Set dst con);
 6121   ins_cost(125);
 6122 
 6123   format %{ "FLDZ   ST\n\t"
 6124             "FSTP   $dst" %}
 6125   ins_encode %{
 6126     __ fldz();
 6127     __ fstp_d($dst$$reg);
 6128   %}
 6129   ins_pipe(fpu_reg_con);
 6130 %}
 6131 
 6132 // The instruction usage is guarded by predicate in operand immDPR1().
 6133 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6134   match(Set dst con);
 6135   ins_cost(125);
 6136 
 6137   format %{ "FLD1   ST\n\t"
 6138             "FSTP   $dst" %}
 6139   ins_encode %{
 6140     __ fld1();
 6141     __ fstp_d($dst$$reg);
 6142   %}
 6143   ins_pipe(fpu_reg_con);
 6144 %}
 6145 
 6146 // The instruction usage is guarded by predicate in operand immD().
 6147 instruct loadConD(regD dst, immD con) %{
 6148   match(Set dst con);
 6149   ins_cost(125);
 6150   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6151   ins_encode %{
 6152     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6153   %}
 6154   ins_pipe(pipe_slow);
 6155 %}
 6156 
 6157 // The instruction usage is guarded by predicate in operand immD0().
 6158 instruct loadConD0(regD dst, immD0 src) %{
 6159   match(Set dst src);
 6160   ins_cost(100);
 6161   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6162   ins_encode %{
 6163     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6164   %}
 6165   ins_pipe( pipe_slow );
 6166 %}
 6167 
 6168 // Load Stack Slot
 6169 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6170   match(Set dst src);
 6171   ins_cost(125);
 6172 
 6173   format %{ "MOV    $dst,$src" %}
 6174   opcode(0x8B);
 6175   ins_encode( OpcP, RegMem(dst,src));
 6176   ins_pipe( ialu_reg_mem );
 6177 %}
 6178 
 6179 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6180   match(Set dst src);
 6181 
 6182   ins_cost(200);
 6183   format %{ "MOV    $dst,$src.lo\n\t"
 6184             "MOV    $dst+4,$src.hi" %}
 6185   opcode(0x8B, 0x8B);
 6186   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6187   ins_pipe( ialu_mem_long_reg );
 6188 %}
 6189 
 6190 // Load Stack Slot
 6191 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6192   match(Set dst src);
 6193   ins_cost(125);
 6194 
 6195   format %{ "MOV    $dst,$src" %}
 6196   opcode(0x8B);
 6197   ins_encode( OpcP, RegMem(dst,src));
 6198   ins_pipe( ialu_reg_mem );
 6199 %}
 6200 
 6201 // Load Stack Slot
 6202 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6203   match(Set dst src);
 6204   ins_cost(125);
 6205 
 6206   format %{ "FLD_S  $src\n\t"
 6207             "FSTP   $dst" %}
 6208   opcode(0xD9);               /* D9 /0, FLD m32real */
 6209   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6210               Pop_Reg_FPR(dst) );
 6211   ins_pipe( fpu_reg_mem );
 6212 %}
 6213 
 6214 // Load Stack Slot
 6215 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6216   match(Set dst src);
 6217   ins_cost(125);
 6218 
 6219   format %{ "FLD_D  $src\n\t"
 6220             "FSTP   $dst" %}
 6221   opcode(0xDD);               /* DD /0, FLD m64real */
 6222   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6223               Pop_Reg_DPR(dst) );
 6224   ins_pipe( fpu_reg_mem );
 6225 %}
 6226 
 6227 // Prefetch instructions for allocation.
 6228 // Must be safe to execute with invalid address (cannot fault).
 6229 
 6230 instruct prefetchAlloc0( memory mem ) %{
 6231   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6232   match(PrefetchAllocation mem);
 6233   ins_cost(0);
 6234   size(0);
 6235   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6236   ins_encode();
 6237   ins_pipe(empty);
 6238 %}
 6239 
 6240 instruct prefetchAlloc( memory mem ) %{
 6241   predicate(AllocatePrefetchInstr==3);
 6242   match( PrefetchAllocation mem );
 6243   ins_cost(100);
 6244 
 6245   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6246   ins_encode %{
 6247     __ prefetchw($mem$$Address);
 6248   %}
 6249   ins_pipe(ialu_mem);
 6250 %}
 6251 
 6252 instruct prefetchAllocNTA( memory mem ) %{
 6253   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6254   match(PrefetchAllocation mem);
 6255   ins_cost(100);
 6256 
 6257   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6258   ins_encode %{
 6259     __ prefetchnta($mem$$Address);
 6260   %}
 6261   ins_pipe(ialu_mem);
 6262 %}
 6263 
 6264 instruct prefetchAllocT0( memory mem ) %{
 6265   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6266   match(PrefetchAllocation mem);
 6267   ins_cost(100);
 6268 
 6269   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6270   ins_encode %{
 6271     __ prefetcht0($mem$$Address);
 6272   %}
 6273   ins_pipe(ialu_mem);
 6274 %}
 6275 
 6276 instruct prefetchAllocT2( memory mem ) %{
 6277   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6278   match(PrefetchAllocation mem);
 6279   ins_cost(100);
 6280 
 6281   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6282   ins_encode %{
 6283     __ prefetcht2($mem$$Address);
 6284   %}
 6285   ins_pipe(ialu_mem);
 6286 %}
 6287 
 6288 //----------Store Instructions-------------------------------------------------
 6289 
 6290 // Store Byte
 6291 instruct storeB(memory mem, xRegI src) %{
 6292   match(Set mem (StoreB mem src));
 6293 
 6294   ins_cost(125);
 6295   format %{ "MOV8   $mem,$src" %}
 6296   opcode(0x88);
 6297   ins_encode( OpcP, RegMem( src, mem ) );
 6298   ins_pipe( ialu_mem_reg );
 6299 %}
 6300 
 6301 // Store Char/Short
 6302 instruct storeC(memory mem, rRegI src) %{
 6303   match(Set mem (StoreC mem src));
 6304 
 6305   ins_cost(125);
 6306   format %{ "MOV16  $mem,$src" %}
 6307   opcode(0x89, 0x66);
 6308   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6309   ins_pipe( ialu_mem_reg );
 6310 %}
 6311 
 6312 // Store Integer
 6313 instruct storeI(memory mem, rRegI src) %{
 6314   match(Set mem (StoreI mem src));
 6315 
 6316   ins_cost(125);
 6317   format %{ "MOV    $mem,$src" %}
 6318   opcode(0x89);
 6319   ins_encode( OpcP, RegMem( src, mem ) );
 6320   ins_pipe( ialu_mem_reg );
 6321 %}
 6322 
 6323 // Store Long
 6324 instruct storeL(long_memory mem, eRegL src) %{
 6325   predicate(!((StoreLNode*)n)->require_atomic_access());
 6326   match(Set mem (StoreL mem src));
 6327 
 6328   ins_cost(200);
 6329   format %{ "MOV    $mem,$src.lo\n\t"
 6330             "MOV    $mem+4,$src.hi" %}
 6331   opcode(0x89, 0x89);
 6332   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6333   ins_pipe( ialu_mem_long_reg );
 6334 %}
 6335 
 6336 // Store Long to Integer
 6337 instruct storeL2I(memory mem, eRegL src) %{
 6338   match(Set mem (StoreI mem (ConvL2I src)));
 6339 
 6340   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6341   ins_encode %{
 6342     __ movl($mem$$Address, $src$$Register);
 6343   %}
 6344   ins_pipe(ialu_mem_reg);
 6345 %}
 6346 
 6347 // Volatile Store Long.  Must be atomic, so move it into
 6348 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6349 // target address before the store (for null-ptr checks)
 6350 // so the memory operand is used twice in the encoding.
 6351 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6352   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6353   match(Set mem (StoreL mem src));
 6354   effect( KILL cr );
 6355   ins_cost(400);
 6356   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6357             "FILD   $src\n\t"
 6358             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6359   opcode(0x3B);
 6360   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6361   ins_pipe( fpu_reg_mem );
 6362 %}
 6363 
 6364 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6365   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6366   match(Set mem (StoreL mem src));
 6367   effect( TEMP tmp, KILL cr );
 6368   ins_cost(380);
 6369   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6370             "MOVSD  $tmp,$src\n\t"
 6371             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6372   ins_encode %{
 6373     __ cmpl(rax, $mem$$Address);
 6374     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6375     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6376   %}
 6377   ins_pipe( pipe_slow );
 6378 %}
 6379 
 6380 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6381   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6382   match(Set mem (StoreL mem src));
 6383   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6384   ins_cost(360);
 6385   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6386             "MOVD   $tmp,$src.lo\n\t"
 6387             "MOVD   $tmp2,$src.hi\n\t"
 6388             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6389             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6390   ins_encode %{
 6391     __ cmpl(rax, $mem$$Address);
 6392     __ movdl($tmp$$XMMRegister, $src$$Register);
 6393     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6394     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6395     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6396   %}
 6397   ins_pipe( pipe_slow );
 6398 %}
 6399 
 6400 // Store Pointer; for storing unknown oops and raw pointers
 6401 instruct storeP(memory mem, anyRegP src) %{
 6402   match(Set mem (StoreP mem src));
 6403 
 6404   ins_cost(125);
 6405   format %{ "MOV    $mem,$src" %}
 6406   opcode(0x89);
 6407   ins_encode( OpcP, RegMem( src, mem ) );
 6408   ins_pipe( ialu_mem_reg );
 6409 %}
 6410 
 6411 // Store Integer Immediate
 6412 instruct storeImmI(memory mem, immI src) %{
 6413   match(Set mem (StoreI mem src));
 6414 
 6415   ins_cost(150);
 6416   format %{ "MOV    $mem,$src" %}
 6417   opcode(0xC7);               /* C7 /0 */
 6418   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6419   ins_pipe( ialu_mem_imm );
 6420 %}
 6421 
 6422 // Store Short/Char Immediate
 6423 instruct storeImmI16(memory mem, immI16 src) %{
 6424   predicate(UseStoreImmI16);
 6425   match(Set mem (StoreC mem src));
 6426 
 6427   ins_cost(150);
 6428   format %{ "MOV16  $mem,$src" %}
 6429   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6430   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6431   ins_pipe( ialu_mem_imm );
 6432 %}
 6433 
 6434 // Store Pointer Immediate; null pointers or constant oops that do not
 6435 // need card-mark barriers.
 6436 instruct storeImmP(memory mem, immP src) %{
 6437   match(Set mem (StoreP mem src));
 6438 
 6439   ins_cost(150);
 6440   format %{ "MOV    $mem,$src" %}
 6441   opcode(0xC7);               /* C7 /0 */
 6442   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6443   ins_pipe( ialu_mem_imm );
 6444 %}
 6445 
 6446 // Store Byte Immediate
 6447 instruct storeImmB(memory mem, immI8 src) %{
 6448   match(Set mem (StoreB mem src));
 6449 
 6450   ins_cost(150);
 6451   format %{ "MOV8   $mem,$src" %}
 6452   opcode(0xC6);               /* C6 /0 */
 6453   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6454   ins_pipe( ialu_mem_imm );
 6455 %}
 6456 
 6457 // Store CMS card-mark Immediate
 6458 instruct storeImmCM(memory mem, immI8 src) %{
 6459   match(Set mem (StoreCM mem src));
 6460 
 6461   ins_cost(150);
 6462   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6463   opcode(0xC6);               /* C6 /0 */
 6464   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6465   ins_pipe( ialu_mem_imm );
 6466 %}
 6467 
 6468 // Store Double
 6469 instruct storeDPR( memory mem, regDPR1 src) %{
 6470   predicate(UseSSE<=1);
 6471   match(Set mem (StoreD mem src));
 6472 
 6473   ins_cost(100);
 6474   format %{ "FST_D  $mem,$src" %}
 6475   opcode(0xDD);       /* DD /2 */
 6476   ins_encode( enc_FPR_store(mem,src) );
 6477   ins_pipe( fpu_mem_reg );
 6478 %}
 6479 
 6480 // Store double does rounding on x86
 6481 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6482   predicate(UseSSE<=1);
 6483   match(Set mem (StoreD mem (RoundDouble src)));
 6484 
 6485   ins_cost(100);
 6486   format %{ "FST_D  $mem,$src\t# round" %}
 6487   opcode(0xDD);       /* DD /2 */
 6488   ins_encode( enc_FPR_store(mem,src) );
 6489   ins_pipe( fpu_mem_reg );
 6490 %}
 6491 
 6492 // Store XMM register to memory (double-precision floating points)
 6493 // MOVSD instruction
 6494 instruct storeD(memory mem, regD src) %{
 6495   predicate(UseSSE>=2);
 6496   match(Set mem (StoreD mem src));
 6497   ins_cost(95);
 6498   format %{ "MOVSD  $mem,$src" %}
 6499   ins_encode %{
 6500     __ movdbl($mem$$Address, $src$$XMMRegister);
 6501   %}
 6502   ins_pipe( pipe_slow );
 6503 %}
 6504 
 6505 // Store XMM register to memory (single-precision floating point)
 6506 // MOVSS instruction
 6507 instruct storeF(memory mem, regF src) %{
 6508   predicate(UseSSE>=1);
 6509   match(Set mem (StoreF mem src));
 6510   ins_cost(95);
 6511   format %{ "MOVSS  $mem,$src" %}
 6512   ins_encode %{
 6513     __ movflt($mem$$Address, $src$$XMMRegister);
 6514   %}
 6515   ins_pipe( pipe_slow );
 6516 %}
 6517 
 6518 
 6519 // Store Float
 6520 instruct storeFPR( memory mem, regFPR1 src) %{
 6521   predicate(UseSSE==0);
 6522   match(Set mem (StoreF mem src));
 6523 
 6524   ins_cost(100);
 6525   format %{ "FST_S  $mem,$src" %}
 6526   opcode(0xD9);       /* D9 /2 */
 6527   ins_encode( enc_FPR_store(mem,src) );
 6528   ins_pipe( fpu_mem_reg );
 6529 %}
 6530 
 6531 // Store Float does rounding on x86
 6532 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6533   predicate(UseSSE==0);
 6534   match(Set mem (StoreF mem (RoundFloat src)));
 6535 
 6536   ins_cost(100);
 6537   format %{ "FST_S  $mem,$src\t# round" %}
 6538   opcode(0xD9);       /* D9 /2 */
 6539   ins_encode( enc_FPR_store(mem,src) );
 6540   ins_pipe( fpu_mem_reg );
 6541 %}
 6542 
 6543 // Store Float does rounding on x86
 6544 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6545   predicate(UseSSE<=1);
 6546   match(Set mem (StoreF mem (ConvD2F src)));
 6547 
 6548   ins_cost(100);
 6549   format %{ "FST_S  $mem,$src\t# D-round" %}
 6550   opcode(0xD9);       /* D9 /2 */
 6551   ins_encode( enc_FPR_store(mem,src) );
 6552   ins_pipe( fpu_mem_reg );
 6553 %}
 6554 
 6555 // Store immediate Float value (it is faster than store from FPU register)
 6556 // The instruction usage is guarded by predicate in operand immFPR().
 6557 instruct storeFPR_imm( memory mem, immFPR src) %{
 6558   match(Set mem (StoreF mem src));
 6559 
 6560   ins_cost(50);
 6561   format %{ "MOV    $mem,$src\t# store float" %}
 6562   opcode(0xC7);               /* C7 /0 */
 6563   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6564   ins_pipe( ialu_mem_imm );
 6565 %}
 6566 
 6567 // Store immediate Float value (it is faster than store from XMM register)
 6568 // The instruction usage is guarded by predicate in operand immF().
 6569 instruct storeF_imm( memory mem, immF src) %{
 6570   match(Set mem (StoreF mem src));
 6571 
 6572   ins_cost(50);
 6573   format %{ "MOV    $mem,$src\t# store float" %}
 6574   opcode(0xC7);               /* C7 /0 */
 6575   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6576   ins_pipe( ialu_mem_imm );
 6577 %}
 6578 
 6579 // Store Integer to stack slot
 6580 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6581   match(Set dst src);
 6582 
 6583   ins_cost(100);
 6584   format %{ "MOV    $dst,$src" %}
 6585   opcode(0x89);
 6586   ins_encode( OpcPRegSS( dst, src ) );
 6587   ins_pipe( ialu_mem_reg );
 6588 %}
 6589 
 6590 // Store Integer to stack slot
 6591 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6592   match(Set dst src);
 6593 
 6594   ins_cost(100);
 6595   format %{ "MOV    $dst,$src" %}
 6596   opcode(0x89);
 6597   ins_encode( OpcPRegSS( dst, src ) );
 6598   ins_pipe( ialu_mem_reg );
 6599 %}
 6600 
 6601 // Store Long to stack slot
 6602 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6603   match(Set dst src);
 6604 
 6605   ins_cost(200);
 6606   format %{ "MOV    $dst,$src.lo\n\t"
 6607             "MOV    $dst+4,$src.hi" %}
 6608   opcode(0x89, 0x89);
 6609   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6610   ins_pipe( ialu_mem_long_reg );
 6611 %}
 6612 
 6613 //----------MemBar Instructions-----------------------------------------------
 6614 // Memory barrier flavors
 6615 
 6616 instruct membar_acquire() %{
 6617   match(MemBarAcquire);
 6618   match(LoadFence);
 6619   ins_cost(400);
 6620 
 6621   size(0);
 6622   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6623   ins_encode();
 6624   ins_pipe(empty);
 6625 %}
 6626 
 6627 instruct membar_acquire_lock() %{
 6628   match(MemBarAcquireLock);
 6629   ins_cost(0);
 6630 
 6631   size(0);
 6632   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6633   ins_encode( );
 6634   ins_pipe(empty);
 6635 %}
 6636 
 6637 instruct membar_release() %{
 6638   match(MemBarRelease);
 6639   match(StoreFence);
 6640   ins_cost(400);
 6641 
 6642   size(0);
 6643   format %{ "MEMBAR-release ! (empty encoding)" %}
 6644   ins_encode( );
 6645   ins_pipe(empty);
 6646 %}
 6647 
 6648 instruct membar_release_lock() %{
 6649   match(MemBarReleaseLock);
 6650   ins_cost(0);
 6651 
 6652   size(0);
 6653   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6654   ins_encode( );
 6655   ins_pipe(empty);
 6656 %}
 6657 
 6658 instruct membar_volatile(eFlagsReg cr) %{
 6659   match(MemBarVolatile);
 6660   effect(KILL cr);
 6661   ins_cost(400);
 6662 
 6663   format %{
 6664     $$template
 6665     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6666   %}
 6667   ins_encode %{
 6668     __ membar(Assembler::StoreLoad);
 6669   %}
 6670   ins_pipe(pipe_slow);
 6671 %}
 6672 
 6673 instruct unnecessary_membar_volatile() %{
 6674   match(MemBarVolatile);
 6675   predicate(Matcher::post_store_load_barrier(n));
 6676   ins_cost(0);
 6677 
 6678   size(0);
 6679   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6680   ins_encode( );
 6681   ins_pipe(empty);
 6682 %}
 6683 
 6684 instruct membar_storestore() %{
 6685   match(MemBarStoreStore);
 6686   match(StoreStoreFence);
 6687   ins_cost(0);
 6688 
 6689   size(0);
 6690   format %{ "MEMBAR-storestore (empty encoding)" %}
 6691   ins_encode( );
 6692   ins_pipe(empty);
 6693 %}
 6694 
 6695 //----------Move Instructions--------------------------------------------------
 6696 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6697   match(Set dst (CastX2P src));
 6698   format %{ "# X2P  $dst, $src" %}
 6699   ins_encode( /*empty encoding*/ );
 6700   ins_cost(0);
 6701   ins_pipe(empty);
 6702 %}
 6703 
 6704 instruct castP2X(rRegI dst, eRegP src ) %{
 6705   match(Set dst (CastP2X src));
 6706   ins_cost(50);
 6707   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6708   ins_encode( enc_Copy( dst, src) );
 6709   ins_pipe( ialu_reg_reg );
 6710 %}
 6711 
 6712 //----------Conditional Move---------------------------------------------------
 6713 // Conditional move
 6714 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6715   predicate(!VM_Version::supports_cmov() );
 6716   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6717   ins_cost(200);
 6718   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6719             "MOV    $dst,$src\n"
 6720       "skip:" %}
 6721   ins_encode %{
 6722     Label Lskip;
 6723     // Invert sense of branch from sense of CMOV
 6724     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6725     __ movl($dst$$Register, $src$$Register);
 6726     __ bind(Lskip);
 6727   %}
 6728   ins_pipe( pipe_cmov_reg );
 6729 %}
 6730 
 6731 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6732   predicate(!VM_Version::supports_cmov() );
 6733   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6734   ins_cost(200);
 6735   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6736             "MOV    $dst,$src\n"
 6737       "skip:" %}
 6738   ins_encode %{
 6739     Label Lskip;
 6740     // Invert sense of branch from sense of CMOV
 6741     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6742     __ movl($dst$$Register, $src$$Register);
 6743     __ bind(Lskip);
 6744   %}
 6745   ins_pipe( pipe_cmov_reg );
 6746 %}
 6747 
 6748 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6749   predicate(VM_Version::supports_cmov() );
 6750   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6751   ins_cost(200);
 6752   format %{ "CMOV$cop $dst,$src" %}
 6753   opcode(0x0F,0x40);
 6754   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6755   ins_pipe( pipe_cmov_reg );
 6756 %}
 6757 
 6758 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6759   predicate(VM_Version::supports_cmov() );
 6760   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6761   ins_cost(200);
 6762   format %{ "CMOV$cop $dst,$src" %}
 6763   opcode(0x0F,0x40);
 6764   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6765   ins_pipe( pipe_cmov_reg );
 6766 %}
 6767 
 6768 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6769   predicate(VM_Version::supports_cmov() );
 6770   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6771   ins_cost(200);
 6772   expand %{
 6773     cmovI_regU(cop, cr, dst, src);
 6774   %}
 6775 %}
 6776 
 6777 // Conditional move
 6778 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6779   predicate(VM_Version::supports_cmov() );
 6780   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6781   ins_cost(250);
 6782   format %{ "CMOV$cop $dst,$src" %}
 6783   opcode(0x0F,0x40);
 6784   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6785   ins_pipe( pipe_cmov_mem );
 6786 %}
 6787 
 6788 // Conditional move
 6789 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6790   predicate(VM_Version::supports_cmov() );
 6791   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6792   ins_cost(250);
 6793   format %{ "CMOV$cop $dst,$src" %}
 6794   opcode(0x0F,0x40);
 6795   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6796   ins_pipe( pipe_cmov_mem );
 6797 %}
 6798 
 6799 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6800   predicate(VM_Version::supports_cmov() );
 6801   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6802   ins_cost(250);
 6803   expand %{
 6804     cmovI_memU(cop, cr, dst, src);
 6805   %}
 6806 %}
 6807 
 6808 // Conditional move
 6809 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6810   predicate(VM_Version::supports_cmov() );
 6811   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6812   ins_cost(200);
 6813   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6814   opcode(0x0F,0x40);
 6815   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6816   ins_pipe( pipe_cmov_reg );
 6817 %}
 6818 
 6819 // Conditional move (non-P6 version)
 6820 // Note:  a CMoveP is generated for  stubs and native wrappers
 6821 //        regardless of whether we are on a P6, so we
 6822 //        emulate a cmov here
 6823 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6824   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6825   ins_cost(300);
 6826   format %{ "Jn$cop   skip\n\t"
 6827           "MOV    $dst,$src\t# pointer\n"
 6828       "skip:" %}
 6829   opcode(0x8b);
 6830   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6831   ins_pipe( pipe_cmov_reg );
 6832 %}
 6833 
 6834 // Conditional move
 6835 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6836   predicate(VM_Version::supports_cmov() );
 6837   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6838   ins_cost(200);
 6839   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6840   opcode(0x0F,0x40);
 6841   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6842   ins_pipe( pipe_cmov_reg );
 6843 %}
 6844 
 6845 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6846   predicate(VM_Version::supports_cmov() );
 6847   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6848   ins_cost(200);
 6849   expand %{
 6850     cmovP_regU(cop, cr, dst, src);
 6851   %}
 6852 %}
 6853 
 6854 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6855 // correctly meets the two pointer arguments; one is an incoming
 6856 // register but the other is a memory operand.  ALSO appears to
 6857 // be buggy with implicit null checks.
 6858 //
 6859 //// Conditional move
 6860 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6861 //  predicate(VM_Version::supports_cmov() );
 6862 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6863 //  ins_cost(250);
 6864 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6865 //  opcode(0x0F,0x40);
 6866 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6867 //  ins_pipe( pipe_cmov_mem );
 6868 //%}
 6869 //
 6870 //// Conditional move
 6871 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6872 //  predicate(VM_Version::supports_cmov() );
 6873 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6874 //  ins_cost(250);
 6875 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6876 //  opcode(0x0F,0x40);
 6877 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6878 //  ins_pipe( pipe_cmov_mem );
 6879 //%}
 6880 
 6881 // Conditional move
 6882 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6883   predicate(UseSSE<=1);
 6884   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6885   ins_cost(200);
 6886   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6887   opcode(0xDA);
 6888   ins_encode( enc_cmov_dpr(cop,src) );
 6889   ins_pipe( pipe_cmovDPR_reg );
 6890 %}
 6891 
 6892 // Conditional move
 6893 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6894   predicate(UseSSE==0);
 6895   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6896   ins_cost(200);
 6897   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6898   opcode(0xDA);
 6899   ins_encode( enc_cmov_dpr(cop,src) );
 6900   ins_pipe( pipe_cmovDPR_reg );
 6901 %}
 6902 
 6903 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6904 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6905   predicate(UseSSE<=1);
 6906   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6907   ins_cost(200);
 6908   format %{ "Jn$cop   skip\n\t"
 6909             "MOV    $dst,$src\t# double\n"
 6910       "skip:" %}
 6911   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6912   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6913   ins_pipe( pipe_cmovDPR_reg );
 6914 %}
 6915 
 6916 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6917 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6918   predicate(UseSSE==0);
 6919   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6920   ins_cost(200);
 6921   format %{ "Jn$cop    skip\n\t"
 6922             "MOV    $dst,$src\t# float\n"
 6923       "skip:" %}
 6924   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6925   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6926   ins_pipe( pipe_cmovDPR_reg );
 6927 %}
 6928 
 6929 // No CMOVE with SSE/SSE2
 6930 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6931   predicate (UseSSE>=1);
 6932   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6933   ins_cost(200);
 6934   format %{ "Jn$cop   skip\n\t"
 6935             "MOVSS  $dst,$src\t# float\n"
 6936       "skip:" %}
 6937   ins_encode %{
 6938     Label skip;
 6939     // Invert sense of branch from sense of CMOV
 6940     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6941     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6942     __ bind(skip);
 6943   %}
 6944   ins_pipe( pipe_slow );
 6945 %}
 6946 
 6947 // No CMOVE with SSE/SSE2
 6948 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6949   predicate (UseSSE>=2);
 6950   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6951   ins_cost(200);
 6952   format %{ "Jn$cop   skip\n\t"
 6953             "MOVSD  $dst,$src\t# float\n"
 6954       "skip:" %}
 6955   ins_encode %{
 6956     Label skip;
 6957     // Invert sense of branch from sense of CMOV
 6958     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6959     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6960     __ bind(skip);
 6961   %}
 6962   ins_pipe( pipe_slow );
 6963 %}
 6964 
 6965 // unsigned version
 6966 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6967   predicate (UseSSE>=1);
 6968   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6969   ins_cost(200);
 6970   format %{ "Jn$cop   skip\n\t"
 6971             "MOVSS  $dst,$src\t# float\n"
 6972       "skip:" %}
 6973   ins_encode %{
 6974     Label skip;
 6975     // Invert sense of branch from sense of CMOV
 6976     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6977     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6978     __ bind(skip);
 6979   %}
 6980   ins_pipe( pipe_slow );
 6981 %}
 6982 
 6983 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6984   predicate (UseSSE>=1);
 6985   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6986   ins_cost(200);
 6987   expand %{
 6988     fcmovF_regU(cop, cr, dst, src);
 6989   %}
 6990 %}
 6991 
 6992 // unsigned version
 6993 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6994   predicate (UseSSE>=2);
 6995   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6996   ins_cost(200);
 6997   format %{ "Jn$cop   skip\n\t"
 6998             "MOVSD  $dst,$src\t# float\n"
 6999       "skip:" %}
 7000   ins_encode %{
 7001     Label skip;
 7002     // Invert sense of branch from sense of CMOV
 7003     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7004     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7005     __ bind(skip);
 7006   %}
 7007   ins_pipe( pipe_slow );
 7008 %}
 7009 
 7010 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7011   predicate (UseSSE>=2);
 7012   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7013   ins_cost(200);
 7014   expand %{
 7015     fcmovD_regU(cop, cr, dst, src);
 7016   %}
 7017 %}
 7018 
 7019 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7020   predicate(VM_Version::supports_cmov() );
 7021   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7022   ins_cost(200);
 7023   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7024             "CMOV$cop $dst.hi,$src.hi" %}
 7025   opcode(0x0F,0x40);
 7026   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7027   ins_pipe( pipe_cmov_reg_long );
 7028 %}
 7029 
 7030 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7031   predicate(VM_Version::supports_cmov() );
 7032   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7033   ins_cost(200);
 7034   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7035             "CMOV$cop $dst.hi,$src.hi" %}
 7036   opcode(0x0F,0x40);
 7037   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7038   ins_pipe( pipe_cmov_reg_long );
 7039 %}
 7040 
 7041 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7042   predicate(VM_Version::supports_cmov() );
 7043   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7044   ins_cost(200);
 7045   expand %{
 7046     cmovL_regU(cop, cr, dst, src);
 7047   %}
 7048 %}
 7049 
 7050 //----------Arithmetic Instructions--------------------------------------------
 7051 //----------Addition Instructions----------------------------------------------
 7052 
 7053 // Integer Addition Instructions
 7054 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7055   match(Set dst (AddI dst src));
 7056   effect(KILL cr);
 7057 
 7058   size(2);
 7059   format %{ "ADD    $dst,$src" %}
 7060   opcode(0x03);
 7061   ins_encode( OpcP, RegReg( dst, src) );
 7062   ins_pipe( ialu_reg_reg );
 7063 %}
 7064 
 7065 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7066   match(Set dst (AddI dst src));
 7067   effect(KILL cr);
 7068 
 7069   format %{ "ADD    $dst,$src" %}
 7070   opcode(0x81, 0x00); /* /0 id */
 7071   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7072   ins_pipe( ialu_reg );
 7073 %}
 7074 
 7075 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7076   predicate(UseIncDec);
 7077   match(Set dst (AddI dst src));
 7078   effect(KILL cr);
 7079 
 7080   size(1);
 7081   format %{ "INC    $dst" %}
 7082   opcode(0x40); /*  */
 7083   ins_encode( Opc_plus( primary, dst ) );
 7084   ins_pipe( ialu_reg );
 7085 %}
 7086 
 7087 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7088   match(Set dst (AddI src0 src1));
 7089   ins_cost(110);
 7090 
 7091   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7092   opcode(0x8D); /* 0x8D /r */
 7093   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7094   ins_pipe( ialu_reg_reg );
 7095 %}
 7096 
 7097 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7098   match(Set dst (AddP src0 src1));
 7099   ins_cost(110);
 7100 
 7101   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7102   opcode(0x8D); /* 0x8D /r */
 7103   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7104   ins_pipe( ialu_reg_reg );
 7105 %}
 7106 
 7107 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7108   predicate(UseIncDec);
 7109   match(Set dst (AddI dst src));
 7110   effect(KILL cr);
 7111 
 7112   size(1);
 7113   format %{ "DEC    $dst" %}
 7114   opcode(0x48); /*  */
 7115   ins_encode( Opc_plus( primary, dst ) );
 7116   ins_pipe( ialu_reg );
 7117 %}
 7118 
 7119 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7120   match(Set dst (AddP dst src));
 7121   effect(KILL cr);
 7122 
 7123   size(2);
 7124   format %{ "ADD    $dst,$src" %}
 7125   opcode(0x03);
 7126   ins_encode( OpcP, RegReg( dst, src) );
 7127   ins_pipe( ialu_reg_reg );
 7128 %}
 7129 
 7130 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7131   match(Set dst (AddP dst src));
 7132   effect(KILL cr);
 7133 
 7134   format %{ "ADD    $dst,$src" %}
 7135   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7136   // ins_encode( RegImm( dst, src) );
 7137   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7138   ins_pipe( ialu_reg );
 7139 %}
 7140 
 7141 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7142   match(Set dst (AddI dst (LoadI src)));
 7143   effect(KILL cr);
 7144 
 7145   ins_cost(150);
 7146   format %{ "ADD    $dst,$src" %}
 7147   opcode(0x03);
 7148   ins_encode( OpcP, RegMem( dst, src) );
 7149   ins_pipe( ialu_reg_mem );
 7150 %}
 7151 
 7152 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7153   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7154   effect(KILL cr);
 7155 
 7156   ins_cost(150);
 7157   format %{ "ADD    $dst,$src" %}
 7158   opcode(0x01);  /* Opcode 01 /r */
 7159   ins_encode( OpcP, RegMem( src, dst ) );
 7160   ins_pipe( ialu_mem_reg );
 7161 %}
 7162 
 7163 // Add Memory with Immediate
 7164 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7165   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7166   effect(KILL cr);
 7167 
 7168   ins_cost(125);
 7169   format %{ "ADD    $dst,$src" %}
 7170   opcode(0x81);               /* Opcode 81 /0 id */
 7171   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7172   ins_pipe( ialu_mem_imm );
 7173 %}
 7174 
 7175 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7176   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7177   effect(KILL cr);
 7178 
 7179   ins_cost(125);
 7180   format %{ "INC    $dst" %}
 7181   opcode(0xFF);               /* Opcode FF /0 */
 7182   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7183   ins_pipe( ialu_mem_imm );
 7184 %}
 7185 
 7186 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7187   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7188   effect(KILL cr);
 7189 
 7190   ins_cost(125);
 7191   format %{ "DEC    $dst" %}
 7192   opcode(0xFF);               /* Opcode FF /1 */
 7193   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7194   ins_pipe( ialu_mem_imm );
 7195 %}
 7196 
 7197 
 7198 instruct checkCastPP( eRegP dst ) %{
 7199   match(Set dst (CheckCastPP dst));
 7200 
 7201   size(0);
 7202   format %{ "#checkcastPP of $dst" %}
 7203   ins_encode( /*empty encoding*/ );
 7204   ins_pipe( empty );
 7205 %}
 7206 
 7207 instruct castPP( eRegP dst ) %{
 7208   match(Set dst (CastPP dst));
 7209   format %{ "#castPP of $dst" %}
 7210   ins_encode( /*empty encoding*/ );
 7211   ins_pipe( empty );
 7212 %}
 7213 
 7214 instruct castII( rRegI dst ) %{
 7215   match(Set dst (CastII dst));
 7216   format %{ "#castII of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castLL( eRegL dst ) %{
 7223   match(Set dst (CastLL dst));
 7224   format %{ "#castLL of $dst" %}
 7225   ins_encode( /*empty encoding*/ );
 7226   ins_cost(0);
 7227   ins_pipe( empty );
 7228 %}
 7229 
 7230 instruct castFF( regF dst ) %{
 7231   predicate(UseSSE >= 1);
 7232   match(Set dst (CastFF dst));
 7233   format %{ "#castFF of $dst" %}
 7234   ins_encode( /*empty encoding*/ );
 7235   ins_cost(0);
 7236   ins_pipe( empty );
 7237 %}
 7238 
 7239 instruct castDD( regD dst ) %{
 7240   predicate(UseSSE >= 2);
 7241   match(Set dst (CastDD dst));
 7242   format %{ "#castDD of $dst" %}
 7243   ins_encode( /*empty encoding*/ );
 7244   ins_cost(0);
 7245   ins_pipe( empty );
 7246 %}
 7247 
 7248 instruct castFF_PR( regFPR dst ) %{
 7249   predicate(UseSSE < 1);
 7250   match(Set dst (CastFF dst));
 7251   format %{ "#castFF of $dst" %}
 7252   ins_encode( /*empty encoding*/ );
 7253   ins_cost(0);
 7254   ins_pipe( empty );
 7255 %}
 7256 
 7257 instruct castDD_PR( regDPR dst ) %{
 7258   predicate(UseSSE < 2);
 7259   match(Set dst (CastDD dst));
 7260   format %{ "#castDD of $dst" %}
 7261   ins_encode( /*empty encoding*/ );
 7262   ins_cost(0);
 7263   ins_pipe( empty );
 7264 %}
 7265 
 7266 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7267 
 7268 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7269   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7270   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7271   effect(KILL cr, KILL oldval);
 7272   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7273             "MOV    $res,0\n\t"
 7274             "JNE,s  fail\n\t"
 7275             "MOV    $res,1\n"
 7276           "fail:" %}
 7277   ins_encode( enc_cmpxchg8(mem_ptr),
 7278               enc_flags_ne_to_boolean(res) );
 7279   ins_pipe( pipe_cmpxchg );
 7280 %}
 7281 
 7282 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7283   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7284   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7285   effect(KILL cr, KILL oldval);
 7286   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7287             "MOV    $res,0\n\t"
 7288             "JNE,s  fail\n\t"
 7289             "MOV    $res,1\n"
 7290           "fail:" %}
 7291   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7292   ins_pipe( pipe_cmpxchg );
 7293 %}
 7294 
 7295 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7296   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7297   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7298   effect(KILL cr, KILL oldval);
 7299   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7300             "MOV    $res,0\n\t"
 7301             "JNE,s  fail\n\t"
 7302             "MOV    $res,1\n"
 7303           "fail:" %}
 7304   ins_encode( enc_cmpxchgb(mem_ptr),
 7305               enc_flags_ne_to_boolean(res) );
 7306   ins_pipe( pipe_cmpxchg );
 7307 %}
 7308 
 7309 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7310   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7311   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7312   effect(KILL cr, KILL oldval);
 7313   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7314             "MOV    $res,0\n\t"
 7315             "JNE,s  fail\n\t"
 7316             "MOV    $res,1\n"
 7317           "fail:" %}
 7318   ins_encode( enc_cmpxchgw(mem_ptr),
 7319               enc_flags_ne_to_boolean(res) );
 7320   ins_pipe( pipe_cmpxchg );
 7321 %}
 7322 
 7323 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7324   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7325   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7326   effect(KILL cr, KILL oldval);
 7327   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7328             "MOV    $res,0\n\t"
 7329             "JNE,s  fail\n\t"
 7330             "MOV    $res,1\n"
 7331           "fail:" %}
 7332   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7333   ins_pipe( pipe_cmpxchg );
 7334 %}
 7335 
 7336 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7337   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7338   effect(KILL cr);
 7339   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7340   ins_encode( enc_cmpxchg8(mem_ptr) );
 7341   ins_pipe( pipe_cmpxchg );
 7342 %}
 7343 
 7344 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7345   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7346   effect(KILL cr);
 7347   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7348   ins_encode( enc_cmpxchg(mem_ptr) );
 7349   ins_pipe( pipe_cmpxchg );
 7350 %}
 7351 
 7352 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7353   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7354   effect(KILL cr);
 7355   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7356   ins_encode( enc_cmpxchgb(mem_ptr) );
 7357   ins_pipe( pipe_cmpxchg );
 7358 %}
 7359 
 7360 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7361   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7362   effect(KILL cr);
 7363   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7364   ins_encode( enc_cmpxchgw(mem_ptr) );
 7365   ins_pipe( pipe_cmpxchg );
 7366 %}
 7367 
 7368 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7369   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7370   effect(KILL cr);
 7371   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7372   ins_encode( enc_cmpxchg(mem_ptr) );
 7373   ins_pipe( pipe_cmpxchg );
 7374 %}
 7375 
 7376 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7377   predicate(n->as_LoadStore()->result_not_used());
 7378   match(Set dummy (GetAndAddB mem add));
 7379   effect(KILL cr);
 7380   format %{ "ADDB  [$mem],$add" %}
 7381   ins_encode %{
 7382     __ lock();
 7383     __ addb($mem$$Address, $add$$constant);
 7384   %}
 7385   ins_pipe( pipe_cmpxchg );
 7386 %}
 7387 
 7388 // Important to match to xRegI: only 8-bit regs.
 7389 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7390   match(Set newval (GetAndAddB mem newval));
 7391   effect(KILL cr);
 7392   format %{ "XADDB  [$mem],$newval" %}
 7393   ins_encode %{
 7394     __ lock();
 7395     __ xaddb($mem$$Address, $newval$$Register);
 7396   %}
 7397   ins_pipe( pipe_cmpxchg );
 7398 %}
 7399 
 7400 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7401   predicate(n->as_LoadStore()->result_not_used());
 7402   match(Set dummy (GetAndAddS mem add));
 7403   effect(KILL cr);
 7404   format %{ "ADDS  [$mem],$add" %}
 7405   ins_encode %{
 7406     __ lock();
 7407     __ addw($mem$$Address, $add$$constant);
 7408   %}
 7409   ins_pipe( pipe_cmpxchg );
 7410 %}
 7411 
 7412 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7413   match(Set newval (GetAndAddS mem newval));
 7414   effect(KILL cr);
 7415   format %{ "XADDS  [$mem],$newval" %}
 7416   ins_encode %{
 7417     __ lock();
 7418     __ xaddw($mem$$Address, $newval$$Register);
 7419   %}
 7420   ins_pipe( pipe_cmpxchg );
 7421 %}
 7422 
 7423 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7424   predicate(n->as_LoadStore()->result_not_used());
 7425   match(Set dummy (GetAndAddI mem add));
 7426   effect(KILL cr);
 7427   format %{ "ADDL  [$mem],$add" %}
 7428   ins_encode %{
 7429     __ lock();
 7430     __ addl($mem$$Address, $add$$constant);
 7431   %}
 7432   ins_pipe( pipe_cmpxchg );
 7433 %}
 7434 
 7435 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7436   match(Set newval (GetAndAddI mem newval));
 7437   effect(KILL cr);
 7438   format %{ "XADDL  [$mem],$newval" %}
 7439   ins_encode %{
 7440     __ lock();
 7441     __ xaddl($mem$$Address, $newval$$Register);
 7442   %}
 7443   ins_pipe( pipe_cmpxchg );
 7444 %}
 7445 
 7446 // Important to match to xRegI: only 8-bit regs.
 7447 instruct xchgB( memory mem, xRegI newval) %{
 7448   match(Set newval (GetAndSetB mem newval));
 7449   format %{ "XCHGB  $newval,[$mem]" %}
 7450   ins_encode %{
 7451     __ xchgb($newval$$Register, $mem$$Address);
 7452   %}
 7453   ins_pipe( pipe_cmpxchg );
 7454 %}
 7455 
 7456 instruct xchgS( memory mem, rRegI newval) %{
 7457   match(Set newval (GetAndSetS mem newval));
 7458   format %{ "XCHGW  $newval,[$mem]" %}
 7459   ins_encode %{
 7460     __ xchgw($newval$$Register, $mem$$Address);
 7461   %}
 7462   ins_pipe( pipe_cmpxchg );
 7463 %}
 7464 
 7465 instruct xchgI( memory mem, rRegI newval) %{
 7466   match(Set newval (GetAndSetI mem newval));
 7467   format %{ "XCHGL  $newval,[$mem]" %}
 7468   ins_encode %{
 7469     __ xchgl($newval$$Register, $mem$$Address);
 7470   %}
 7471   ins_pipe( pipe_cmpxchg );
 7472 %}
 7473 
 7474 instruct xchgP( memory mem, pRegP newval) %{
 7475   match(Set newval (GetAndSetP mem newval));
 7476   format %{ "XCHGL  $newval,[$mem]" %}
 7477   ins_encode %{
 7478     __ xchgl($newval$$Register, $mem$$Address);
 7479   %}
 7480   ins_pipe( pipe_cmpxchg );
 7481 %}
 7482 
 7483 //----------Subtraction Instructions-------------------------------------------
 7484 
 7485 // Integer Subtraction Instructions
 7486 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7487   match(Set dst (SubI dst src));
 7488   effect(KILL cr);
 7489 
 7490   size(2);
 7491   format %{ "SUB    $dst,$src" %}
 7492   opcode(0x2B);
 7493   ins_encode( OpcP, RegReg( dst, src) );
 7494   ins_pipe( ialu_reg_reg );
 7495 %}
 7496 
 7497 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7498   match(Set dst (SubI dst src));
 7499   effect(KILL cr);
 7500 
 7501   format %{ "SUB    $dst,$src" %}
 7502   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7503   // ins_encode( RegImm( dst, src) );
 7504   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7505   ins_pipe( ialu_reg );
 7506 %}
 7507 
 7508 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7509   match(Set dst (SubI dst (LoadI src)));
 7510   effect(KILL cr);
 7511 
 7512   ins_cost(150);
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x2B);
 7515   ins_encode( OpcP, RegMem( dst, src) );
 7516   ins_pipe( ialu_reg_mem );
 7517 %}
 7518 
 7519 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7520   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7521   effect(KILL cr);
 7522 
 7523   ins_cost(150);
 7524   format %{ "SUB    $dst,$src" %}
 7525   opcode(0x29);  /* Opcode 29 /r */
 7526   ins_encode( OpcP, RegMem( src, dst ) );
 7527   ins_pipe( ialu_mem_reg );
 7528 %}
 7529 
 7530 // Subtract from a pointer
 7531 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7532   match(Set dst (AddP dst (SubI zero src)));
 7533   effect(KILL cr);
 7534 
 7535   size(2);
 7536   format %{ "SUB    $dst,$src" %}
 7537   opcode(0x2B);
 7538   ins_encode( OpcP, RegReg( dst, src) );
 7539   ins_pipe( ialu_reg_reg );
 7540 %}
 7541 
 7542 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7543   match(Set dst (SubI zero dst));
 7544   effect(KILL cr);
 7545 
 7546   size(2);
 7547   format %{ "NEG    $dst" %}
 7548   opcode(0xF7,0x03);  // Opcode F7 /3
 7549   ins_encode( OpcP, RegOpc( dst ) );
 7550   ins_pipe( ialu_reg );
 7551 %}
 7552 
 7553 //----------Multiplication/Division Instructions-------------------------------
 7554 // Integer Multiplication Instructions
 7555 // Multiply Register
 7556 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7557   match(Set dst (MulI dst src));
 7558   effect(KILL cr);
 7559 
 7560   size(3);
 7561   ins_cost(300);
 7562   format %{ "IMUL   $dst,$src" %}
 7563   opcode(0xAF, 0x0F);
 7564   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7565   ins_pipe( ialu_reg_reg_alu0 );
 7566 %}
 7567 
 7568 // Multiply 32-bit Immediate
 7569 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7570   match(Set dst (MulI src imm));
 7571   effect(KILL cr);
 7572 
 7573   ins_cost(300);
 7574   format %{ "IMUL   $dst,$src,$imm" %}
 7575   opcode(0x69);  /* 69 /r id */
 7576   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7577   ins_pipe( ialu_reg_reg_alu0 );
 7578 %}
 7579 
 7580 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7581   match(Set dst src);
 7582   effect(KILL cr);
 7583 
 7584   // Note that this is artificially increased to make it more expensive than loadConL
 7585   ins_cost(250);
 7586   format %{ "MOV    EAX,$src\t// low word only" %}
 7587   opcode(0xB8);
 7588   ins_encode( LdImmL_Lo(dst, src) );
 7589   ins_pipe( ialu_reg_fat );
 7590 %}
 7591 
 7592 // Multiply by 32-bit Immediate, taking the shifted high order results
 7593 //  (special case for shift by 32)
 7594 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7595   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7596   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7597              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7598              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7599   effect(USE src1, KILL cr);
 7600 
 7601   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7602   ins_cost(0*100 + 1*400 - 150);
 7603   format %{ "IMUL   EDX:EAX,$src1" %}
 7604   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7605   ins_pipe( pipe_slow );
 7606 %}
 7607 
 7608 // Multiply by 32-bit Immediate, taking the shifted high order results
 7609 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7610   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7611   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7613              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7614   effect(USE src1, KILL cr);
 7615 
 7616   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7617   ins_cost(1*100 + 1*400 - 150);
 7618   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7619             "SAR    EDX,$cnt-32" %}
 7620   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7621   ins_pipe( pipe_slow );
 7622 %}
 7623 
 7624 // Multiply Memory 32-bit Immediate
 7625 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7626   match(Set dst (MulI (LoadI src) imm));
 7627   effect(KILL cr);
 7628 
 7629   ins_cost(300);
 7630   format %{ "IMUL   $dst,$src,$imm" %}
 7631   opcode(0x69);  /* 69 /r id */
 7632   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7633   ins_pipe( ialu_reg_mem_alu0 );
 7634 %}
 7635 
 7636 // Multiply Memory
 7637 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7638   match(Set dst (MulI dst (LoadI src)));
 7639   effect(KILL cr);
 7640 
 7641   ins_cost(350);
 7642   format %{ "IMUL   $dst,$src" %}
 7643   opcode(0xAF, 0x0F);
 7644   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7645   ins_pipe( ialu_reg_mem_alu0 );
 7646 %}
 7647 
 7648 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7649 %{
 7650   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7651   effect(KILL cr, KILL src2);
 7652 
 7653   expand %{ mulI_eReg(dst, src1, cr);
 7654            mulI_eReg(src2, src3, cr);
 7655            addI_eReg(dst, src2, cr); %}
 7656 %}
 7657 
 7658 // Multiply Register Int to Long
 7659 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7660   // Basic Idea: long = (long)int * (long)int
 7661   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7662   effect(DEF dst, USE src, USE src1, KILL flags);
 7663 
 7664   ins_cost(300);
 7665   format %{ "IMUL   $dst,$src1" %}
 7666 
 7667   ins_encode( long_int_multiply( dst, src1 ) );
 7668   ins_pipe( ialu_reg_reg_alu0 );
 7669 %}
 7670 
 7671 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7672   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7673   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7674   effect(KILL flags);
 7675 
 7676   ins_cost(300);
 7677   format %{ "MUL    $dst,$src1" %}
 7678 
 7679   ins_encode( long_uint_multiply(dst, src1) );
 7680   ins_pipe( ialu_reg_reg_alu0 );
 7681 %}
 7682 
 7683 // Multiply Register Long
 7684 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7685   match(Set dst (MulL dst src));
 7686   effect(KILL cr, TEMP tmp);
 7687   ins_cost(4*100+3*400);
 7688 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7689 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7690   format %{ "MOV    $tmp,$src.lo\n\t"
 7691             "IMUL   $tmp,EDX\n\t"
 7692             "MOV    EDX,$src.hi\n\t"
 7693             "IMUL   EDX,EAX\n\t"
 7694             "ADD    $tmp,EDX\n\t"
 7695             "MUL    EDX:EAX,$src.lo\n\t"
 7696             "ADD    EDX,$tmp" %}
 7697   ins_encode( long_multiply( dst, src, tmp ) );
 7698   ins_pipe( pipe_slow );
 7699 %}
 7700 
 7701 // Multiply Register Long where the left operand's high 32 bits are zero
 7702 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7703   predicate(is_operand_hi32_zero(n->in(1)));
 7704   match(Set dst (MulL dst src));
 7705   effect(KILL cr, TEMP tmp);
 7706   ins_cost(2*100+2*400);
 7707 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7708 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7709   format %{ "MOV    $tmp,$src.hi\n\t"
 7710             "IMUL   $tmp,EAX\n\t"
 7711             "MUL    EDX:EAX,$src.lo\n\t"
 7712             "ADD    EDX,$tmp" %}
 7713   ins_encode %{
 7714     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7715     __ imull($tmp$$Register, rax);
 7716     __ mull($src$$Register);
 7717     __ addl(rdx, $tmp$$Register);
 7718   %}
 7719   ins_pipe( pipe_slow );
 7720 %}
 7721 
 7722 // Multiply Register Long where the right operand's high 32 bits are zero
 7723 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7724   predicate(is_operand_hi32_zero(n->in(2)));
 7725   match(Set dst (MulL dst src));
 7726   effect(KILL cr, TEMP tmp);
 7727   ins_cost(2*100+2*400);
 7728 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7729 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7730   format %{ "MOV    $tmp,$src.lo\n\t"
 7731             "IMUL   $tmp,EDX\n\t"
 7732             "MUL    EDX:EAX,$src.lo\n\t"
 7733             "ADD    EDX,$tmp" %}
 7734   ins_encode %{
 7735     __ movl($tmp$$Register, $src$$Register);
 7736     __ imull($tmp$$Register, rdx);
 7737     __ mull($src$$Register);
 7738     __ addl(rdx, $tmp$$Register);
 7739   %}
 7740   ins_pipe( pipe_slow );
 7741 %}
 7742 
 7743 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7744 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7745   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7746   match(Set dst (MulL dst src));
 7747   effect(KILL cr);
 7748   ins_cost(1*400);
 7749 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7750 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7751   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7752   ins_encode %{
 7753     __ mull($src$$Register);
 7754   %}
 7755   ins_pipe( pipe_slow );
 7756 %}
 7757 
 7758 // Multiply Register Long by small constant
 7759 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7760   match(Set dst (MulL dst src));
 7761   effect(KILL cr, TEMP tmp);
 7762   ins_cost(2*100+2*400);
 7763   size(12);
 7764 // Basic idea: lo(result) = lo(src * EAX)
 7765 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7766   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7767             "MOV    EDX,$src\n\t"
 7768             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7769             "ADD    EDX,$tmp" %}
 7770   ins_encode( long_multiply_con( dst, src, tmp ) );
 7771   ins_pipe( pipe_slow );
 7772 %}
 7773 
 7774 // Integer DIV with Register
 7775 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7776   match(Set rax (DivI rax div));
 7777   effect(KILL rdx, KILL cr);
 7778   size(26);
 7779   ins_cost(30*100+10*100);
 7780   format %{ "CMP    EAX,0x80000000\n\t"
 7781             "JNE,s  normal\n\t"
 7782             "XOR    EDX,EDX\n\t"
 7783             "CMP    ECX,-1\n\t"
 7784             "JE,s   done\n"
 7785     "normal: CDQ\n\t"
 7786             "IDIV   $div\n\t"
 7787     "done:"        %}
 7788   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7789   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7790   ins_pipe( ialu_reg_reg_alu0 );
 7791 %}
 7792 
 7793 // Divide Register Long
 7794 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7795   match(Set dst (DivL src1 src2));
 7796   effect(CALL);
 7797   ins_cost(10000);
 7798   format %{ "PUSH   $src1.hi\n\t"
 7799             "PUSH   $src1.lo\n\t"
 7800             "PUSH   $src2.hi\n\t"
 7801             "PUSH   $src2.lo\n\t"
 7802             "CALL   SharedRuntime::ldiv\n\t"
 7803             "ADD    ESP,16" %}
 7804   ins_encode( long_div(src1,src2) );
 7805   ins_pipe( pipe_slow );
 7806 %}
 7807 
 7808 // Integer DIVMOD with Register, both quotient and mod results
 7809 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7810   match(DivModI rax div);
 7811   effect(KILL cr);
 7812   size(26);
 7813   ins_cost(30*100+10*100);
 7814   format %{ "CMP    EAX,0x80000000\n\t"
 7815             "JNE,s  normal\n\t"
 7816             "XOR    EDX,EDX\n\t"
 7817             "CMP    ECX,-1\n\t"
 7818             "JE,s   done\n"
 7819     "normal: CDQ\n\t"
 7820             "IDIV   $div\n\t"
 7821     "done:"        %}
 7822   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7823   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7824   ins_pipe( pipe_slow );
 7825 %}
 7826 
 7827 // Integer MOD with Register
 7828 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7829   match(Set rdx (ModI rax div));
 7830   effect(KILL rax, KILL cr);
 7831 
 7832   size(26);
 7833   ins_cost(300);
 7834   format %{ "CDQ\n\t"
 7835             "IDIV   $div" %}
 7836   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7837   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7838   ins_pipe( ialu_reg_reg_alu0 );
 7839 %}
 7840 
 7841 // Remainder Register Long
 7842 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7843   match(Set dst (ModL src1 src2));
 7844   effect(CALL);
 7845   ins_cost(10000);
 7846   format %{ "PUSH   $src1.hi\n\t"
 7847             "PUSH   $src1.lo\n\t"
 7848             "PUSH   $src2.hi\n\t"
 7849             "PUSH   $src2.lo\n\t"
 7850             "CALL   SharedRuntime::lrem\n\t"
 7851             "ADD    ESP,16" %}
 7852   ins_encode( long_mod(src1,src2) );
 7853   ins_pipe( pipe_slow );
 7854 %}
 7855 
 7856 // Divide Register Long (no special case since divisor != -1)
 7857 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7858   match(Set dst (DivL dst imm));
 7859   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7860   ins_cost(1000);
 7861   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7862             "XOR    $tmp2,$tmp2\n\t"
 7863             "CMP    $tmp,EDX\n\t"
 7864             "JA,s   fast\n\t"
 7865             "MOV    $tmp2,EAX\n\t"
 7866             "MOV    EAX,EDX\n\t"
 7867             "MOV    EDX,0\n\t"
 7868             "JLE,s  pos\n\t"
 7869             "LNEG   EAX : $tmp2\n\t"
 7870             "DIV    $tmp # unsigned division\n\t"
 7871             "XCHG   EAX,$tmp2\n\t"
 7872             "DIV    $tmp\n\t"
 7873             "LNEG   $tmp2 : EAX\n\t"
 7874             "JMP,s  done\n"
 7875     "pos:\n\t"
 7876             "DIV    $tmp\n\t"
 7877             "XCHG   EAX,$tmp2\n"
 7878     "fast:\n\t"
 7879             "DIV    $tmp\n"
 7880     "done:\n\t"
 7881             "MOV    EDX,$tmp2\n\t"
 7882             "NEG    EDX:EAX # if $imm < 0" %}
 7883   ins_encode %{
 7884     int con = (int)$imm$$constant;
 7885     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7886     int pcon = (con > 0) ? con : -con;
 7887     Label Lfast, Lpos, Ldone;
 7888 
 7889     __ movl($tmp$$Register, pcon);
 7890     __ xorl($tmp2$$Register,$tmp2$$Register);
 7891     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7892     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7893 
 7894     __ movl($tmp2$$Register, $dst$$Register); // save
 7895     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7896     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7897     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7898 
 7899     // Negative dividend.
 7900     // convert value to positive to use unsigned division
 7901     __ lneg($dst$$Register, $tmp2$$Register);
 7902     __ divl($tmp$$Register);
 7903     __ xchgl($dst$$Register, $tmp2$$Register);
 7904     __ divl($tmp$$Register);
 7905     // revert result back to negative
 7906     __ lneg($tmp2$$Register, $dst$$Register);
 7907     __ jmpb(Ldone);
 7908 
 7909     __ bind(Lpos);
 7910     __ divl($tmp$$Register); // Use unsigned division
 7911     __ xchgl($dst$$Register, $tmp2$$Register);
 7912     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7913 
 7914     __ bind(Lfast);
 7915     // fast path: src is positive
 7916     __ divl($tmp$$Register); // Use unsigned division
 7917 
 7918     __ bind(Ldone);
 7919     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7920     if (con < 0) {
 7921       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7922     }
 7923   %}
 7924   ins_pipe( pipe_slow );
 7925 %}
 7926 
 7927 // Remainder Register Long (remainder fit into 32 bits)
 7928 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7929   match(Set dst (ModL dst imm));
 7930   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7931   ins_cost(1000);
 7932   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7933             "CMP    $tmp,EDX\n\t"
 7934             "JA,s   fast\n\t"
 7935             "MOV    $tmp2,EAX\n\t"
 7936             "MOV    EAX,EDX\n\t"
 7937             "MOV    EDX,0\n\t"
 7938             "JLE,s  pos\n\t"
 7939             "LNEG   EAX : $tmp2\n\t"
 7940             "DIV    $tmp # unsigned division\n\t"
 7941             "MOV    EAX,$tmp2\n\t"
 7942             "DIV    $tmp\n\t"
 7943             "NEG    EDX\n\t"
 7944             "JMP,s  done\n"
 7945     "pos:\n\t"
 7946             "DIV    $tmp\n\t"
 7947             "MOV    EAX,$tmp2\n"
 7948     "fast:\n\t"
 7949             "DIV    $tmp\n"
 7950     "done:\n\t"
 7951             "MOV    EAX,EDX\n\t"
 7952             "SAR    EDX,31\n\t" %}
 7953   ins_encode %{
 7954     int con = (int)$imm$$constant;
 7955     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7956     int pcon = (con > 0) ? con : -con;
 7957     Label  Lfast, Lpos, Ldone;
 7958 
 7959     __ movl($tmp$$Register, pcon);
 7960     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7961     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7962 
 7963     __ movl($tmp2$$Register, $dst$$Register); // save
 7964     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7965     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7966     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7967 
 7968     // Negative dividend.
 7969     // convert value to positive to use unsigned division
 7970     __ lneg($dst$$Register, $tmp2$$Register);
 7971     __ divl($tmp$$Register);
 7972     __ movl($dst$$Register, $tmp2$$Register);
 7973     __ divl($tmp$$Register);
 7974     // revert remainder back to negative
 7975     __ negl(HIGH_FROM_LOW($dst$$Register));
 7976     __ jmpb(Ldone);
 7977 
 7978     __ bind(Lpos);
 7979     __ divl($tmp$$Register);
 7980     __ movl($dst$$Register, $tmp2$$Register);
 7981 
 7982     __ bind(Lfast);
 7983     // fast path: src is positive
 7984     __ divl($tmp$$Register);
 7985 
 7986     __ bind(Ldone);
 7987     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7988     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7989 
 7990   %}
 7991   ins_pipe( pipe_slow );
 7992 %}
 7993 
 7994 // Integer Shift Instructions
 7995 // Shift Left by one
 7996 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7997   match(Set dst (LShiftI dst shift));
 7998   effect(KILL cr);
 7999 
 8000   size(2);
 8001   format %{ "SHL    $dst,$shift" %}
 8002   opcode(0xD1, 0x4);  /* D1 /4 */
 8003   ins_encode( OpcP, RegOpc( dst ) );
 8004   ins_pipe( ialu_reg );
 8005 %}
 8006 
 8007 // Shift Left by 8-bit immediate
 8008 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8009   match(Set dst (LShiftI dst shift));
 8010   effect(KILL cr);
 8011 
 8012   size(3);
 8013   format %{ "SHL    $dst,$shift" %}
 8014   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8015   ins_encode( RegOpcImm( dst, shift) );
 8016   ins_pipe( ialu_reg );
 8017 %}
 8018 
 8019 // Shift Left by variable
 8020 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8021   match(Set dst (LShiftI dst shift));
 8022   effect(KILL cr);
 8023 
 8024   size(2);
 8025   format %{ "SHL    $dst,$shift" %}
 8026   opcode(0xD3, 0x4);  /* D3 /4 */
 8027   ins_encode( OpcP, RegOpc( dst ) );
 8028   ins_pipe( ialu_reg_reg );
 8029 %}
 8030 
 8031 // Arithmetic shift right by one
 8032 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8033   match(Set dst (RShiftI dst shift));
 8034   effect(KILL cr);
 8035 
 8036   size(2);
 8037   format %{ "SAR    $dst,$shift" %}
 8038   opcode(0xD1, 0x7);  /* D1 /7 */
 8039   ins_encode( OpcP, RegOpc( dst ) );
 8040   ins_pipe( ialu_reg );
 8041 %}
 8042 
 8043 // Arithmetic shift right by one
 8044 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8045   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8046   effect(KILL cr);
 8047   format %{ "SAR    $dst,$shift" %}
 8048   opcode(0xD1, 0x7);  /* D1 /7 */
 8049   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8050   ins_pipe( ialu_mem_imm );
 8051 %}
 8052 
 8053 // Arithmetic Shift Right by 8-bit immediate
 8054 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8055   match(Set dst (RShiftI dst shift));
 8056   effect(KILL cr);
 8057 
 8058   size(3);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8061   ins_encode( RegOpcImm( dst, shift ) );
 8062   ins_pipe( ialu_mem_imm );
 8063 %}
 8064 
 8065 // Arithmetic Shift Right by 8-bit immediate
 8066 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8067   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8068   effect(KILL cr);
 8069 
 8070   format %{ "SAR    $dst,$shift" %}
 8071   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8072   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8073   ins_pipe( ialu_mem_imm );
 8074 %}
 8075 
 8076 // Arithmetic Shift Right by variable
 8077 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8078   match(Set dst (RShiftI dst shift));
 8079   effect(KILL cr);
 8080 
 8081   size(2);
 8082   format %{ "SAR    $dst,$shift" %}
 8083   opcode(0xD3, 0x7);  /* D3 /7 */
 8084   ins_encode( OpcP, RegOpc( dst ) );
 8085   ins_pipe( ialu_reg_reg );
 8086 %}
 8087 
 8088 // Logical shift right by one
 8089 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8090   match(Set dst (URShiftI dst shift));
 8091   effect(KILL cr);
 8092 
 8093   size(2);
 8094   format %{ "SHR    $dst,$shift" %}
 8095   opcode(0xD1, 0x5);  /* D1 /5 */
 8096   ins_encode( OpcP, RegOpc( dst ) );
 8097   ins_pipe( ialu_reg );
 8098 %}
 8099 
 8100 // Logical Shift Right by 8-bit immediate
 8101 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8102   match(Set dst (URShiftI dst shift));
 8103   effect(KILL cr);
 8104 
 8105   size(3);
 8106   format %{ "SHR    $dst,$shift" %}
 8107   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8108   ins_encode( RegOpcImm( dst, shift) );
 8109   ins_pipe( ialu_reg );
 8110 %}
 8111 
 8112 
 8113 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8114 // This idiom is used by the compiler for the i2b bytecode.
 8115 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8116   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8117 
 8118   size(3);
 8119   format %{ "MOVSX  $dst,$src :8" %}
 8120   ins_encode %{
 8121     __ movsbl($dst$$Register, $src$$Register);
 8122   %}
 8123   ins_pipe(ialu_reg_reg);
 8124 %}
 8125 
 8126 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8127 // This idiom is used by the compiler the i2s bytecode.
 8128 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8129   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8130 
 8131   size(3);
 8132   format %{ "MOVSX  $dst,$src :16" %}
 8133   ins_encode %{
 8134     __ movswl($dst$$Register, $src$$Register);
 8135   %}
 8136   ins_pipe(ialu_reg_reg);
 8137 %}
 8138 
 8139 
 8140 // Logical Shift Right by variable
 8141 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8142   match(Set dst (URShiftI dst shift));
 8143   effect(KILL cr);
 8144 
 8145   size(2);
 8146   format %{ "SHR    $dst,$shift" %}
 8147   opcode(0xD3, 0x5);  /* D3 /5 */
 8148   ins_encode( OpcP, RegOpc( dst ) );
 8149   ins_pipe( ialu_reg_reg );
 8150 %}
 8151 
 8152 
 8153 //----------Logical Instructions-----------------------------------------------
 8154 //----------Integer Logical Instructions---------------------------------------
 8155 // And Instructions
 8156 // And Register with Register
 8157 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8158   match(Set dst (AndI dst src));
 8159   effect(KILL cr);
 8160 
 8161   size(2);
 8162   format %{ "AND    $dst,$src" %}
 8163   opcode(0x23);
 8164   ins_encode( OpcP, RegReg( dst, src) );
 8165   ins_pipe( ialu_reg_reg );
 8166 %}
 8167 
 8168 // And Register with Immediate
 8169 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8170   match(Set dst (AndI dst src));
 8171   effect(KILL cr);
 8172 
 8173   format %{ "AND    $dst,$src" %}
 8174   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8175   // ins_encode( RegImm( dst, src) );
 8176   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8177   ins_pipe( ialu_reg );
 8178 %}
 8179 
 8180 // And Register with Memory
 8181 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8182   match(Set dst (AndI dst (LoadI src)));
 8183   effect(KILL cr);
 8184 
 8185   ins_cost(150);
 8186   format %{ "AND    $dst,$src" %}
 8187   opcode(0x23);
 8188   ins_encode( OpcP, RegMem( dst, src) );
 8189   ins_pipe( ialu_reg_mem );
 8190 %}
 8191 
 8192 // And Memory with Register
 8193 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8194   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8195   effect(KILL cr);
 8196 
 8197   ins_cost(150);
 8198   format %{ "AND    $dst,$src" %}
 8199   opcode(0x21);  /* Opcode 21 /r */
 8200   ins_encode( OpcP, RegMem( src, dst ) );
 8201   ins_pipe( ialu_mem_reg );
 8202 %}
 8203 
 8204 // And Memory with Immediate
 8205 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8206   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8207   effect(KILL cr);
 8208 
 8209   ins_cost(125);
 8210   format %{ "AND    $dst,$src" %}
 8211   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8212   // ins_encode( MemImm( dst, src) );
 8213   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8214   ins_pipe( ialu_mem_imm );
 8215 %}
 8216 
 8217 // BMI1 instructions
 8218 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8219   match(Set dst (AndI (XorI src1 minus_1) src2));
 8220   predicate(UseBMI1Instructions);
 8221   effect(KILL cr);
 8222 
 8223   format %{ "ANDNL  $dst, $src1, $src2" %}
 8224 
 8225   ins_encode %{
 8226     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8227   %}
 8228   ins_pipe(ialu_reg);
 8229 %}
 8230 
 8231 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8232   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8233   predicate(UseBMI1Instructions);
 8234   effect(KILL cr);
 8235 
 8236   ins_cost(125);
 8237   format %{ "ANDNL  $dst, $src1, $src2" %}
 8238 
 8239   ins_encode %{
 8240     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8241   %}
 8242   ins_pipe(ialu_reg_mem);
 8243 %}
 8244 
 8245 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8246   match(Set dst (AndI (SubI imm_zero src) src));
 8247   predicate(UseBMI1Instructions);
 8248   effect(KILL cr);
 8249 
 8250   format %{ "BLSIL  $dst, $src" %}
 8251 
 8252   ins_encode %{
 8253     __ blsil($dst$$Register, $src$$Register);
 8254   %}
 8255   ins_pipe(ialu_reg);
 8256 %}
 8257 
 8258 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8259   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8260   predicate(UseBMI1Instructions);
 8261   effect(KILL cr);
 8262 
 8263   ins_cost(125);
 8264   format %{ "BLSIL  $dst, $src" %}
 8265 
 8266   ins_encode %{
 8267     __ blsil($dst$$Register, $src$$Address);
 8268   %}
 8269   ins_pipe(ialu_reg_mem);
 8270 %}
 8271 
 8272 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8273 %{
 8274   match(Set dst (XorI (AddI src minus_1) src));
 8275   predicate(UseBMI1Instructions);
 8276   effect(KILL cr);
 8277 
 8278   format %{ "BLSMSKL $dst, $src" %}
 8279 
 8280   ins_encode %{
 8281     __ blsmskl($dst$$Register, $src$$Register);
 8282   %}
 8283 
 8284   ins_pipe(ialu_reg);
 8285 %}
 8286 
 8287 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8288 %{
 8289   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8290   predicate(UseBMI1Instructions);
 8291   effect(KILL cr);
 8292 
 8293   ins_cost(125);
 8294   format %{ "BLSMSKL $dst, $src" %}
 8295 
 8296   ins_encode %{
 8297     __ blsmskl($dst$$Register, $src$$Address);
 8298   %}
 8299 
 8300   ins_pipe(ialu_reg_mem);
 8301 %}
 8302 
 8303 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8304 %{
 8305   match(Set dst (AndI (AddI src minus_1) src) );
 8306   predicate(UseBMI1Instructions);
 8307   effect(KILL cr);
 8308 
 8309   format %{ "BLSRL  $dst, $src" %}
 8310 
 8311   ins_encode %{
 8312     __ blsrl($dst$$Register, $src$$Register);
 8313   %}
 8314 
 8315   ins_pipe(ialu_reg);
 8316 %}
 8317 
 8318 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8319 %{
 8320   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8321   predicate(UseBMI1Instructions);
 8322   effect(KILL cr);
 8323 
 8324   ins_cost(125);
 8325   format %{ "BLSRL  $dst, $src" %}
 8326 
 8327   ins_encode %{
 8328     __ blsrl($dst$$Register, $src$$Address);
 8329   %}
 8330 
 8331   ins_pipe(ialu_reg_mem);
 8332 %}
 8333 
 8334 // Or Instructions
 8335 // Or Register with Register
 8336 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8337   match(Set dst (OrI dst src));
 8338   effect(KILL cr);
 8339 
 8340   size(2);
 8341   format %{ "OR     $dst,$src" %}
 8342   opcode(0x0B);
 8343   ins_encode( OpcP, RegReg( dst, src) );
 8344   ins_pipe( ialu_reg_reg );
 8345 %}
 8346 
 8347 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8348   match(Set dst (OrI dst (CastP2X src)));
 8349   effect(KILL cr);
 8350 
 8351   size(2);
 8352   format %{ "OR     $dst,$src" %}
 8353   opcode(0x0B);
 8354   ins_encode( OpcP, RegReg( dst, src) );
 8355   ins_pipe( ialu_reg_reg );
 8356 %}
 8357 
 8358 
 8359 // Or Register with Immediate
 8360 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8361   match(Set dst (OrI dst src));
 8362   effect(KILL cr);
 8363 
 8364   format %{ "OR     $dst,$src" %}
 8365   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8366   // ins_encode( RegImm( dst, src) );
 8367   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8368   ins_pipe( ialu_reg );
 8369 %}
 8370 
 8371 // Or Register with Memory
 8372 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8373   match(Set dst (OrI dst (LoadI src)));
 8374   effect(KILL cr);
 8375 
 8376   ins_cost(150);
 8377   format %{ "OR     $dst,$src" %}
 8378   opcode(0x0B);
 8379   ins_encode( OpcP, RegMem( dst, src) );
 8380   ins_pipe( ialu_reg_mem );
 8381 %}
 8382 
 8383 // Or Memory with Register
 8384 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8385   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8386   effect(KILL cr);
 8387 
 8388   ins_cost(150);
 8389   format %{ "OR     $dst,$src" %}
 8390   opcode(0x09);  /* Opcode 09 /r */
 8391   ins_encode( OpcP, RegMem( src, dst ) );
 8392   ins_pipe( ialu_mem_reg );
 8393 %}
 8394 
 8395 // Or Memory with Immediate
 8396 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8397   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8398   effect(KILL cr);
 8399 
 8400   ins_cost(125);
 8401   format %{ "OR     $dst,$src" %}
 8402   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8403   // ins_encode( MemImm( dst, src) );
 8404   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8405   ins_pipe( ialu_mem_imm );
 8406 %}
 8407 
 8408 // ROL/ROR
 8409 // ROL expand
 8410 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8411   effect(USE_DEF dst, USE shift, KILL cr);
 8412 
 8413   format %{ "ROL    $dst, $shift" %}
 8414   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8415   ins_encode( OpcP, RegOpc( dst ));
 8416   ins_pipe( ialu_reg );
 8417 %}
 8418 
 8419 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8420   effect(USE_DEF dst, USE shift, KILL cr);
 8421 
 8422   format %{ "ROL    $dst, $shift" %}
 8423   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8424   ins_encode( RegOpcImm(dst, shift) );
 8425   ins_pipe(ialu_reg);
 8426 %}
 8427 
 8428 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8429   effect(USE_DEF dst, USE shift, KILL cr);
 8430 
 8431   format %{ "ROL    $dst, $shift" %}
 8432   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8433   ins_encode(OpcP, RegOpc(dst));
 8434   ins_pipe( ialu_reg_reg );
 8435 %}
 8436 // end of ROL expand
 8437 
 8438 // ROL 32bit by one once
 8439 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8440   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8441 
 8442   expand %{
 8443     rolI_eReg_imm1(dst, lshift, cr);
 8444   %}
 8445 %}
 8446 
 8447 // ROL 32bit var by imm8 once
 8448 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8449   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8450   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8451 
 8452   expand %{
 8453     rolI_eReg_imm8(dst, lshift, cr);
 8454   %}
 8455 %}
 8456 
 8457 // ROL 32bit var by var once
 8458 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8459   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8460 
 8461   expand %{
 8462     rolI_eReg_CL(dst, shift, cr);
 8463   %}
 8464 %}
 8465 
 8466 // ROL 32bit var by var once
 8467 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8468   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8469 
 8470   expand %{
 8471     rolI_eReg_CL(dst, shift, cr);
 8472   %}
 8473 %}
 8474 
 8475 // ROR expand
 8476 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8477   effect(USE_DEF dst, USE shift, KILL cr);
 8478 
 8479   format %{ "ROR    $dst, $shift" %}
 8480   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8481   ins_encode( OpcP, RegOpc( dst ) );
 8482   ins_pipe( ialu_reg );
 8483 %}
 8484 
 8485 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8486   effect (USE_DEF dst, USE shift, KILL cr);
 8487 
 8488   format %{ "ROR    $dst, $shift" %}
 8489   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8490   ins_encode( RegOpcImm(dst, shift) );
 8491   ins_pipe( ialu_reg );
 8492 %}
 8493 
 8494 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8495   effect(USE_DEF dst, USE shift, KILL cr);
 8496 
 8497   format %{ "ROR    $dst, $shift" %}
 8498   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8499   ins_encode(OpcP, RegOpc(dst));
 8500   ins_pipe( ialu_reg_reg );
 8501 %}
 8502 // end of ROR expand
 8503 
 8504 // ROR right once
 8505 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8506   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8507 
 8508   expand %{
 8509     rorI_eReg_imm1(dst, rshift, cr);
 8510   %}
 8511 %}
 8512 
 8513 // ROR 32bit by immI8 once
 8514 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8515   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8516   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8517 
 8518   expand %{
 8519     rorI_eReg_imm8(dst, rshift, cr);
 8520   %}
 8521 %}
 8522 
 8523 // ROR 32bit var by var once
 8524 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8525   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8526 
 8527   expand %{
 8528     rorI_eReg_CL(dst, shift, cr);
 8529   %}
 8530 %}
 8531 
 8532 // ROR 32bit var by var once
 8533 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8534   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8535 
 8536   expand %{
 8537     rorI_eReg_CL(dst, shift, cr);
 8538   %}
 8539 %}
 8540 
 8541 // Xor Instructions
 8542 // Xor Register with Register
 8543 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8544   match(Set dst (XorI dst src));
 8545   effect(KILL cr);
 8546 
 8547   size(2);
 8548   format %{ "XOR    $dst,$src" %}
 8549   opcode(0x33);
 8550   ins_encode( OpcP, RegReg( dst, src) );
 8551   ins_pipe( ialu_reg_reg );
 8552 %}
 8553 
 8554 // Xor Register with Immediate -1
 8555 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8556   match(Set dst (XorI dst imm));
 8557 
 8558   size(2);
 8559   format %{ "NOT    $dst" %}
 8560   ins_encode %{
 8561      __ notl($dst$$Register);
 8562   %}
 8563   ins_pipe( ialu_reg );
 8564 %}
 8565 
 8566 // Xor Register with Immediate
 8567 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8568   match(Set dst (XorI dst src));
 8569   effect(KILL cr);
 8570 
 8571   format %{ "XOR    $dst,$src" %}
 8572   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8573   // ins_encode( RegImm( dst, src) );
 8574   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8575   ins_pipe( ialu_reg );
 8576 %}
 8577 
 8578 // Xor Register with Memory
 8579 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8580   match(Set dst (XorI dst (LoadI src)));
 8581   effect(KILL cr);
 8582 
 8583   ins_cost(150);
 8584   format %{ "XOR    $dst,$src" %}
 8585   opcode(0x33);
 8586   ins_encode( OpcP, RegMem(dst, src) );
 8587   ins_pipe( ialu_reg_mem );
 8588 %}
 8589 
 8590 // Xor Memory with Register
 8591 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8592   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8593   effect(KILL cr);
 8594 
 8595   ins_cost(150);
 8596   format %{ "XOR    $dst,$src" %}
 8597   opcode(0x31);  /* Opcode 31 /r */
 8598   ins_encode( OpcP, RegMem( src, dst ) );
 8599   ins_pipe( ialu_mem_reg );
 8600 %}
 8601 
 8602 // Xor Memory with Immediate
 8603 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8604   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(125);
 8608   format %{ "XOR    $dst,$src" %}
 8609   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8610   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8611   ins_pipe( ialu_mem_imm );
 8612 %}
 8613 
 8614 //----------Convert Int to Boolean---------------------------------------------
 8615 
 8616 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8617   effect( DEF dst, USE src );
 8618   format %{ "MOV    $dst,$src" %}
 8619   ins_encode( enc_Copy( dst, src) );
 8620   ins_pipe( ialu_reg_reg );
 8621 %}
 8622 
 8623 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8624   effect( USE_DEF dst, USE src, KILL cr );
 8625 
 8626   size(4);
 8627   format %{ "NEG    $dst\n\t"
 8628             "ADC    $dst,$src" %}
 8629   ins_encode( neg_reg(dst),
 8630               OpcRegReg(0x13,dst,src) );
 8631   ins_pipe( ialu_reg_reg_long );
 8632 %}
 8633 
 8634 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8635   match(Set dst (Conv2B src));
 8636 
 8637   expand %{
 8638     movI_nocopy(dst,src);
 8639     ci2b(dst,src,cr);
 8640   %}
 8641 %}
 8642 
 8643 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8644   effect( DEF dst, USE src );
 8645   format %{ "MOV    $dst,$src" %}
 8646   ins_encode( enc_Copy( dst, src) );
 8647   ins_pipe( ialu_reg_reg );
 8648 %}
 8649 
 8650 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8651   effect( USE_DEF dst, USE src, KILL cr );
 8652   format %{ "NEG    $dst\n\t"
 8653             "ADC    $dst,$src" %}
 8654   ins_encode( neg_reg(dst),
 8655               OpcRegReg(0x13,dst,src) );
 8656   ins_pipe( ialu_reg_reg_long );
 8657 %}
 8658 
 8659 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8660   match(Set dst (Conv2B src));
 8661 
 8662   expand %{
 8663     movP_nocopy(dst,src);
 8664     cp2b(dst,src,cr);
 8665   %}
 8666 %}
 8667 
 8668 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8669   match(Set dst (CmpLTMask p q));
 8670   effect(KILL cr);
 8671   ins_cost(400);
 8672 
 8673   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8674   format %{ "XOR    $dst,$dst\n\t"
 8675             "CMP    $p,$q\n\t"
 8676             "SETlt  $dst\n\t"
 8677             "NEG    $dst" %}
 8678   ins_encode %{
 8679     Register Rp = $p$$Register;
 8680     Register Rq = $q$$Register;
 8681     Register Rd = $dst$$Register;
 8682     Label done;
 8683     __ xorl(Rd, Rd);
 8684     __ cmpl(Rp, Rq);
 8685     __ setb(Assembler::less, Rd);
 8686     __ negl(Rd);
 8687   %}
 8688 
 8689   ins_pipe(pipe_slow);
 8690 %}
 8691 
 8692 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8693   match(Set dst (CmpLTMask dst zero));
 8694   effect(DEF dst, KILL cr);
 8695   ins_cost(100);
 8696 
 8697   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8698   ins_encode %{
 8699   __ sarl($dst$$Register, 31);
 8700   %}
 8701   ins_pipe(ialu_reg);
 8702 %}
 8703 
 8704 /* better to save a register than avoid a branch */
 8705 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8706   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8707   effect(KILL cr);
 8708   ins_cost(400);
 8709   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8710             "JGE    done\n\t"
 8711             "ADD    $p,$y\n"
 8712             "done:  " %}
 8713   ins_encode %{
 8714     Register Rp = $p$$Register;
 8715     Register Rq = $q$$Register;
 8716     Register Ry = $y$$Register;
 8717     Label done;
 8718     __ subl(Rp, Rq);
 8719     __ jccb(Assembler::greaterEqual, done);
 8720     __ addl(Rp, Ry);
 8721     __ bind(done);
 8722   %}
 8723 
 8724   ins_pipe(pipe_cmplt);
 8725 %}
 8726 
 8727 /* better to save a register than avoid a branch */
 8728 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8729   match(Set y (AndI (CmpLTMask p q) y));
 8730   effect(KILL cr);
 8731 
 8732   ins_cost(300);
 8733 
 8734   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8735             "JLT      done\n\t"
 8736             "XORL     $y, $y\n"
 8737             "done:  " %}
 8738   ins_encode %{
 8739     Register Rp = $p$$Register;
 8740     Register Rq = $q$$Register;
 8741     Register Ry = $y$$Register;
 8742     Label done;
 8743     __ cmpl(Rp, Rq);
 8744     __ jccb(Assembler::less, done);
 8745     __ xorl(Ry, Ry);
 8746     __ bind(done);
 8747   %}
 8748 
 8749   ins_pipe(pipe_cmplt);
 8750 %}
 8751 
 8752 /* If I enable this, I encourage spilling in the inner loop of compress.
 8753 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8754   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8755 */
 8756 //----------Overflow Math Instructions-----------------------------------------
 8757 
 8758 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8759 %{
 8760   match(Set cr (OverflowAddI op1 op2));
 8761   effect(DEF cr, USE_KILL op1, USE op2);
 8762 
 8763   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8764 
 8765   ins_encode %{
 8766     __ addl($op1$$Register, $op2$$Register);
 8767   %}
 8768   ins_pipe(ialu_reg_reg);
 8769 %}
 8770 
 8771 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8772 %{
 8773   match(Set cr (OverflowAddI op1 op2));
 8774   effect(DEF cr, USE_KILL op1, USE op2);
 8775 
 8776   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8777 
 8778   ins_encode %{
 8779     __ addl($op1$$Register, $op2$$constant);
 8780   %}
 8781   ins_pipe(ialu_reg_reg);
 8782 %}
 8783 
 8784 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8785 %{
 8786   match(Set cr (OverflowSubI op1 op2));
 8787 
 8788   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8789   ins_encode %{
 8790     __ cmpl($op1$$Register, $op2$$Register);
 8791   %}
 8792   ins_pipe(ialu_reg_reg);
 8793 %}
 8794 
 8795 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8796 %{
 8797   match(Set cr (OverflowSubI op1 op2));
 8798 
 8799   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8800   ins_encode %{
 8801     __ cmpl($op1$$Register, $op2$$constant);
 8802   %}
 8803   ins_pipe(ialu_reg_reg);
 8804 %}
 8805 
 8806 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8807 %{
 8808   match(Set cr (OverflowSubI zero op2));
 8809   effect(DEF cr, USE_KILL op2);
 8810 
 8811   format %{ "NEG    $op2\t# overflow check int" %}
 8812   ins_encode %{
 8813     __ negl($op2$$Register);
 8814   %}
 8815   ins_pipe(ialu_reg_reg);
 8816 %}
 8817 
 8818 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8819 %{
 8820   match(Set cr (OverflowMulI op1 op2));
 8821   effect(DEF cr, USE_KILL op1, USE op2);
 8822 
 8823   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8824   ins_encode %{
 8825     __ imull($op1$$Register, $op2$$Register);
 8826   %}
 8827   ins_pipe(ialu_reg_reg_alu0);
 8828 %}
 8829 
 8830 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8831 %{
 8832   match(Set cr (OverflowMulI op1 op2));
 8833   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8834 
 8835   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8836   ins_encode %{
 8837     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8838   %}
 8839   ins_pipe(ialu_reg_reg_alu0);
 8840 %}
 8841 
 8842 // Integer Absolute Instructions
 8843 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8844 %{
 8845   match(Set dst (AbsI src));
 8846   effect(TEMP dst, TEMP tmp, KILL cr);
 8847   format %{ "movl $tmp, $src\n\t"
 8848             "sarl $tmp, 31\n\t"
 8849             "movl $dst, $src\n\t"
 8850             "xorl $dst, $tmp\n\t"
 8851             "subl $dst, $tmp\n"
 8852           %}
 8853   ins_encode %{
 8854     __ movl($tmp$$Register, $src$$Register);
 8855     __ sarl($tmp$$Register, 31);
 8856     __ movl($dst$$Register, $src$$Register);
 8857     __ xorl($dst$$Register, $tmp$$Register);
 8858     __ subl($dst$$Register, $tmp$$Register);
 8859   %}
 8860 
 8861   ins_pipe(ialu_reg_reg);
 8862 %}
 8863 
 8864 //----------Long Instructions------------------------------------------------
 8865 // Add Long Register with Register
 8866 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8867   match(Set dst (AddL dst src));
 8868   effect(KILL cr);
 8869   ins_cost(200);
 8870   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8871             "ADC    $dst.hi,$src.hi" %}
 8872   opcode(0x03, 0x13);
 8873   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8874   ins_pipe( ialu_reg_reg_long );
 8875 %}
 8876 
 8877 // Add Long Register with Immediate
 8878 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8879   match(Set dst (AddL dst src));
 8880   effect(KILL cr);
 8881   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8882             "ADC    $dst.hi,$src.hi" %}
 8883   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8884   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8885   ins_pipe( ialu_reg_long );
 8886 %}
 8887 
 8888 // Add Long Register with Memory
 8889 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8890   match(Set dst (AddL dst (LoadL mem)));
 8891   effect(KILL cr);
 8892   ins_cost(125);
 8893   format %{ "ADD    $dst.lo,$mem\n\t"
 8894             "ADC    $dst.hi,$mem+4" %}
 8895   opcode(0x03, 0x13);
 8896   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8897   ins_pipe( ialu_reg_long_mem );
 8898 %}
 8899 
 8900 // Subtract Long Register with Register.
 8901 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8902   match(Set dst (SubL dst src));
 8903   effect(KILL cr);
 8904   ins_cost(200);
 8905   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8906             "SBB    $dst.hi,$src.hi" %}
 8907   opcode(0x2B, 0x1B);
 8908   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8909   ins_pipe( ialu_reg_reg_long );
 8910 %}
 8911 
 8912 // Subtract Long Register with Immediate
 8913 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8914   match(Set dst (SubL dst src));
 8915   effect(KILL cr);
 8916   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8917             "SBB    $dst.hi,$src.hi" %}
 8918   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8919   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8920   ins_pipe( ialu_reg_long );
 8921 %}
 8922 
 8923 // Subtract Long Register with Memory
 8924 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8925   match(Set dst (SubL dst (LoadL mem)));
 8926   effect(KILL cr);
 8927   ins_cost(125);
 8928   format %{ "SUB    $dst.lo,$mem\n\t"
 8929             "SBB    $dst.hi,$mem+4" %}
 8930   opcode(0x2B, 0x1B);
 8931   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8932   ins_pipe( ialu_reg_long_mem );
 8933 %}
 8934 
 8935 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8936   match(Set dst (SubL zero dst));
 8937   effect(KILL cr);
 8938   ins_cost(300);
 8939   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8940   ins_encode( neg_long(dst) );
 8941   ins_pipe( ialu_reg_reg_long );
 8942 %}
 8943 
 8944 // And Long Register with Register
 8945 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8946   match(Set dst (AndL dst src));
 8947   effect(KILL cr);
 8948   format %{ "AND    $dst.lo,$src.lo\n\t"
 8949             "AND    $dst.hi,$src.hi" %}
 8950   opcode(0x23,0x23);
 8951   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8952   ins_pipe( ialu_reg_reg_long );
 8953 %}
 8954 
 8955 // And Long Register with Immediate
 8956 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8957   match(Set dst (AndL dst src));
 8958   effect(KILL cr);
 8959   format %{ "AND    $dst.lo,$src.lo\n\t"
 8960             "AND    $dst.hi,$src.hi" %}
 8961   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8962   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8963   ins_pipe( ialu_reg_long );
 8964 %}
 8965 
 8966 // And Long Register with Memory
 8967 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8968   match(Set dst (AndL dst (LoadL mem)));
 8969   effect(KILL cr);
 8970   ins_cost(125);
 8971   format %{ "AND    $dst.lo,$mem\n\t"
 8972             "AND    $dst.hi,$mem+4" %}
 8973   opcode(0x23, 0x23);
 8974   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8975   ins_pipe( ialu_reg_long_mem );
 8976 %}
 8977 
 8978 // BMI1 instructions
 8979 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8980   match(Set dst (AndL (XorL src1 minus_1) src2));
 8981   predicate(UseBMI1Instructions);
 8982   effect(KILL cr, TEMP dst);
 8983 
 8984   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8985             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8986          %}
 8987 
 8988   ins_encode %{
 8989     Register Rdst = $dst$$Register;
 8990     Register Rsrc1 = $src1$$Register;
 8991     Register Rsrc2 = $src2$$Register;
 8992     __ andnl(Rdst, Rsrc1, Rsrc2);
 8993     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8994   %}
 8995   ins_pipe(ialu_reg_reg_long);
 8996 %}
 8997 
 8998 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8999   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9000   predicate(UseBMI1Instructions);
 9001   effect(KILL cr, TEMP dst);
 9002 
 9003   ins_cost(125);
 9004   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9005             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9006          %}
 9007 
 9008   ins_encode %{
 9009     Register Rdst = $dst$$Register;
 9010     Register Rsrc1 = $src1$$Register;
 9011     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9012 
 9013     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9014     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9015   %}
 9016   ins_pipe(ialu_reg_mem);
 9017 %}
 9018 
 9019 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9020   match(Set dst (AndL (SubL imm_zero src) src));
 9021   predicate(UseBMI1Instructions);
 9022   effect(KILL cr, TEMP dst);
 9023 
 9024   format %{ "MOVL   $dst.hi, 0\n\t"
 9025             "BLSIL  $dst.lo, $src.lo\n\t"
 9026             "JNZ    done\n\t"
 9027             "BLSIL  $dst.hi, $src.hi\n"
 9028             "done:"
 9029          %}
 9030 
 9031   ins_encode %{
 9032     Label done;
 9033     Register Rdst = $dst$$Register;
 9034     Register Rsrc = $src$$Register;
 9035     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9036     __ blsil(Rdst, Rsrc);
 9037     __ jccb(Assembler::notZero, done);
 9038     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9039     __ bind(done);
 9040   %}
 9041   ins_pipe(ialu_reg);
 9042 %}
 9043 
 9044 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9045   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9046   predicate(UseBMI1Instructions);
 9047   effect(KILL cr, TEMP dst);
 9048 
 9049   ins_cost(125);
 9050   format %{ "MOVL   $dst.hi, 0\n\t"
 9051             "BLSIL  $dst.lo, $src\n\t"
 9052             "JNZ    done\n\t"
 9053             "BLSIL  $dst.hi, $src+4\n"
 9054             "done:"
 9055          %}
 9056 
 9057   ins_encode %{
 9058     Label done;
 9059     Register Rdst = $dst$$Register;
 9060     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9061 
 9062     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9063     __ blsil(Rdst, $src$$Address);
 9064     __ jccb(Assembler::notZero, done);
 9065     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9066     __ bind(done);
 9067   %}
 9068   ins_pipe(ialu_reg_mem);
 9069 %}
 9070 
 9071 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9072 %{
 9073   match(Set dst (XorL (AddL src minus_1) src));
 9074   predicate(UseBMI1Instructions);
 9075   effect(KILL cr, TEMP dst);
 9076 
 9077   format %{ "MOVL    $dst.hi, 0\n\t"
 9078             "BLSMSKL $dst.lo, $src.lo\n\t"
 9079             "JNC     done\n\t"
 9080             "BLSMSKL $dst.hi, $src.hi\n"
 9081             "done:"
 9082          %}
 9083 
 9084   ins_encode %{
 9085     Label done;
 9086     Register Rdst = $dst$$Register;
 9087     Register Rsrc = $src$$Register;
 9088     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9089     __ blsmskl(Rdst, Rsrc);
 9090     __ jccb(Assembler::carryClear, done);
 9091     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9092     __ bind(done);
 9093   %}
 9094 
 9095   ins_pipe(ialu_reg);
 9096 %}
 9097 
 9098 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9099 %{
 9100   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9101   predicate(UseBMI1Instructions);
 9102   effect(KILL cr, TEMP dst);
 9103 
 9104   ins_cost(125);
 9105   format %{ "MOVL    $dst.hi, 0\n\t"
 9106             "BLSMSKL $dst.lo, $src\n\t"
 9107             "JNC     done\n\t"
 9108             "BLSMSKL $dst.hi, $src+4\n"
 9109             "done:"
 9110          %}
 9111 
 9112   ins_encode %{
 9113     Label done;
 9114     Register Rdst = $dst$$Register;
 9115     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9116 
 9117     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9118     __ blsmskl(Rdst, $src$$Address);
 9119     __ jccb(Assembler::carryClear, done);
 9120     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9121     __ bind(done);
 9122   %}
 9123 
 9124   ins_pipe(ialu_reg_mem);
 9125 %}
 9126 
 9127 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9128 %{
 9129   match(Set dst (AndL (AddL src minus_1) src) );
 9130   predicate(UseBMI1Instructions);
 9131   effect(KILL cr, TEMP dst);
 9132 
 9133   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9134             "BLSRL  $dst.lo, $src.lo\n\t"
 9135             "JNC    done\n\t"
 9136             "BLSRL  $dst.hi, $src.hi\n"
 9137             "done:"
 9138   %}
 9139 
 9140   ins_encode %{
 9141     Label done;
 9142     Register Rdst = $dst$$Register;
 9143     Register Rsrc = $src$$Register;
 9144     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9145     __ blsrl(Rdst, Rsrc);
 9146     __ jccb(Assembler::carryClear, done);
 9147     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9148     __ bind(done);
 9149   %}
 9150 
 9151   ins_pipe(ialu_reg);
 9152 %}
 9153 
 9154 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9155 %{
 9156   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9157   predicate(UseBMI1Instructions);
 9158   effect(KILL cr, TEMP dst);
 9159 
 9160   ins_cost(125);
 9161   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9162             "BLSRL  $dst.lo, $src\n\t"
 9163             "JNC    done\n\t"
 9164             "BLSRL  $dst.hi, $src+4\n"
 9165             "done:"
 9166   %}
 9167 
 9168   ins_encode %{
 9169     Label done;
 9170     Register Rdst = $dst$$Register;
 9171     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9172     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9173     __ blsrl(Rdst, $src$$Address);
 9174     __ jccb(Assembler::carryClear, done);
 9175     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9176     __ bind(done);
 9177   %}
 9178 
 9179   ins_pipe(ialu_reg_mem);
 9180 %}
 9181 
 9182 // Or Long Register with Register
 9183 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9184   match(Set dst (OrL dst src));
 9185   effect(KILL cr);
 9186   format %{ "OR     $dst.lo,$src.lo\n\t"
 9187             "OR     $dst.hi,$src.hi" %}
 9188   opcode(0x0B,0x0B);
 9189   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9190   ins_pipe( ialu_reg_reg_long );
 9191 %}
 9192 
 9193 // Or Long Register with Immediate
 9194 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9195   match(Set dst (OrL dst src));
 9196   effect(KILL cr);
 9197   format %{ "OR     $dst.lo,$src.lo\n\t"
 9198             "OR     $dst.hi,$src.hi" %}
 9199   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9200   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9201   ins_pipe( ialu_reg_long );
 9202 %}
 9203 
 9204 // Or Long Register with Memory
 9205 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9206   match(Set dst (OrL dst (LoadL mem)));
 9207   effect(KILL cr);
 9208   ins_cost(125);
 9209   format %{ "OR     $dst.lo,$mem\n\t"
 9210             "OR     $dst.hi,$mem+4" %}
 9211   opcode(0x0B,0x0B);
 9212   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9213   ins_pipe( ialu_reg_long_mem );
 9214 %}
 9215 
 9216 // Xor Long Register with Register
 9217 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9218   match(Set dst (XorL dst src));
 9219   effect(KILL cr);
 9220   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9221             "XOR    $dst.hi,$src.hi" %}
 9222   opcode(0x33,0x33);
 9223   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9224   ins_pipe( ialu_reg_reg_long );
 9225 %}
 9226 
 9227 // Xor Long Register with Immediate -1
 9228 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9229   match(Set dst (XorL dst imm));
 9230   format %{ "NOT    $dst.lo\n\t"
 9231             "NOT    $dst.hi" %}
 9232   ins_encode %{
 9233      __ notl($dst$$Register);
 9234      __ notl(HIGH_FROM_LOW($dst$$Register));
 9235   %}
 9236   ins_pipe( ialu_reg_long );
 9237 %}
 9238 
 9239 // Xor Long Register with Immediate
 9240 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9241   match(Set dst (XorL dst src));
 9242   effect(KILL cr);
 9243   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9244             "XOR    $dst.hi,$src.hi" %}
 9245   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9246   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9247   ins_pipe( ialu_reg_long );
 9248 %}
 9249 
 9250 // Xor Long Register with Memory
 9251 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9252   match(Set dst (XorL dst (LoadL mem)));
 9253   effect(KILL cr);
 9254   ins_cost(125);
 9255   format %{ "XOR    $dst.lo,$mem\n\t"
 9256             "XOR    $dst.hi,$mem+4" %}
 9257   opcode(0x33,0x33);
 9258   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9259   ins_pipe( ialu_reg_long_mem );
 9260 %}
 9261 
 9262 // Shift Left Long by 1
 9263 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9264   predicate(UseNewLongLShift);
 9265   match(Set dst (LShiftL dst cnt));
 9266   effect(KILL cr);
 9267   ins_cost(100);
 9268   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9269             "ADC    $dst.hi,$dst.hi" %}
 9270   ins_encode %{
 9271     __ addl($dst$$Register,$dst$$Register);
 9272     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9273   %}
 9274   ins_pipe( ialu_reg_long );
 9275 %}
 9276 
 9277 // Shift Left Long by 2
 9278 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9279   predicate(UseNewLongLShift);
 9280   match(Set dst (LShiftL dst cnt));
 9281   effect(KILL cr);
 9282   ins_cost(100);
 9283   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9284             "ADC    $dst.hi,$dst.hi\n\t"
 9285             "ADD    $dst.lo,$dst.lo\n\t"
 9286             "ADC    $dst.hi,$dst.hi" %}
 9287   ins_encode %{
 9288     __ addl($dst$$Register,$dst$$Register);
 9289     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9290     __ addl($dst$$Register,$dst$$Register);
 9291     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9292   %}
 9293   ins_pipe( ialu_reg_long );
 9294 %}
 9295 
 9296 // Shift Left Long by 3
 9297 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9298   predicate(UseNewLongLShift);
 9299   match(Set dst (LShiftL dst cnt));
 9300   effect(KILL cr);
 9301   ins_cost(100);
 9302   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9303             "ADC    $dst.hi,$dst.hi\n\t"
 9304             "ADD    $dst.lo,$dst.lo\n\t"
 9305             "ADC    $dst.hi,$dst.hi\n\t"
 9306             "ADD    $dst.lo,$dst.lo\n\t"
 9307             "ADC    $dst.hi,$dst.hi" %}
 9308   ins_encode %{
 9309     __ addl($dst$$Register,$dst$$Register);
 9310     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9311     __ addl($dst$$Register,$dst$$Register);
 9312     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9313     __ addl($dst$$Register,$dst$$Register);
 9314     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9315   %}
 9316   ins_pipe( ialu_reg_long );
 9317 %}
 9318 
 9319 // Shift Left Long by 1-31
 9320 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9321   match(Set dst (LShiftL dst cnt));
 9322   effect(KILL cr);
 9323   ins_cost(200);
 9324   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9325             "SHL    $dst.lo,$cnt" %}
 9326   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9327   ins_encode( move_long_small_shift(dst,cnt) );
 9328   ins_pipe( ialu_reg_long );
 9329 %}
 9330 
 9331 // Shift Left Long by 32-63
 9332 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9333   match(Set dst (LShiftL dst cnt));
 9334   effect(KILL cr);
 9335   ins_cost(300);
 9336   format %{ "MOV    $dst.hi,$dst.lo\n"
 9337           "\tSHL    $dst.hi,$cnt-32\n"
 9338           "\tXOR    $dst.lo,$dst.lo" %}
 9339   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9340   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9341   ins_pipe( ialu_reg_long );
 9342 %}
 9343 
 9344 // Shift Left Long by variable
 9345 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9346   match(Set dst (LShiftL dst shift));
 9347   effect(KILL cr);
 9348   ins_cost(500+200);
 9349   size(17);
 9350   format %{ "TEST   $shift,32\n\t"
 9351             "JEQ,s  small\n\t"
 9352             "MOV    $dst.hi,$dst.lo\n\t"
 9353             "XOR    $dst.lo,$dst.lo\n"
 9354     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9355             "SHL    $dst.lo,$shift" %}
 9356   ins_encode( shift_left_long( dst, shift ) );
 9357   ins_pipe( pipe_slow );
 9358 %}
 9359 
 9360 // Shift Right Long by 1-31
 9361 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9362   match(Set dst (URShiftL dst cnt));
 9363   effect(KILL cr);
 9364   ins_cost(200);
 9365   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9366             "SHR    $dst.hi,$cnt" %}
 9367   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9368   ins_encode( move_long_small_shift(dst,cnt) );
 9369   ins_pipe( ialu_reg_long );
 9370 %}
 9371 
 9372 // Shift Right Long by 32-63
 9373 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9374   match(Set dst (URShiftL dst cnt));
 9375   effect(KILL cr);
 9376   ins_cost(300);
 9377   format %{ "MOV    $dst.lo,$dst.hi\n"
 9378           "\tSHR    $dst.lo,$cnt-32\n"
 9379           "\tXOR    $dst.hi,$dst.hi" %}
 9380   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9381   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9382   ins_pipe( ialu_reg_long );
 9383 %}
 9384 
 9385 // Shift Right Long by variable
 9386 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9387   match(Set dst (URShiftL dst shift));
 9388   effect(KILL cr);
 9389   ins_cost(600);
 9390   size(17);
 9391   format %{ "TEST   $shift,32\n\t"
 9392             "JEQ,s  small\n\t"
 9393             "MOV    $dst.lo,$dst.hi\n\t"
 9394             "XOR    $dst.hi,$dst.hi\n"
 9395     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9396             "SHR    $dst.hi,$shift" %}
 9397   ins_encode( shift_right_long( dst, shift ) );
 9398   ins_pipe( pipe_slow );
 9399 %}
 9400 
 9401 // Shift Right Long by 1-31
 9402 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9403   match(Set dst (RShiftL dst cnt));
 9404   effect(KILL cr);
 9405   ins_cost(200);
 9406   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9407             "SAR    $dst.hi,$cnt" %}
 9408   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9409   ins_encode( move_long_small_shift(dst,cnt) );
 9410   ins_pipe( ialu_reg_long );
 9411 %}
 9412 
 9413 // Shift Right Long by 32-63
 9414 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9415   match(Set dst (RShiftL dst cnt));
 9416   effect(KILL cr);
 9417   ins_cost(300);
 9418   format %{ "MOV    $dst.lo,$dst.hi\n"
 9419           "\tSAR    $dst.lo,$cnt-32\n"
 9420           "\tSAR    $dst.hi,31" %}
 9421   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9422   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9423   ins_pipe( ialu_reg_long );
 9424 %}
 9425 
 9426 // Shift Right arithmetic Long by variable
 9427 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9428   match(Set dst (RShiftL dst shift));
 9429   effect(KILL cr);
 9430   ins_cost(600);
 9431   size(18);
 9432   format %{ "TEST   $shift,32\n\t"
 9433             "JEQ,s  small\n\t"
 9434             "MOV    $dst.lo,$dst.hi\n\t"
 9435             "SAR    $dst.hi,31\n"
 9436     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9437             "SAR    $dst.hi,$shift" %}
 9438   ins_encode( shift_right_arith_long( dst, shift ) );
 9439   ins_pipe( pipe_slow );
 9440 %}
 9441 
 9442 
 9443 //----------Double Instructions------------------------------------------------
 9444 // Double Math
 9445 
 9446 // Compare & branch
 9447 
 9448 // P6 version of float compare, sets condition codes in EFLAGS
 9449 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9450   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9451   match(Set cr (CmpD src1 src2));
 9452   effect(KILL rax);
 9453   ins_cost(150);
 9454   format %{ "FLD    $src1\n\t"
 9455             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9456             "JNP    exit\n\t"
 9457             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9458             "SAHF\n"
 9459      "exit:\tNOP               // avoid branch to branch" %}
 9460   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9461   ins_encode( Push_Reg_DPR(src1),
 9462               OpcP, RegOpc(src2),
 9463               cmpF_P6_fixup );
 9464   ins_pipe( pipe_slow );
 9465 %}
 9466 
 9467 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9468   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9469   match(Set cr (CmpD src1 src2));
 9470   ins_cost(150);
 9471   format %{ "FLD    $src1\n\t"
 9472             "FUCOMIP ST,$src2  // P6 instruction" %}
 9473   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9474   ins_encode( Push_Reg_DPR(src1),
 9475               OpcP, RegOpc(src2));
 9476   ins_pipe( pipe_slow );
 9477 %}
 9478 
 9479 // Compare & branch
 9480 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9481   predicate(UseSSE<=1);
 9482   match(Set cr (CmpD src1 src2));
 9483   effect(KILL rax);
 9484   ins_cost(200);
 9485   format %{ "FLD    $src1\n\t"
 9486             "FCOMp  $src2\n\t"
 9487             "FNSTSW AX\n\t"
 9488             "TEST   AX,0x400\n\t"
 9489             "JZ,s   flags\n\t"
 9490             "MOV    AH,1\t# unordered treat as LT\n"
 9491     "flags:\tSAHF" %}
 9492   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9493   ins_encode( Push_Reg_DPR(src1),
 9494               OpcP, RegOpc(src2),
 9495               fpu_flags);
 9496   ins_pipe( pipe_slow );
 9497 %}
 9498 
 9499 // Compare vs zero into -1,0,1
 9500 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9501   predicate(UseSSE<=1);
 9502   match(Set dst (CmpD3 src1 zero));
 9503   effect(KILL cr, KILL rax);
 9504   ins_cost(280);
 9505   format %{ "FTSTD  $dst,$src1" %}
 9506   opcode(0xE4, 0xD9);
 9507   ins_encode( Push_Reg_DPR(src1),
 9508               OpcS, OpcP, PopFPU,
 9509               CmpF_Result(dst));
 9510   ins_pipe( pipe_slow );
 9511 %}
 9512 
 9513 // Compare into -1,0,1
 9514 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9515   predicate(UseSSE<=1);
 9516   match(Set dst (CmpD3 src1 src2));
 9517   effect(KILL cr, KILL rax);
 9518   ins_cost(300);
 9519   format %{ "FCMPD  $dst,$src1,$src2" %}
 9520   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9521   ins_encode( Push_Reg_DPR(src1),
 9522               OpcP, RegOpc(src2),
 9523               CmpF_Result(dst));
 9524   ins_pipe( pipe_slow );
 9525 %}
 9526 
 9527 // float compare and set condition codes in EFLAGS by XMM regs
 9528 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9529   predicate(UseSSE>=2);
 9530   match(Set cr (CmpD src1 src2));
 9531   ins_cost(145);
 9532   format %{ "UCOMISD $src1,$src2\n\t"
 9533             "JNP,s   exit\n\t"
 9534             "PUSHF\t# saw NaN, set CF\n\t"
 9535             "AND     [rsp], #0xffffff2b\n\t"
 9536             "POPF\n"
 9537     "exit:" %}
 9538   ins_encode %{
 9539     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9540     emit_cmpfp_fixup(_masm);
 9541   %}
 9542   ins_pipe( pipe_slow );
 9543 %}
 9544 
 9545 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9546   predicate(UseSSE>=2);
 9547   match(Set cr (CmpD src1 src2));
 9548   ins_cost(100);
 9549   format %{ "UCOMISD $src1,$src2" %}
 9550   ins_encode %{
 9551     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9552   %}
 9553   ins_pipe( pipe_slow );
 9554 %}
 9555 
 9556 // float compare and set condition codes in EFLAGS by XMM regs
 9557 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9558   predicate(UseSSE>=2);
 9559   match(Set cr (CmpD src1 (LoadD src2)));
 9560   ins_cost(145);
 9561   format %{ "UCOMISD $src1,$src2\n\t"
 9562             "JNP,s   exit\n\t"
 9563             "PUSHF\t# saw NaN, set CF\n\t"
 9564             "AND     [rsp], #0xffffff2b\n\t"
 9565             "POPF\n"
 9566     "exit:" %}
 9567   ins_encode %{
 9568     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9569     emit_cmpfp_fixup(_masm);
 9570   %}
 9571   ins_pipe( pipe_slow );
 9572 %}
 9573 
 9574 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9575   predicate(UseSSE>=2);
 9576   match(Set cr (CmpD src1 (LoadD src2)));
 9577   ins_cost(100);
 9578   format %{ "UCOMISD $src1,$src2" %}
 9579   ins_encode %{
 9580     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9581   %}
 9582   ins_pipe( pipe_slow );
 9583 %}
 9584 
 9585 // Compare into -1,0,1 in XMM
 9586 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9587   predicate(UseSSE>=2);
 9588   match(Set dst (CmpD3 src1 src2));
 9589   effect(KILL cr);
 9590   ins_cost(255);
 9591   format %{ "UCOMISD $src1, $src2\n\t"
 9592             "MOV     $dst, #-1\n\t"
 9593             "JP,s    done\n\t"
 9594             "JB,s    done\n\t"
 9595             "SETNE   $dst\n\t"
 9596             "MOVZB   $dst, $dst\n"
 9597     "done:" %}
 9598   ins_encode %{
 9599     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9600     emit_cmpfp3(_masm, $dst$$Register);
 9601   %}
 9602   ins_pipe( pipe_slow );
 9603 %}
 9604 
 9605 // Compare into -1,0,1 in XMM and memory
 9606 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9607   predicate(UseSSE>=2);
 9608   match(Set dst (CmpD3 src1 (LoadD src2)));
 9609   effect(KILL cr);
 9610   ins_cost(275);
 9611   format %{ "UCOMISD $src1, $src2\n\t"
 9612             "MOV     $dst, #-1\n\t"
 9613             "JP,s    done\n\t"
 9614             "JB,s    done\n\t"
 9615             "SETNE   $dst\n\t"
 9616             "MOVZB   $dst, $dst\n"
 9617     "done:" %}
 9618   ins_encode %{
 9619     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9620     emit_cmpfp3(_masm, $dst$$Register);
 9621   %}
 9622   ins_pipe( pipe_slow );
 9623 %}
 9624 
 9625 
 9626 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9627   predicate (UseSSE <=1);
 9628   match(Set dst (SubD dst src));
 9629 
 9630   format %{ "FLD    $src\n\t"
 9631             "DSUBp  $dst,ST" %}
 9632   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9633   ins_cost(150);
 9634   ins_encode( Push_Reg_DPR(src),
 9635               OpcP, RegOpc(dst) );
 9636   ins_pipe( fpu_reg_reg );
 9637 %}
 9638 
 9639 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9640   predicate (UseSSE <=1);
 9641   match(Set dst (RoundDouble (SubD src1 src2)));
 9642   ins_cost(250);
 9643 
 9644   format %{ "FLD    $src2\n\t"
 9645             "DSUB   ST,$src1\n\t"
 9646             "FSTP_D $dst\t# D-round" %}
 9647   opcode(0xD8, 0x5);
 9648   ins_encode( Push_Reg_DPR(src2),
 9649               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9650   ins_pipe( fpu_mem_reg_reg );
 9651 %}
 9652 
 9653 
 9654 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9655   predicate (UseSSE <=1);
 9656   match(Set dst (SubD dst (LoadD src)));
 9657   ins_cost(150);
 9658 
 9659   format %{ "FLD    $src\n\t"
 9660             "DSUBp  $dst,ST" %}
 9661   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9662   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9663               OpcP, RegOpc(dst) );
 9664   ins_pipe( fpu_reg_mem );
 9665 %}
 9666 
 9667 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9668   predicate (UseSSE<=1);
 9669   match(Set dst (AbsD src));
 9670   ins_cost(100);
 9671   format %{ "FABS" %}
 9672   opcode(0xE1, 0xD9);
 9673   ins_encode( OpcS, OpcP );
 9674   ins_pipe( fpu_reg_reg );
 9675 %}
 9676 
 9677 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9678   predicate(UseSSE<=1);
 9679   match(Set dst (NegD src));
 9680   ins_cost(100);
 9681   format %{ "FCHS" %}
 9682   opcode(0xE0, 0xD9);
 9683   ins_encode( OpcS, OpcP );
 9684   ins_pipe( fpu_reg_reg );
 9685 %}
 9686 
 9687 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9688   predicate(UseSSE<=1);
 9689   match(Set dst (AddD dst src));
 9690   format %{ "FLD    $src\n\t"
 9691             "DADD   $dst,ST" %}
 9692   size(4);
 9693   ins_cost(150);
 9694   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9695   ins_encode( Push_Reg_DPR(src),
 9696               OpcP, RegOpc(dst) );
 9697   ins_pipe( fpu_reg_reg );
 9698 %}
 9699 
 9700 
 9701 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9702   predicate(UseSSE<=1);
 9703   match(Set dst (RoundDouble (AddD src1 src2)));
 9704   ins_cost(250);
 9705 
 9706   format %{ "FLD    $src2\n\t"
 9707             "DADD   ST,$src1\n\t"
 9708             "FSTP_D $dst\t# D-round" %}
 9709   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9710   ins_encode( Push_Reg_DPR(src2),
 9711               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9712   ins_pipe( fpu_mem_reg_reg );
 9713 %}
 9714 
 9715 
 9716 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9717   predicate(UseSSE<=1);
 9718   match(Set dst (AddD dst (LoadD src)));
 9719   ins_cost(150);
 9720 
 9721   format %{ "FLD    $src\n\t"
 9722             "DADDp  $dst,ST" %}
 9723   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9724   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9725               OpcP, RegOpc(dst) );
 9726   ins_pipe( fpu_reg_mem );
 9727 %}
 9728 
 9729 // add-to-memory
 9730 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9731   predicate(UseSSE<=1);
 9732   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9733   ins_cost(150);
 9734 
 9735   format %{ "FLD_D  $dst\n\t"
 9736             "DADD   ST,$src\n\t"
 9737             "FST_D  $dst" %}
 9738   opcode(0xDD, 0x0);
 9739   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9740               Opcode(0xD8), RegOpc(src),
 9741               set_instruction_start,
 9742               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9743   ins_pipe( fpu_reg_mem );
 9744 %}
 9745 
 9746 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9747   predicate(UseSSE<=1);
 9748   match(Set dst (AddD dst con));
 9749   ins_cost(125);
 9750   format %{ "FLD1\n\t"
 9751             "DADDp  $dst,ST" %}
 9752   ins_encode %{
 9753     __ fld1();
 9754     __ faddp($dst$$reg);
 9755   %}
 9756   ins_pipe(fpu_reg);
 9757 %}
 9758 
 9759 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9760   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9761   match(Set dst (AddD dst con));
 9762   ins_cost(200);
 9763   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9764             "DADDp  $dst,ST" %}
 9765   ins_encode %{
 9766     __ fld_d($constantaddress($con));
 9767     __ faddp($dst$$reg);
 9768   %}
 9769   ins_pipe(fpu_reg_mem);
 9770 %}
 9771 
 9772 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9773   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9774   match(Set dst (RoundDouble (AddD src con)));
 9775   ins_cost(200);
 9776   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9777             "DADD   ST,$src\n\t"
 9778             "FSTP_D $dst\t# D-round" %}
 9779   ins_encode %{
 9780     __ fld_d($constantaddress($con));
 9781     __ fadd($src$$reg);
 9782     __ fstp_d(Address(rsp, $dst$$disp));
 9783   %}
 9784   ins_pipe(fpu_mem_reg_con);
 9785 %}
 9786 
 9787 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9788   predicate(UseSSE<=1);
 9789   match(Set dst (MulD dst src));
 9790   format %{ "FLD    $src\n\t"
 9791             "DMULp  $dst,ST" %}
 9792   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9793   ins_cost(150);
 9794   ins_encode( Push_Reg_DPR(src),
 9795               OpcP, RegOpc(dst) );
 9796   ins_pipe( fpu_reg_reg );
 9797 %}
 9798 
 9799 // Strict FP instruction biases argument before multiply then
 9800 // biases result to avoid double rounding of subnormals.
 9801 //
 9802 // scale arg1 by multiplying arg1 by 2^(-15360)
 9803 // load arg2
 9804 // multiply scaled arg1 by arg2
 9805 // rescale product by 2^(15360)
 9806 //
 9807 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9808   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9809   match(Set dst (MulD dst src));
 9810   ins_cost(1);   // Select this instruction for all FP double multiplies
 9811 
 9812   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9813             "DMULp  $dst,ST\n\t"
 9814             "FLD    $src\n\t"
 9815             "DMULp  $dst,ST\n\t"
 9816             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9817             "DMULp  $dst,ST\n\t" %}
 9818   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9819   ins_encode( strictfp_bias1(dst),
 9820               Push_Reg_DPR(src),
 9821               OpcP, RegOpc(dst),
 9822               strictfp_bias2(dst) );
 9823   ins_pipe( fpu_reg_reg );
 9824 %}
 9825 
 9826 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9827   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9828   match(Set dst (MulD dst con));
 9829   ins_cost(200);
 9830   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9831             "DMULp  $dst,ST" %}
 9832   ins_encode %{
 9833     __ fld_d($constantaddress($con));
 9834     __ fmulp($dst$$reg);
 9835   %}
 9836   ins_pipe(fpu_reg_mem);
 9837 %}
 9838 
 9839 
 9840 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9841   predicate( UseSSE<=1 );
 9842   match(Set dst (MulD dst (LoadD src)));
 9843   ins_cost(200);
 9844   format %{ "FLD_D  $src\n\t"
 9845             "DMULp  $dst,ST" %}
 9846   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9847   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9848               OpcP, RegOpc(dst) );
 9849   ins_pipe( fpu_reg_mem );
 9850 %}
 9851 
 9852 //
 9853 // Cisc-alternate to reg-reg multiply
 9854 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9855   predicate( UseSSE<=1 );
 9856   match(Set dst (MulD src (LoadD mem)));
 9857   ins_cost(250);
 9858   format %{ "FLD_D  $mem\n\t"
 9859             "DMUL   ST,$src\n\t"
 9860             "FSTP_D $dst" %}
 9861   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9862   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9863               OpcReg_FPR(src),
 9864               Pop_Reg_DPR(dst) );
 9865   ins_pipe( fpu_reg_reg_mem );
 9866 %}
 9867 
 9868 
 9869 // MACRO3 -- addDPR a mulDPR
 9870 // This instruction is a '2-address' instruction in that the result goes
 9871 // back to src2.  This eliminates a move from the macro; possibly the
 9872 // register allocator will have to add it back (and maybe not).
 9873 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9874   predicate( UseSSE<=1 );
 9875   match(Set src2 (AddD (MulD src0 src1) src2));
 9876   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9877             "DMUL   ST,$src1\n\t"
 9878             "DADDp  $src2,ST" %}
 9879   ins_cost(250);
 9880   opcode(0xDD); /* LoadD DD /0 */
 9881   ins_encode( Push_Reg_FPR(src0),
 9882               FMul_ST_reg(src1),
 9883               FAddP_reg_ST(src2) );
 9884   ins_pipe( fpu_reg_reg_reg );
 9885 %}
 9886 
 9887 
 9888 // MACRO3 -- subDPR a mulDPR
 9889 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9890   predicate( UseSSE<=1 );
 9891   match(Set src2 (SubD (MulD src0 src1) src2));
 9892   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9893             "DMUL   ST,$src1\n\t"
 9894             "DSUBRp $src2,ST" %}
 9895   ins_cost(250);
 9896   ins_encode( Push_Reg_FPR(src0),
 9897               FMul_ST_reg(src1),
 9898               Opcode(0xDE), Opc_plus(0xE0,src2));
 9899   ins_pipe( fpu_reg_reg_reg );
 9900 %}
 9901 
 9902 
 9903 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9904   predicate( UseSSE<=1 );
 9905   match(Set dst (DivD dst src));
 9906 
 9907   format %{ "FLD    $src\n\t"
 9908             "FDIVp  $dst,ST" %}
 9909   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9910   ins_cost(150);
 9911   ins_encode( Push_Reg_DPR(src),
 9912               OpcP, RegOpc(dst) );
 9913   ins_pipe( fpu_reg_reg );
 9914 %}
 9915 
 9916 // Strict FP instruction biases argument before division then
 9917 // biases result, to avoid double rounding of subnormals.
 9918 //
 9919 // scale dividend by multiplying dividend by 2^(-15360)
 9920 // load divisor
 9921 // divide scaled dividend by divisor
 9922 // rescale quotient by 2^(15360)
 9923 //
 9924 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9925   predicate (UseSSE<=1);
 9926   match(Set dst (DivD dst src));
 9927   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9928   ins_cost(01);
 9929 
 9930   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9931             "DMULp  $dst,ST\n\t"
 9932             "FLD    $src\n\t"
 9933             "FDIVp  $dst,ST\n\t"
 9934             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9935             "DMULp  $dst,ST\n\t" %}
 9936   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9937   ins_encode( strictfp_bias1(dst),
 9938               Push_Reg_DPR(src),
 9939               OpcP, RegOpc(dst),
 9940               strictfp_bias2(dst) );
 9941   ins_pipe( fpu_reg_reg );
 9942 %}
 9943 
 9944 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9945   predicate(UseSSE<=1);
 9946   match(Set dst (ModD dst src));
 9947   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9948 
 9949   format %{ "DMOD   $dst,$src" %}
 9950   ins_cost(250);
 9951   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9952               emitModDPR(),
 9953               Push_Result_Mod_DPR(src),
 9954               Pop_Reg_DPR(dst));
 9955   ins_pipe( pipe_slow );
 9956 %}
 9957 
 9958 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9959   predicate(UseSSE>=2);
 9960   match(Set dst (ModD src0 src1));
 9961   effect(KILL rax, KILL cr);
 9962 
 9963   format %{ "SUB    ESP,8\t # DMOD\n"
 9964           "\tMOVSD  [ESP+0],$src1\n"
 9965           "\tFLD_D  [ESP+0]\n"
 9966           "\tMOVSD  [ESP+0],$src0\n"
 9967           "\tFLD_D  [ESP+0]\n"
 9968      "loop:\tFPREM\n"
 9969           "\tFWAIT\n"
 9970           "\tFNSTSW AX\n"
 9971           "\tSAHF\n"
 9972           "\tJP     loop\n"
 9973           "\tFSTP_D [ESP+0]\n"
 9974           "\tMOVSD  $dst,[ESP+0]\n"
 9975           "\tADD    ESP,8\n"
 9976           "\tFSTP   ST0\t # Restore FPU Stack"
 9977     %}
 9978   ins_cost(250);
 9979   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9980   ins_pipe( pipe_slow );
 9981 %}
 9982 
 9983 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9984   predicate (UseSSE<=1);
 9985   match(Set dst(AtanD dst src));
 9986   format %{ "DATA   $dst,$src" %}
 9987   opcode(0xD9, 0xF3);
 9988   ins_encode( Push_Reg_DPR(src),
 9989               OpcP, OpcS, RegOpc(dst) );
 9990   ins_pipe( pipe_slow );
 9991 %}
 9992 
 9993 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9994   predicate (UseSSE>=2);
 9995   match(Set dst(AtanD dst src));
 9996   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9997   format %{ "DATA   $dst,$src" %}
 9998   opcode(0xD9, 0xF3);
 9999   ins_encode( Push_SrcD(src),
10000               OpcP, OpcS, Push_ResultD(dst) );
10001   ins_pipe( pipe_slow );
10002 %}
10003 
10004 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10005   predicate (UseSSE<=1);
10006   match(Set dst (SqrtD src));
10007   format %{ "DSQRT  $dst,$src" %}
10008   opcode(0xFA, 0xD9);
10009   ins_encode( Push_Reg_DPR(src),
10010               OpcS, OpcP, Pop_Reg_DPR(dst) );
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 //-------------Float Instructions-------------------------------
10015 // Float Math
10016 
10017 // Code for float compare:
10018 //     fcompp();
10019 //     fwait(); fnstsw_ax();
10020 //     sahf();
10021 //     movl(dst, unordered_result);
10022 //     jcc(Assembler::parity, exit);
10023 //     movl(dst, less_result);
10024 //     jcc(Assembler::below, exit);
10025 //     movl(dst, equal_result);
10026 //     jcc(Assembler::equal, exit);
10027 //     movl(dst, greater_result);
10028 //   exit:
10029 
10030 // P6 version of float compare, sets condition codes in EFLAGS
10031 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10032   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10033   match(Set cr (CmpF src1 src2));
10034   effect(KILL rax);
10035   ins_cost(150);
10036   format %{ "FLD    $src1\n\t"
10037             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10038             "JNP    exit\n\t"
10039             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10040             "SAHF\n"
10041      "exit:\tNOP               // avoid branch to branch" %}
10042   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10043   ins_encode( Push_Reg_DPR(src1),
10044               OpcP, RegOpc(src2),
10045               cmpF_P6_fixup );
10046   ins_pipe( pipe_slow );
10047 %}
10048 
10049 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10050   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10051   match(Set cr (CmpF src1 src2));
10052   ins_cost(100);
10053   format %{ "FLD    $src1\n\t"
10054             "FUCOMIP ST,$src2  // P6 instruction" %}
10055   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10056   ins_encode( Push_Reg_DPR(src1),
10057               OpcP, RegOpc(src2));
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 
10062 // Compare & branch
10063 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10064   predicate(UseSSE == 0);
10065   match(Set cr (CmpF src1 src2));
10066   effect(KILL rax);
10067   ins_cost(200);
10068   format %{ "FLD    $src1\n\t"
10069             "FCOMp  $src2\n\t"
10070             "FNSTSW AX\n\t"
10071             "TEST   AX,0x400\n\t"
10072             "JZ,s   flags\n\t"
10073             "MOV    AH,1\t# unordered treat as LT\n"
10074     "flags:\tSAHF" %}
10075   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10076   ins_encode( Push_Reg_DPR(src1),
10077               OpcP, RegOpc(src2),
10078               fpu_flags);
10079   ins_pipe( pipe_slow );
10080 %}
10081 
10082 // Compare vs zero into -1,0,1
10083 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10084   predicate(UseSSE == 0);
10085   match(Set dst (CmpF3 src1 zero));
10086   effect(KILL cr, KILL rax);
10087   ins_cost(280);
10088   format %{ "FTSTF  $dst,$src1" %}
10089   opcode(0xE4, 0xD9);
10090   ins_encode( Push_Reg_DPR(src1),
10091               OpcS, OpcP, PopFPU,
10092               CmpF_Result(dst));
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 // Compare into -1,0,1
10097 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10098   predicate(UseSSE == 0);
10099   match(Set dst (CmpF3 src1 src2));
10100   effect(KILL cr, KILL rax);
10101   ins_cost(300);
10102   format %{ "FCMPF  $dst,$src1,$src2" %}
10103   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10104   ins_encode( Push_Reg_DPR(src1),
10105               OpcP, RegOpc(src2),
10106               CmpF_Result(dst));
10107   ins_pipe( pipe_slow );
10108 %}
10109 
10110 // float compare and set condition codes in EFLAGS by XMM regs
10111 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10112   predicate(UseSSE>=1);
10113   match(Set cr (CmpF src1 src2));
10114   ins_cost(145);
10115   format %{ "UCOMISS $src1,$src2\n\t"
10116             "JNP,s   exit\n\t"
10117             "PUSHF\t# saw NaN, set CF\n\t"
10118             "AND     [rsp], #0xffffff2b\n\t"
10119             "POPF\n"
10120     "exit:" %}
10121   ins_encode %{
10122     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10123     emit_cmpfp_fixup(_masm);
10124   %}
10125   ins_pipe( pipe_slow );
10126 %}
10127 
10128 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10129   predicate(UseSSE>=1);
10130   match(Set cr (CmpF src1 src2));
10131   ins_cost(100);
10132   format %{ "UCOMISS $src1,$src2" %}
10133   ins_encode %{
10134     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10135   %}
10136   ins_pipe( pipe_slow );
10137 %}
10138 
10139 // float compare and set condition codes in EFLAGS by XMM regs
10140 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10141   predicate(UseSSE>=1);
10142   match(Set cr (CmpF src1 (LoadF src2)));
10143   ins_cost(165);
10144   format %{ "UCOMISS $src1,$src2\n\t"
10145             "JNP,s   exit\n\t"
10146             "PUSHF\t# saw NaN, set CF\n\t"
10147             "AND     [rsp], #0xffffff2b\n\t"
10148             "POPF\n"
10149     "exit:" %}
10150   ins_encode %{
10151     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10152     emit_cmpfp_fixup(_masm);
10153   %}
10154   ins_pipe( pipe_slow );
10155 %}
10156 
10157 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10158   predicate(UseSSE>=1);
10159   match(Set cr (CmpF src1 (LoadF src2)));
10160   ins_cost(100);
10161   format %{ "UCOMISS $src1,$src2" %}
10162   ins_encode %{
10163     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10164   %}
10165   ins_pipe( pipe_slow );
10166 %}
10167 
10168 // Compare into -1,0,1 in XMM
10169 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10170   predicate(UseSSE>=1);
10171   match(Set dst (CmpF3 src1 src2));
10172   effect(KILL cr);
10173   ins_cost(255);
10174   format %{ "UCOMISS $src1, $src2\n\t"
10175             "MOV     $dst, #-1\n\t"
10176             "JP,s    done\n\t"
10177             "JB,s    done\n\t"
10178             "SETNE   $dst\n\t"
10179             "MOVZB   $dst, $dst\n"
10180     "done:" %}
10181   ins_encode %{
10182     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10183     emit_cmpfp3(_masm, $dst$$Register);
10184   %}
10185   ins_pipe( pipe_slow );
10186 %}
10187 
10188 // Compare into -1,0,1 in XMM and memory
10189 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10190   predicate(UseSSE>=1);
10191   match(Set dst (CmpF3 src1 (LoadF src2)));
10192   effect(KILL cr);
10193   ins_cost(275);
10194   format %{ "UCOMISS $src1, $src2\n\t"
10195             "MOV     $dst, #-1\n\t"
10196             "JP,s    done\n\t"
10197             "JB,s    done\n\t"
10198             "SETNE   $dst\n\t"
10199             "MOVZB   $dst, $dst\n"
10200     "done:" %}
10201   ins_encode %{
10202     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10203     emit_cmpfp3(_masm, $dst$$Register);
10204   %}
10205   ins_pipe( pipe_slow );
10206 %}
10207 
10208 // Spill to obtain 24-bit precision
10209 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10210   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10211   match(Set dst (SubF src1 src2));
10212 
10213   format %{ "FSUB   $dst,$src1 - $src2" %}
10214   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10215   ins_encode( Push_Reg_FPR(src1),
10216               OpcReg_FPR(src2),
10217               Pop_Mem_FPR(dst) );
10218   ins_pipe( fpu_mem_reg_reg );
10219 %}
10220 //
10221 // This instruction does not round to 24-bits
10222 instruct subFPR_reg(regFPR dst, regFPR src) %{
10223   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10224   match(Set dst (SubF dst src));
10225 
10226   format %{ "FSUB   $dst,$src" %}
10227   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10228   ins_encode( Push_Reg_FPR(src),
10229               OpcP, RegOpc(dst) );
10230   ins_pipe( fpu_reg_reg );
10231 %}
10232 
10233 // Spill to obtain 24-bit precision
10234 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10235   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10236   match(Set dst (AddF src1 src2));
10237 
10238   format %{ "FADD   $dst,$src1,$src2" %}
10239   opcode(0xD8, 0x0); /* D8 C0+i */
10240   ins_encode( Push_Reg_FPR(src2),
10241               OpcReg_FPR(src1),
10242               Pop_Mem_FPR(dst) );
10243   ins_pipe( fpu_mem_reg_reg );
10244 %}
10245 //
10246 // This instruction does not round to 24-bits
10247 instruct addFPR_reg(regFPR dst, regFPR src) %{
10248   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10249   match(Set dst (AddF dst src));
10250 
10251   format %{ "FLD    $src\n\t"
10252             "FADDp  $dst,ST" %}
10253   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10254   ins_encode( Push_Reg_FPR(src),
10255               OpcP, RegOpc(dst) );
10256   ins_pipe( fpu_reg_reg );
10257 %}
10258 
10259 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10260   predicate(UseSSE==0);
10261   match(Set dst (AbsF src));
10262   ins_cost(100);
10263   format %{ "FABS" %}
10264   opcode(0xE1, 0xD9);
10265   ins_encode( OpcS, OpcP );
10266   ins_pipe( fpu_reg_reg );
10267 %}
10268 
10269 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10270   predicate(UseSSE==0);
10271   match(Set dst (NegF src));
10272   ins_cost(100);
10273   format %{ "FCHS" %}
10274   opcode(0xE0, 0xD9);
10275   ins_encode( OpcS, OpcP );
10276   ins_pipe( fpu_reg_reg );
10277 %}
10278 
10279 // Cisc-alternate to addFPR_reg
10280 // Spill to obtain 24-bit precision
10281 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10282   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10283   match(Set dst (AddF src1 (LoadF src2)));
10284 
10285   format %{ "FLD    $src2\n\t"
10286             "FADD   ST,$src1\n\t"
10287             "FSTP_S $dst" %}
10288   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10289   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10290               OpcReg_FPR(src1),
10291               Pop_Mem_FPR(dst) );
10292   ins_pipe( fpu_mem_reg_mem );
10293 %}
10294 //
10295 // Cisc-alternate to addFPR_reg
10296 // This instruction does not round to 24-bits
10297 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10298   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10299   match(Set dst (AddF dst (LoadF src)));
10300 
10301   format %{ "FADD   $dst,$src" %}
10302   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10303   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10304               OpcP, RegOpc(dst) );
10305   ins_pipe( fpu_reg_mem );
10306 %}
10307 
10308 // // Following two instructions for _222_mpegaudio
10309 // Spill to obtain 24-bit precision
10310 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10311   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10312   match(Set dst (AddF src1 src2));
10313 
10314   format %{ "FADD   $dst,$src1,$src2" %}
10315   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10316   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10317               OpcReg_FPR(src2),
10318               Pop_Mem_FPR(dst) );
10319   ins_pipe( fpu_mem_reg_mem );
10320 %}
10321 
10322 // Cisc-spill variant
10323 // Spill to obtain 24-bit precision
10324 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10325   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10326   match(Set dst (AddF src1 (LoadF src2)));
10327 
10328   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10329   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10330   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10331               set_instruction_start,
10332               OpcP, RMopc_Mem(secondary,src1),
10333               Pop_Mem_FPR(dst) );
10334   ins_pipe( fpu_mem_mem_mem );
10335 %}
10336 
10337 // Spill to obtain 24-bit precision
10338 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10339   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10340   match(Set dst (AddF src1 src2));
10341 
10342   format %{ "FADD   $dst,$src1,$src2" %}
10343   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10344   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10345               set_instruction_start,
10346               OpcP, RMopc_Mem(secondary,src1),
10347               Pop_Mem_FPR(dst) );
10348   ins_pipe( fpu_mem_mem_mem );
10349 %}
10350 
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (AddF src con));
10356   format %{ "FLD    $src\n\t"
10357             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10358             "FSTP_S $dst"  %}
10359   ins_encode %{
10360     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10361     __ fadd_s($constantaddress($con));
10362     __ fstp_s(Address(rsp, $dst$$disp));
10363   %}
10364   ins_pipe(fpu_mem_reg_con);
10365 %}
10366 //
10367 // This instruction does not round to 24-bits
10368 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10369   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10370   match(Set dst (AddF src con));
10371   format %{ "FLD    $src\n\t"
10372             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10373             "FSTP   $dst"  %}
10374   ins_encode %{
10375     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10376     __ fadd_s($constantaddress($con));
10377     __ fstp_d($dst$$reg);
10378   %}
10379   ins_pipe(fpu_reg_reg_con);
10380 %}
10381 
10382 // Spill to obtain 24-bit precision
10383 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (MulF src1 src2));
10386 
10387   format %{ "FLD    $src1\n\t"
10388             "FMUL   $src2\n\t"
10389             "FSTP_S $dst"  %}
10390   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10391   ins_encode( Push_Reg_FPR(src1),
10392               OpcReg_FPR(src2),
10393               Pop_Mem_FPR(dst) );
10394   ins_pipe( fpu_mem_reg_reg );
10395 %}
10396 //
10397 // This instruction does not round to 24-bits
10398 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10399   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10400   match(Set dst (MulF src1 src2));
10401 
10402   format %{ "FLD    $src1\n\t"
10403             "FMUL   $src2\n\t"
10404             "FSTP_S $dst"  %}
10405   opcode(0xD8, 0x1); /* D8 C8+i */
10406   ins_encode( Push_Reg_FPR(src2),
10407               OpcReg_FPR(src1),
10408               Pop_Reg_FPR(dst) );
10409   ins_pipe( fpu_reg_reg_reg );
10410 %}
10411 
10412 
10413 // Spill to obtain 24-bit precision
10414 // Cisc-alternate to reg-reg multiply
10415 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10416   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10417   match(Set dst (MulF src1 (LoadF src2)));
10418 
10419   format %{ "FLD_S  $src2\n\t"
10420             "FMUL   $src1\n\t"
10421             "FSTP_S $dst"  %}
10422   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10423   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10424               OpcReg_FPR(src1),
10425               Pop_Mem_FPR(dst) );
10426   ins_pipe( fpu_mem_reg_mem );
10427 %}
10428 //
10429 // This instruction does not round to 24-bits
10430 // Cisc-alternate to reg-reg multiply
10431 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10432   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10433   match(Set dst (MulF src1 (LoadF src2)));
10434 
10435   format %{ "FMUL   $dst,$src1,$src2" %}
10436   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10438               OpcReg_FPR(src1),
10439               Pop_Reg_FPR(dst) );
10440   ins_pipe( fpu_reg_reg_mem );
10441 %}
10442 
10443 // Spill to obtain 24-bit precision
10444 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10445   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10446   match(Set dst (MulF src1 src2));
10447 
10448   format %{ "FMUL   $dst,$src1,$src2" %}
10449   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10450   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10451               set_instruction_start,
10452               OpcP, RMopc_Mem(secondary,src1),
10453               Pop_Mem_FPR(dst) );
10454   ins_pipe( fpu_mem_mem_mem );
10455 %}
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10459   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (MulF src con));
10461 
10462   format %{ "FLD    $src\n\t"
10463             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10464             "FSTP_S $dst"  %}
10465   ins_encode %{
10466     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10467     __ fmul_s($constantaddress($con));
10468     __ fstp_s(Address(rsp, $dst$$disp));
10469   %}
10470   ins_pipe(fpu_mem_reg_con);
10471 %}
10472 //
10473 // This instruction does not round to 24-bits
10474 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10475   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10476   match(Set dst (MulF src con));
10477 
10478   format %{ "FLD    $src\n\t"
10479             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10480             "FSTP   $dst"  %}
10481   ins_encode %{
10482     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10483     __ fmul_s($constantaddress($con));
10484     __ fstp_d($dst$$reg);
10485   %}
10486   ins_pipe(fpu_reg_reg_con);
10487 %}
10488 
10489 
10490 //
10491 // MACRO1 -- subsume unshared load into mulFPR
10492 // This instruction does not round to 24-bits
10493 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10494   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10495   match(Set dst (MulF (LoadF mem1) src));
10496 
10497   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10498             "FMUL   ST,$src\n\t"
10499             "FSTP   $dst" %}
10500   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10501   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10502               OpcReg_FPR(src),
10503               Pop_Reg_FPR(dst) );
10504   ins_pipe( fpu_reg_reg_mem );
10505 %}
10506 //
10507 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10508 // This instruction does not round to 24-bits
10509 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10510   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10511   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10512   ins_cost(95);
10513 
10514   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10515             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10516             "FADD   ST,$src2\n\t"
10517             "FSTP   $dst" %}
10518   opcode(0xD9); /* LoadF D9 /0 */
10519   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10520               FMul_ST_reg(src1),
10521               FAdd_ST_reg(src2),
10522               Pop_Reg_FPR(dst) );
10523   ins_pipe( fpu_reg_mem_reg_reg );
10524 %}
10525 
10526 // MACRO3 -- addFPR a mulFPR
10527 // This instruction does not round to 24-bits.  It is a '2-address'
10528 // instruction in that the result goes back to src2.  This eliminates
10529 // a move from the macro; possibly the register allocator will have
10530 // to add it back (and maybe not).
10531 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10532   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533   match(Set src2 (AddF (MulF src0 src1) src2));
10534 
10535   format %{ "FLD    $src0     ===MACRO3===\n\t"
10536             "FMUL   ST,$src1\n\t"
10537             "FADDP  $src2,ST" %}
10538   opcode(0xD9); /* LoadF D9 /0 */
10539   ins_encode( Push_Reg_FPR(src0),
10540               FMul_ST_reg(src1),
10541               FAddP_reg_ST(src2) );
10542   ins_pipe( fpu_reg_reg_reg );
10543 %}
10544 
10545 // MACRO4 -- divFPR subFPR
10546 // This instruction does not round to 24-bits
10547 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10548   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10549   match(Set dst (DivF (SubF src2 src1) src3));
10550 
10551   format %{ "FLD    $src2   ===MACRO4===\n\t"
10552             "FSUB   ST,$src1\n\t"
10553             "FDIV   ST,$src3\n\t"
10554             "FSTP  $dst" %}
10555   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10556   ins_encode( Push_Reg_FPR(src2),
10557               subFPR_divFPR_encode(src1,src3),
10558               Pop_Reg_FPR(dst) );
10559   ins_pipe( fpu_reg_reg_reg_reg );
10560 %}
10561 
10562 // Spill to obtain 24-bit precision
10563 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10564   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10565   match(Set dst (DivF src1 src2));
10566 
10567   format %{ "FDIV   $dst,$src1,$src2" %}
10568   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10569   ins_encode( Push_Reg_FPR(src1),
10570               OpcReg_FPR(src2),
10571               Pop_Mem_FPR(dst) );
10572   ins_pipe( fpu_mem_reg_reg );
10573 %}
10574 //
10575 // This instruction does not round to 24-bits
10576 instruct divFPR_reg(regFPR dst, regFPR src) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set dst (DivF dst src));
10579 
10580   format %{ "FDIV   $dst,$src" %}
10581   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10582   ins_encode( Push_Reg_FPR(src),
10583               OpcP, RegOpc(dst) );
10584   ins_pipe( fpu_reg_reg );
10585 %}
10586 
10587 
10588 // Spill to obtain 24-bit precision
10589 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10590   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10591   match(Set dst (ModF src1 src2));
10592   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10593 
10594   format %{ "FMOD   $dst,$src1,$src2" %}
10595   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10596               emitModDPR(),
10597               Push_Result_Mod_DPR(src2),
10598               Pop_Mem_FPR(dst));
10599   ins_pipe( pipe_slow );
10600 %}
10601 //
10602 // This instruction does not round to 24-bits
10603 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10604   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10605   match(Set dst (ModF dst src));
10606   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10607 
10608   format %{ "FMOD   $dst,$src" %}
10609   ins_encode(Push_Reg_Mod_DPR(dst, src),
10610               emitModDPR(),
10611               Push_Result_Mod_DPR(src),
10612               Pop_Reg_FPR(dst));
10613   ins_pipe( pipe_slow );
10614 %}
10615 
10616 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10617   predicate(UseSSE>=1);
10618   match(Set dst (ModF src0 src1));
10619   effect(KILL rax, KILL cr);
10620   format %{ "SUB    ESP,4\t # FMOD\n"
10621           "\tMOVSS  [ESP+0],$src1\n"
10622           "\tFLD_S  [ESP+0]\n"
10623           "\tMOVSS  [ESP+0],$src0\n"
10624           "\tFLD_S  [ESP+0]\n"
10625      "loop:\tFPREM\n"
10626           "\tFWAIT\n"
10627           "\tFNSTSW AX\n"
10628           "\tSAHF\n"
10629           "\tJP     loop\n"
10630           "\tFSTP_S [ESP+0]\n"
10631           "\tMOVSS  $dst,[ESP+0]\n"
10632           "\tADD    ESP,4\n"
10633           "\tFSTP   ST0\t # Restore FPU Stack"
10634     %}
10635   ins_cost(250);
10636   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10637   ins_pipe( pipe_slow );
10638 %}
10639 
10640 
10641 //----------Arithmetic Conversion Instructions---------------------------------
10642 // The conversions operations are all Alpha sorted.  Please keep it that way!
10643 
10644 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10645   predicate(UseSSE==0);
10646   match(Set dst (RoundFloat src));
10647   ins_cost(125);
10648   format %{ "FST_S  $dst,$src\t# F-round" %}
10649   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10650   ins_pipe( fpu_mem_reg );
10651 %}
10652 
10653 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10654   predicate(UseSSE<=1);
10655   match(Set dst (RoundDouble src));
10656   ins_cost(125);
10657   format %{ "FST_D  $dst,$src\t# D-round" %}
10658   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10659   ins_pipe( fpu_mem_reg );
10660 %}
10661 
10662 // Force rounding to 24-bit precision and 6-bit exponent
10663 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10664   predicate(UseSSE==0);
10665   match(Set dst (ConvD2F src));
10666   format %{ "FST_S  $dst,$src\t# F-round" %}
10667   expand %{
10668     roundFloat_mem_reg(dst,src);
10669   %}
10670 %}
10671 
10672 // Force rounding to 24-bit precision and 6-bit exponent
10673 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10674   predicate(UseSSE==1);
10675   match(Set dst (ConvD2F src));
10676   effect( KILL cr );
10677   format %{ "SUB    ESP,4\n\t"
10678             "FST_S  [ESP],$src\t# F-round\n\t"
10679             "MOVSS  $dst,[ESP]\n\t"
10680             "ADD ESP,4" %}
10681   ins_encode %{
10682     __ subptr(rsp, 4);
10683     if ($src$$reg != FPR1L_enc) {
10684       __ fld_s($src$$reg-1);
10685       __ fstp_s(Address(rsp, 0));
10686     } else {
10687       __ fst_s(Address(rsp, 0));
10688     }
10689     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10690     __ addptr(rsp, 4);
10691   %}
10692   ins_pipe( pipe_slow );
10693 %}
10694 
10695 // Force rounding double precision to single precision
10696 instruct convD2F_reg(regF dst, regD src) %{
10697   predicate(UseSSE>=2);
10698   match(Set dst (ConvD2F src));
10699   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10700   ins_encode %{
10701     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10702   %}
10703   ins_pipe( pipe_slow );
10704 %}
10705 
10706 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10707   predicate(UseSSE==0);
10708   match(Set dst (ConvF2D src));
10709   format %{ "FST_S  $dst,$src\t# D-round" %}
10710   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10711   ins_pipe( fpu_reg_reg );
10712 %}
10713 
10714 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10715   predicate(UseSSE==1);
10716   match(Set dst (ConvF2D src));
10717   format %{ "FST_D  $dst,$src\t# D-round" %}
10718   expand %{
10719     roundDouble_mem_reg(dst,src);
10720   %}
10721 %}
10722 
10723 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10724   predicate(UseSSE==1);
10725   match(Set dst (ConvF2D src));
10726   effect( KILL cr );
10727   format %{ "SUB    ESP,4\n\t"
10728             "MOVSS  [ESP] $src\n\t"
10729             "FLD_S  [ESP]\n\t"
10730             "ADD    ESP,4\n\t"
10731             "FSTP   $dst\t# D-round" %}
10732   ins_encode %{
10733     __ subptr(rsp, 4);
10734     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10735     __ fld_s(Address(rsp, 0));
10736     __ addptr(rsp, 4);
10737     __ fstp_d($dst$$reg);
10738   %}
10739   ins_pipe( pipe_slow );
10740 %}
10741 
10742 instruct convF2D_reg(regD dst, regF src) %{
10743   predicate(UseSSE>=2);
10744   match(Set dst (ConvF2D src));
10745   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10746   ins_encode %{
10747     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10748   %}
10749   ins_pipe( pipe_slow );
10750 %}
10751 
10752 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10753 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10754   predicate(UseSSE<=1);
10755   match(Set dst (ConvD2I src));
10756   effect( KILL tmp, KILL cr );
10757   format %{ "FLD    $src\t# Convert double to int \n\t"
10758             "FLDCW  trunc mode\n\t"
10759             "SUB    ESP,4\n\t"
10760             "FISTp  [ESP + #0]\n\t"
10761             "FLDCW  std/24-bit mode\n\t"
10762             "POP    EAX\n\t"
10763             "CMP    EAX,0x80000000\n\t"
10764             "JNE,s  fast\n\t"
10765             "FLD_D  $src\n\t"
10766             "CALL   d2i_wrapper\n"
10767       "fast:" %}
10768   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10769   ins_pipe( pipe_slow );
10770 %}
10771 
10772 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10773 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10774   predicate(UseSSE>=2);
10775   match(Set dst (ConvD2I src));
10776   effect( KILL tmp, KILL cr );
10777   format %{ "CVTTSD2SI $dst, $src\n\t"
10778             "CMP    $dst,0x80000000\n\t"
10779             "JNE,s  fast\n\t"
10780             "SUB    ESP, 8\n\t"
10781             "MOVSD  [ESP], $src\n\t"
10782             "FLD_D  [ESP]\n\t"
10783             "ADD    ESP, 8\n\t"
10784             "CALL   d2i_wrapper\n"
10785       "fast:" %}
10786   ins_encode %{
10787     Label fast;
10788     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10789     __ cmpl($dst$$Register, 0x80000000);
10790     __ jccb(Assembler::notEqual, fast);
10791     __ subptr(rsp, 8);
10792     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10793     __ fld_d(Address(rsp, 0));
10794     __ addptr(rsp, 8);
10795     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10796     __ post_call_nop();
10797     __ bind(fast);
10798   %}
10799   ins_pipe( pipe_slow );
10800 %}
10801 
10802 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10803   predicate(UseSSE<=1);
10804   match(Set dst (ConvD2L src));
10805   effect( KILL cr );
10806   format %{ "FLD    $src\t# Convert double to long\n\t"
10807             "FLDCW  trunc mode\n\t"
10808             "SUB    ESP,8\n\t"
10809             "FISTp  [ESP + #0]\n\t"
10810             "FLDCW  std/24-bit mode\n\t"
10811             "POP    EAX\n\t"
10812             "POP    EDX\n\t"
10813             "CMP    EDX,0x80000000\n\t"
10814             "JNE,s  fast\n\t"
10815             "TEST   EAX,EAX\n\t"
10816             "JNE,s  fast\n\t"
10817             "FLD    $src\n\t"
10818             "CALL   d2l_wrapper\n"
10819       "fast:" %}
10820   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10821   ins_pipe( pipe_slow );
10822 %}
10823 
10824 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10825 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10826   predicate (UseSSE>=2);
10827   match(Set dst (ConvD2L src));
10828   effect( KILL cr );
10829   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10830             "MOVSD  [ESP],$src\n\t"
10831             "FLD_D  [ESP]\n\t"
10832             "FLDCW  trunc mode\n\t"
10833             "FISTp  [ESP + #0]\n\t"
10834             "FLDCW  std/24-bit mode\n\t"
10835             "POP    EAX\n\t"
10836             "POP    EDX\n\t"
10837             "CMP    EDX,0x80000000\n\t"
10838             "JNE,s  fast\n\t"
10839             "TEST   EAX,EAX\n\t"
10840             "JNE,s  fast\n\t"
10841             "SUB    ESP,8\n\t"
10842             "MOVSD  [ESP],$src\n\t"
10843             "FLD_D  [ESP]\n\t"
10844             "ADD    ESP,8\n\t"
10845             "CALL   d2l_wrapper\n"
10846       "fast:" %}
10847   ins_encode %{
10848     Label fast;
10849     __ subptr(rsp, 8);
10850     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10851     __ fld_d(Address(rsp, 0));
10852     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10853     __ fistp_d(Address(rsp, 0));
10854     // Restore the rounding mode, mask the exception
10855     if (Compile::current()->in_24_bit_fp_mode()) {
10856       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10857     } else {
10858       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10859     }
10860     // Load the converted long, adjust CPU stack
10861     __ pop(rax);
10862     __ pop(rdx);
10863     __ cmpl(rdx, 0x80000000);
10864     __ jccb(Assembler::notEqual, fast);
10865     __ testl(rax, rax);
10866     __ jccb(Assembler::notEqual, fast);
10867     __ subptr(rsp, 8);
10868     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10869     __ fld_d(Address(rsp, 0));
10870     __ addptr(rsp, 8);
10871     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10872     __ post_call_nop();
10873     __ bind(fast);
10874   %}
10875   ins_pipe( pipe_slow );
10876 %}
10877 
10878 // Convert a double to an int.  Java semantics require we do complex
10879 // manglations in the corner cases.  So we set the rounding mode to
10880 // 'zero', store the darned double down as an int, and reset the
10881 // rounding mode to 'nearest'.  The hardware stores a flag value down
10882 // if we would overflow or converted a NAN; we check for this and
10883 // and go the slow path if needed.
10884 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10885   predicate(UseSSE==0);
10886   match(Set dst (ConvF2I src));
10887   effect( KILL tmp, KILL cr );
10888   format %{ "FLD    $src\t# Convert float to int \n\t"
10889             "FLDCW  trunc mode\n\t"
10890             "SUB    ESP,4\n\t"
10891             "FISTp  [ESP + #0]\n\t"
10892             "FLDCW  std/24-bit mode\n\t"
10893             "POP    EAX\n\t"
10894             "CMP    EAX,0x80000000\n\t"
10895             "JNE,s  fast\n\t"
10896             "FLD    $src\n\t"
10897             "CALL   d2i_wrapper\n"
10898       "fast:" %}
10899   // DPR2I_encoding works for FPR2I
10900   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10901   ins_pipe( pipe_slow );
10902 %}
10903 
10904 // Convert a float in xmm to an int reg.
10905 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10906   predicate(UseSSE>=1);
10907   match(Set dst (ConvF2I src));
10908   effect( KILL tmp, KILL cr );
10909   format %{ "CVTTSS2SI $dst, $src\n\t"
10910             "CMP    $dst,0x80000000\n\t"
10911             "JNE,s  fast\n\t"
10912             "SUB    ESP, 4\n\t"
10913             "MOVSS  [ESP], $src\n\t"
10914             "FLD    [ESP]\n\t"
10915             "ADD    ESP, 4\n\t"
10916             "CALL   d2i_wrapper\n"
10917       "fast:" %}
10918   ins_encode %{
10919     Label fast;
10920     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10921     __ cmpl($dst$$Register, 0x80000000);
10922     __ jccb(Assembler::notEqual, fast);
10923     __ subptr(rsp, 4);
10924     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10925     __ fld_s(Address(rsp, 0));
10926     __ addptr(rsp, 4);
10927     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10928     __ post_call_nop();
10929     __ bind(fast);
10930   %}
10931   ins_pipe( pipe_slow );
10932 %}
10933 
10934 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10935   predicate(UseSSE==0);
10936   match(Set dst (ConvF2L src));
10937   effect( KILL cr );
10938   format %{ "FLD    $src\t# Convert float to long\n\t"
10939             "FLDCW  trunc mode\n\t"
10940             "SUB    ESP,8\n\t"
10941             "FISTp  [ESP + #0]\n\t"
10942             "FLDCW  std/24-bit mode\n\t"
10943             "POP    EAX\n\t"
10944             "POP    EDX\n\t"
10945             "CMP    EDX,0x80000000\n\t"
10946             "JNE,s  fast\n\t"
10947             "TEST   EAX,EAX\n\t"
10948             "JNE,s  fast\n\t"
10949             "FLD    $src\n\t"
10950             "CALL   d2l_wrapper\n"
10951       "fast:" %}
10952   // DPR2L_encoding works for FPR2L
10953   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10954   ins_pipe( pipe_slow );
10955 %}
10956 
10957 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10958 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10959   predicate (UseSSE>=1);
10960   match(Set dst (ConvF2L src));
10961   effect( KILL cr );
10962   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10963             "MOVSS  [ESP],$src\n\t"
10964             "FLD_S  [ESP]\n\t"
10965             "FLDCW  trunc mode\n\t"
10966             "FISTp  [ESP + #0]\n\t"
10967             "FLDCW  std/24-bit mode\n\t"
10968             "POP    EAX\n\t"
10969             "POP    EDX\n\t"
10970             "CMP    EDX,0x80000000\n\t"
10971             "JNE,s  fast\n\t"
10972             "TEST   EAX,EAX\n\t"
10973             "JNE,s  fast\n\t"
10974             "SUB    ESP,4\t# Convert float to long\n\t"
10975             "MOVSS  [ESP],$src\n\t"
10976             "FLD_S  [ESP]\n\t"
10977             "ADD    ESP,4\n\t"
10978             "CALL   d2l_wrapper\n"
10979       "fast:" %}
10980   ins_encode %{
10981     Label fast;
10982     __ subptr(rsp, 8);
10983     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10984     __ fld_s(Address(rsp, 0));
10985     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10986     __ fistp_d(Address(rsp, 0));
10987     // Restore the rounding mode, mask the exception
10988     if (Compile::current()->in_24_bit_fp_mode()) {
10989       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10990     } else {
10991       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10992     }
10993     // Load the converted long, adjust CPU stack
10994     __ pop(rax);
10995     __ pop(rdx);
10996     __ cmpl(rdx, 0x80000000);
10997     __ jccb(Assembler::notEqual, fast);
10998     __ testl(rax, rax);
10999     __ jccb(Assembler::notEqual, fast);
11000     __ subptr(rsp, 4);
11001     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11002     __ fld_s(Address(rsp, 0));
11003     __ addptr(rsp, 4);
11004     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11005     __ post_call_nop();
11006     __ bind(fast);
11007   %}
11008   ins_pipe( pipe_slow );
11009 %}
11010 
11011 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11012   predicate( UseSSE<=1 );
11013   match(Set dst (ConvI2D src));
11014   format %{ "FILD   $src\n\t"
11015             "FSTP   $dst" %}
11016   opcode(0xDB, 0x0);  /* DB /0 */
11017   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11018   ins_pipe( fpu_reg_mem );
11019 %}
11020 
11021 instruct convI2D_reg(regD dst, rRegI src) %{
11022   predicate( UseSSE>=2 && !UseXmmI2D );
11023   match(Set dst (ConvI2D src));
11024   format %{ "CVTSI2SD $dst,$src" %}
11025   ins_encode %{
11026     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11027   %}
11028   ins_pipe( pipe_slow );
11029 %}
11030 
11031 instruct convI2D_mem(regD dst, memory mem) %{
11032   predicate( UseSSE>=2 );
11033   match(Set dst (ConvI2D (LoadI mem)));
11034   format %{ "CVTSI2SD $dst,$mem" %}
11035   ins_encode %{
11036     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11037   %}
11038   ins_pipe( pipe_slow );
11039 %}
11040 
11041 instruct convXI2D_reg(regD dst, rRegI src)
11042 %{
11043   predicate( UseSSE>=2 && UseXmmI2D );
11044   match(Set dst (ConvI2D src));
11045 
11046   format %{ "MOVD  $dst,$src\n\t"
11047             "CVTDQ2PD $dst,$dst\t# i2d" %}
11048   ins_encode %{
11049     __ movdl($dst$$XMMRegister, $src$$Register);
11050     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11051   %}
11052   ins_pipe(pipe_slow); // XXX
11053 %}
11054 
11055 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11056   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11057   match(Set dst (ConvI2D (LoadI mem)));
11058   format %{ "FILD   $mem\n\t"
11059             "FSTP   $dst" %}
11060   opcode(0xDB);      /* DB /0 */
11061   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11062               Pop_Reg_DPR(dst));
11063   ins_pipe( fpu_reg_mem );
11064 %}
11065 
11066 // Convert a byte to a float; no rounding step needed.
11067 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11068   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11069   match(Set dst (ConvI2F src));
11070   format %{ "FILD   $src\n\t"
11071             "FSTP   $dst" %}
11072 
11073   opcode(0xDB, 0x0);  /* DB /0 */
11074   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11075   ins_pipe( fpu_reg_mem );
11076 %}
11077 
11078 // In 24-bit mode, force exponent rounding by storing back out
11079 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11080   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11081   match(Set dst (ConvI2F src));
11082   ins_cost(200);
11083   format %{ "FILD   $src\n\t"
11084             "FSTP_S $dst" %}
11085   opcode(0xDB, 0x0);  /* DB /0 */
11086   ins_encode( Push_Mem_I(src),
11087               Pop_Mem_FPR(dst));
11088   ins_pipe( fpu_mem_mem );
11089 %}
11090 
11091 // In 24-bit mode, force exponent rounding by storing back out
11092 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11093   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11094   match(Set dst (ConvI2F (LoadI mem)));
11095   ins_cost(200);
11096   format %{ "FILD   $mem\n\t"
11097             "FSTP_S $dst" %}
11098   opcode(0xDB);  /* DB /0 */
11099   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11100               Pop_Mem_FPR(dst));
11101   ins_pipe( fpu_mem_mem );
11102 %}
11103 
11104 // This instruction does not round to 24-bits
11105 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11106   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11107   match(Set dst (ConvI2F src));
11108   format %{ "FILD   $src\n\t"
11109             "FSTP   $dst" %}
11110   opcode(0xDB, 0x0);  /* DB /0 */
11111   ins_encode( Push_Mem_I(src),
11112               Pop_Reg_FPR(dst));
11113   ins_pipe( fpu_reg_mem );
11114 %}
11115 
11116 // This instruction does not round to 24-bits
11117 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11118   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11119   match(Set dst (ConvI2F (LoadI mem)));
11120   format %{ "FILD   $mem\n\t"
11121             "FSTP   $dst" %}
11122   opcode(0xDB);      /* DB /0 */
11123   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11124               Pop_Reg_FPR(dst));
11125   ins_pipe( fpu_reg_mem );
11126 %}
11127 
11128 // Convert an int to a float in xmm; no rounding step needed.
11129 instruct convI2F_reg(regF dst, rRegI src) %{
11130   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11131   match(Set dst (ConvI2F src));
11132   format %{ "CVTSI2SS $dst, $src" %}
11133   ins_encode %{
11134     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11135   %}
11136   ins_pipe( pipe_slow );
11137 %}
11138 
11139  instruct convXI2F_reg(regF dst, rRegI src)
11140 %{
11141   predicate( UseSSE>=2 && UseXmmI2F );
11142   match(Set dst (ConvI2F src));
11143 
11144   format %{ "MOVD  $dst,$src\n\t"
11145             "CVTDQ2PS $dst,$dst\t# i2f" %}
11146   ins_encode %{
11147     __ movdl($dst$$XMMRegister, $src$$Register);
11148     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11149   %}
11150   ins_pipe(pipe_slow); // XXX
11151 %}
11152 
11153 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11154   match(Set dst (ConvI2L src));
11155   effect(KILL cr);
11156   ins_cost(375);
11157   format %{ "MOV    $dst.lo,$src\n\t"
11158             "MOV    $dst.hi,$src\n\t"
11159             "SAR    $dst.hi,31" %}
11160   ins_encode(convert_int_long(dst,src));
11161   ins_pipe( ialu_reg_reg_long );
11162 %}
11163 
11164 // Zero-extend convert int to long
11165 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11166   match(Set dst (AndL (ConvI2L src) mask) );
11167   effect( KILL flags );
11168   ins_cost(250);
11169   format %{ "MOV    $dst.lo,$src\n\t"
11170             "XOR    $dst.hi,$dst.hi" %}
11171   opcode(0x33); // XOR
11172   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11173   ins_pipe( ialu_reg_reg_long );
11174 %}
11175 
11176 // Zero-extend long
11177 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11178   match(Set dst (AndL src mask) );
11179   effect( KILL flags );
11180   ins_cost(250);
11181   format %{ "MOV    $dst.lo,$src.lo\n\t"
11182             "XOR    $dst.hi,$dst.hi\n\t" %}
11183   opcode(0x33); // XOR
11184   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11185   ins_pipe( ialu_reg_reg_long );
11186 %}
11187 
11188 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11189   predicate (UseSSE<=1);
11190   match(Set dst (ConvL2D src));
11191   effect( KILL cr );
11192   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11193             "PUSH   $src.lo\n\t"
11194             "FILD   ST,[ESP + #0]\n\t"
11195             "ADD    ESP,8\n\t"
11196             "FSTP_D $dst\t# D-round" %}
11197   opcode(0xDF, 0x5);  /* DF /5 */
11198   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11199   ins_pipe( pipe_slow );
11200 %}
11201 
11202 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11203   predicate (UseSSE>=2);
11204   match(Set dst (ConvL2D src));
11205   effect( KILL cr );
11206   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11207             "PUSH   $src.lo\n\t"
11208             "FILD_D [ESP]\n\t"
11209             "FSTP_D [ESP]\n\t"
11210             "MOVSD  $dst,[ESP]\n\t"
11211             "ADD    ESP,8" %}
11212   opcode(0xDF, 0x5);  /* DF /5 */
11213   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11214   ins_pipe( pipe_slow );
11215 %}
11216 
11217 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11218   predicate (UseSSE>=1);
11219   match(Set dst (ConvL2F src));
11220   effect( KILL cr );
11221   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11222             "PUSH   $src.lo\n\t"
11223             "FILD_D [ESP]\n\t"
11224             "FSTP_S [ESP]\n\t"
11225             "MOVSS  $dst,[ESP]\n\t"
11226             "ADD    ESP,8" %}
11227   opcode(0xDF, 0x5);  /* DF /5 */
11228   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11229   ins_pipe( pipe_slow );
11230 %}
11231 
11232 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11233   match(Set dst (ConvL2F src));
11234   effect( KILL cr );
11235   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11236             "PUSH   $src.lo\n\t"
11237             "FILD   ST,[ESP + #0]\n\t"
11238             "ADD    ESP,8\n\t"
11239             "FSTP_S $dst\t# F-round" %}
11240   opcode(0xDF, 0x5);  /* DF /5 */
11241   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11242   ins_pipe( pipe_slow );
11243 %}
11244 
11245 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11246   match(Set dst (ConvL2I src));
11247   effect( DEF dst, USE src );
11248   format %{ "MOV    $dst,$src.lo" %}
11249   ins_encode(enc_CopyL_Lo(dst,src));
11250   ins_pipe( ialu_reg_reg );
11251 %}
11252 
11253 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11254   match(Set dst (MoveF2I src));
11255   effect( DEF dst, USE src );
11256   ins_cost(100);
11257   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11258   ins_encode %{
11259     __ movl($dst$$Register, Address(rsp, $src$$disp));
11260   %}
11261   ins_pipe( ialu_reg_mem );
11262 %}
11263 
11264 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11265   predicate(UseSSE==0);
11266   match(Set dst (MoveF2I src));
11267   effect( DEF dst, USE src );
11268 
11269   ins_cost(125);
11270   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11271   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11272   ins_pipe( fpu_mem_reg );
11273 %}
11274 
11275 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11276   predicate(UseSSE>=1);
11277   match(Set dst (MoveF2I src));
11278   effect( DEF dst, USE src );
11279 
11280   ins_cost(95);
11281   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11282   ins_encode %{
11283     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11284   %}
11285   ins_pipe( pipe_slow );
11286 %}
11287 
11288 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11289   predicate(UseSSE>=2);
11290   match(Set dst (MoveF2I src));
11291   effect( DEF dst, USE src );
11292   ins_cost(85);
11293   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11294   ins_encode %{
11295     __ movdl($dst$$Register, $src$$XMMRegister);
11296   %}
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11301   match(Set dst (MoveI2F src));
11302   effect( DEF dst, USE src );
11303 
11304   ins_cost(100);
11305   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11306   ins_encode %{
11307     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11308   %}
11309   ins_pipe( ialu_mem_reg );
11310 %}
11311 
11312 
11313 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11314   predicate(UseSSE==0);
11315   match(Set dst (MoveI2F src));
11316   effect(DEF dst, USE src);
11317 
11318   ins_cost(125);
11319   format %{ "FLD_S  $src\n\t"
11320             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11321   opcode(0xD9);               /* D9 /0, FLD m32real */
11322   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11323               Pop_Reg_FPR(dst) );
11324   ins_pipe( fpu_reg_mem );
11325 %}
11326 
11327 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11328   predicate(UseSSE>=1);
11329   match(Set dst (MoveI2F src));
11330   effect( DEF dst, USE src );
11331 
11332   ins_cost(95);
11333   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11334   ins_encode %{
11335     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11336   %}
11337   ins_pipe( pipe_slow );
11338 %}
11339 
11340 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11341   predicate(UseSSE>=2);
11342   match(Set dst (MoveI2F src));
11343   effect( DEF dst, USE src );
11344 
11345   ins_cost(85);
11346   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11347   ins_encode %{
11348     __ movdl($dst$$XMMRegister, $src$$Register);
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11354   match(Set dst (MoveD2L src));
11355   effect(DEF dst, USE src);
11356 
11357   ins_cost(250);
11358   format %{ "MOV    $dst.lo,$src\n\t"
11359             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11360   opcode(0x8B, 0x8B);
11361   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11362   ins_pipe( ialu_mem_long_reg );
11363 %}
11364 
11365 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11366   predicate(UseSSE<=1);
11367   match(Set dst (MoveD2L src));
11368   effect(DEF dst, USE src);
11369 
11370   ins_cost(125);
11371   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11372   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11373   ins_pipe( fpu_mem_reg );
11374 %}
11375 
11376 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11377   predicate(UseSSE>=2);
11378   match(Set dst (MoveD2L src));
11379   effect(DEF dst, USE src);
11380   ins_cost(95);
11381   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11382   ins_encode %{
11383     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11384   %}
11385   ins_pipe( pipe_slow );
11386 %}
11387 
11388 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11389   predicate(UseSSE>=2);
11390   match(Set dst (MoveD2L src));
11391   effect(DEF dst, USE src, TEMP tmp);
11392   ins_cost(85);
11393   format %{ "MOVD   $dst.lo,$src\n\t"
11394             "PSHUFLW $tmp,$src,0x4E\n\t"
11395             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11396   ins_encode %{
11397     __ movdl($dst$$Register, $src$$XMMRegister);
11398     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11399     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11400   %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11405   match(Set dst (MoveL2D src));
11406   effect(DEF dst, USE src);
11407 
11408   ins_cost(200);
11409   format %{ "MOV    $dst,$src.lo\n\t"
11410             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11411   opcode(0x89, 0x89);
11412   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11413   ins_pipe( ialu_mem_long_reg );
11414 %}
11415 
11416 
11417 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11418   predicate(UseSSE<=1);
11419   match(Set dst (MoveL2D src));
11420   effect(DEF dst, USE src);
11421   ins_cost(125);
11422 
11423   format %{ "FLD_D  $src\n\t"
11424             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11425   opcode(0xDD);               /* DD /0, FLD m64real */
11426   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11427               Pop_Reg_DPR(dst) );
11428   ins_pipe( fpu_reg_mem );
11429 %}
11430 
11431 
11432 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11433   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11434   match(Set dst (MoveL2D src));
11435   effect(DEF dst, USE src);
11436 
11437   ins_cost(95);
11438   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11439   ins_encode %{
11440     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11441   %}
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11446   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11447   match(Set dst (MoveL2D src));
11448   effect(DEF dst, USE src);
11449 
11450   ins_cost(95);
11451   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11452   ins_encode %{
11453     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11454   %}
11455   ins_pipe( pipe_slow );
11456 %}
11457 
11458 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11459   predicate(UseSSE>=2);
11460   match(Set dst (MoveL2D src));
11461   effect(TEMP dst, USE src, TEMP tmp);
11462   ins_cost(85);
11463   format %{ "MOVD   $dst,$src.lo\n\t"
11464             "MOVD   $tmp,$src.hi\n\t"
11465             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11466   ins_encode %{
11467     __ movdl($dst$$XMMRegister, $src$$Register);
11468     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11469     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11470   %}
11471   ins_pipe( pipe_slow );
11472 %}
11473 
11474 //----------------------------- CompressBits/ExpandBits ------------------------
11475 
11476 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11477   predicate(n->bottom_type()->isa_long());
11478   match(Set dst (CompressBits src mask));
11479   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11480   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11481   ins_encode %{
11482     Label exit, partail_result;
11483     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11484     // Merge the results of upper and lower destination registers such that upper destination
11485     // results are contiguously laid out after the lower destination result.
11486     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11487     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11488     __ popcntl($rtmp$$Register, $mask$$Register);
11489     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11490     __ cmpl($rtmp$$Register, 32);
11491     __ jccb(Assembler::equal, exit);
11492     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11493     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11494     // Shift left the contents of upper destination register by true bit count of lower mask register
11495     // and merge with lower destination register.
11496     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11497     __ orl($dst$$Register, $rtmp$$Register);
11498     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11499     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11500     // since contents of upper destination have already been copied to lower destination
11501     // register.
11502     __ cmpl($rtmp$$Register, 0);
11503     __ jccb(Assembler::greater, partail_result);
11504     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11505     __ jmp(exit);
11506     __ bind(partail_result);
11507     // Perform right shift over upper destination register to move out bits already copied
11508     // to lower destination register.
11509     __ subl($rtmp$$Register, 32);
11510     __ negl($rtmp$$Register);
11511     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11512     __ bind(exit);
11513   %}
11514   ins_pipe( pipe_slow );
11515 %}
11516 
11517 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11518   predicate(n->bottom_type()->isa_long());
11519   match(Set dst (ExpandBits src mask));
11520   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11521   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11522   ins_encode %{
11523     // Extraction operation sequentially reads the bits from source register starting from LSB
11524     // and lays them out into destination register at bit locations corresponding to true bits
11525     // in mask register. Thus number of source bits read are equal to combined true bit count
11526     // of mask register pair.
11527     Label exit, mask_clipping;
11528     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11529     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11530     __ popcntl($rtmp$$Register, $mask$$Register);
11531     // If true bit count of lower mask register is 32 then none of bit of lower source register
11532     // will feed to upper destination register.
11533     __ cmpl($rtmp$$Register, 32);
11534     __ jccb(Assembler::equal, exit);
11535     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11536     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11537     // Shift right the contents of lower source register to remove already consumed bits.
11538     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11539     // Extract the bits from lower source register starting from LSB under the influence
11540     // of upper mask register.
11541     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11542     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11543     __ subl($rtmp$$Register, 32);
11544     __ negl($rtmp$$Register);
11545     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11546     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11547     // Clear the set bits in upper mask register which have been used to extract the contents
11548     // from lower source register.
11549     __ bind(mask_clipping);
11550     __ blsrl($mask$$Register, $mask$$Register);
11551     __ decrementl($rtmp$$Register, 1);
11552     __ jccb(Assembler::greater, mask_clipping);
11553     // Starting from LSB extract the bits from upper source register under the influence of
11554     // remaining set bits in upper mask register.
11555     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11556     // Merge the partial results extracted from lower and upper source register bits.
11557     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11558     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11559     __ bind(exit);
11560   %}
11561   ins_pipe( pipe_slow );
11562 %}
11563 
11564 // =======================================================================
11565 // fast clearing of an array
11566 // Small ClearArray non-AVX512.
11567 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11568   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11569   match(Set dummy (ClearArray cnt base));
11570   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11571 
11572   format %{ $$template
11573     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11574     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11575     $$emit$$"JG     LARGE\n\t"
11576     $$emit$$"SHL    ECX, 1\n\t"
11577     $$emit$$"DEC    ECX\n\t"
11578     $$emit$$"JS     DONE\t# Zero length\n\t"
11579     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11580     $$emit$$"DEC    ECX\n\t"
11581     $$emit$$"JGE    LOOP\n\t"
11582     $$emit$$"JMP    DONE\n\t"
11583     $$emit$$"# LARGE:\n\t"
11584     if (UseFastStosb) {
11585        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11586        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11587     } else if (UseXMMForObjInit) {
11588        $$emit$$"MOV     RDI,RAX\n\t"
11589        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11590        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11591        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11592        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11593        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11594        $$emit$$"ADD     0x40,RAX\n\t"
11595        $$emit$$"# L_zero_64_bytes:\n\t"
11596        $$emit$$"SUB     0x8,RCX\n\t"
11597        $$emit$$"JGE     L_loop\n\t"
11598        $$emit$$"ADD     0x4,RCX\n\t"
11599        $$emit$$"JL      L_tail\n\t"
11600        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11601        $$emit$$"ADD     0x20,RAX\n\t"
11602        $$emit$$"SUB     0x4,RCX\n\t"
11603        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11604        $$emit$$"ADD     0x4,RCX\n\t"
11605        $$emit$$"JLE     L_end\n\t"
11606        $$emit$$"DEC     RCX\n\t"
11607        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11608        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11609        $$emit$$"ADD     0x8,RAX\n\t"
11610        $$emit$$"DEC     RCX\n\t"
11611        $$emit$$"JGE     L_sloop\n\t"
11612        $$emit$$"# L_end:\n\t"
11613     } else {
11614        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11615        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11616     }
11617     $$emit$$"# DONE"
11618   %}
11619   ins_encode %{
11620     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11621                  $tmp$$XMMRegister, false, knoreg);
11622   %}
11623   ins_pipe( pipe_slow );
11624 %}
11625 
11626 // Small ClearArray AVX512 non-constant length.
11627 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11628   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11629   match(Set dummy (ClearArray cnt base));
11630   ins_cost(125);
11631   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11632 
11633   format %{ $$template
11634     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11635     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11636     $$emit$$"JG     LARGE\n\t"
11637     $$emit$$"SHL    ECX, 1\n\t"
11638     $$emit$$"DEC    ECX\n\t"
11639     $$emit$$"JS     DONE\t# Zero length\n\t"
11640     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11641     $$emit$$"DEC    ECX\n\t"
11642     $$emit$$"JGE    LOOP\n\t"
11643     $$emit$$"JMP    DONE\n\t"
11644     $$emit$$"# LARGE:\n\t"
11645     if (UseFastStosb) {
11646        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11647        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11648     } else if (UseXMMForObjInit) {
11649        $$emit$$"MOV     RDI,RAX\n\t"
11650        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11651        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11652        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11653        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11654        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11655        $$emit$$"ADD     0x40,RAX\n\t"
11656        $$emit$$"# L_zero_64_bytes:\n\t"
11657        $$emit$$"SUB     0x8,RCX\n\t"
11658        $$emit$$"JGE     L_loop\n\t"
11659        $$emit$$"ADD     0x4,RCX\n\t"
11660        $$emit$$"JL      L_tail\n\t"
11661        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11662        $$emit$$"ADD     0x20,RAX\n\t"
11663        $$emit$$"SUB     0x4,RCX\n\t"
11664        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11665        $$emit$$"ADD     0x4,RCX\n\t"
11666        $$emit$$"JLE     L_end\n\t"
11667        $$emit$$"DEC     RCX\n\t"
11668        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11669        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11670        $$emit$$"ADD     0x8,RAX\n\t"
11671        $$emit$$"DEC     RCX\n\t"
11672        $$emit$$"JGE     L_sloop\n\t"
11673        $$emit$$"# L_end:\n\t"
11674     } else {
11675        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11676        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11677     }
11678     $$emit$$"# DONE"
11679   %}
11680   ins_encode %{
11681     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11682                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11683   %}
11684   ins_pipe( pipe_slow );
11685 %}
11686 
11687 // Large ClearArray non-AVX512.
11688 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11689   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11690   match(Set dummy (ClearArray cnt base));
11691   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11692   format %{ $$template
11693     if (UseFastStosb) {
11694        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11695        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11696        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11697     } else if (UseXMMForObjInit) {
11698        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11699        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11700        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11701        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11702        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11703        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11704        $$emit$$"ADD     0x40,RAX\n\t"
11705        $$emit$$"# L_zero_64_bytes:\n\t"
11706        $$emit$$"SUB     0x8,RCX\n\t"
11707        $$emit$$"JGE     L_loop\n\t"
11708        $$emit$$"ADD     0x4,RCX\n\t"
11709        $$emit$$"JL      L_tail\n\t"
11710        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11711        $$emit$$"ADD     0x20,RAX\n\t"
11712        $$emit$$"SUB     0x4,RCX\n\t"
11713        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11714        $$emit$$"ADD     0x4,RCX\n\t"
11715        $$emit$$"JLE     L_end\n\t"
11716        $$emit$$"DEC     RCX\n\t"
11717        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11718        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11719        $$emit$$"ADD     0x8,RAX\n\t"
11720        $$emit$$"DEC     RCX\n\t"
11721        $$emit$$"JGE     L_sloop\n\t"
11722        $$emit$$"# L_end:\n\t"
11723     } else {
11724        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11725        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11726        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11727     }
11728     $$emit$$"# DONE"
11729   %}
11730   ins_encode %{
11731     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11732                  $tmp$$XMMRegister, true, knoreg);
11733   %}
11734   ins_pipe( pipe_slow );
11735 %}
11736 
11737 // Large ClearArray AVX512.
11738 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11739   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11740   match(Set dummy (ClearArray cnt base));
11741   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11742   format %{ $$template
11743     if (UseFastStosb) {
11744        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11745        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11746        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11747     } else if (UseXMMForObjInit) {
11748        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11749        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11750        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11751        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11752        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11753        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11754        $$emit$$"ADD     0x40,RAX\n\t"
11755        $$emit$$"# L_zero_64_bytes:\n\t"
11756        $$emit$$"SUB     0x8,RCX\n\t"
11757        $$emit$$"JGE     L_loop\n\t"
11758        $$emit$$"ADD     0x4,RCX\n\t"
11759        $$emit$$"JL      L_tail\n\t"
11760        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11761        $$emit$$"ADD     0x20,RAX\n\t"
11762        $$emit$$"SUB     0x4,RCX\n\t"
11763        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11764        $$emit$$"ADD     0x4,RCX\n\t"
11765        $$emit$$"JLE     L_end\n\t"
11766        $$emit$$"DEC     RCX\n\t"
11767        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11768        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11769        $$emit$$"ADD     0x8,RAX\n\t"
11770        $$emit$$"DEC     RCX\n\t"
11771        $$emit$$"JGE     L_sloop\n\t"
11772        $$emit$$"# L_end:\n\t"
11773     } else {
11774        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11775        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11776        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11777     }
11778     $$emit$$"# DONE"
11779   %}
11780   ins_encode %{
11781     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11782                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 // Small ClearArray AVX512 constant length.
11788 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11789 %{
11790   predicate(!((ClearArrayNode*)n)->is_large() &&
11791                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11792   match(Set dummy (ClearArray cnt base));
11793   ins_cost(100);
11794   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11795   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11796   ins_encode %{
11797    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11798   %}
11799   ins_pipe(pipe_slow);
11800 %}
11801 
11802 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11803                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11804   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11805   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11806   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11807 
11808   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11809   ins_encode %{
11810     __ string_compare($str1$$Register, $str2$$Register,
11811                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11812                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11813   %}
11814   ins_pipe( pipe_slow );
11815 %}
11816 
11817 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11818                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11819   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11820   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11821   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11822 
11823   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11824   ins_encode %{
11825     __ string_compare($str1$$Register, $str2$$Register,
11826                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11827                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11828   %}
11829   ins_pipe( pipe_slow );
11830 %}
11831 
11832 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11833                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11834   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11835   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11836   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11837 
11838   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11839   ins_encode %{
11840     __ string_compare($str1$$Register, $str2$$Register,
11841                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11842                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11848                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11849   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11850   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11851   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11852 
11853   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11854   ins_encode %{
11855     __ string_compare($str1$$Register, $str2$$Register,
11856                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11857                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11858   %}
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11863                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11864   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11865   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11866   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11867 
11868   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11869   ins_encode %{
11870     __ string_compare($str1$$Register, $str2$$Register,
11871                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11872                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11873   %}
11874   ins_pipe( pipe_slow );
11875 %}
11876 
11877 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11878                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11879   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11880   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11881   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11882 
11883   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11884   ins_encode %{
11885     __ string_compare($str1$$Register, $str2$$Register,
11886                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11887                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11888   %}
11889   ins_pipe( pipe_slow );
11890 %}
11891 
11892 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11893                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11894   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11895   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11896   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11897 
11898   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11899   ins_encode %{
11900     __ string_compare($str2$$Register, $str1$$Register,
11901                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11902                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11903   %}
11904   ins_pipe( pipe_slow );
11905 %}
11906 
11907 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11908                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11909   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11910   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11911   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11912 
11913   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11914   ins_encode %{
11915     __ string_compare($str2$$Register, $str1$$Register,
11916                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11917                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 // fast string equals
11923 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11924                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11925   predicate(!VM_Version::supports_avx512vlbw());
11926   match(Set result (StrEquals (Binary str1 str2) cnt));
11927   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11928 
11929   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11930   ins_encode %{
11931     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11932                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11933                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11934   %}
11935 
11936   ins_pipe( pipe_slow );
11937 %}
11938 
11939 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11940                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11941   predicate(VM_Version::supports_avx512vlbw());
11942   match(Set result (StrEquals (Binary str1 str2) cnt));
11943   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11944 
11945   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11946   ins_encode %{
11947     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11948                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11949                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11950   %}
11951 
11952   ins_pipe( pipe_slow );
11953 %}
11954 
11955 
11956 // fast search of substring with known size.
11957 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11958                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11959   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11960   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11961   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11962 
11963   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11964   ins_encode %{
11965     int icnt2 = (int)$int_cnt2$$constant;
11966     if (icnt2 >= 16) {
11967       // IndexOf for constant substrings with size >= 16 elements
11968       // which don't need to be loaded through stack.
11969       __ string_indexofC8($str1$$Register, $str2$$Register,
11970                           $cnt1$$Register, $cnt2$$Register,
11971                           icnt2, $result$$Register,
11972                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11973     } else {
11974       // Small strings are loaded through stack if they cross page boundary.
11975       __ string_indexof($str1$$Register, $str2$$Register,
11976                         $cnt1$$Register, $cnt2$$Register,
11977                         icnt2, $result$$Register,
11978                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11979     }
11980   %}
11981   ins_pipe( pipe_slow );
11982 %}
11983 
11984 // fast search of substring with known size.
11985 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11986                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11987   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11988   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11989   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11990 
11991   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11992   ins_encode %{
11993     int icnt2 = (int)$int_cnt2$$constant;
11994     if (icnt2 >= 8) {
11995       // IndexOf for constant substrings with size >= 8 elements
11996       // which don't need to be loaded through stack.
11997       __ string_indexofC8($str1$$Register, $str2$$Register,
11998                           $cnt1$$Register, $cnt2$$Register,
11999                           icnt2, $result$$Register,
12000                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12001     } else {
12002       // Small strings are loaded through stack if they cross page boundary.
12003       __ string_indexof($str1$$Register, $str2$$Register,
12004                         $cnt1$$Register, $cnt2$$Register,
12005                         icnt2, $result$$Register,
12006                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12007     }
12008   %}
12009   ins_pipe( pipe_slow );
12010 %}
12011 
12012 // fast search of substring with known size.
12013 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12014                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12015   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12016   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12017   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12018 
12019   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12020   ins_encode %{
12021     int icnt2 = (int)$int_cnt2$$constant;
12022     if (icnt2 >= 8) {
12023       // IndexOf for constant substrings with size >= 8 elements
12024       // which don't need to be loaded through stack.
12025       __ string_indexofC8($str1$$Register, $str2$$Register,
12026                           $cnt1$$Register, $cnt2$$Register,
12027                           icnt2, $result$$Register,
12028                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12029     } else {
12030       // Small strings are loaded through stack if they cross page boundary.
12031       __ string_indexof($str1$$Register, $str2$$Register,
12032                         $cnt1$$Register, $cnt2$$Register,
12033                         icnt2, $result$$Register,
12034                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12035     }
12036   %}
12037   ins_pipe( pipe_slow );
12038 %}
12039 
12040 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12041                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12042   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12043   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12044   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12045 
12046   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12047   ins_encode %{
12048     __ string_indexof($str1$$Register, $str2$$Register,
12049                       $cnt1$$Register, $cnt2$$Register,
12050                       (-1), $result$$Register,
12051                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12052   %}
12053   ins_pipe( pipe_slow );
12054 %}
12055 
12056 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12057                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12058   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12059   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12060   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12061 
12062   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12063   ins_encode %{
12064     __ string_indexof($str1$$Register, $str2$$Register,
12065                       $cnt1$$Register, $cnt2$$Register,
12066                       (-1), $result$$Register,
12067                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12068   %}
12069   ins_pipe( pipe_slow );
12070 %}
12071 
12072 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12073                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12074   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12075   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12076   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12077 
12078   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12079   ins_encode %{
12080     __ string_indexof($str1$$Register, $str2$$Register,
12081                       $cnt1$$Register, $cnt2$$Register,
12082                       (-1), $result$$Register,
12083                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12084   %}
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12089                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12090   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12091   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12092   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12093   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12094   ins_encode %{
12095     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12096                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12097   %}
12098   ins_pipe( pipe_slow );
12099 %}
12100 
12101 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12102                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12103   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12104   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12105   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12106   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12107   ins_encode %{
12108     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12109                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12110   %}
12111   ins_pipe( pipe_slow );
12112 %}
12113 
12114 
12115 // fast array equals
12116 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12117                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12118 %{
12119   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12120   match(Set result (AryEq ary1 ary2));
12121   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12122   //ins_cost(300);
12123 
12124   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12125   ins_encode %{
12126     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12127                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12128                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12129   %}
12130   ins_pipe( pipe_slow );
12131 %}
12132 
12133 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12134                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12135 %{
12136   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12137   match(Set result (AryEq ary1 ary2));
12138   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12139   //ins_cost(300);
12140 
12141   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12142   ins_encode %{
12143     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12144                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12145                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12146   %}
12147   ins_pipe( pipe_slow );
12148 %}
12149 
12150 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12151                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12152 %{
12153   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12154   match(Set result (AryEq ary1 ary2));
12155   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12156   //ins_cost(300);
12157 
12158   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12159   ins_encode %{
12160     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12161                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12162                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12168                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12169 %{
12170   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12171   match(Set result (AryEq ary1 ary2));
12172   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12173   //ins_cost(300);
12174 
12175   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12176   ins_encode %{
12177     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12178                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12179                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12180   %}
12181   ins_pipe( pipe_slow );
12182 %}
12183 
12184 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12185                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12186 %{
12187   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12188   match(Set result (CountPositives ary1 len));
12189   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12190 
12191   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12192   ins_encode %{
12193     __ count_positives($ary1$$Register, $len$$Register,
12194                        $result$$Register, $tmp3$$Register,
12195                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12196   %}
12197   ins_pipe( pipe_slow );
12198 %}
12199 
12200 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12201                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12202 %{
12203   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12204   match(Set result (CountPositives ary1 len));
12205   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12206 
12207   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12208   ins_encode %{
12209     __ count_positives($ary1$$Register, $len$$Register,
12210                        $result$$Register, $tmp3$$Register,
12211                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12212   %}
12213   ins_pipe( pipe_slow );
12214 %}
12215 
12216 
12217 // fast char[] to byte[] compression
12218 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12219                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12220   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12221   match(Set result (StrCompressedCopy src (Binary dst len)));
12222   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12223 
12224   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12225   ins_encode %{
12226     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12227                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12228                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12229                            knoreg, knoreg);
12230   %}
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12235                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12236   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12237   match(Set result (StrCompressedCopy src (Binary dst len)));
12238   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12239 
12240   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12241   ins_encode %{
12242     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12243                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12244                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12245                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12246   %}
12247   ins_pipe( pipe_slow );
12248 %}
12249 
12250 // fast byte[] to char[] inflation
12251 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12252                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12253   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12254   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12255   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12256 
12257   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12258   ins_encode %{
12259     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12260                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12261   %}
12262   ins_pipe( pipe_slow );
12263 %}
12264 
12265 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12266                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12267   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12268   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12269   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12270 
12271   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12272   ins_encode %{
12273     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12274                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12275   %}
12276   ins_pipe( pipe_slow );
12277 %}
12278 
12279 // encode char[] to byte[] in ISO_8859_1
12280 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12281                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12282                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12283   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12284   match(Set result (EncodeISOArray src (Binary dst len)));
12285   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12286 
12287   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12288   ins_encode %{
12289     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12290                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12291                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12292   %}
12293   ins_pipe( pipe_slow );
12294 %}
12295 
12296 // encode char[] to byte[] in ASCII
12297 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12298                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12299                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12300   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12301   match(Set result (EncodeISOArray src (Binary dst len)));
12302   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12303 
12304   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12305   ins_encode %{
12306     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12307                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12308                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12309   %}
12310   ins_pipe( pipe_slow );
12311 %}
12312 
12313 //----------Control Flow Instructions------------------------------------------
12314 // Signed compare Instructions
12315 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12316   match(Set cr (CmpI op1 op2));
12317   effect( DEF cr, USE op1, USE op2 );
12318   format %{ "CMP    $op1,$op2" %}
12319   opcode(0x3B);  /* Opcode 3B /r */
12320   ins_encode( OpcP, RegReg( op1, op2) );
12321   ins_pipe( ialu_cr_reg_reg );
12322 %}
12323 
12324 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12325   match(Set cr (CmpI op1 op2));
12326   effect( DEF cr, USE op1 );
12327   format %{ "CMP    $op1,$op2" %}
12328   opcode(0x81,0x07);  /* Opcode 81 /7 */
12329   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12330   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12331   ins_pipe( ialu_cr_reg_imm );
12332 %}
12333 
12334 // Cisc-spilled version of cmpI_eReg
12335 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12336   match(Set cr (CmpI op1 (LoadI op2)));
12337 
12338   format %{ "CMP    $op1,$op2" %}
12339   ins_cost(500);
12340   opcode(0x3B);  /* Opcode 3B /r */
12341   ins_encode( OpcP, RegMem( op1, op2) );
12342   ins_pipe( ialu_cr_reg_mem );
12343 %}
12344 
12345 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12346   match(Set cr (CmpI src zero));
12347   effect( DEF cr, USE src );
12348 
12349   format %{ "TEST   $src,$src" %}
12350   opcode(0x85);
12351   ins_encode( OpcP, RegReg( src, src ) );
12352   ins_pipe( ialu_cr_reg_imm );
12353 %}
12354 
12355 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12356   match(Set cr (CmpI (AndI src con) zero));
12357 
12358   format %{ "TEST   $src,$con" %}
12359   opcode(0xF7,0x00);
12360   ins_encode( OpcP, RegOpc(src), Con32(con) );
12361   ins_pipe( ialu_cr_reg_imm );
12362 %}
12363 
12364 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12365   match(Set cr (CmpI (AndI src mem) zero));
12366 
12367   format %{ "TEST   $src,$mem" %}
12368   opcode(0x85);
12369   ins_encode( OpcP, RegMem( src, mem ) );
12370   ins_pipe( ialu_cr_reg_mem );
12371 %}
12372 
12373 // Unsigned compare Instructions; really, same as signed except they
12374 // produce an eFlagsRegU instead of eFlagsReg.
12375 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12376   match(Set cr (CmpU op1 op2));
12377 
12378   format %{ "CMPu   $op1,$op2" %}
12379   opcode(0x3B);  /* Opcode 3B /r */
12380   ins_encode( OpcP, RegReg( op1, op2) );
12381   ins_pipe( ialu_cr_reg_reg );
12382 %}
12383 
12384 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12385   match(Set cr (CmpU op1 op2));
12386 
12387   format %{ "CMPu   $op1,$op2" %}
12388   opcode(0x81,0x07);  /* Opcode 81 /7 */
12389   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12390   ins_pipe( ialu_cr_reg_imm );
12391 %}
12392 
12393 // // Cisc-spilled version of cmpU_eReg
12394 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12395   match(Set cr (CmpU op1 (LoadI op2)));
12396 
12397   format %{ "CMPu   $op1,$op2" %}
12398   ins_cost(500);
12399   opcode(0x3B);  /* Opcode 3B /r */
12400   ins_encode( OpcP, RegMem( op1, op2) );
12401   ins_pipe( ialu_cr_reg_mem );
12402 %}
12403 
12404 // // Cisc-spilled version of cmpU_eReg
12405 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12406 //  match(Set cr (CmpU (LoadI op1) op2));
12407 //
12408 //  format %{ "CMPu   $op1,$op2" %}
12409 //  ins_cost(500);
12410 //  opcode(0x39);  /* Opcode 39 /r */
12411 //  ins_encode( OpcP, RegMem( op1, op2) );
12412 //%}
12413 
12414 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12415   match(Set cr (CmpU src zero));
12416 
12417   format %{ "TESTu  $src,$src" %}
12418   opcode(0x85);
12419   ins_encode( OpcP, RegReg( src, src ) );
12420   ins_pipe( ialu_cr_reg_imm );
12421 %}
12422 
12423 // Unsigned pointer compare Instructions
12424 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12425   match(Set cr (CmpP op1 op2));
12426 
12427   format %{ "CMPu   $op1,$op2" %}
12428   opcode(0x3B);  /* Opcode 3B /r */
12429   ins_encode( OpcP, RegReg( op1, op2) );
12430   ins_pipe( ialu_cr_reg_reg );
12431 %}
12432 
12433 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12434   match(Set cr (CmpP op1 op2));
12435 
12436   format %{ "CMPu   $op1,$op2" %}
12437   opcode(0x81,0x07);  /* Opcode 81 /7 */
12438   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12439   ins_pipe( ialu_cr_reg_imm );
12440 %}
12441 
12442 // // Cisc-spilled version of cmpP_eReg
12443 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12444   match(Set cr (CmpP op1 (LoadP op2)));
12445 
12446   format %{ "CMPu   $op1,$op2" %}
12447   ins_cost(500);
12448   opcode(0x3B);  /* Opcode 3B /r */
12449   ins_encode( OpcP, RegMem( op1, op2) );
12450   ins_pipe( ialu_cr_reg_mem );
12451 %}
12452 
12453 // // Cisc-spilled version of cmpP_eReg
12454 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12455 //  match(Set cr (CmpP (LoadP op1) op2));
12456 //
12457 //  format %{ "CMPu   $op1,$op2" %}
12458 //  ins_cost(500);
12459 //  opcode(0x39);  /* Opcode 39 /r */
12460 //  ins_encode( OpcP, RegMem( op1, op2) );
12461 //%}
12462 
12463 // Compare raw pointer (used in out-of-heap check).
12464 // Only works because non-oop pointers must be raw pointers
12465 // and raw pointers have no anti-dependencies.
12466 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12467   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12468   match(Set cr (CmpP op1 (LoadP op2)));
12469 
12470   format %{ "CMPu   $op1,$op2" %}
12471   opcode(0x3B);  /* Opcode 3B /r */
12472   ins_encode( OpcP, RegMem( op1, op2) );
12473   ins_pipe( ialu_cr_reg_mem );
12474 %}
12475 
12476 //
12477 // This will generate a signed flags result. This should be ok
12478 // since any compare to a zero should be eq/neq.
12479 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12480   match(Set cr (CmpP src zero));
12481 
12482   format %{ "TEST   $src,$src" %}
12483   opcode(0x85);
12484   ins_encode( OpcP, RegReg( src, src ) );
12485   ins_pipe( ialu_cr_reg_imm );
12486 %}
12487 
12488 // Cisc-spilled version of testP_reg
12489 // This will generate a signed flags result. This should be ok
12490 // since any compare to a zero should be eq/neq.
12491 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12492   match(Set cr (CmpP (LoadP op) zero));
12493 
12494   format %{ "TEST   $op,0xFFFFFFFF" %}
12495   ins_cost(500);
12496   opcode(0xF7);               /* Opcode F7 /0 */
12497   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12498   ins_pipe( ialu_cr_reg_imm );
12499 %}
12500 
12501 // Yanked all unsigned pointer compare operations.
12502 // Pointer compares are done with CmpP which is already unsigned.
12503 
12504 //----------Max and Min--------------------------------------------------------
12505 // Min Instructions
12506 ////
12507 //   *** Min and Max using the conditional move are slower than the
12508 //   *** branch version on a Pentium III.
12509 // // Conditional move for min
12510 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12511 //  effect( USE_DEF op2, USE op1, USE cr );
12512 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12513 //  opcode(0x4C,0x0F);
12514 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12515 //  ins_pipe( pipe_cmov_reg );
12516 //%}
12517 //
12518 //// Min Register with Register (P6 version)
12519 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12520 //  predicate(VM_Version::supports_cmov() );
12521 //  match(Set op2 (MinI op1 op2));
12522 //  ins_cost(200);
12523 //  expand %{
12524 //    eFlagsReg cr;
12525 //    compI_eReg(cr,op1,op2);
12526 //    cmovI_reg_lt(op2,op1,cr);
12527 //  %}
12528 //%}
12529 
12530 // Min Register with Register (generic version)
12531 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12532   match(Set dst (MinI dst src));
12533   effect(KILL flags);
12534   ins_cost(300);
12535 
12536   format %{ "MIN    $dst,$src" %}
12537   opcode(0xCC);
12538   ins_encode( min_enc(dst,src) );
12539   ins_pipe( pipe_slow );
12540 %}
12541 
12542 // Max Register with Register
12543 //   *** Min and Max using the conditional move are slower than the
12544 //   *** branch version on a Pentium III.
12545 // // Conditional move for max
12546 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12547 //  effect( USE_DEF op2, USE op1, USE cr );
12548 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12549 //  opcode(0x4F,0x0F);
12550 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12551 //  ins_pipe( pipe_cmov_reg );
12552 //%}
12553 //
12554 // // Max Register with Register (P6 version)
12555 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12556 //  predicate(VM_Version::supports_cmov() );
12557 //  match(Set op2 (MaxI op1 op2));
12558 //  ins_cost(200);
12559 //  expand %{
12560 //    eFlagsReg cr;
12561 //    compI_eReg(cr,op1,op2);
12562 //    cmovI_reg_gt(op2,op1,cr);
12563 //  %}
12564 //%}
12565 
12566 // Max Register with Register (generic version)
12567 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12568   match(Set dst (MaxI dst src));
12569   effect(KILL flags);
12570   ins_cost(300);
12571 
12572   format %{ "MAX    $dst,$src" %}
12573   opcode(0xCC);
12574   ins_encode( max_enc(dst,src) );
12575   ins_pipe( pipe_slow );
12576 %}
12577 
12578 // ============================================================================
12579 // Counted Loop limit node which represents exact final iterator value.
12580 // Note: the resulting value should fit into integer range since
12581 // counted loops have limit check on overflow.
12582 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12583   match(Set limit (LoopLimit (Binary init limit) stride));
12584   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12585   ins_cost(300);
12586 
12587   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12588   ins_encode %{
12589     int strd = (int)$stride$$constant;
12590     assert(strd != 1 && strd != -1, "sanity");
12591     int m1 = (strd > 0) ? 1 : -1;
12592     // Convert limit to long (EAX:EDX)
12593     __ cdql();
12594     // Convert init to long (init:tmp)
12595     __ movl($tmp$$Register, $init$$Register);
12596     __ sarl($tmp$$Register, 31);
12597     // $limit - $init
12598     __ subl($limit$$Register, $init$$Register);
12599     __ sbbl($limit_hi$$Register, $tmp$$Register);
12600     // + ($stride - 1)
12601     if (strd > 0) {
12602       __ addl($limit$$Register, (strd - 1));
12603       __ adcl($limit_hi$$Register, 0);
12604       __ movl($tmp$$Register, strd);
12605     } else {
12606       __ addl($limit$$Register, (strd + 1));
12607       __ adcl($limit_hi$$Register, -1);
12608       __ lneg($limit_hi$$Register, $limit$$Register);
12609       __ movl($tmp$$Register, -strd);
12610     }
12611     // signed division: (EAX:EDX) / pos_stride
12612     __ idivl($tmp$$Register);
12613     if (strd < 0) {
12614       // restore sign
12615       __ negl($tmp$$Register);
12616     }
12617     // (EAX) * stride
12618     __ mull($tmp$$Register);
12619     // + init (ignore upper bits)
12620     __ addl($limit$$Register, $init$$Register);
12621   %}
12622   ins_pipe( pipe_slow );
12623 %}
12624 
12625 // ============================================================================
12626 // Branch Instructions
12627 // Jump Table
12628 instruct jumpXtnd(rRegI switch_val) %{
12629   match(Jump switch_val);
12630   ins_cost(350);
12631   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12632   ins_encode %{
12633     // Jump to Address(table_base + switch_reg)
12634     Address index(noreg, $switch_val$$Register, Address::times_1);
12635     __ jump(ArrayAddress($constantaddress, index), noreg);
12636   %}
12637   ins_pipe(pipe_jmp);
12638 %}
12639 
12640 // Jump Direct - Label defines a relative address from JMP+1
12641 instruct jmpDir(label labl) %{
12642   match(Goto);
12643   effect(USE labl);
12644 
12645   ins_cost(300);
12646   format %{ "JMP    $labl" %}
12647   size(5);
12648   ins_encode %{
12649     Label* L = $labl$$label;
12650     __ jmp(*L, false); // Always long jump
12651   %}
12652   ins_pipe( pipe_jmp );
12653 %}
12654 
12655 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12656 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12657   match(If cop cr);
12658   effect(USE labl);
12659 
12660   ins_cost(300);
12661   format %{ "J$cop    $labl" %}
12662   size(6);
12663   ins_encode %{
12664     Label* L = $labl$$label;
12665     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12666   %}
12667   ins_pipe( pipe_jcc );
12668 %}
12669 
12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12671 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12672   match(CountedLoopEnd cop cr);
12673   effect(USE labl);
12674 
12675   ins_cost(300);
12676   format %{ "J$cop    $labl\t# Loop end" %}
12677   size(6);
12678   ins_encode %{
12679     Label* L = $labl$$label;
12680     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12681   %}
12682   ins_pipe( pipe_jcc );
12683 %}
12684 
12685 // Jump Direct Conditional - using unsigned comparison
12686 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12687   match(If cop cmp);
12688   effect(USE labl);
12689 
12690   ins_cost(300);
12691   format %{ "J$cop,u  $labl" %}
12692   size(6);
12693   ins_encode %{
12694     Label* L = $labl$$label;
12695     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12696   %}
12697   ins_pipe(pipe_jcc);
12698 %}
12699 
12700 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12701   match(If cop cmp);
12702   effect(USE labl);
12703 
12704   ins_cost(200);
12705   format %{ "J$cop,u  $labl" %}
12706   size(6);
12707   ins_encode %{
12708     Label* L = $labl$$label;
12709     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12710   %}
12711   ins_pipe(pipe_jcc);
12712 %}
12713 
12714 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12715   match(If cop cmp);
12716   effect(USE labl);
12717 
12718   ins_cost(200);
12719   format %{ $$template
12720     if ($cop$$cmpcode == Assembler::notEqual) {
12721       $$emit$$"JP,u   $labl\n\t"
12722       $$emit$$"J$cop,u   $labl"
12723     } else {
12724       $$emit$$"JP,u   done\n\t"
12725       $$emit$$"J$cop,u   $labl\n\t"
12726       $$emit$$"done:"
12727     }
12728   %}
12729   ins_encode %{
12730     Label* l = $labl$$label;
12731     if ($cop$$cmpcode == Assembler::notEqual) {
12732       __ jcc(Assembler::parity, *l, false);
12733       __ jcc(Assembler::notEqual, *l, false);
12734     } else if ($cop$$cmpcode == Assembler::equal) {
12735       Label done;
12736       __ jccb(Assembler::parity, done);
12737       __ jcc(Assembler::equal, *l, false);
12738       __ bind(done);
12739     } else {
12740        ShouldNotReachHere();
12741     }
12742   %}
12743   ins_pipe(pipe_jcc);
12744 %}
12745 
12746 // ============================================================================
12747 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12748 // array for an instance of the superklass.  Set a hidden internal cache on a
12749 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12750 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12751 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12752   match(Set result (PartialSubtypeCheck sub super));
12753   effect( KILL rcx, KILL cr );
12754 
12755   ins_cost(1100);  // slightly larger than the next version
12756   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12757             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12758             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12759             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12760             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12761             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12762             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12763      "miss:\t" %}
12764 
12765   opcode(0x1); // Force a XOR of EDI
12766   ins_encode( enc_PartialSubtypeCheck() );
12767   ins_pipe( pipe_slow );
12768 %}
12769 
12770 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12771   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12772   effect( KILL rcx, KILL result );
12773 
12774   ins_cost(1000);
12775   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12776             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12777             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12778             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12779             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12780             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12781      "miss:\t" %}
12782 
12783   opcode(0x0);  // No need to XOR EDI
12784   ins_encode( enc_PartialSubtypeCheck() );
12785   ins_pipe( pipe_slow );
12786 %}
12787 
12788 // ============================================================================
12789 // Branch Instructions -- short offset versions
12790 //
12791 // These instructions are used to replace jumps of a long offset (the default
12792 // match) with jumps of a shorter offset.  These instructions are all tagged
12793 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12794 // match rules in general matching.  Instead, the ADLC generates a conversion
12795 // method in the MachNode which can be used to do in-place replacement of the
12796 // long variant with the shorter variant.  The compiler will determine if a
12797 // branch can be taken by the is_short_branch_offset() predicate in the machine
12798 // specific code section of the file.
12799 
12800 // Jump Direct - Label defines a relative address from JMP+1
12801 instruct jmpDir_short(label labl) %{
12802   match(Goto);
12803   effect(USE labl);
12804 
12805   ins_cost(300);
12806   format %{ "JMP,s  $labl" %}
12807   size(2);
12808   ins_encode %{
12809     Label* L = $labl$$label;
12810     __ jmpb(*L);
12811   %}
12812   ins_pipe( pipe_jmp );
12813   ins_short_branch(1);
12814 %}
12815 
12816 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12817 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12818   match(If cop cr);
12819   effect(USE labl);
12820 
12821   ins_cost(300);
12822   format %{ "J$cop,s  $labl" %}
12823   size(2);
12824   ins_encode %{
12825     Label* L = $labl$$label;
12826     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12827   %}
12828   ins_pipe( pipe_jcc );
12829   ins_short_branch(1);
12830 %}
12831 
12832 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12833 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12834   match(CountedLoopEnd cop cr);
12835   effect(USE labl);
12836 
12837   ins_cost(300);
12838   format %{ "J$cop,s  $labl\t# Loop end" %}
12839   size(2);
12840   ins_encode %{
12841     Label* L = $labl$$label;
12842     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12843   %}
12844   ins_pipe( pipe_jcc );
12845   ins_short_branch(1);
12846 %}
12847 
12848 // Jump Direct Conditional - using unsigned comparison
12849 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12850   match(If cop cmp);
12851   effect(USE labl);
12852 
12853   ins_cost(300);
12854   format %{ "J$cop,us $labl" %}
12855   size(2);
12856   ins_encode %{
12857     Label* L = $labl$$label;
12858     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12859   %}
12860   ins_pipe( pipe_jcc );
12861   ins_short_branch(1);
12862 %}
12863 
12864 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12865   match(If cop cmp);
12866   effect(USE labl);
12867 
12868   ins_cost(300);
12869   format %{ "J$cop,us $labl" %}
12870   size(2);
12871   ins_encode %{
12872     Label* L = $labl$$label;
12873     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12874   %}
12875   ins_pipe( pipe_jcc );
12876   ins_short_branch(1);
12877 %}
12878 
12879 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12880   match(If cop cmp);
12881   effect(USE labl);
12882 
12883   ins_cost(300);
12884   format %{ $$template
12885     if ($cop$$cmpcode == Assembler::notEqual) {
12886       $$emit$$"JP,u,s   $labl\n\t"
12887       $$emit$$"J$cop,u,s   $labl"
12888     } else {
12889       $$emit$$"JP,u,s   done\n\t"
12890       $$emit$$"J$cop,u,s  $labl\n\t"
12891       $$emit$$"done:"
12892     }
12893   %}
12894   size(4);
12895   ins_encode %{
12896     Label* l = $labl$$label;
12897     if ($cop$$cmpcode == Assembler::notEqual) {
12898       __ jccb(Assembler::parity, *l);
12899       __ jccb(Assembler::notEqual, *l);
12900     } else if ($cop$$cmpcode == Assembler::equal) {
12901       Label done;
12902       __ jccb(Assembler::parity, done);
12903       __ jccb(Assembler::equal, *l);
12904       __ bind(done);
12905     } else {
12906        ShouldNotReachHere();
12907     }
12908   %}
12909   ins_pipe(pipe_jcc);
12910   ins_short_branch(1);
12911 %}
12912 
12913 // ============================================================================
12914 // Long Compare
12915 //
12916 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12917 // is tricky.  The flavor of compare used depends on whether we are testing
12918 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12919 // The GE test is the negated LT test.  The LE test can be had by commuting
12920 // the operands (yielding a GE test) and then negating; negate again for the
12921 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12922 // NE test is negated from that.
12923 
12924 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12925 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12926 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12927 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12928 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12929 // foo match ends up with the wrong leaf.  One fix is to not match both
12930 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12931 // both forms beat the trinary form of long-compare and both are very useful
12932 // on Intel which has so few registers.
12933 
12934 // Manifest a CmpL result in an integer register.  Very painful.
12935 // This is the test to avoid.
12936 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12937   match(Set dst (CmpL3 src1 src2));
12938   effect( KILL flags );
12939   ins_cost(1000);
12940   format %{ "XOR    $dst,$dst\n\t"
12941             "CMP    $src1.hi,$src2.hi\n\t"
12942             "JLT,s  m_one\n\t"
12943             "JGT,s  p_one\n\t"
12944             "CMP    $src1.lo,$src2.lo\n\t"
12945             "JB,s   m_one\n\t"
12946             "JEQ,s  done\n"
12947     "p_one:\tINC    $dst\n\t"
12948             "JMP,s  done\n"
12949     "m_one:\tDEC    $dst\n"
12950      "done:" %}
12951   ins_encode %{
12952     Label p_one, m_one, done;
12953     __ xorptr($dst$$Register, $dst$$Register);
12954     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12955     __ jccb(Assembler::less,    m_one);
12956     __ jccb(Assembler::greater, p_one);
12957     __ cmpl($src1$$Register, $src2$$Register);
12958     __ jccb(Assembler::below,   m_one);
12959     __ jccb(Assembler::equal,   done);
12960     __ bind(p_one);
12961     __ incrementl($dst$$Register);
12962     __ jmpb(done);
12963     __ bind(m_one);
12964     __ decrementl($dst$$Register);
12965     __ bind(done);
12966   %}
12967   ins_pipe( pipe_slow );
12968 %}
12969 
12970 //======
12971 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12972 // compares.  Can be used for LE or GT compares by reversing arguments.
12973 // NOT GOOD FOR EQ/NE tests.
12974 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12975   match( Set flags (CmpL src zero ));
12976   ins_cost(100);
12977   format %{ "TEST   $src.hi,$src.hi" %}
12978   opcode(0x85);
12979   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12980   ins_pipe( ialu_cr_reg_reg );
12981 %}
12982 
12983 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12984 // compares.  Can be used for LE or GT compares by reversing arguments.
12985 // NOT GOOD FOR EQ/NE tests.
12986 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12987   match( Set flags (CmpL src1 src2 ));
12988   effect( TEMP tmp );
12989   ins_cost(300);
12990   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12991             "MOV    $tmp,$src1.hi\n\t"
12992             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12993   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12994   ins_pipe( ialu_cr_reg_reg );
12995 %}
12996 
12997 // Long compares reg < zero/req OR reg >= zero/req.
12998 // Just a wrapper for a normal branch, plus the predicate test.
12999 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13000   match(If cmp flags);
13001   effect(USE labl);
13002   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13003   expand %{
13004     jmpCon(cmp,flags,labl);    // JLT or JGE...
13005   %}
13006 %}
13007 
13008 //======
13009 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13010 // compares.  Can be used for LE or GT compares by reversing arguments.
13011 // NOT GOOD FOR EQ/NE tests.
13012 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13013   match(Set flags (CmpUL src zero));
13014   ins_cost(100);
13015   format %{ "TEST   $src.hi,$src.hi" %}
13016   opcode(0x85);
13017   ins_encode(OpcP, RegReg_Hi2(src, src));
13018   ins_pipe(ialu_cr_reg_reg);
13019 %}
13020 
13021 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13022 // compares.  Can be used for LE or GT compares by reversing arguments.
13023 // NOT GOOD FOR EQ/NE tests.
13024 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13025   match(Set flags (CmpUL src1 src2));
13026   effect(TEMP tmp);
13027   ins_cost(300);
13028   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13029             "MOV    $tmp,$src1.hi\n\t"
13030             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13031   ins_encode(long_cmp_flags2(src1, src2, tmp));
13032   ins_pipe(ialu_cr_reg_reg);
13033 %}
13034 
13035 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13036 // Just a wrapper for a normal branch, plus the predicate test.
13037 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13038   match(If cmp flags);
13039   effect(USE labl);
13040   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13041   expand %{
13042     jmpCon(cmp, flags, labl);    // JLT or JGE...
13043   %}
13044 %}
13045 
13046 // Compare 2 longs and CMOVE longs.
13047 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13048   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13049   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13050   ins_cost(400);
13051   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13052             "CMOV$cmp $dst.hi,$src.hi" %}
13053   opcode(0x0F,0x40);
13054   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13055   ins_pipe( pipe_cmov_reg_long );
13056 %}
13057 
13058 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13059   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13060   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13061   ins_cost(500);
13062   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13063             "CMOV$cmp $dst.hi,$src.hi" %}
13064   opcode(0x0F,0x40);
13065   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13066   ins_pipe( pipe_cmov_reg_long );
13067 %}
13068 
13069 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13070   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13071   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13072   ins_cost(400);
13073   expand %{
13074     cmovLL_reg_LTGE(cmp, flags, dst, src);
13075   %}
13076 %}
13077 
13078 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13079   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13080   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13081   ins_cost(500);
13082   expand %{
13083     cmovLL_mem_LTGE(cmp, flags, dst, src);
13084   %}
13085 %}
13086 
13087 // Compare 2 longs and CMOVE ints.
13088 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13089   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13090   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13091   ins_cost(200);
13092   format %{ "CMOV$cmp $dst,$src" %}
13093   opcode(0x0F,0x40);
13094   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13095   ins_pipe( pipe_cmov_reg );
13096 %}
13097 
13098 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13099   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13100   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13101   ins_cost(250);
13102   format %{ "CMOV$cmp $dst,$src" %}
13103   opcode(0x0F,0x40);
13104   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13105   ins_pipe( pipe_cmov_mem );
13106 %}
13107 
13108 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13109   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13110   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13111   ins_cost(200);
13112   expand %{
13113     cmovII_reg_LTGE(cmp, flags, dst, src);
13114   %}
13115 %}
13116 
13117 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13118   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13119   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13120   ins_cost(250);
13121   expand %{
13122     cmovII_mem_LTGE(cmp, flags, dst, src);
13123   %}
13124 %}
13125 
13126 // Compare 2 longs and CMOVE ptrs.
13127 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13128   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13129   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13130   ins_cost(200);
13131   format %{ "CMOV$cmp $dst,$src" %}
13132   opcode(0x0F,0x40);
13133   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13134   ins_pipe( pipe_cmov_reg );
13135 %}
13136 
13137 // Compare 2 unsigned longs and CMOVE ptrs.
13138 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13139   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13140   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13141   ins_cost(200);
13142   expand %{
13143     cmovPP_reg_LTGE(cmp,flags,dst,src);
13144   %}
13145 %}
13146 
13147 // Compare 2 longs and CMOVE doubles
13148 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13149   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13150   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13151   ins_cost(200);
13152   expand %{
13153     fcmovDPR_regS(cmp,flags,dst,src);
13154   %}
13155 %}
13156 
13157 // Compare 2 longs and CMOVE doubles
13158 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13159   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13160   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13161   ins_cost(200);
13162   expand %{
13163     fcmovD_regS(cmp,flags,dst,src);
13164   %}
13165 %}
13166 
13167 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13168   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13169   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13170   ins_cost(200);
13171   expand %{
13172     fcmovFPR_regS(cmp,flags,dst,src);
13173   %}
13174 %}
13175 
13176 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13177   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13178   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13179   ins_cost(200);
13180   expand %{
13181     fcmovF_regS(cmp,flags,dst,src);
13182   %}
13183 %}
13184 
13185 //======
13186 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13187 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13188   match( Set flags (CmpL src zero ));
13189   effect(TEMP tmp);
13190   ins_cost(200);
13191   format %{ "MOV    $tmp,$src.lo\n\t"
13192             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13193   ins_encode( long_cmp_flags0( src, tmp ) );
13194   ins_pipe( ialu_reg_reg_long );
13195 %}
13196 
13197 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13198 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13199   match( Set flags (CmpL src1 src2 ));
13200   ins_cost(200+300);
13201   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13202             "JNE,s  skip\n\t"
13203             "CMP    $src1.hi,$src2.hi\n\t"
13204      "skip:\t" %}
13205   ins_encode( long_cmp_flags1( src1, src2 ) );
13206   ins_pipe( ialu_cr_reg_reg );
13207 %}
13208 
13209 // Long compare reg == zero/reg OR reg != zero/reg
13210 // Just a wrapper for a normal branch, plus the predicate test.
13211 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13212   match(If cmp flags);
13213   effect(USE labl);
13214   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13215   expand %{
13216     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13217   %}
13218 %}
13219 
13220 //======
13221 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13222 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13223   match(Set flags (CmpUL src zero));
13224   effect(TEMP tmp);
13225   ins_cost(200);
13226   format %{ "MOV    $tmp,$src.lo\n\t"
13227             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13228   ins_encode(long_cmp_flags0(src, tmp));
13229   ins_pipe(ialu_reg_reg_long);
13230 %}
13231 
13232 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13233 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13234   match(Set flags (CmpUL src1 src2));
13235   ins_cost(200+300);
13236   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13237             "JNE,s  skip\n\t"
13238             "CMP    $src1.hi,$src2.hi\n\t"
13239      "skip:\t" %}
13240   ins_encode(long_cmp_flags1(src1, src2));
13241   ins_pipe(ialu_cr_reg_reg);
13242 %}
13243 
13244 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13245 // Just a wrapper for a normal branch, plus the predicate test.
13246 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13247   match(If cmp flags);
13248   effect(USE labl);
13249   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13250   expand %{
13251     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13252   %}
13253 %}
13254 
13255 // Compare 2 longs and CMOVE longs.
13256 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13257   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13258   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13259   ins_cost(400);
13260   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13261             "CMOV$cmp $dst.hi,$src.hi" %}
13262   opcode(0x0F,0x40);
13263   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13264   ins_pipe( pipe_cmov_reg_long );
13265 %}
13266 
13267 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13268   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13269   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13270   ins_cost(500);
13271   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13272             "CMOV$cmp $dst.hi,$src.hi" %}
13273   opcode(0x0F,0x40);
13274   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13275   ins_pipe( pipe_cmov_reg_long );
13276 %}
13277 
13278 // Compare 2 longs and CMOVE ints.
13279 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13280   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13281   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13282   ins_cost(200);
13283   format %{ "CMOV$cmp $dst,$src" %}
13284   opcode(0x0F,0x40);
13285   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13286   ins_pipe( pipe_cmov_reg );
13287 %}
13288 
13289 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13290   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13291   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13292   ins_cost(250);
13293   format %{ "CMOV$cmp $dst,$src" %}
13294   opcode(0x0F,0x40);
13295   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13296   ins_pipe( pipe_cmov_mem );
13297 %}
13298 
13299 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13300   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13301   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13302   ins_cost(200);
13303   expand %{
13304     cmovII_reg_EQNE(cmp, flags, dst, src);
13305   %}
13306 %}
13307 
13308 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13309   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13310   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13311   ins_cost(250);
13312   expand %{
13313     cmovII_mem_EQNE(cmp, flags, dst, src);
13314   %}
13315 %}
13316 
13317 // Compare 2 longs and CMOVE ptrs.
13318 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13319   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13320   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13321   ins_cost(200);
13322   format %{ "CMOV$cmp $dst,$src" %}
13323   opcode(0x0F,0x40);
13324   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13325   ins_pipe( pipe_cmov_reg );
13326 %}
13327 
13328 // Compare 2 unsigned longs and CMOVE ptrs.
13329 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13330   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13331   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13332   ins_cost(200);
13333   expand %{
13334     cmovPP_reg_EQNE(cmp,flags,dst,src);
13335   %}
13336 %}
13337 
13338 // Compare 2 longs and CMOVE doubles
13339 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13340   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13341   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13342   ins_cost(200);
13343   expand %{
13344     fcmovDPR_regS(cmp,flags,dst,src);
13345   %}
13346 %}
13347 
13348 // Compare 2 longs and CMOVE doubles
13349 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13350   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13351   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13352   ins_cost(200);
13353   expand %{
13354     fcmovD_regS(cmp,flags,dst,src);
13355   %}
13356 %}
13357 
13358 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13359   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13360   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13361   ins_cost(200);
13362   expand %{
13363     fcmovFPR_regS(cmp,flags,dst,src);
13364   %}
13365 %}
13366 
13367 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13368   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13369   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13370   ins_cost(200);
13371   expand %{
13372     fcmovF_regS(cmp,flags,dst,src);
13373   %}
13374 %}
13375 
13376 //======
13377 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13378 // Same as cmpL_reg_flags_LEGT except must negate src
13379 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13380   match( Set flags (CmpL src zero ));
13381   effect( TEMP tmp );
13382   ins_cost(300);
13383   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13384             "CMP    $tmp,$src.lo\n\t"
13385             "SBB    $tmp,$src.hi\n\t" %}
13386   ins_encode( long_cmp_flags3(src, tmp) );
13387   ins_pipe( ialu_reg_reg_long );
13388 %}
13389 
13390 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13391 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13392 // requires a commuted test to get the same result.
13393 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13394   match( Set flags (CmpL src1 src2 ));
13395   effect( TEMP tmp );
13396   ins_cost(300);
13397   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13398             "MOV    $tmp,$src2.hi\n\t"
13399             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13400   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13401   ins_pipe( ialu_cr_reg_reg );
13402 %}
13403 
13404 // Long compares reg < zero/req OR reg >= zero/req.
13405 // Just a wrapper for a normal branch, plus the predicate test
13406 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13407   match(If cmp flags);
13408   effect(USE labl);
13409   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13410   ins_cost(300);
13411   expand %{
13412     jmpCon(cmp,flags,labl);    // JGT or JLE...
13413   %}
13414 %}
13415 
13416 //======
13417 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13418 // Same as cmpUL_reg_flags_LEGT except must negate src
13419 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13420   match(Set flags (CmpUL src zero));
13421   effect(TEMP tmp);
13422   ins_cost(300);
13423   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13424             "CMP    $tmp,$src.lo\n\t"
13425             "SBB    $tmp,$src.hi\n\t" %}
13426   ins_encode(long_cmp_flags3(src, tmp));
13427   ins_pipe(ialu_reg_reg_long);
13428 %}
13429 
13430 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13431 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13432 // requires a commuted test to get the same result.
13433 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13434   match(Set flags (CmpUL src1 src2));
13435   effect(TEMP tmp);
13436   ins_cost(300);
13437   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13438             "MOV    $tmp,$src2.hi\n\t"
13439             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13440   ins_encode(long_cmp_flags2( src2, src1, tmp));
13441   ins_pipe(ialu_cr_reg_reg);
13442 %}
13443 
13444 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13445 // Just a wrapper for a normal branch, plus the predicate test
13446 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13447   match(If cmp flags);
13448   effect(USE labl);
13449   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13450   ins_cost(300);
13451   expand %{
13452     jmpCon(cmp, flags, labl);    // JGT or JLE...
13453   %}
13454 %}
13455 
13456 // Compare 2 longs and CMOVE longs.
13457 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13458   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13459   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13460   ins_cost(400);
13461   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13462             "CMOV$cmp $dst.hi,$src.hi" %}
13463   opcode(0x0F,0x40);
13464   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13465   ins_pipe( pipe_cmov_reg_long );
13466 %}
13467 
13468 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13469   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13470   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13471   ins_cost(500);
13472   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13473             "CMOV$cmp $dst.hi,$src.hi+4" %}
13474   opcode(0x0F,0x40);
13475   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13476   ins_pipe( pipe_cmov_reg_long );
13477 %}
13478 
13479 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13480   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482   ins_cost(400);
13483   expand %{
13484     cmovLL_reg_LEGT(cmp, flags, dst, src);
13485   %}
13486 %}
13487 
13488 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13489   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13490   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13491   ins_cost(500);
13492   expand %{
13493     cmovLL_mem_LEGT(cmp, flags, dst, src);
13494   %}
13495 %}
13496 
13497 // Compare 2 longs and CMOVE ints.
13498 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13499   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13500   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13501   ins_cost(200);
13502   format %{ "CMOV$cmp $dst,$src" %}
13503   opcode(0x0F,0x40);
13504   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13505   ins_pipe( pipe_cmov_reg );
13506 %}
13507 
13508 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13509   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13510   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13511   ins_cost(250);
13512   format %{ "CMOV$cmp $dst,$src" %}
13513   opcode(0x0F,0x40);
13514   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13515   ins_pipe( pipe_cmov_mem );
13516 %}
13517 
13518 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13519   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13520   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13521   ins_cost(200);
13522   expand %{
13523     cmovII_reg_LEGT(cmp, flags, dst, src);
13524   %}
13525 %}
13526 
13527 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13528   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13529   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13530   ins_cost(250);
13531   expand %{
13532     cmovII_mem_LEGT(cmp, flags, dst, src);
13533   %}
13534 %}
13535 
13536 // Compare 2 longs and CMOVE ptrs.
13537 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13538   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13539   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13540   ins_cost(200);
13541   format %{ "CMOV$cmp $dst,$src" %}
13542   opcode(0x0F,0x40);
13543   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13544   ins_pipe( pipe_cmov_reg );
13545 %}
13546 
13547 // Compare 2 unsigned longs and CMOVE ptrs.
13548 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13549   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13550   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13551   ins_cost(200);
13552   expand %{
13553     cmovPP_reg_LEGT(cmp,flags,dst,src);
13554   %}
13555 %}
13556 
13557 // Compare 2 longs and CMOVE doubles
13558 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13559   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13560   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13561   ins_cost(200);
13562   expand %{
13563     fcmovDPR_regS(cmp,flags,dst,src);
13564   %}
13565 %}
13566 
13567 // Compare 2 longs and CMOVE doubles
13568 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13569   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13570   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13571   ins_cost(200);
13572   expand %{
13573     fcmovD_regS(cmp,flags,dst,src);
13574   %}
13575 %}
13576 
13577 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13578   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13579   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13580   ins_cost(200);
13581   expand %{
13582     fcmovFPR_regS(cmp,flags,dst,src);
13583   %}
13584 %}
13585 
13586 
13587 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13588   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13589   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13590   ins_cost(200);
13591   expand %{
13592     fcmovF_regS(cmp,flags,dst,src);
13593   %}
13594 %}
13595 
13596 
13597 // ============================================================================
13598 // Procedure Call/Return Instructions
13599 // Call Java Static Instruction
13600 // Note: If this code changes, the corresponding ret_addr_offset() and
13601 //       compute_padding() functions will have to be adjusted.
13602 instruct CallStaticJavaDirect(method meth) %{
13603   match(CallStaticJava);
13604   effect(USE meth);
13605 
13606   ins_cost(300);
13607   format %{ "CALL,static " %}
13608   opcode(0xE8); /* E8 cd */
13609   ins_encode( pre_call_resets,
13610               Java_Static_Call( meth ),
13611               call_epilog,
13612               post_call_FPU );
13613   ins_pipe( pipe_slow );
13614   ins_alignment(4);
13615 %}
13616 
13617 // Call Java Dynamic Instruction
13618 // Note: If this code changes, the corresponding ret_addr_offset() and
13619 //       compute_padding() functions will have to be adjusted.
13620 instruct CallDynamicJavaDirect(method meth) %{
13621   match(CallDynamicJava);
13622   effect(USE meth);
13623 
13624   ins_cost(300);
13625   format %{ "MOV    EAX,(oop)-1\n\t"
13626             "CALL,dynamic" %}
13627   opcode(0xE8); /* E8 cd */
13628   ins_encode( pre_call_resets,
13629               Java_Dynamic_Call( meth ),
13630               call_epilog,
13631               post_call_FPU );
13632   ins_pipe( pipe_slow );
13633   ins_alignment(4);
13634 %}
13635 
13636 // Call Runtime Instruction
13637 instruct CallRuntimeDirect(method meth) %{
13638   match(CallRuntime );
13639   effect(USE meth);
13640 
13641   ins_cost(300);
13642   format %{ "CALL,runtime " %}
13643   opcode(0xE8); /* E8 cd */
13644   // Use FFREEs to clear entries in float stack
13645   ins_encode( pre_call_resets,
13646               FFree_Float_Stack_All,
13647               Java_To_Runtime( meth ),
13648               post_call_FPU );
13649   ins_pipe( pipe_slow );
13650 %}
13651 
13652 // Call runtime without safepoint
13653 instruct CallLeafDirect(method meth) %{
13654   match(CallLeaf);
13655   effect(USE meth);
13656 
13657   ins_cost(300);
13658   format %{ "CALL_LEAF,runtime " %}
13659   opcode(0xE8); /* E8 cd */
13660   ins_encode( pre_call_resets,
13661               FFree_Float_Stack_All,
13662               Java_To_Runtime( meth ),
13663               Verify_FPU_For_Leaf, post_call_FPU );
13664   ins_pipe( pipe_slow );
13665 %}
13666 
13667 instruct CallLeafNoFPDirect(method meth) %{
13668   match(CallLeafNoFP);
13669   effect(USE meth);
13670 
13671   ins_cost(300);
13672   format %{ "CALL_LEAF_NOFP,runtime " %}
13673   opcode(0xE8); /* E8 cd */
13674   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13675   ins_pipe( pipe_slow );
13676 %}
13677 
13678 
13679 // Return Instruction
13680 // Remove the return address & jump to it.
13681 instruct Ret() %{
13682   match(Return);
13683   format %{ "RET" %}
13684   opcode(0xC3);
13685   ins_encode(OpcP);
13686   ins_pipe( pipe_jmp );
13687 %}
13688 
13689 // Tail Call; Jump from runtime stub to Java code.
13690 // Also known as an 'interprocedural jump'.
13691 // Target of jump will eventually return to caller.
13692 // TailJump below removes the return address.
13693 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13694   match(TailCall jump_target method_ptr);
13695   ins_cost(300);
13696   format %{ "JMP    $jump_target \t# EBX holds method" %}
13697   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13698   ins_encode( OpcP, RegOpc(jump_target) );
13699   ins_pipe( pipe_jmp );
13700 %}
13701 
13702 
13703 // Tail Jump; remove the return address; jump to target.
13704 // TailCall above leaves the return address around.
13705 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13706   match( TailJump jump_target ex_oop );
13707   ins_cost(300);
13708   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13709             "JMP    $jump_target " %}
13710   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13711   ins_encode( enc_pop_rdx,
13712               OpcP, RegOpc(jump_target) );
13713   ins_pipe( pipe_jmp );
13714 %}
13715 
13716 // Create exception oop: created by stack-crawling runtime code.
13717 // Created exception is now available to this handler, and is setup
13718 // just prior to jumping to this handler.  No code emitted.
13719 instruct CreateException( eAXRegP ex_oop )
13720 %{
13721   match(Set ex_oop (CreateEx));
13722 
13723   size(0);
13724   // use the following format syntax
13725   format %{ "# exception oop is in EAX; no code emitted" %}
13726   ins_encode();
13727   ins_pipe( empty );
13728 %}
13729 
13730 
13731 // Rethrow exception:
13732 // The exception oop will come in the first argument position.
13733 // Then JUMP (not call) to the rethrow stub code.
13734 instruct RethrowException()
13735 %{
13736   match(Rethrow);
13737 
13738   // use the following format syntax
13739   format %{ "JMP    rethrow_stub" %}
13740   ins_encode(enc_rethrow);
13741   ins_pipe( pipe_jmp );
13742 %}
13743 
13744 // inlined locking and unlocking
13745 
13746 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13747   predicate(Compile::current()->use_rtm());
13748   match(Set cr (FastLock object box));
13749   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13750   ins_cost(300);
13751   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13752   ins_encode %{
13753     __ get_thread($thread$$Register);
13754     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13755                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13756                  _rtm_counters, _stack_rtm_counters,
13757                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13758                  true, ra_->C->profile_rtm());
13759   %}
13760   ins_pipe(pipe_slow);
13761 %}
13762 
13763 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13764   predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13765   match(Set cr (FastLock object box));
13766   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13767   ins_cost(300);
13768   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13769   ins_encode %{
13770     __ get_thread($thread$$Register);
13771     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13772                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13773   %}
13774   ins_pipe(pipe_slow);
13775 %}
13776 
13777 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13778   predicate(LockingMode != LM_LIGHTWEIGHT);
13779   match(Set cr (FastUnlock object box));
13780   effect(TEMP tmp, USE_KILL box);
13781   ins_cost(300);
13782   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13783   ins_encode %{
13784     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13785   %}
13786   ins_pipe(pipe_slow);
13787 %}
13788 
13789 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13790   predicate(LockingMode == LM_LIGHTWEIGHT);
13791   match(Set cr (FastLock object box));
13792   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13793   ins_cost(300);
13794   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13795   ins_encode %{
13796     __ get_thread($thread$$Register);
13797     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13798   %}
13799   ins_pipe(pipe_slow);
13800 %}
13801 
13802 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13803   predicate(LockingMode == LM_LIGHTWEIGHT);
13804   match(Set cr (FastUnlock object eax_reg));
13805   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13806   ins_cost(300);
13807   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13808   ins_encode %{
13809     __ get_thread($thread$$Register);
13810     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13811   %}
13812   ins_pipe(pipe_slow);
13813 %}
13814 
13815 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13816   predicate(Matcher::vector_length(n) <= 32);
13817   match(Set dst (MaskAll src));
13818   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13819   ins_encode %{
13820     int mask_len = Matcher::vector_length(this);
13821     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13822   %}
13823   ins_pipe( pipe_slow );
13824 %}
13825 
13826 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13827   predicate(Matcher::vector_length(n) > 32);
13828   match(Set dst (MaskAll src));
13829   effect(TEMP ktmp);
13830   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13831   ins_encode %{
13832     int mask_len = Matcher::vector_length(this);
13833     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13834   %}
13835   ins_pipe( pipe_slow );
13836 %}
13837 
13838 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13839   predicate(Matcher::vector_length(n) > 32);
13840   match(Set dst (MaskAll src));
13841   effect(TEMP ktmp);
13842   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13843   ins_encode %{
13844     int mask_len = Matcher::vector_length(this);
13845     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13846   %}
13847   ins_pipe( pipe_slow );
13848 %}
13849 
13850 // ============================================================================
13851 // Safepoint Instruction
13852 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13853   match(SafePoint poll);
13854   effect(KILL cr, USE poll);
13855 
13856   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13857   ins_cost(125);
13858   // EBP would need size(3)
13859   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13860   ins_encode %{
13861     __ relocate(relocInfo::poll_type);
13862     address pre_pc = __ pc();
13863     __ testl(rax, Address($poll$$Register, 0));
13864     address post_pc = __ pc();
13865     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13866   %}
13867   ins_pipe(ialu_reg_mem);
13868 %}
13869 
13870 
13871 // ============================================================================
13872 // This name is KNOWN by the ADLC and cannot be changed.
13873 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13874 // for this guy.
13875 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13876   match(Set dst (ThreadLocal));
13877   effect(DEF dst, KILL cr);
13878 
13879   format %{ "MOV    $dst, Thread::current()" %}
13880   ins_encode %{
13881     Register dstReg = as_Register($dst$$reg);
13882     __ get_thread(dstReg);
13883   %}
13884   ins_pipe( ialu_reg_fat );
13885 %}
13886 
13887 
13888 
13889 //----------PEEPHOLE RULES-----------------------------------------------------
13890 // These must follow all instruction definitions as they use the names
13891 // defined in the instructions definitions.
13892 //
13893 // peepmatch ( root_instr_name [preceding_instruction]* );
13894 //
13895 // peepconstraint %{
13896 // (instruction_number.operand_name relational_op instruction_number.operand_name
13897 //  [, ...] );
13898 // // instruction numbers are zero-based using left to right order in peepmatch
13899 //
13900 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13901 // // provide an instruction_number.operand_name for each operand that appears
13902 // // in the replacement instruction's match rule
13903 //
13904 // ---------VM FLAGS---------------------------------------------------------
13905 //
13906 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13907 //
13908 // Each peephole rule is given an identifying number starting with zero and
13909 // increasing by one in the order seen by the parser.  An individual peephole
13910 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13911 // on the command-line.
13912 //
13913 // ---------CURRENT LIMITATIONS----------------------------------------------
13914 //
13915 // Only match adjacent instructions in same basic block
13916 // Only equality constraints
13917 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13918 // Only one replacement instruction
13919 //
13920 // ---------EXAMPLE----------------------------------------------------------
13921 //
13922 // // pertinent parts of existing instructions in architecture description
13923 // instruct movI(rRegI dst, rRegI src) %{
13924 //   match(Set dst (CopyI src));
13925 // %}
13926 //
13927 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13928 //   match(Set dst (AddI dst src));
13929 //   effect(KILL cr);
13930 // %}
13931 //
13932 // // Change (inc mov) to lea
13933 // peephole %{
13934 //   // increment preceded by register-register move
13935 //   peepmatch ( incI_eReg movI );
13936 //   // require that the destination register of the increment
13937 //   // match the destination register of the move
13938 //   peepconstraint ( 0.dst == 1.dst );
13939 //   // construct a replacement instruction that sets
13940 //   // the destination to ( move's source register + one )
13941 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13942 // %}
13943 //
13944 // Implementation no longer uses movX instructions since
13945 // machine-independent system no longer uses CopyX nodes.
13946 //
13947 // peephole %{
13948 //   peepmatch ( incI_eReg movI );
13949 //   peepconstraint ( 0.dst == 1.dst );
13950 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13951 // %}
13952 //
13953 // peephole %{
13954 //   peepmatch ( decI_eReg movI );
13955 //   peepconstraint ( 0.dst == 1.dst );
13956 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13957 // %}
13958 //
13959 // peephole %{
13960 //   peepmatch ( addI_eReg_imm movI );
13961 //   peepconstraint ( 0.dst == 1.dst );
13962 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13963 // %}
13964 //
13965 // peephole %{
13966 //   peepmatch ( addP_eReg_imm movP );
13967 //   peepconstraint ( 0.dst == 1.dst );
13968 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13969 // %}
13970 
13971 // // Change load of spilled value to only a spill
13972 // instruct storeI(memory mem, rRegI src) %{
13973 //   match(Set mem (StoreI mem src));
13974 // %}
13975 //
13976 // instruct loadI(rRegI dst, memory mem) %{
13977 //   match(Set dst (LoadI mem));
13978 // %}
13979 //
13980 peephole %{
13981   peepmatch ( loadI storeI );
13982   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13983   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13984 %}
13985 
13986 //----------SMARTSPILL RULES---------------------------------------------------
13987 // These must follow all instruction definitions as they use the names
13988 // defined in the instructions definitions.