1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   __ verified_entry(C);
  615 
  616   C->output()->set_frame_complete(cbuf.insts_size());
  617 
  618   if (C->has_mach_constant_base_node()) {
  619     // NOTE: We set the table base offset here because users might be
  620     // emitted before MachConstantBaseNode.
  621     ConstantTable& constant_table = C->output()->constant_table();
  622     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  623   }
  624 }
  625 
  626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  627   return MachNode::size(ra_); // too many variables; just compute it the hard way
  628 }
  629 
  630 int MachPrologNode::reloc() const {
  631   return 0; // a large enough number
  632 }
  633 
  634 //=============================================================================
  635 #ifndef PRODUCT
  636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  637   Compile *C = ra_->C;
  638   int framesize = C->output()->frame_size_in_bytes();
  639   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  640   // Remove two words for return addr and rbp,
  641   framesize -= 2*wordSize;
  642 
  643   if (C->max_vector_size() > 16) {
  644     st->print("VZEROUPPER");
  645     st->cr(); st->print("\t");
  646   }
  647   if (C->in_24_bit_fp_mode()) {
  648     st->print("FLDCW  standard control word");
  649     st->cr(); st->print("\t");
  650   }
  651   if (framesize) {
  652     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  653     st->cr(); st->print("\t");
  654   }
  655   st->print_cr("POPL   EBP"); st->print("\t");
  656   if (do_polling() && C->is_method_compilation()) {
  657     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  658               "JA      #safepoint_stub\t"
  659               "# Safepoint: poll for GC");
  660   }
  661 }
  662 #endif
  663 
  664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  665   Compile *C = ra_->C;
  666   MacroAssembler _masm(&cbuf);
  667 
  668   if (C->max_vector_size() > 16) {
  669     // Clear upper bits of YMM registers when current compiled code uses
  670     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  671     _masm.vzeroupper();
  672   }
  673   // If method set FPU control word, restore to standard control word
  674   if (C->in_24_bit_fp_mode()) {
  675     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  676   }
  677 
  678   int framesize = C->output()->frame_size_in_bytes();
  679   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  680   // Remove two words for return addr and rbp,
  681   framesize -= 2*wordSize;
  682 
  683   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  684 
  685   if (framesize >= 128) {
  686     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  687     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  688     emit_d32(cbuf, framesize);
  689   } else if (framesize) {
  690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d8(cbuf, framesize);
  693   }
  694 
  695   emit_opcode(cbuf, 0x58 | EBP_enc);
  696 
  697   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  698     __ reserved_stack_check();
  699   }
  700 
  701   if (do_polling() && C->is_method_compilation()) {
  702     Register thread = as_Register(EBX_enc);
  703     MacroAssembler masm(&cbuf);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
  709     }
  710     __ relocate(relocInfo::poll_return_type);
  711     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  712   }
  713 }
  714 
  715 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  716   return MachNode::size(ra_); // too many variables; just compute it
  717                               // the hard way
  718 }
  719 
  720 int MachEpilogNode::reloc() const {
  721   return 0; // a large enough number
  722 }
  723 
  724 const Pipeline * MachEpilogNode::pipeline() const {
  725   return MachNode::pipeline_class();
  726 }
  727 
  728 //=============================================================================
  729 
  730 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  731 static enum RC rc_class( OptoReg::Name reg ) {
  732 
  733   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  734   if (OptoReg::is_stack(reg)) return rc_stack;
  735 
  736   VMReg r = OptoReg::as_VMReg(reg);
  737   if (r->is_Register()) return rc_int;
  738   if (r->is_FloatRegister()) {
  739     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  740     return rc_float;
  741   }
  742   if (r->is_KRegister()) return rc_kreg;
  743   assert(r->is_XMMRegister(), "must be");
  744   return rc_xmm;
  745 }
  746 
  747 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  748                         int opcode, const char *op_str, int size, outputStream* st ) {
  749   if( cbuf ) {
  750     emit_opcode  (*cbuf, opcode );
  751     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  752 #ifndef PRODUCT
  753   } else if( !do_size ) {
  754     if( size != 0 ) st->print("\n\t");
  755     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  756       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  757       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  758     } else { // FLD, FST, PUSH, POP
  759       st->print("%s [ESP + #%d]",op_str,offset);
  760     }
  761 #endif
  762   }
  763   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  764   return size+3+offset_size;
  765 }
  766 
  767 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  768 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  769                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  770   int in_size_in_bits = Assembler::EVEX_32bit;
  771   int evex_encoding = 0;
  772   if (reg_lo+1 == reg_hi) {
  773     in_size_in_bits = Assembler::EVEX_64bit;
  774     evex_encoding = Assembler::VEX_W;
  775   }
  776   if (cbuf) {
  777     MacroAssembler _masm(cbuf);
  778     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  779     //                          it maps more cases to single byte displacement
  780     _masm.set_managed();
  781     if (reg_lo+1 == reg_hi) { // double move?
  782       if (is_load) {
  783         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  784       } else {
  785         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  786       }
  787     } else {
  788       if (is_load) {
  789         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  790       } else {
  791         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  792       }
  793     }
  794 #ifndef PRODUCT
  795   } else if (!do_size) {
  796     if (size != 0) st->print("\n\t");
  797     if (reg_lo+1 == reg_hi) { // double move?
  798       if (is_load) st->print("%s %s,[ESP + #%d]",
  799                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  800                               Matcher::regName[reg_lo], offset);
  801       else         st->print("MOVSD  [ESP + #%d],%s",
  802                               offset, Matcher::regName[reg_lo]);
  803     } else {
  804       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSS  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     }
  809 #endif
  810   }
  811   bool is_single_byte = false;
  812   if ((UseAVX > 2) && (offset != 0)) {
  813     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  814   }
  815   int offset_size = 0;
  816   if (UseAVX > 2 ) {
  817     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  818   } else {
  819     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  820   }
  821   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  822   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  823   return size+5+offset_size;
  824 }
  825 
  826 
  827 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  828                             int src_hi, int dst_hi, int size, outputStream* st ) {
  829   if (cbuf) {
  830     MacroAssembler _masm(cbuf);
  831     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  832     _masm.set_managed();
  833     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  834       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  835                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  836     } else {
  837       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  838                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  839     }
  840 #ifndef PRODUCT
  841   } else if (!do_size) {
  842     if (size != 0) st->print("\n\t");
  843     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  844       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  845         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  846       } else {
  847         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  848       }
  849     } else {
  850       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  851         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       } else {
  853         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       }
  855     }
  856 #endif
  857   }
  858   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  859   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  860   int sz = (UseAVX > 2) ? 6 : 4;
  861   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  862       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  863   return size + sz;
  864 }
  865 
  866 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  867                             int src_hi, int dst_hi, int size, outputStream* st ) {
  868   // 32-bit
  869   if (cbuf) {
  870     MacroAssembler _masm(cbuf);
  871     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  872     _masm.set_managed();
  873     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  874              as_Register(Matcher::_regEncode[src_lo]));
  875 #ifndef PRODUCT
  876   } else if (!do_size) {
  877     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  878 #endif
  879   }
  880   return (UseAVX> 2) ? 6 : 4;
  881 }
  882 
  883 
  884 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  885                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  886   // 32-bit
  887   if (cbuf) {
  888     MacroAssembler _masm(cbuf);
  889     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  890     _masm.set_managed();
  891     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  892              as_XMMRegister(Matcher::_regEncode[src_lo]));
  893 #ifndef PRODUCT
  894   } else if (!do_size) {
  895     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  896 #endif
  897   }
  898   return (UseAVX> 2) ? 6 : 4;
  899 }
  900 
  901 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  902   if( cbuf ) {
  903     emit_opcode(*cbuf, 0x8B );
  904     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  905 #ifndef PRODUCT
  906   } else if( !do_size ) {
  907     if( size != 0 ) st->print("\n\t");
  908     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  909 #endif
  910   }
  911   return size+2;
  912 }
  913 
  914 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  915                                  int offset, int size, outputStream* st ) {
  916   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  917     if( cbuf ) {
  918       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  919       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  920 #ifndef PRODUCT
  921     } else if( !do_size ) {
  922       if( size != 0 ) st->print("\n\t");
  923       st->print("FLD    %s",Matcher::regName[src_lo]);
  924 #endif
  925     }
  926     size += 2;
  927   }
  928 
  929   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  930   const char *op_str;
  931   int op;
  932   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  933     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  934     op = 0xDD;
  935   } else {                   // 32-bit store
  936     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  937     op = 0xD9;
  938     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  939   }
  940 
  941   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  942 }
  943 
  944 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  945 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  946                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  947 
  948 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  949                             int stack_offset, int reg, uint ireg, outputStream* st);
  950 
  951 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  952                                      int dst_offset, uint ireg, outputStream* st) {
  953   if (cbuf) {
  954     MacroAssembler _masm(cbuf);
  955     switch (ireg) {
  956     case Op_VecS:
  957       __ pushl(Address(rsp, src_offset));
  958       __ popl (Address(rsp, dst_offset));
  959       break;
  960     case Op_VecD:
  961       __ pushl(Address(rsp, src_offset));
  962       __ popl (Address(rsp, dst_offset));
  963       __ pushl(Address(rsp, src_offset+4));
  964       __ popl (Address(rsp, dst_offset+4));
  965       break;
  966     case Op_VecX:
  967       __ movdqu(Address(rsp, -16), xmm0);
  968       __ movdqu(xmm0, Address(rsp, src_offset));
  969       __ movdqu(Address(rsp, dst_offset), xmm0);
  970       __ movdqu(xmm0, Address(rsp, -16));
  971       break;
  972     case Op_VecY:
  973       __ vmovdqu(Address(rsp, -32), xmm0);
  974       __ vmovdqu(xmm0, Address(rsp, src_offset));
  975       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  976       __ vmovdqu(xmm0, Address(rsp, -32));
  977       break;
  978     case Op_VecZ:
  979       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  980       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  981       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  982       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  983       break;
  984     default:
  985       ShouldNotReachHere();
  986     }
  987 #ifndef PRODUCT
  988   } else {
  989     switch (ireg) {
  990     case Op_VecS:
  991       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  992                 "popl    [rsp + #%d]",
  993                 src_offset, dst_offset);
  994       break;
  995     case Op_VecD:
  996       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  997                 "popq    [rsp + #%d]\n\t"
  998                 "pushl   [rsp + #%d]\n\t"
  999                 "popq    [rsp + #%d]",
 1000                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1001       break;
 1002      case Op_VecX:
 1003       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1004                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1005                 "movdqu  [rsp + #%d], xmm0\n\t"
 1006                 "movdqu  xmm0, [rsp - #16]",
 1007                 src_offset, dst_offset);
 1008       break;
 1009     case Op_VecY:
 1010       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1011                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1012                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1013                 "vmovdqu xmm0, [rsp - #32]",
 1014                 src_offset, dst_offset);
 1015       break;
 1016     case Op_VecZ:
 1017       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1018                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1019                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1020                 "vmovdqu xmm0, [rsp - #64]",
 1021                 src_offset, dst_offset);
 1022       break;
 1023     default:
 1024       ShouldNotReachHere();
 1025     }
 1026 #endif
 1027   }
 1028 }
 1029 
 1030 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1031   // Get registers to move
 1032   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1033   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1034   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1035   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1036 
 1037   enum RC src_second_rc = rc_class(src_second);
 1038   enum RC src_first_rc = rc_class(src_first);
 1039   enum RC dst_second_rc = rc_class(dst_second);
 1040   enum RC dst_first_rc = rc_class(dst_first);
 1041 
 1042   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1043 
 1044   // Generate spill code!
 1045   int size = 0;
 1046 
 1047   if( src_first == dst_first && src_second == dst_second )
 1048     return size;            // Self copy, no move
 1049 
 1050   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1051     uint ireg = ideal_reg();
 1052     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1053     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1054     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1055     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1056       // mem -> mem
 1057       int src_offset = ra_->reg2offset(src_first);
 1058       int dst_offset = ra_->reg2offset(dst_first);
 1059       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1060     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1061       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1063       int stack_offset = ra_->reg2offset(dst_first);
 1064       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1065     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1066       int stack_offset = ra_->reg2offset(src_first);
 1067       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1068     } else {
 1069       ShouldNotReachHere();
 1070     }
 1071     return 0;
 1072   }
 1073 
 1074   // --------------------------------------
 1075   // Check for mem-mem move.  push/pop to move.
 1076   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1077     if( src_second == dst_first ) { // overlapping stack copy ranges
 1078       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1079       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1080       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1081       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1082     }
 1083     // move low bits
 1084     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1085     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1086     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1087       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1088       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1089     }
 1090     return size;
 1091   }
 1092 
 1093   // --------------------------------------
 1094   // Check for integer reg-reg copy
 1095   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1096     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1097 
 1098   // Check for integer store
 1099   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1100     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1101 
 1102   // Check for integer load
 1103   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1104     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1105 
 1106   // Check for integer reg-xmm reg copy
 1107   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1108     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1109             "no 64 bit integer-float reg moves" );
 1110     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1111   }
 1112   // --------------------------------------
 1113   // Check for float reg-reg copy
 1114   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1115     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1116             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1117     if( cbuf ) {
 1118 
 1119       // Note the mucking with the register encode to compensate for the 0/1
 1120       // indexing issue mentioned in a comment in the reg_def sections
 1121       // for FPR registers many lines above here.
 1122 
 1123       if( src_first != FPR1L_num ) {
 1124         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1125         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1126         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1127         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1128      } else {
 1129         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1130         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1131      }
 1132 #ifndef PRODUCT
 1133     } else if( !do_size ) {
 1134       if( size != 0 ) st->print("\n\t");
 1135       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1136       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1137 #endif
 1138     }
 1139     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1140   }
 1141 
 1142   // Check for float store
 1143   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1144     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1145   }
 1146 
 1147   // Check for float load
 1148   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1149     int offset = ra_->reg2offset(src_first);
 1150     const char *op_str;
 1151     int op;
 1152     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1153       op_str = "FLD_D";
 1154       op = 0xDD;
 1155     } else {                   // 32-bit load
 1156       op_str = "FLD_S";
 1157       op = 0xD9;
 1158       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1159     }
 1160     if( cbuf ) {
 1161       emit_opcode  (*cbuf, op );
 1162       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1163       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1164       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1165 #ifndef PRODUCT
 1166     } else if( !do_size ) {
 1167       if( size != 0 ) st->print("\n\t");
 1168       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1169 #endif
 1170     }
 1171     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1172     return size + 3+offset_size+2;
 1173   }
 1174 
 1175   // Check for xmm reg-reg copy
 1176   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1177     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1178             (src_first+1 == src_second && dst_first+1 == dst_second),
 1179             "no non-adjacent float-moves" );
 1180     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1181   }
 1182 
 1183   // Check for xmm reg-integer reg copy
 1184   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1185     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1186             "no 64 bit float-integer reg moves" );
 1187     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1188   }
 1189 
 1190   // Check for xmm store
 1191   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1192     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1193   }
 1194 
 1195   // Check for float xmm load
 1196   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1197     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1198   }
 1199 
 1200   // Copy from float reg to xmm reg
 1201   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1202     // copy to the top of stack from floating point reg
 1203     // and use LEA to preserve flags
 1204     if( cbuf ) {
 1205       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1206       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1207       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1208       emit_d8(*cbuf,0xF8);
 1209 #ifndef PRODUCT
 1210     } else if( !do_size ) {
 1211       if( size != 0 ) st->print("\n\t");
 1212       st->print("LEA    ESP,[ESP-8]");
 1213 #endif
 1214     }
 1215     size += 4;
 1216 
 1217     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1218 
 1219     // Copy from the temp memory to the xmm reg.
 1220     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1221 
 1222     if( cbuf ) {
 1223       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1224       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1225       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1226       emit_d8(*cbuf,0x08);
 1227 #ifndef PRODUCT
 1228     } else if( !do_size ) {
 1229       if( size != 0 ) st->print("\n\t");
 1230       st->print("LEA    ESP,[ESP+8]");
 1231 #endif
 1232     }
 1233     size += 4;
 1234     return size;
 1235   }
 1236 
 1237   // AVX-512 opmask specific spilling.
 1238   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1239     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1240     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1241     MacroAssembler _masm(cbuf);
 1242     int offset = ra_->reg2offset(src_first);
 1243     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1244     return 0;
 1245   }
 1246 
 1247   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1248     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1249     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1250     MacroAssembler _masm(cbuf);
 1251     int offset = ra_->reg2offset(dst_first);
 1252     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1253     return 0;
 1254   }
 1255 
 1256   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1257     Unimplemented();
 1258     return 0;
 1259   }
 1260 
 1261   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1262     Unimplemented();
 1263     return 0;
 1264   }
 1265 
 1266   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1267     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1268     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1269     MacroAssembler _masm(cbuf);
 1270     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1271     return 0;
 1272   }
 1273 
 1274   assert( size > 0, "missed a case" );
 1275 
 1276   // --------------------------------------------------------------------
 1277   // Check for second bits still needing moving.
 1278   if( src_second == dst_second )
 1279     return size;               // Self copy; no move
 1280   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1281 
 1282   // Check for second word int-int move
 1283   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1284     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1285 
 1286   // Check for second word integer store
 1287   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1288     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1289 
 1290   // Check for second word integer load
 1291   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1292     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1293 
 1294   Unimplemented();
 1295   return 0; // Mute compiler
 1296 }
 1297 
 1298 #ifndef PRODUCT
 1299 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1300   implementation( NULL, ra_, false, st );
 1301 }
 1302 #endif
 1303 
 1304 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1305   implementation( &cbuf, ra_, false, NULL );
 1306 }
 1307 
 1308 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1309   return MachNode::size(ra_);
 1310 }
 1311 
 1312 
 1313 //=============================================================================
 1314 #ifndef PRODUCT
 1315 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1316   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1317   int reg = ra_->get_reg_first(this);
 1318   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1319 }
 1320 #endif
 1321 
 1322 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1323   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1324   int reg = ra_->get_encode(this);
 1325   if( offset >= 128 ) {
 1326     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1327     emit_rm(cbuf, 0x2, reg, 0x04);
 1328     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1329     emit_d32(cbuf, offset);
 1330   }
 1331   else {
 1332     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1333     emit_rm(cbuf, 0x1, reg, 0x04);
 1334     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1335     emit_d8(cbuf, offset);
 1336   }
 1337 }
 1338 
 1339 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1340   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1341   if( offset >= 128 ) {
 1342     return 7;
 1343   }
 1344   else {
 1345     return 4;
 1346   }
 1347 }
 1348 
 1349 //=============================================================================
 1350 #ifndef PRODUCT
 1351 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1352   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1353   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1354   st->print_cr("\tNOP");
 1355   st->print_cr("\tNOP");
 1356   if( !OptoBreakpoint )
 1357     st->print_cr("\tNOP");
 1358 }
 1359 #endif
 1360 
 1361 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1362   MacroAssembler masm(&cbuf);
 1363 #ifdef ASSERT
 1364   uint insts_size = cbuf.insts_size();
 1365 #endif
 1366   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1367   masm.jump_cc(Assembler::notEqual,
 1368                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1369   /* WARNING these NOPs are critical so that verified entry point is properly
 1370      aligned for patching by NativeJump::patch_verified_entry() */
 1371   int nops_cnt = 2;
 1372   if( !OptoBreakpoint ) // Leave space for int3
 1373      nops_cnt += 1;
 1374   masm.nop(nops_cnt);
 1375 
 1376   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1377 }
 1378 
 1379 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1380   return OptoBreakpoint ? 11 : 12;
 1381 }
 1382 
 1383 
 1384 //=============================================================================
 1385 
 1386 // Vector calling convention not supported.
 1387 const bool Matcher::supports_vector_calling_convention() {
 1388   return false;
 1389 }
 1390 
 1391 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1392   Unimplemented();
 1393   return OptoRegPair(0, 0);
 1394 }
 1395 
 1396 // Is this branch offset short enough that a short branch can be used?
 1397 //
 1398 // NOTE: If the platform does not provide any short branch variants, then
 1399 //       this method should return false for offset 0.
 1400 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1401   // The passed offset is relative to address of the branch.
 1402   // On 86 a branch displacement is calculated relative to address
 1403   // of a next instruction.
 1404   offset -= br_size;
 1405 
 1406   // the short version of jmpConUCF2 contains multiple branches,
 1407   // making the reach slightly less
 1408   if (rule == jmpConUCF2_rule)
 1409     return (-126 <= offset && offset <= 125);
 1410   return (-128 <= offset && offset <= 127);
 1411 }
 1412 
 1413 // Return whether or not this register is ever used as an argument.  This
 1414 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1415 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1416 // arguments in those registers not be available to the callee.
 1417 bool Matcher::can_be_java_arg( int reg ) {
 1418   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1419   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1420   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1421   return false;
 1422 }
 1423 
 1424 bool Matcher::is_spillable_arg( int reg ) {
 1425   return can_be_java_arg(reg);
 1426 }
 1427 
 1428 uint Matcher::int_pressure_limit()
 1429 {
 1430   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1431 }
 1432 
 1433 uint Matcher::float_pressure_limit()
 1434 {
 1435   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1436 }
 1437 
 1438 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1439   // Use hardware integer DIV instruction when
 1440   // it is faster than a code which use multiply.
 1441   // Only when constant divisor fits into 32 bit
 1442   // (min_jint is excluded to get only correct
 1443   // positive 32 bit values from negative).
 1444   return VM_Version::has_fast_idiv() &&
 1445          (divisor == (int)divisor && divisor != min_jint);
 1446 }
 1447 
 1448 // Register for DIVI projection of divmodI
 1449 RegMask Matcher::divI_proj_mask() {
 1450   return EAX_REG_mask();
 1451 }
 1452 
 1453 // Register for MODI projection of divmodI
 1454 RegMask Matcher::modI_proj_mask() {
 1455   return EDX_REG_mask();
 1456 }
 1457 
 1458 // Register for DIVL projection of divmodL
 1459 RegMask Matcher::divL_proj_mask() {
 1460   ShouldNotReachHere();
 1461   return RegMask();
 1462 }
 1463 
 1464 // Register for MODL projection of divmodL
 1465 RegMask Matcher::modL_proj_mask() {
 1466   ShouldNotReachHere();
 1467   return RegMask();
 1468 }
 1469 
 1470 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1471   return NO_REG_mask();
 1472 }
 1473 
 1474 // Returns true if the high 32 bits of the value is known to be zero.
 1475 bool is_operand_hi32_zero(Node* n) {
 1476   int opc = n->Opcode();
 1477   if (opc == Op_AndL) {
 1478     Node* o2 = n->in(2);
 1479     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1480       return true;
 1481     }
 1482   }
 1483   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1484     return true;
 1485   }
 1486   return false;
 1487 }
 1488 
 1489 %}
 1490 
 1491 //----------ENCODING BLOCK-----------------------------------------------------
 1492 // This block specifies the encoding classes used by the compiler to output
 1493 // byte streams.  Encoding classes generate functions which are called by
 1494 // Machine Instruction Nodes in order to generate the bit encoding of the
 1495 // instruction.  Operands specify their base encoding interface with the
 1496 // interface keyword.  There are currently supported four interfaces,
 1497 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1498 // operand to generate a function which returns its register number when
 1499 // queried.   CONST_INTER causes an operand to generate a function which
 1500 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1501 // operand to generate four functions which return the Base Register, the
 1502 // Index Register, the Scale Value, and the Offset Value of the operand when
 1503 // queried.  COND_INTER causes an operand to generate six functions which
 1504 // return the encoding code (ie - encoding bits for the instruction)
 1505 // associated with each basic boolean condition for a conditional instruction.
 1506 // Instructions specify two basic values for encoding.  They use the
 1507 // ins_encode keyword to specify their encoding class (which must be one of
 1508 // the class names specified in the encoding block), and they use the
 1509 // opcode keyword to specify, in order, their primary, secondary, and
 1510 // tertiary opcode.  Only the opcode sections which a particular instruction
 1511 // needs for encoding need to be specified.
 1512 encode %{
 1513   // Build emit functions for each basic byte or larger field in the intel
 1514   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1515   // code in the enc_class source block.  Emit functions will live in the
 1516   // main source block for now.  In future, we can generalize this by
 1517   // adding a syntax that specifies the sizes of fields in an order,
 1518   // so that the adlc can build the emit functions automagically
 1519 
 1520   // Emit primary opcode
 1521   enc_class OpcP %{
 1522     emit_opcode(cbuf, $primary);
 1523   %}
 1524 
 1525   // Emit secondary opcode
 1526   enc_class OpcS %{
 1527     emit_opcode(cbuf, $secondary);
 1528   %}
 1529 
 1530   // Emit opcode directly
 1531   enc_class Opcode(immI d8) %{
 1532     emit_opcode(cbuf, $d8$$constant);
 1533   %}
 1534 
 1535   enc_class SizePrefix %{
 1536     emit_opcode(cbuf,0x66);
 1537   %}
 1538 
 1539   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1540     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1541   %}
 1542 
 1543   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1544     emit_opcode(cbuf,$opcode$$constant);
 1545     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1546   %}
 1547 
 1548   enc_class mov_r32_imm0( rRegI dst ) %{
 1549     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1550     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1551   %}
 1552 
 1553   enc_class cdq_enc %{
 1554     // Full implementation of Java idiv and irem; checks for
 1555     // special case as described in JVM spec., p.243 & p.271.
 1556     //
 1557     //         normal case                           special case
 1558     //
 1559     // input : rax,: dividend                         min_int
 1560     //         reg: divisor                          -1
 1561     //
 1562     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1563     //         rdx: remainder (= rax, irem reg)       0
 1564     //
 1565     //  Code sequnce:
 1566     //
 1567     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1568     //  0F 85 0B 00 00 00    jne         normal_case
 1569     //  33 D2                xor         rdx,edx
 1570     //  83 F9 FF             cmp         rcx,0FFh
 1571     //  0F 84 03 00 00 00    je          done
 1572     //                  normal_case:
 1573     //  99                   cdq
 1574     //  F7 F9                idiv        rax,ecx
 1575     //                  done:
 1576     //
 1577     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1578     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1579     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1580     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1581     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1582     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1583     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1584     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1585     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1586     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1587     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1588     // normal_case:
 1589     emit_opcode(cbuf,0x99);                                         // cdq
 1590     // idiv (note: must be emitted by the user of this rule)
 1591     // normal:
 1592   %}
 1593 
 1594   // Dense encoding for older common ops
 1595   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1596     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1597   %}
 1598 
 1599 
 1600   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1601   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1602     // Check for 8-bit immediate, and set sign extend bit in opcode
 1603     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1604       emit_opcode(cbuf, $primary | 0x02);
 1605     }
 1606     else {                          // If 32-bit immediate
 1607       emit_opcode(cbuf, $primary);
 1608     }
 1609   %}
 1610 
 1611   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1612     // Emit primary opcode and set sign-extend bit
 1613     // Check for 8-bit immediate, and set sign extend bit in opcode
 1614     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1615       emit_opcode(cbuf, $primary | 0x02);    }
 1616     else {                          // If 32-bit immediate
 1617       emit_opcode(cbuf, $primary);
 1618     }
 1619     // Emit r/m byte with secondary opcode, after primary opcode.
 1620     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1621   %}
 1622 
 1623   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1624     // Check for 8-bit immediate, and set sign extend bit in opcode
 1625     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1626       $$$emit8$imm$$constant;
 1627     }
 1628     else {                          // If 32-bit immediate
 1629       // Output immediate
 1630       $$$emit32$imm$$constant;
 1631     }
 1632   %}
 1633 
 1634   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1635     // Emit primary opcode and set sign-extend bit
 1636     // Check for 8-bit immediate, and set sign extend bit in opcode
 1637     int con = (int)$imm$$constant; // Throw away top bits
 1638     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1639     // Emit r/m byte with secondary opcode, after primary opcode.
 1640     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1641     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1642     else                               emit_d32(cbuf,con);
 1643   %}
 1644 
 1645   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1646     // Emit primary opcode and set sign-extend bit
 1647     // Check for 8-bit immediate, and set sign extend bit in opcode
 1648     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1649     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1650     // Emit r/m byte with tertiary opcode, after primary opcode.
 1651     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1652     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1653     else                               emit_d32(cbuf,con);
 1654   %}
 1655 
 1656   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1657     emit_cc(cbuf, $secondary, $dst$$reg );
 1658   %}
 1659 
 1660   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1661     int destlo = $dst$$reg;
 1662     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1663     // bswap lo
 1664     emit_opcode(cbuf, 0x0F);
 1665     emit_cc(cbuf, 0xC8, destlo);
 1666     // bswap hi
 1667     emit_opcode(cbuf, 0x0F);
 1668     emit_cc(cbuf, 0xC8, desthi);
 1669     // xchg lo and hi
 1670     emit_opcode(cbuf, 0x87);
 1671     emit_rm(cbuf, 0x3, destlo, desthi);
 1672   %}
 1673 
 1674   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1675     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1676   %}
 1677 
 1678   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1679     $$$emit8$primary;
 1680     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1681   %}
 1682 
 1683   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1684     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1685     emit_d8(cbuf, op >> 8 );
 1686     emit_d8(cbuf, op & 255);
 1687   %}
 1688 
 1689   // emulate a CMOV with a conditional branch around a MOV
 1690   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1691     // Invert sense of branch from sense of CMOV
 1692     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1693     emit_d8( cbuf, $brOffs$$constant );
 1694   %}
 1695 
 1696   enc_class enc_PartialSubtypeCheck( ) %{
 1697     Register Redi = as_Register(EDI_enc); // result register
 1698     Register Reax = as_Register(EAX_enc); // super class
 1699     Register Recx = as_Register(ECX_enc); // killed
 1700     Register Resi = as_Register(ESI_enc); // sub class
 1701     Label miss;
 1702 
 1703     MacroAssembler _masm(&cbuf);
 1704     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1705                                      NULL, &miss,
 1706                                      /*set_cond_codes:*/ true);
 1707     if ($primary) {
 1708       __ xorptr(Redi, Redi);
 1709     }
 1710     __ bind(miss);
 1711   %}
 1712 
 1713   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1714     MacroAssembler masm(&cbuf);
 1715     int start = masm.offset();
 1716     if (UseSSE >= 2) {
 1717       if (VerifyFPU) {
 1718         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1719       }
 1720     } else {
 1721       // External c_calling_convention expects the FPU stack to be 'clean'.
 1722       // Compiled code leaves it dirty.  Do cleanup now.
 1723       masm.empty_FPU_stack();
 1724     }
 1725     if (sizeof_FFree_Float_Stack_All == -1) {
 1726       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1727     } else {
 1728       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1729     }
 1730   %}
 1731 
 1732   enc_class Verify_FPU_For_Leaf %{
 1733     if( VerifyFPU ) {
 1734       MacroAssembler masm(&cbuf);
 1735       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1736     }
 1737   %}
 1738 
 1739   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1740     // This is the instruction starting address for relocation info.
 1741     MacroAssembler _masm(&cbuf);
 1742     cbuf.set_insts_mark();
 1743     $$$emit8$primary;
 1744     // CALL directly to the runtime
 1745     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1746                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1747     __ post_call_nop();
 1748 
 1749     if (UseSSE >= 2) {
 1750       MacroAssembler _masm(&cbuf);
 1751       BasicType rt = tf()->return_type();
 1752 
 1753       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1754         // A C runtime call where the return value is unused.  In SSE2+
 1755         // mode the result needs to be removed from the FPU stack.  It's
 1756         // likely that this function call could be removed by the
 1757         // optimizer if the C function is a pure function.
 1758         __ ffree(0);
 1759       } else if (rt == T_FLOAT) {
 1760         __ lea(rsp, Address(rsp, -4));
 1761         __ fstp_s(Address(rsp, 0));
 1762         __ movflt(xmm0, Address(rsp, 0));
 1763         __ lea(rsp, Address(rsp,  4));
 1764       } else if (rt == T_DOUBLE) {
 1765         __ lea(rsp, Address(rsp, -8));
 1766         __ fstp_d(Address(rsp, 0));
 1767         __ movdbl(xmm0, Address(rsp, 0));
 1768         __ lea(rsp, Address(rsp,  8));
 1769       }
 1770     }
 1771   %}
 1772 
 1773   enc_class pre_call_resets %{
 1774     // If method sets FPU control word restore it here
 1775     debug_only(int off0 = cbuf.insts_size());
 1776     if (ra_->C->in_24_bit_fp_mode()) {
 1777       MacroAssembler _masm(&cbuf);
 1778       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1779     }
 1780     // Clear upper bits of YMM registers when current compiled code uses
 1781     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1782     MacroAssembler _masm(&cbuf);
 1783     __ vzeroupper();
 1784     debug_only(int off1 = cbuf.insts_size());
 1785     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1786   %}
 1787 
 1788   enc_class post_call_FPU %{
 1789     // If method sets FPU control word do it here also
 1790     if (Compile::current()->in_24_bit_fp_mode()) {
 1791       MacroAssembler masm(&cbuf);
 1792       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1793     }
 1794   %}
 1795 
 1796   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1797     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1798     // who we intended to call.
 1799     MacroAssembler _masm(&cbuf);
 1800     cbuf.set_insts_mark();
 1801     $$$emit8$primary;
 1802 
 1803     if (!_method) {
 1804       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1805                      runtime_call_Relocation::spec(),
 1806                      RELOC_IMM32);
 1807       __ post_call_nop();
 1808     } else {
 1809       int method_index = resolved_method_index(cbuf);
 1810       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1811                                                   : static_call_Relocation::spec(method_index);
 1812       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1813                      rspec, RELOC_DISP32);
 1814       __ post_call_nop();
 1815       address mark = cbuf.insts_mark();
 1816       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1817         // Calls of the same statically bound method can share
 1818         // a stub to the interpreter.
 1819         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1820       } else {
 1821         // Emit stubs for static call.
 1822         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1823         if (stub == NULL) {
 1824           ciEnv::current()->record_failure("CodeCache is full");
 1825           return;
 1826         }
 1827       }
 1828     }
 1829   %}
 1830 
 1831   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1832     MacroAssembler _masm(&cbuf);
 1833     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1834     __ post_call_nop();
 1835   %}
 1836 
 1837   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1838     int disp = in_bytes(Method::from_compiled_offset());
 1839     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1840 
 1841     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1842     MacroAssembler _masm(&cbuf);
 1843     cbuf.set_insts_mark();
 1844     $$$emit8$primary;
 1845     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1846     emit_d8(cbuf, disp);             // Displacement
 1847     __ post_call_nop();
 1848   %}
 1849 
 1850 //   Following encoding is no longer used, but may be restored if calling
 1851 //   convention changes significantly.
 1852 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1853 //
 1854 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1855 //     // int ic_reg     = Matcher::inline_cache_reg();
 1856 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1857 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1858 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1859 //
 1860 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1861 //     // // so we load it immediately before the call
 1862 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1863 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1864 //
 1865 //     // xor rbp,ebp
 1866 //     emit_opcode(cbuf, 0x33);
 1867 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1868 //
 1869 //     // CALL to interpreter.
 1870 //     cbuf.set_insts_mark();
 1871 //     $$$emit8$primary;
 1872 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1873 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1874 //   %}
 1875 
 1876   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1877     $$$emit8$primary;
 1878     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1879     $$$emit8$shift$$constant;
 1880   %}
 1881 
 1882   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1883     // Load immediate does not have a zero or sign extended version
 1884     // for 8-bit immediates
 1885     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1886     $$$emit32$src$$constant;
 1887   %}
 1888 
 1889   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1890     // Load immediate does not have a zero or sign extended version
 1891     // for 8-bit immediates
 1892     emit_opcode(cbuf, $primary + $dst$$reg);
 1893     $$$emit32$src$$constant;
 1894   %}
 1895 
 1896   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1897     // Load immediate does not have a zero or sign extended version
 1898     // for 8-bit immediates
 1899     int dst_enc = $dst$$reg;
 1900     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1901     if (src_con == 0) {
 1902       // xor dst, dst
 1903       emit_opcode(cbuf, 0x33);
 1904       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1905     } else {
 1906       emit_opcode(cbuf, $primary + dst_enc);
 1907       emit_d32(cbuf, src_con);
 1908     }
 1909   %}
 1910 
 1911   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1912     // Load immediate does not have a zero or sign extended version
 1913     // for 8-bit immediates
 1914     int dst_enc = $dst$$reg + 2;
 1915     int src_con = ((julong)($src$$constant)) >> 32;
 1916     if (src_con == 0) {
 1917       // xor dst, dst
 1918       emit_opcode(cbuf, 0x33);
 1919       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1920     } else {
 1921       emit_opcode(cbuf, $primary + dst_enc);
 1922       emit_d32(cbuf, src_con);
 1923     }
 1924   %}
 1925 
 1926 
 1927   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1928   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1929     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1930   %}
 1931 
 1932   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1933     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1934   %}
 1935 
 1936   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1937     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1938   %}
 1939 
 1940   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1941     $$$emit8$primary;
 1942     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1943   %}
 1944 
 1945   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1946     $$$emit8$secondary;
 1947     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1948   %}
 1949 
 1950   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1951     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1952   %}
 1953 
 1954   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1955     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1956   %}
 1957 
 1958   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1959     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1960   %}
 1961 
 1962   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1963     // Output immediate
 1964     $$$emit32$src$$constant;
 1965   %}
 1966 
 1967   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1968     // Output Float immediate bits
 1969     jfloat jf = $src$$constant;
 1970     int    jf_as_bits = jint_cast( jf );
 1971     emit_d32(cbuf, jf_as_bits);
 1972   %}
 1973 
 1974   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1975     // Output Float immediate bits
 1976     jfloat jf = $src$$constant;
 1977     int    jf_as_bits = jint_cast( jf );
 1978     emit_d32(cbuf, jf_as_bits);
 1979   %}
 1980 
 1981   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1982     // Output immediate
 1983     $$$emit16$src$$constant;
 1984   %}
 1985 
 1986   enc_class Con_d32(immI src) %{
 1987     emit_d32(cbuf,$src$$constant);
 1988   %}
 1989 
 1990   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1991     // Output immediate memory reference
 1992     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1993     emit_d32(cbuf, 0x00);
 1994   %}
 1995 
 1996   enc_class lock_prefix( ) %{
 1997     emit_opcode(cbuf,0xF0);         // [Lock]
 1998   %}
 1999 
 2000   // Cmp-xchg long value.
 2001   // Note: we need to swap rbx, and rcx before and after the
 2002   //       cmpxchg8 instruction because the instruction uses
 2003   //       rcx as the high order word of the new value to store but
 2004   //       our register encoding uses rbx,.
 2005   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2006 
 2007     // XCHG  rbx,ecx
 2008     emit_opcode(cbuf,0x87);
 2009     emit_opcode(cbuf,0xD9);
 2010     // [Lock]
 2011     emit_opcode(cbuf,0xF0);
 2012     // CMPXCHG8 [Eptr]
 2013     emit_opcode(cbuf,0x0F);
 2014     emit_opcode(cbuf,0xC7);
 2015     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2016     // XCHG  rbx,ecx
 2017     emit_opcode(cbuf,0x87);
 2018     emit_opcode(cbuf,0xD9);
 2019   %}
 2020 
 2021   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2022     // [Lock]
 2023     emit_opcode(cbuf,0xF0);
 2024 
 2025     // CMPXCHG [Eptr]
 2026     emit_opcode(cbuf,0x0F);
 2027     emit_opcode(cbuf,0xB1);
 2028     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2029   %}
 2030 
 2031   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2032     // [Lock]
 2033     emit_opcode(cbuf,0xF0);
 2034 
 2035     // CMPXCHGB [Eptr]
 2036     emit_opcode(cbuf,0x0F);
 2037     emit_opcode(cbuf,0xB0);
 2038     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2039   %}
 2040 
 2041   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2042     // [Lock]
 2043     emit_opcode(cbuf,0xF0);
 2044 
 2045     // 16-bit mode
 2046     emit_opcode(cbuf, 0x66);
 2047 
 2048     // CMPXCHGW [Eptr]
 2049     emit_opcode(cbuf,0x0F);
 2050     emit_opcode(cbuf,0xB1);
 2051     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2052   %}
 2053 
 2054   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2055     int res_encoding = $res$$reg;
 2056 
 2057     // MOV  res,0
 2058     emit_opcode( cbuf, 0xB8 + res_encoding);
 2059     emit_d32( cbuf, 0 );
 2060     // JNE,s  fail
 2061     emit_opcode(cbuf,0x75);
 2062     emit_d8(cbuf, 5 );
 2063     // MOV  res,1
 2064     emit_opcode( cbuf, 0xB8 + res_encoding);
 2065     emit_d32( cbuf, 1 );
 2066     // fail:
 2067   %}
 2068 
 2069   enc_class set_instruction_start( ) %{
 2070     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2071   %}
 2072 
 2073   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2074     int reg_encoding = $ereg$$reg;
 2075     int base  = $mem$$base;
 2076     int index = $mem$$index;
 2077     int scale = $mem$$scale;
 2078     int displace = $mem$$disp;
 2079     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2080     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2081   %}
 2082 
 2083   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2084     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2085     int base  = $mem$$base;
 2086     int index = $mem$$index;
 2087     int scale = $mem$$scale;
 2088     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2089     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2090     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2091   %}
 2092 
 2093   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2094     int r1, r2;
 2095     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2096     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2097     emit_opcode(cbuf,0x0F);
 2098     emit_opcode(cbuf,$tertiary);
 2099     emit_rm(cbuf, 0x3, r1, r2);
 2100     emit_d8(cbuf,$cnt$$constant);
 2101     emit_d8(cbuf,$primary);
 2102     emit_rm(cbuf, 0x3, $secondary, r1);
 2103     emit_d8(cbuf,$cnt$$constant);
 2104   %}
 2105 
 2106   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2107     emit_opcode( cbuf, 0x8B ); // Move
 2108     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2109     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2110       emit_d8(cbuf,$primary);
 2111       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2112       emit_d8(cbuf,$cnt$$constant-32);
 2113     }
 2114     emit_d8(cbuf,$primary);
 2115     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2116     emit_d8(cbuf,31);
 2117   %}
 2118 
 2119   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2120     int r1, r2;
 2121     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2122     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2123 
 2124     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2125     emit_rm(cbuf, 0x3, r1, r2);
 2126     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2127       emit_opcode(cbuf,$primary);
 2128       emit_rm(cbuf, 0x3, $secondary, r1);
 2129       emit_d8(cbuf,$cnt$$constant-32);
 2130     }
 2131     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2132     emit_rm(cbuf, 0x3, r2, r2);
 2133   %}
 2134 
 2135   // Clone of RegMem but accepts an extra parameter to access each
 2136   // half of a double in memory; it never needs relocation info.
 2137   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2138     emit_opcode(cbuf,$opcode$$constant);
 2139     int reg_encoding = $rm_reg$$reg;
 2140     int base     = $mem$$base;
 2141     int index    = $mem$$index;
 2142     int scale    = $mem$$scale;
 2143     int displace = $mem$$disp + $disp_for_half$$constant;
 2144     relocInfo::relocType disp_reloc = relocInfo::none;
 2145     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2146   %}
 2147 
 2148   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2149   //
 2150   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2151   // and it never needs relocation information.
 2152   // Frequently used to move data between FPU's Stack Top and memory.
 2153   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2154     int rm_byte_opcode = $rm_opcode$$constant;
 2155     int base     = $mem$$base;
 2156     int index    = $mem$$index;
 2157     int scale    = $mem$$scale;
 2158     int displace = $mem$$disp;
 2159     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2160     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2161   %}
 2162 
 2163   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2164     int rm_byte_opcode = $rm_opcode$$constant;
 2165     int base     = $mem$$base;
 2166     int index    = $mem$$index;
 2167     int scale    = $mem$$scale;
 2168     int displace = $mem$$disp;
 2169     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2170     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2171   %}
 2172 
 2173   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2174     int reg_encoding = $dst$$reg;
 2175     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2176     int index        = 0x04;            // 0x04 indicates no index
 2177     int scale        = 0x00;            // 0x00 indicates no scale
 2178     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2179     relocInfo::relocType disp_reloc = relocInfo::none;
 2180     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2181   %}
 2182 
 2183   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2184     // Compare dst,src
 2185     emit_opcode(cbuf,0x3B);
 2186     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2187     // jmp dst < src around move
 2188     emit_opcode(cbuf,0x7C);
 2189     emit_d8(cbuf,2);
 2190     // move dst,src
 2191     emit_opcode(cbuf,0x8B);
 2192     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2193   %}
 2194 
 2195   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2196     // Compare dst,src
 2197     emit_opcode(cbuf,0x3B);
 2198     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2199     // jmp dst > src around move
 2200     emit_opcode(cbuf,0x7F);
 2201     emit_d8(cbuf,2);
 2202     // move dst,src
 2203     emit_opcode(cbuf,0x8B);
 2204     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2205   %}
 2206 
 2207   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2208     // If src is FPR1, we can just FST to store it.
 2209     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2210     int reg_encoding = 0x2; // Just store
 2211     int base  = $mem$$base;
 2212     int index = $mem$$index;
 2213     int scale = $mem$$scale;
 2214     int displace = $mem$$disp;
 2215     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2216     if( $src$$reg != FPR1L_enc ) {
 2217       reg_encoding = 0x3;  // Store & pop
 2218       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2219       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2220     }
 2221     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2222     emit_opcode(cbuf,$primary);
 2223     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2224   %}
 2225 
 2226   enc_class neg_reg(rRegI dst) %{
 2227     // NEG $dst
 2228     emit_opcode(cbuf,0xF7);
 2229     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2230   %}
 2231 
 2232   enc_class setLT_reg(eCXRegI dst) %{
 2233     // SETLT $dst
 2234     emit_opcode(cbuf,0x0F);
 2235     emit_opcode(cbuf,0x9C);
 2236     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2237   %}
 2238 
 2239   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2240     int tmpReg = $tmp$$reg;
 2241 
 2242     // SUB $p,$q
 2243     emit_opcode(cbuf,0x2B);
 2244     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2245     // SBB $tmp,$tmp
 2246     emit_opcode(cbuf,0x1B);
 2247     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2248     // AND $tmp,$y
 2249     emit_opcode(cbuf,0x23);
 2250     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2251     // ADD $p,$tmp
 2252     emit_opcode(cbuf,0x03);
 2253     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2254   %}
 2255 
 2256   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2257     // TEST shift,32
 2258     emit_opcode(cbuf,0xF7);
 2259     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2260     emit_d32(cbuf,0x20);
 2261     // JEQ,s small
 2262     emit_opcode(cbuf, 0x74);
 2263     emit_d8(cbuf, 0x04);
 2264     // MOV    $dst.hi,$dst.lo
 2265     emit_opcode( cbuf, 0x8B );
 2266     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2267     // CLR    $dst.lo
 2268     emit_opcode(cbuf, 0x33);
 2269     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2270 // small:
 2271     // SHLD   $dst.hi,$dst.lo,$shift
 2272     emit_opcode(cbuf,0x0F);
 2273     emit_opcode(cbuf,0xA5);
 2274     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2275     // SHL    $dst.lo,$shift"
 2276     emit_opcode(cbuf,0xD3);
 2277     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2278   %}
 2279 
 2280   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2281     // TEST shift,32
 2282     emit_opcode(cbuf,0xF7);
 2283     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2284     emit_d32(cbuf,0x20);
 2285     // JEQ,s small
 2286     emit_opcode(cbuf, 0x74);
 2287     emit_d8(cbuf, 0x04);
 2288     // MOV    $dst.lo,$dst.hi
 2289     emit_opcode( cbuf, 0x8B );
 2290     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2291     // CLR    $dst.hi
 2292     emit_opcode(cbuf, 0x33);
 2293     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2294 // small:
 2295     // SHRD   $dst.lo,$dst.hi,$shift
 2296     emit_opcode(cbuf,0x0F);
 2297     emit_opcode(cbuf,0xAD);
 2298     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2299     // SHR    $dst.hi,$shift"
 2300     emit_opcode(cbuf,0xD3);
 2301     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2302   %}
 2303 
 2304   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2305     // TEST shift,32
 2306     emit_opcode(cbuf,0xF7);
 2307     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2308     emit_d32(cbuf,0x20);
 2309     // JEQ,s small
 2310     emit_opcode(cbuf, 0x74);
 2311     emit_d8(cbuf, 0x05);
 2312     // MOV    $dst.lo,$dst.hi
 2313     emit_opcode( cbuf, 0x8B );
 2314     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2315     // SAR    $dst.hi,31
 2316     emit_opcode(cbuf, 0xC1);
 2317     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2318     emit_d8(cbuf, 0x1F );
 2319 // small:
 2320     // SHRD   $dst.lo,$dst.hi,$shift
 2321     emit_opcode(cbuf,0x0F);
 2322     emit_opcode(cbuf,0xAD);
 2323     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2324     // SAR    $dst.hi,$shift"
 2325     emit_opcode(cbuf,0xD3);
 2326     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2327   %}
 2328 
 2329 
 2330   // ----------------- Encodings for floating point unit -----------------
 2331   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2332   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2333     $$$emit8$primary;
 2334     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2335   %}
 2336 
 2337   // Pop argument in FPR0 with FSTP ST(0)
 2338   enc_class PopFPU() %{
 2339     emit_opcode( cbuf, 0xDD );
 2340     emit_d8( cbuf, 0xD8 );
 2341   %}
 2342 
 2343   // !!!!! equivalent to Pop_Reg_F
 2344   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2345     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2346     emit_d8( cbuf, 0xD8+$dst$$reg );
 2347   %}
 2348 
 2349   enc_class Push_Reg_DPR( regDPR dst ) %{
 2350     emit_opcode( cbuf, 0xD9 );
 2351     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2352   %}
 2353 
 2354   enc_class strictfp_bias1( regDPR dst ) %{
 2355     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2356     emit_opcode( cbuf, 0x2D );
 2357     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2358     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2359     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2360   %}
 2361 
 2362   enc_class strictfp_bias2( regDPR dst ) %{
 2363     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2364     emit_opcode( cbuf, 0x2D );
 2365     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2366     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2367     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2368   %}
 2369 
 2370   // Special case for moving an integer register to a stack slot.
 2371   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2372     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2373   %}
 2374 
 2375   // Special case for moving a register to a stack slot.
 2376   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2377     // Opcode already emitted
 2378     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2379     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2380     emit_d32(cbuf, $dst$$disp);   // Displacement
 2381   %}
 2382 
 2383   // Push the integer in stackSlot 'src' onto FP-stack
 2384   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2385     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2386   %}
 2387 
 2388   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2389   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2390     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2391   %}
 2392 
 2393   // Same as Pop_Mem_F except for opcode
 2394   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2395   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2396     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2397   %}
 2398 
 2399   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2400     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2401     emit_d8( cbuf, 0xD8+$dst$$reg );
 2402   %}
 2403 
 2404   enc_class Push_Reg_FPR( regFPR dst ) %{
 2405     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2406     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2407   %}
 2408 
 2409   // Push FPU's float to a stack-slot, and pop FPU-stack
 2410   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2411     int pop = 0x02;
 2412     if ($src$$reg != FPR1L_enc) {
 2413       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2414       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2415       pop = 0x03;
 2416     }
 2417     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2418   %}
 2419 
 2420   // Push FPU's double to a stack-slot, and pop FPU-stack
 2421   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2422     int pop = 0x02;
 2423     if ($src$$reg != FPR1L_enc) {
 2424       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2425       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2426       pop = 0x03;
 2427     }
 2428     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2429   %}
 2430 
 2431   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2432   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2433     int pop = 0xD0 - 1; // -1 since we skip FLD
 2434     if ($src$$reg != FPR1L_enc) {
 2435       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2436       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2437       pop = 0xD8;
 2438     }
 2439     emit_opcode( cbuf, 0xDD );
 2440     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2441   %}
 2442 
 2443 
 2444   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2445     // load dst in FPR0
 2446     emit_opcode( cbuf, 0xD9 );
 2447     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2448     if ($src$$reg != FPR1L_enc) {
 2449       // fincstp
 2450       emit_opcode (cbuf, 0xD9);
 2451       emit_opcode (cbuf, 0xF7);
 2452       // swap src with FPR1:
 2453       // FXCH FPR1 with src
 2454       emit_opcode(cbuf, 0xD9);
 2455       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2456       // fdecstp
 2457       emit_opcode (cbuf, 0xD9);
 2458       emit_opcode (cbuf, 0xF6);
 2459     }
 2460   %}
 2461 
 2462   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2463     MacroAssembler _masm(&cbuf);
 2464     __ subptr(rsp, 8);
 2465     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2466     __ fld_d(Address(rsp, 0));
 2467     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2468     __ fld_d(Address(rsp, 0));
 2469   %}
 2470 
 2471   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2472     MacroAssembler _masm(&cbuf);
 2473     __ subptr(rsp, 4);
 2474     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2475     __ fld_s(Address(rsp, 0));
 2476     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2477     __ fld_s(Address(rsp, 0));
 2478   %}
 2479 
 2480   enc_class Push_ResultD(regD dst) %{
 2481     MacroAssembler _masm(&cbuf);
 2482     __ fstp_d(Address(rsp, 0));
 2483     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2484     __ addptr(rsp, 8);
 2485   %}
 2486 
 2487   enc_class Push_ResultF(regF dst, immI d8) %{
 2488     MacroAssembler _masm(&cbuf);
 2489     __ fstp_s(Address(rsp, 0));
 2490     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2491     __ addptr(rsp, $d8$$constant);
 2492   %}
 2493 
 2494   enc_class Push_SrcD(regD src) %{
 2495     MacroAssembler _masm(&cbuf);
 2496     __ subptr(rsp, 8);
 2497     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2498     __ fld_d(Address(rsp, 0));
 2499   %}
 2500 
 2501   enc_class push_stack_temp_qword() %{
 2502     MacroAssembler _masm(&cbuf);
 2503     __ subptr(rsp, 8);
 2504   %}
 2505 
 2506   enc_class pop_stack_temp_qword() %{
 2507     MacroAssembler _masm(&cbuf);
 2508     __ addptr(rsp, 8);
 2509   %}
 2510 
 2511   enc_class push_xmm_to_fpr1(regD src) %{
 2512     MacroAssembler _masm(&cbuf);
 2513     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2514     __ fld_d(Address(rsp, 0));
 2515   %}
 2516 
 2517   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2518     if ($src$$reg != FPR1L_enc) {
 2519       // fincstp
 2520       emit_opcode (cbuf, 0xD9);
 2521       emit_opcode (cbuf, 0xF7);
 2522       // FXCH FPR1 with src
 2523       emit_opcode(cbuf, 0xD9);
 2524       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2525       // fdecstp
 2526       emit_opcode (cbuf, 0xD9);
 2527       emit_opcode (cbuf, 0xF6);
 2528     }
 2529     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2530     // // FSTP   FPR$dst$$reg
 2531     // emit_opcode( cbuf, 0xDD );
 2532     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2533   %}
 2534 
 2535   enc_class fnstsw_sahf_skip_parity() %{
 2536     // fnstsw ax
 2537     emit_opcode( cbuf, 0xDF );
 2538     emit_opcode( cbuf, 0xE0 );
 2539     // sahf
 2540     emit_opcode( cbuf, 0x9E );
 2541     // jnp  ::skip
 2542     emit_opcode( cbuf, 0x7B );
 2543     emit_opcode( cbuf, 0x05 );
 2544   %}
 2545 
 2546   enc_class emitModDPR() %{
 2547     // fprem must be iterative
 2548     // :: loop
 2549     // fprem
 2550     emit_opcode( cbuf, 0xD9 );
 2551     emit_opcode( cbuf, 0xF8 );
 2552     // wait
 2553     emit_opcode( cbuf, 0x9b );
 2554     // fnstsw ax
 2555     emit_opcode( cbuf, 0xDF );
 2556     emit_opcode( cbuf, 0xE0 );
 2557     // sahf
 2558     emit_opcode( cbuf, 0x9E );
 2559     // jp  ::loop
 2560     emit_opcode( cbuf, 0x0F );
 2561     emit_opcode( cbuf, 0x8A );
 2562     emit_opcode( cbuf, 0xF4 );
 2563     emit_opcode( cbuf, 0xFF );
 2564     emit_opcode( cbuf, 0xFF );
 2565     emit_opcode( cbuf, 0xFF );
 2566   %}
 2567 
 2568   enc_class fpu_flags() %{
 2569     // fnstsw_ax
 2570     emit_opcode( cbuf, 0xDF);
 2571     emit_opcode( cbuf, 0xE0);
 2572     // test ax,0x0400
 2573     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2574     emit_opcode( cbuf, 0xA9 );
 2575     emit_d16   ( cbuf, 0x0400 );
 2576     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2577     // // test rax,0x0400
 2578     // emit_opcode( cbuf, 0xA9 );
 2579     // emit_d32   ( cbuf, 0x00000400 );
 2580     //
 2581     // jz exit (no unordered comparison)
 2582     emit_opcode( cbuf, 0x74 );
 2583     emit_d8    ( cbuf, 0x02 );
 2584     // mov ah,1 - treat as LT case (set carry flag)
 2585     emit_opcode( cbuf, 0xB4 );
 2586     emit_d8    ( cbuf, 0x01 );
 2587     // sahf
 2588     emit_opcode( cbuf, 0x9E);
 2589   %}
 2590 
 2591   enc_class cmpF_P6_fixup() %{
 2592     // Fixup the integer flags in case comparison involved a NaN
 2593     //
 2594     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2595     emit_opcode( cbuf, 0x7B );
 2596     emit_d8    ( cbuf, 0x03 );
 2597     // MOV AH,1 - treat as LT case (set carry flag)
 2598     emit_opcode( cbuf, 0xB4 );
 2599     emit_d8    ( cbuf, 0x01 );
 2600     // SAHF
 2601     emit_opcode( cbuf, 0x9E);
 2602     // NOP     // target for branch to avoid branch to branch
 2603     emit_opcode( cbuf, 0x90);
 2604   %}
 2605 
 2606 //     fnstsw_ax();
 2607 //     sahf();
 2608 //     movl(dst, nan_result);
 2609 //     jcc(Assembler::parity, exit);
 2610 //     movl(dst, less_result);
 2611 //     jcc(Assembler::below, exit);
 2612 //     movl(dst, equal_result);
 2613 //     jcc(Assembler::equal, exit);
 2614 //     movl(dst, greater_result);
 2615 
 2616 // less_result     =  1;
 2617 // greater_result  = -1;
 2618 // equal_result    = 0;
 2619 // nan_result      = -1;
 2620 
 2621   enc_class CmpF_Result(rRegI dst) %{
 2622     // fnstsw_ax();
 2623     emit_opcode( cbuf, 0xDF);
 2624     emit_opcode( cbuf, 0xE0);
 2625     // sahf
 2626     emit_opcode( cbuf, 0x9E);
 2627     // movl(dst, nan_result);
 2628     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2629     emit_d32( cbuf, -1 );
 2630     // jcc(Assembler::parity, exit);
 2631     emit_opcode( cbuf, 0x7A );
 2632     emit_d8    ( cbuf, 0x13 );
 2633     // movl(dst, less_result);
 2634     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2635     emit_d32( cbuf, -1 );
 2636     // jcc(Assembler::below, exit);
 2637     emit_opcode( cbuf, 0x72 );
 2638     emit_d8    ( cbuf, 0x0C );
 2639     // movl(dst, equal_result);
 2640     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2641     emit_d32( cbuf, 0 );
 2642     // jcc(Assembler::equal, exit);
 2643     emit_opcode( cbuf, 0x74 );
 2644     emit_d8    ( cbuf, 0x05 );
 2645     // movl(dst, greater_result);
 2646     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2647     emit_d32( cbuf, 1 );
 2648   %}
 2649 
 2650 
 2651   // Compare the longs and set flags
 2652   // BROKEN!  Do Not use as-is
 2653   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2654     // CMP    $src1.hi,$src2.hi
 2655     emit_opcode( cbuf, 0x3B );
 2656     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2657     // JNE,s  done
 2658     emit_opcode(cbuf,0x75);
 2659     emit_d8(cbuf, 2 );
 2660     // CMP    $src1.lo,$src2.lo
 2661     emit_opcode( cbuf, 0x3B );
 2662     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2663 // done:
 2664   %}
 2665 
 2666   enc_class convert_int_long( regL dst, rRegI src ) %{
 2667     // mov $dst.lo,$src
 2668     int dst_encoding = $dst$$reg;
 2669     int src_encoding = $src$$reg;
 2670     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2671     // mov $dst.hi,$src
 2672     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2673     // sar $dst.hi,31
 2674     emit_opcode( cbuf, 0xC1 );
 2675     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2676     emit_d8(cbuf, 0x1F );
 2677   %}
 2678 
 2679   enc_class convert_long_double( eRegL src ) %{
 2680     // push $src.hi
 2681     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2682     // push $src.lo
 2683     emit_opcode(cbuf, 0x50+$src$$reg  );
 2684     // fild 64-bits at [SP]
 2685     emit_opcode(cbuf,0xdf);
 2686     emit_d8(cbuf, 0x6C);
 2687     emit_d8(cbuf, 0x24);
 2688     emit_d8(cbuf, 0x00);
 2689     // pop stack
 2690     emit_opcode(cbuf, 0x83); // add  SP, #8
 2691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2692     emit_d8(cbuf, 0x8);
 2693   %}
 2694 
 2695   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2696     // IMUL   EDX:EAX,$src1
 2697     emit_opcode( cbuf, 0xF7 );
 2698     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2699     // SAR    EDX,$cnt-32
 2700     int shift_count = ((int)$cnt$$constant) - 32;
 2701     if (shift_count > 0) {
 2702       emit_opcode(cbuf, 0xC1);
 2703       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2704       emit_d8(cbuf, shift_count);
 2705     }
 2706   %}
 2707 
 2708   // this version doesn't have add sp, 8
 2709   enc_class convert_long_double2( eRegL src ) %{
 2710     // push $src.hi
 2711     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2712     // push $src.lo
 2713     emit_opcode(cbuf, 0x50+$src$$reg  );
 2714     // fild 64-bits at [SP]
 2715     emit_opcode(cbuf,0xdf);
 2716     emit_d8(cbuf, 0x6C);
 2717     emit_d8(cbuf, 0x24);
 2718     emit_d8(cbuf, 0x00);
 2719   %}
 2720 
 2721   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2722     // Basic idea: long = (long)int * (long)int
 2723     // IMUL EDX:EAX, src
 2724     emit_opcode( cbuf, 0xF7 );
 2725     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2726   %}
 2727 
 2728   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2729     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2730     // MUL EDX:EAX, src
 2731     emit_opcode( cbuf, 0xF7 );
 2732     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2733   %}
 2734 
 2735   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2736     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2737     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2738     // MOV    $tmp,$src.lo
 2739     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2740     // IMUL   $tmp,EDX
 2741     emit_opcode( cbuf, 0x0F );
 2742     emit_opcode( cbuf, 0xAF );
 2743     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2744     // MOV    EDX,$src.hi
 2745     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2746     // IMUL   EDX,EAX
 2747     emit_opcode( cbuf, 0x0F );
 2748     emit_opcode( cbuf, 0xAF );
 2749     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2750     // ADD    $tmp,EDX
 2751     emit_opcode( cbuf, 0x03 );
 2752     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2753     // MUL   EDX:EAX,$src.lo
 2754     emit_opcode( cbuf, 0xF7 );
 2755     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2756     // ADD    EDX,ESI
 2757     emit_opcode( cbuf, 0x03 );
 2758     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2759   %}
 2760 
 2761   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2762     // Basic idea: lo(result) = lo(src * y_lo)
 2763     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2764     // IMUL   $tmp,EDX,$src
 2765     emit_opcode( cbuf, 0x6B );
 2766     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2767     emit_d8( cbuf, (int)$src$$constant );
 2768     // MOV    EDX,$src
 2769     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2770     emit_d32( cbuf, (int)$src$$constant );
 2771     // MUL   EDX:EAX,EDX
 2772     emit_opcode( cbuf, 0xF7 );
 2773     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2774     // ADD    EDX,ESI
 2775     emit_opcode( cbuf, 0x03 );
 2776     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2777   %}
 2778 
 2779   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2780     // PUSH src1.hi
 2781     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2782     // PUSH src1.lo
 2783     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2784     // PUSH src2.hi
 2785     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2786     // PUSH src2.lo
 2787     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2788     // CALL directly to the runtime
 2789     MacroAssembler _masm(&cbuf);
 2790     cbuf.set_insts_mark();
 2791     emit_opcode(cbuf,0xE8);       // Call into runtime
 2792     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2793     __ post_call_nop();
 2794     // Restore stack
 2795     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2796     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2797     emit_d8(cbuf, 4*4);
 2798   %}
 2799 
 2800   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2801     // PUSH src1.hi
 2802     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2803     // PUSH src1.lo
 2804     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2805     // PUSH src2.hi
 2806     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2807     // PUSH src2.lo
 2808     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2809     // CALL directly to the runtime
 2810     MacroAssembler _masm(&cbuf);
 2811     cbuf.set_insts_mark();
 2812     emit_opcode(cbuf,0xE8);       // Call into runtime
 2813     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2814     __ post_call_nop();
 2815     // Restore stack
 2816     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2817     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2818     emit_d8(cbuf, 4*4);
 2819   %}
 2820 
 2821   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2822     // MOV   $tmp,$src.lo
 2823     emit_opcode(cbuf, 0x8B);
 2824     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2825     // OR    $tmp,$src.hi
 2826     emit_opcode(cbuf, 0x0B);
 2827     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2828   %}
 2829 
 2830   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2831     // CMP    $src1.lo,$src2.lo
 2832     emit_opcode( cbuf, 0x3B );
 2833     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2834     // JNE,s  skip
 2835     emit_cc(cbuf, 0x70, 0x5);
 2836     emit_d8(cbuf,2);
 2837     // CMP    $src1.hi,$src2.hi
 2838     emit_opcode( cbuf, 0x3B );
 2839     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2840   %}
 2841 
 2842   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2843     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2844     emit_opcode( cbuf, 0x3B );
 2845     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2846     // MOV    $tmp,$src1.hi
 2847     emit_opcode( cbuf, 0x8B );
 2848     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2849     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2850     emit_opcode( cbuf, 0x1B );
 2851     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2852   %}
 2853 
 2854   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2855     // XOR    $tmp,$tmp
 2856     emit_opcode(cbuf,0x33);  // XOR
 2857     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2858     // CMP    $tmp,$src.lo
 2859     emit_opcode( cbuf, 0x3B );
 2860     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2861     // SBB    $tmp,$src.hi
 2862     emit_opcode( cbuf, 0x1B );
 2863     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2864   %}
 2865 
 2866  // Sniff, sniff... smells like Gnu Superoptimizer
 2867   enc_class neg_long( eRegL dst ) %{
 2868     emit_opcode(cbuf,0xF7);    // NEG hi
 2869     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2870     emit_opcode(cbuf,0xF7);    // NEG lo
 2871     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2872     emit_opcode(cbuf,0x83);    // SBB hi,0
 2873     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2874     emit_d8    (cbuf,0 );
 2875   %}
 2876 
 2877   enc_class enc_pop_rdx() %{
 2878     emit_opcode(cbuf,0x5A);
 2879   %}
 2880 
 2881   enc_class enc_rethrow() %{
 2882     MacroAssembler _masm(&cbuf);
 2883     cbuf.set_insts_mark();
 2884     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2885     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2886                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2887     __ post_call_nop();
 2888   %}
 2889 
 2890 
 2891   // Convert a double to an int.  Java semantics require we do complex
 2892   // manglelations in the corner cases.  So we set the rounding mode to
 2893   // 'zero', store the darned double down as an int, and reset the
 2894   // rounding mode to 'nearest'.  The hardware throws an exception which
 2895   // patches up the correct value directly to the stack.
 2896   enc_class DPR2I_encoding( regDPR src ) %{
 2897     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2898     // exceptions here, so that a NAN or other corner-case value will
 2899     // thrown an exception (but normal values get converted at full speed).
 2900     // However, I2C adapters and other float-stack manglers leave pending
 2901     // invalid-op exceptions hanging.  We would have to clear them before
 2902     // enabling them and that is more expensive than just testing for the
 2903     // invalid value Intel stores down in the corner cases.
 2904     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2905     emit_opcode(cbuf,0x2D);
 2906     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2907     // Allocate a word
 2908     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2909     emit_opcode(cbuf,0xEC);
 2910     emit_d8(cbuf,0x04);
 2911     // Encoding assumes a double has been pushed into FPR0.
 2912     // Store down the double as an int, popping the FPU stack
 2913     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2914     emit_opcode(cbuf,0x1C);
 2915     emit_d8(cbuf,0x24);
 2916     // Restore the rounding mode; mask the exception
 2917     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2918     emit_opcode(cbuf,0x2D);
 2919     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2920         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2921         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2922 
 2923     // Load the converted int; adjust CPU stack
 2924     emit_opcode(cbuf,0x58);       // POP EAX
 2925     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2926     emit_d32   (cbuf,0x80000000); //         0x80000000
 2927     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2928     emit_d8    (cbuf,0x07);       // Size of slow_call
 2929     // Push src onto stack slow-path
 2930     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2931     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2932     // CALL directly to the runtime
 2933     MacroAssembler _masm(&cbuf);
 2934     cbuf.set_insts_mark();
 2935     emit_opcode(cbuf,0xE8);       // Call into runtime
 2936     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2937     __ post_call_nop();
 2938     // Carry on here...
 2939   %}
 2940 
 2941   enc_class DPR2L_encoding( regDPR src ) %{
 2942     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2943     emit_opcode(cbuf,0x2D);
 2944     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2945     // Allocate a word
 2946     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2947     emit_opcode(cbuf,0xEC);
 2948     emit_d8(cbuf,0x08);
 2949     // Encoding assumes a double has been pushed into FPR0.
 2950     // Store down the double as a long, popping the FPU stack
 2951     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2952     emit_opcode(cbuf,0x3C);
 2953     emit_d8(cbuf,0x24);
 2954     // Restore the rounding mode; mask the exception
 2955     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2956     emit_opcode(cbuf,0x2D);
 2957     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2958         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2959         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2960 
 2961     // Load the converted int; adjust CPU stack
 2962     emit_opcode(cbuf,0x58);       // POP EAX
 2963     emit_opcode(cbuf,0x5A);       // POP EDX
 2964     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2965     emit_d8    (cbuf,0xFA);       // rdx
 2966     emit_d32   (cbuf,0x80000000); //         0x80000000
 2967     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2968     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2969     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2970     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2971     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2972     emit_d8    (cbuf,0x07);       // Size of slow_call
 2973     // Push src onto stack slow-path
 2974     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2975     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2976     // CALL directly to the runtime
 2977     MacroAssembler _masm(&cbuf);
 2978     cbuf.set_insts_mark();
 2979     emit_opcode(cbuf,0xE8);       // Call into runtime
 2980     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2981     __ post_call_nop();
 2982     // Carry on here...
 2983   %}
 2984 
 2985   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2986     // Operand was loaded from memory into fp ST (stack top)
 2987     // FMUL   ST,$src  /* D8 C8+i */
 2988     emit_opcode(cbuf, 0xD8);
 2989     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2990   %}
 2991 
 2992   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2993     // FADDP  ST,src2  /* D8 C0+i */
 2994     emit_opcode(cbuf, 0xD8);
 2995     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2996     //could use FADDP  src2,fpST  /* DE C0+i */
 2997   %}
 2998 
 2999   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3000     // FADDP  src2,ST  /* DE C0+i */
 3001     emit_opcode(cbuf, 0xDE);
 3002     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3003   %}
 3004 
 3005   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3006     // Operand has been loaded into fp ST (stack top)
 3007       // FSUB   ST,$src1
 3008       emit_opcode(cbuf, 0xD8);
 3009       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3010 
 3011       // FDIV
 3012       emit_opcode(cbuf, 0xD8);
 3013       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3014   %}
 3015 
 3016   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3017     // Operand was loaded from memory into fp ST (stack top)
 3018     // FADD   ST,$src  /* D8 C0+i */
 3019     emit_opcode(cbuf, 0xD8);
 3020     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3021 
 3022     // FMUL  ST,src2  /* D8 C*+i */
 3023     emit_opcode(cbuf, 0xD8);
 3024     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3025   %}
 3026 
 3027 
 3028   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3029     // Operand was loaded from memory into fp ST (stack top)
 3030     // FADD   ST,$src  /* D8 C0+i */
 3031     emit_opcode(cbuf, 0xD8);
 3032     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3033 
 3034     // FMULP  src2,ST  /* DE C8+i */
 3035     emit_opcode(cbuf, 0xDE);
 3036     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3037   %}
 3038 
 3039   // Atomically load the volatile long
 3040   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3041     emit_opcode(cbuf,0xDF);
 3042     int rm_byte_opcode = 0x05;
 3043     int base     = $mem$$base;
 3044     int index    = $mem$$index;
 3045     int scale    = $mem$$scale;
 3046     int displace = $mem$$disp;
 3047     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3048     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3049     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3050   %}
 3051 
 3052   // Volatile Store Long.  Must be atomic, so move it into
 3053   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3054   // target address before the store (for null-ptr checks)
 3055   // so the memory operand is used twice in the encoding.
 3056   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3057     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3058     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3059     emit_opcode(cbuf,0xDF);
 3060     int rm_byte_opcode = 0x07;
 3061     int base     = $mem$$base;
 3062     int index    = $mem$$index;
 3063     int scale    = $mem$$scale;
 3064     int displace = $mem$$disp;
 3065     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3066     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3067   %}
 3068 
 3069 %}
 3070 
 3071 
 3072 //----------FRAME--------------------------------------------------------------
 3073 // Definition of frame structure and management information.
 3074 //
 3075 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3076 //                             |   (to get allocators register number
 3077 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3078 //  r   CALLER     |        |
 3079 //  o     |        +--------+      pad to even-align allocators stack-slot
 3080 //  w     V        |  pad0  |        numbers; owned by CALLER
 3081 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3082 //  h     ^        |   in   |  5
 3083 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3084 //  |     |        |        |  3
 3085 //  |     |        +--------+
 3086 //  V     |        | old out|      Empty on Intel, window on Sparc
 3087 //        |    old |preserve|      Must be even aligned.
 3088 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3089 //        |        |   in   |  3   area for Intel ret address
 3090 //     Owned by    |preserve|      Empty on Sparc.
 3091 //       SELF      +--------+
 3092 //        |        |  pad2  |  2   pad to align old SP
 3093 //        |        +--------+  1
 3094 //        |        | locks  |  0
 3095 //        |        +--------+----> OptoReg::stack0(), even aligned
 3096 //        |        |  pad1  | 11   pad to align new SP
 3097 //        |        +--------+
 3098 //        |        |        | 10
 3099 //        |        | spills |  9   spills
 3100 //        V        |        |  8   (pad0 slot for callee)
 3101 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3102 //        ^        |  out   |  7
 3103 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3104 //     Owned by    +--------+
 3105 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3106 //        |    new |preserve|      Must be even-aligned.
 3107 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3108 //        |        |        |
 3109 //
 3110 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3111 //         known from SELF's arguments and the Java calling convention.
 3112 //         Region 6-7 is determined per call site.
 3113 // Note 2: If the calling convention leaves holes in the incoming argument
 3114 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3115 //         are owned by the CALLEE.  Holes should not be necessary in the
 3116 //         incoming area, as the Java calling convention is completely under
 3117 //         the control of the AD file.  Doubles can be sorted and packed to
 3118 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3119 //         varargs C calling conventions.
 3120 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3121 //         even aligned with pad0 as needed.
 3122 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3123 //         region 6-11 is even aligned; it may be padded out more so that
 3124 //         the region from SP to FP meets the minimum stack alignment.
 3125 
 3126 frame %{
 3127   // These three registers define part of the calling convention
 3128   // between compiled code and the interpreter.
 3129   inline_cache_reg(EAX);                // Inline Cache Register
 3130 
 3131   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3132   cisc_spilling_operand_name(indOffset32);
 3133 
 3134   // Number of stack slots consumed by locking an object
 3135   sync_stack_slots(1);
 3136 
 3137   // Compiled code's Frame Pointer
 3138   frame_pointer(ESP);
 3139   // Interpreter stores its frame pointer in a register which is
 3140   // stored to the stack by I2CAdaptors.
 3141   // I2CAdaptors convert from interpreted java to compiled java.
 3142   interpreter_frame_pointer(EBP);
 3143 
 3144   // Stack alignment requirement
 3145   // Alignment size in bytes (128-bit -> 16 bytes)
 3146   stack_alignment(StackAlignmentInBytes);
 3147 
 3148   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3149   // for calls to C.  Supports the var-args backing area for register parms.
 3150   varargs_C_out_slots_killed(0);
 3151 
 3152   // The after-PROLOG location of the return address.  Location of
 3153   // return address specifies a type (REG or STACK) and a number
 3154   // representing the register number (i.e. - use a register name) or
 3155   // stack slot.
 3156   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3157   // Otherwise, it is above the locks and verification slot and alignment word
 3158   return_addr(STACK - 1 +
 3159               align_up((Compile::current()->in_preserve_stack_slots() +
 3160                         Compile::current()->fixed_slots()),
 3161                        stack_alignment_in_slots()));
 3162 
 3163   // Location of C & interpreter return values
 3164   c_return_value %{
 3165     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3166     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3167     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3168 
 3169     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3170     // that C functions return float and double results in XMM0.
 3171     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3172       return OptoRegPair(XMM0b_num,XMM0_num);
 3173     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3174       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3175 
 3176     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3177   %}
 3178 
 3179   // Location of return values
 3180   return_value %{
 3181     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3182     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3183     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3184     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3185       return OptoRegPair(XMM0b_num,XMM0_num);
 3186     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3187       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3188     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3189   %}
 3190 
 3191 %}
 3192 
 3193 //----------ATTRIBUTES---------------------------------------------------------
 3194 //----------Operand Attributes-------------------------------------------------
 3195 op_attrib op_cost(0);        // Required cost attribute
 3196 
 3197 //----------Instruction Attributes---------------------------------------------
 3198 ins_attrib ins_cost(100);       // Required cost attribute
 3199 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3200 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3201                                 // non-matching short branch variant of some
 3202                                                             // long branch?
 3203 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3204                                 // specifies the alignment that some part of the instruction (not
 3205                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3206                                 // function must be provided for the instruction
 3207 
 3208 //----------OPERANDS-----------------------------------------------------------
 3209 // Operand definitions must precede instruction definitions for correct parsing
 3210 // in the ADLC because operands constitute user defined types which are used in
 3211 // instruction definitions.
 3212 
 3213 //----------Simple Operands----------------------------------------------------
 3214 // Immediate Operands
 3215 // Integer Immediate
 3216 operand immI() %{
 3217   match(ConI);
 3218 
 3219   op_cost(10);
 3220   format %{ %}
 3221   interface(CONST_INTER);
 3222 %}
 3223 
 3224 // Constant for test vs zero
 3225 operand immI_0() %{
 3226   predicate(n->get_int() == 0);
 3227   match(ConI);
 3228 
 3229   op_cost(0);
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 // Constant for increment
 3235 operand immI_1() %{
 3236   predicate(n->get_int() == 1);
 3237   match(ConI);
 3238 
 3239   op_cost(0);
 3240   format %{ %}
 3241   interface(CONST_INTER);
 3242 %}
 3243 
 3244 // Constant for decrement
 3245 operand immI_M1() %{
 3246   predicate(n->get_int() == -1);
 3247   match(ConI);
 3248 
 3249   op_cost(0);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 // Valid scale values for addressing modes
 3255 operand immI2() %{
 3256   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3257   match(ConI);
 3258 
 3259   format %{ %}
 3260   interface(CONST_INTER);
 3261 %}
 3262 
 3263 operand immI8() %{
 3264   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3265   match(ConI);
 3266 
 3267   op_cost(5);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 operand immU8() %{
 3273   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3274   match(ConI);
 3275 
 3276   op_cost(5);
 3277   format %{ %}
 3278   interface(CONST_INTER);
 3279 %}
 3280 
 3281 operand immI16() %{
 3282   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3283   match(ConI);
 3284 
 3285   op_cost(10);
 3286   format %{ %}
 3287   interface(CONST_INTER);
 3288 %}
 3289 
 3290 // Int Immediate non-negative
 3291 operand immU31()
 3292 %{
 3293   predicate(n->get_int() >= 0);
 3294   match(ConI);
 3295 
 3296   op_cost(0);
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 // Constant for long shifts
 3302 operand immI_32() %{
 3303   predicate( n->get_int() == 32 );
 3304   match(ConI);
 3305 
 3306   op_cost(0);
 3307   format %{ %}
 3308   interface(CONST_INTER);
 3309 %}
 3310 
 3311 operand immI_1_31() %{
 3312   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3313   match(ConI);
 3314 
 3315   op_cost(0);
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 operand immI_32_63() %{
 3321   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3322   match(ConI);
 3323   op_cost(0);
 3324 
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 operand immI_2() %{
 3330   predicate( n->get_int() == 2 );
 3331   match(ConI);
 3332 
 3333   op_cost(0);
 3334   format %{ %}
 3335   interface(CONST_INTER);
 3336 %}
 3337 
 3338 operand immI_3() %{
 3339   predicate( n->get_int() == 3 );
 3340   match(ConI);
 3341 
 3342   op_cost(0);
 3343   format %{ %}
 3344   interface(CONST_INTER);
 3345 %}
 3346 
 3347 operand immI_4()
 3348 %{
 3349   predicate(n->get_int() == 4);
 3350   match(ConI);
 3351 
 3352   op_cost(0);
 3353   format %{ %}
 3354   interface(CONST_INTER);
 3355 %}
 3356 
 3357 operand immI_8()
 3358 %{
 3359   predicate(n->get_int() == 8);
 3360   match(ConI);
 3361 
 3362   op_cost(0);
 3363   format %{ %}
 3364   interface(CONST_INTER);
 3365 %}
 3366 
 3367 // Pointer Immediate
 3368 operand immP() %{
 3369   match(ConP);
 3370 
 3371   op_cost(10);
 3372   format %{ %}
 3373   interface(CONST_INTER);
 3374 %}
 3375 
 3376 // NULL Pointer Immediate
 3377 operand immP0() %{
 3378   predicate( n->get_ptr() == 0 );
 3379   match(ConP);
 3380   op_cost(0);
 3381 
 3382   format %{ %}
 3383   interface(CONST_INTER);
 3384 %}
 3385 
 3386 // Long Immediate
 3387 operand immL() %{
 3388   match(ConL);
 3389 
 3390   op_cost(20);
 3391   format %{ %}
 3392   interface(CONST_INTER);
 3393 %}
 3394 
 3395 // Long Immediate zero
 3396 operand immL0() %{
 3397   predicate( n->get_long() == 0L );
 3398   match(ConL);
 3399   op_cost(0);
 3400 
 3401   format %{ %}
 3402   interface(CONST_INTER);
 3403 %}
 3404 
 3405 // Long Immediate zero
 3406 operand immL_M1() %{
 3407   predicate( n->get_long() == -1L );
 3408   match(ConL);
 3409   op_cost(0);
 3410 
 3411   format %{ %}
 3412   interface(CONST_INTER);
 3413 %}
 3414 
 3415 // Long immediate from 0 to 127.
 3416 // Used for a shorter form of long mul by 10.
 3417 operand immL_127() %{
 3418   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3419   match(ConL);
 3420   op_cost(0);
 3421 
 3422   format %{ %}
 3423   interface(CONST_INTER);
 3424 %}
 3425 
 3426 // Long Immediate: low 32-bit mask
 3427 operand immL_32bits() %{
 3428   predicate(n->get_long() == 0xFFFFFFFFL);
 3429   match(ConL);
 3430   op_cost(0);
 3431 
 3432   format %{ %}
 3433   interface(CONST_INTER);
 3434 %}
 3435 
 3436 // Long Immediate: low 32-bit mask
 3437 operand immL32() %{
 3438   predicate(n->get_long() == (int)(n->get_long()));
 3439   match(ConL);
 3440   op_cost(20);
 3441 
 3442   format %{ %}
 3443   interface(CONST_INTER);
 3444 %}
 3445 
 3446 //Double Immediate zero
 3447 operand immDPR0() %{
 3448   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3449   // bug that generates code such that NaNs compare equal to 0.0
 3450   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3451   match(ConD);
 3452 
 3453   op_cost(5);
 3454   format %{ %}
 3455   interface(CONST_INTER);
 3456 %}
 3457 
 3458 // Double Immediate one
 3459 operand immDPR1() %{
 3460   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3461   match(ConD);
 3462 
 3463   op_cost(5);
 3464   format %{ %}
 3465   interface(CONST_INTER);
 3466 %}
 3467 
 3468 // Double Immediate
 3469 operand immDPR() %{
 3470   predicate(UseSSE<=1);
 3471   match(ConD);
 3472 
 3473   op_cost(5);
 3474   format %{ %}
 3475   interface(CONST_INTER);
 3476 %}
 3477 
 3478 operand immD() %{
 3479   predicate(UseSSE>=2);
 3480   match(ConD);
 3481 
 3482   op_cost(5);
 3483   format %{ %}
 3484   interface(CONST_INTER);
 3485 %}
 3486 
 3487 // Double Immediate zero
 3488 operand immD0() %{
 3489   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3490   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3491   // compare equal to -0.0.
 3492   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3493   match(ConD);
 3494 
 3495   format %{ %}
 3496   interface(CONST_INTER);
 3497 %}
 3498 
 3499 // Float Immediate zero
 3500 operand immFPR0() %{
 3501   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3502   match(ConF);
 3503 
 3504   op_cost(5);
 3505   format %{ %}
 3506   interface(CONST_INTER);
 3507 %}
 3508 
 3509 // Float Immediate one
 3510 operand immFPR1() %{
 3511   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3512   match(ConF);
 3513 
 3514   op_cost(5);
 3515   format %{ %}
 3516   interface(CONST_INTER);
 3517 %}
 3518 
 3519 // Float Immediate
 3520 operand immFPR() %{
 3521   predicate( UseSSE == 0 );
 3522   match(ConF);
 3523 
 3524   op_cost(5);
 3525   format %{ %}
 3526   interface(CONST_INTER);
 3527 %}
 3528 
 3529 // Float Immediate
 3530 operand immF() %{
 3531   predicate(UseSSE >= 1);
 3532   match(ConF);
 3533 
 3534   op_cost(5);
 3535   format %{ %}
 3536   interface(CONST_INTER);
 3537 %}
 3538 
 3539 // Float Immediate zero.  Zero and not -0.0
 3540 operand immF0() %{
 3541   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3542   match(ConF);
 3543 
 3544   op_cost(5);
 3545   format %{ %}
 3546   interface(CONST_INTER);
 3547 %}
 3548 
 3549 // Immediates for special shifts (sign extend)
 3550 
 3551 // Constants for increment
 3552 operand immI_16() %{
 3553   predicate( n->get_int() == 16 );
 3554   match(ConI);
 3555 
 3556   format %{ %}
 3557   interface(CONST_INTER);
 3558 %}
 3559 
 3560 operand immI_24() %{
 3561   predicate( n->get_int() == 24 );
 3562   match(ConI);
 3563 
 3564   format %{ %}
 3565   interface(CONST_INTER);
 3566 %}
 3567 
 3568 // Constant for byte-wide masking
 3569 operand immI_255() %{
 3570   predicate( n->get_int() == 255 );
 3571   match(ConI);
 3572 
 3573   format %{ %}
 3574   interface(CONST_INTER);
 3575 %}
 3576 
 3577 // Constant for short-wide masking
 3578 operand immI_65535() %{
 3579   predicate(n->get_int() == 65535);
 3580   match(ConI);
 3581 
 3582   format %{ %}
 3583   interface(CONST_INTER);
 3584 %}
 3585 
 3586 operand kReg()
 3587 %{
 3588   constraint(ALLOC_IN_RC(vectmask_reg));
 3589   match(RegVectMask);
 3590   format %{%}
 3591   interface(REG_INTER);
 3592 %}
 3593 
 3594 operand kReg_K1()
 3595 %{
 3596   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3597   match(RegVectMask);
 3598   format %{%}
 3599   interface(REG_INTER);
 3600 %}
 3601 
 3602 operand kReg_K2()
 3603 %{
 3604   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3605   match(RegVectMask);
 3606   format %{%}
 3607   interface(REG_INTER);
 3608 %}
 3609 
 3610 // Special Registers
 3611 operand kReg_K3()
 3612 %{
 3613   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3614   match(RegVectMask);
 3615   format %{%}
 3616   interface(REG_INTER);
 3617 %}
 3618 
 3619 operand kReg_K4()
 3620 %{
 3621   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3622   match(RegVectMask);
 3623   format %{%}
 3624   interface(REG_INTER);
 3625 %}
 3626 
 3627 operand kReg_K5()
 3628 %{
 3629   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3630   match(RegVectMask);
 3631   format %{%}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 operand kReg_K6()
 3636 %{
 3637   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3638   match(RegVectMask);
 3639   format %{%}
 3640   interface(REG_INTER);
 3641 %}
 3642 
 3643 // Special Registers
 3644 operand kReg_K7()
 3645 %{
 3646   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3647   match(RegVectMask);
 3648   format %{%}
 3649   interface(REG_INTER);
 3650 %}
 3651 
 3652 // Register Operands
 3653 // Integer Register
 3654 operand rRegI() %{
 3655   constraint(ALLOC_IN_RC(int_reg));
 3656   match(RegI);
 3657   match(xRegI);
 3658   match(eAXRegI);
 3659   match(eBXRegI);
 3660   match(eCXRegI);
 3661   match(eDXRegI);
 3662   match(eDIRegI);
 3663   match(eSIRegI);
 3664 
 3665   format %{ %}
 3666   interface(REG_INTER);
 3667 %}
 3668 
 3669 // Subset of Integer Register
 3670 operand xRegI(rRegI reg) %{
 3671   constraint(ALLOC_IN_RC(int_x_reg));
 3672   match(reg);
 3673   match(eAXRegI);
 3674   match(eBXRegI);
 3675   match(eCXRegI);
 3676   match(eDXRegI);
 3677 
 3678   format %{ %}
 3679   interface(REG_INTER);
 3680 %}
 3681 
 3682 // Special Registers
 3683 operand eAXRegI(xRegI reg) %{
 3684   constraint(ALLOC_IN_RC(eax_reg));
 3685   match(reg);
 3686   match(rRegI);
 3687 
 3688   format %{ "EAX" %}
 3689   interface(REG_INTER);
 3690 %}
 3691 
 3692 // Special Registers
 3693 operand eBXRegI(xRegI reg) %{
 3694   constraint(ALLOC_IN_RC(ebx_reg));
 3695   match(reg);
 3696   match(rRegI);
 3697 
 3698   format %{ "EBX" %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand eCXRegI(xRegI reg) %{
 3703   constraint(ALLOC_IN_RC(ecx_reg));
 3704   match(reg);
 3705   match(rRegI);
 3706 
 3707   format %{ "ECX" %}
 3708   interface(REG_INTER);
 3709 %}
 3710 
 3711 operand eDXRegI(xRegI reg) %{
 3712   constraint(ALLOC_IN_RC(edx_reg));
 3713   match(reg);
 3714   match(rRegI);
 3715 
 3716   format %{ "EDX" %}
 3717   interface(REG_INTER);
 3718 %}
 3719 
 3720 operand eDIRegI(xRegI reg) %{
 3721   constraint(ALLOC_IN_RC(edi_reg));
 3722   match(reg);
 3723   match(rRegI);
 3724 
 3725   format %{ "EDI" %}
 3726   interface(REG_INTER);
 3727 %}
 3728 
 3729 operand naxRegI() %{
 3730   constraint(ALLOC_IN_RC(nax_reg));
 3731   match(RegI);
 3732   match(eCXRegI);
 3733   match(eDXRegI);
 3734   match(eSIRegI);
 3735   match(eDIRegI);
 3736 
 3737   format %{ %}
 3738   interface(REG_INTER);
 3739 %}
 3740 
 3741 operand nadxRegI() %{
 3742   constraint(ALLOC_IN_RC(nadx_reg));
 3743   match(RegI);
 3744   match(eBXRegI);
 3745   match(eCXRegI);
 3746   match(eSIRegI);
 3747   match(eDIRegI);
 3748 
 3749   format %{ %}
 3750   interface(REG_INTER);
 3751 %}
 3752 
 3753 operand ncxRegI() %{
 3754   constraint(ALLOC_IN_RC(ncx_reg));
 3755   match(RegI);
 3756   match(eAXRegI);
 3757   match(eDXRegI);
 3758   match(eSIRegI);
 3759   match(eDIRegI);
 3760 
 3761   format %{ %}
 3762   interface(REG_INTER);
 3763 %}
 3764 
 3765 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3766 // //
 3767 operand eSIRegI(xRegI reg) %{
 3768    constraint(ALLOC_IN_RC(esi_reg));
 3769    match(reg);
 3770    match(rRegI);
 3771 
 3772    format %{ "ESI" %}
 3773    interface(REG_INTER);
 3774 %}
 3775 
 3776 // Pointer Register
 3777 operand anyRegP() %{
 3778   constraint(ALLOC_IN_RC(any_reg));
 3779   match(RegP);
 3780   match(eAXRegP);
 3781   match(eBXRegP);
 3782   match(eCXRegP);
 3783   match(eDIRegP);
 3784   match(eRegP);
 3785 
 3786   format %{ %}
 3787   interface(REG_INTER);
 3788 %}
 3789 
 3790 operand eRegP() %{
 3791   constraint(ALLOC_IN_RC(int_reg));
 3792   match(RegP);
 3793   match(eAXRegP);
 3794   match(eBXRegP);
 3795   match(eCXRegP);
 3796   match(eDIRegP);
 3797 
 3798   format %{ %}
 3799   interface(REG_INTER);
 3800 %}
 3801 
 3802 operand rRegP() %{
 3803   constraint(ALLOC_IN_RC(int_reg));
 3804   match(RegP);
 3805   match(eAXRegP);
 3806   match(eBXRegP);
 3807   match(eCXRegP);
 3808   match(eDIRegP);
 3809 
 3810   format %{ %}
 3811   interface(REG_INTER);
 3812 %}
 3813 
 3814 // On windows95, EBP is not safe to use for implicit null tests.
 3815 operand eRegP_no_EBP() %{
 3816   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3817   match(RegP);
 3818   match(eAXRegP);
 3819   match(eBXRegP);
 3820   match(eCXRegP);
 3821   match(eDIRegP);
 3822 
 3823   op_cost(100);
 3824   format %{ %}
 3825   interface(REG_INTER);
 3826 %}
 3827 
 3828 operand naxRegP() %{
 3829   constraint(ALLOC_IN_RC(nax_reg));
 3830   match(RegP);
 3831   match(eBXRegP);
 3832   match(eDXRegP);
 3833   match(eCXRegP);
 3834   match(eSIRegP);
 3835   match(eDIRegP);
 3836 
 3837   format %{ %}
 3838   interface(REG_INTER);
 3839 %}
 3840 
 3841 operand nabxRegP() %{
 3842   constraint(ALLOC_IN_RC(nabx_reg));
 3843   match(RegP);
 3844   match(eCXRegP);
 3845   match(eDXRegP);
 3846   match(eSIRegP);
 3847   match(eDIRegP);
 3848 
 3849   format %{ %}
 3850   interface(REG_INTER);
 3851 %}
 3852 
 3853 operand pRegP() %{
 3854   constraint(ALLOC_IN_RC(p_reg));
 3855   match(RegP);
 3856   match(eBXRegP);
 3857   match(eDXRegP);
 3858   match(eSIRegP);
 3859   match(eDIRegP);
 3860 
 3861   format %{ %}
 3862   interface(REG_INTER);
 3863 %}
 3864 
 3865 // Special Registers
 3866 // Return a pointer value
 3867 operand eAXRegP(eRegP reg) %{
 3868   constraint(ALLOC_IN_RC(eax_reg));
 3869   match(reg);
 3870   format %{ "EAX" %}
 3871   interface(REG_INTER);
 3872 %}
 3873 
 3874 // Used in AtomicAdd
 3875 operand eBXRegP(eRegP reg) %{
 3876   constraint(ALLOC_IN_RC(ebx_reg));
 3877   match(reg);
 3878   format %{ "EBX" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 // Tail-call (interprocedural jump) to interpreter
 3883 operand eCXRegP(eRegP reg) %{
 3884   constraint(ALLOC_IN_RC(ecx_reg));
 3885   match(reg);
 3886   format %{ "ECX" %}
 3887   interface(REG_INTER);
 3888 %}
 3889 
 3890 operand eDXRegP(eRegP reg) %{
 3891   constraint(ALLOC_IN_RC(edx_reg));
 3892   match(reg);
 3893   format %{ "EDX" %}
 3894   interface(REG_INTER);
 3895 %}
 3896 
 3897 operand eSIRegP(eRegP reg) %{
 3898   constraint(ALLOC_IN_RC(esi_reg));
 3899   match(reg);
 3900   format %{ "ESI" %}
 3901   interface(REG_INTER);
 3902 %}
 3903 
 3904 // Used in rep stosw
 3905 operand eDIRegP(eRegP reg) %{
 3906   constraint(ALLOC_IN_RC(edi_reg));
 3907   match(reg);
 3908   format %{ "EDI" %}
 3909   interface(REG_INTER);
 3910 %}
 3911 
 3912 operand eRegL() %{
 3913   constraint(ALLOC_IN_RC(long_reg));
 3914   match(RegL);
 3915   match(eADXRegL);
 3916 
 3917   format %{ %}
 3918   interface(REG_INTER);
 3919 %}
 3920 
 3921 operand eADXRegL( eRegL reg ) %{
 3922   constraint(ALLOC_IN_RC(eadx_reg));
 3923   match(reg);
 3924 
 3925   format %{ "EDX:EAX" %}
 3926   interface(REG_INTER);
 3927 %}
 3928 
 3929 operand eBCXRegL( eRegL reg ) %{
 3930   constraint(ALLOC_IN_RC(ebcx_reg));
 3931   match(reg);
 3932 
 3933   format %{ "EBX:ECX" %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 operand eBDPRegL( eRegL reg ) %{
 3938   constraint(ALLOC_IN_RC(ebpd_reg));
 3939   match(reg);
 3940 
 3941   format %{ "EBP:EDI" %}
 3942   interface(REG_INTER);
 3943 %}
 3944 // Special case for integer high multiply
 3945 operand eADXRegL_low_only() %{
 3946   constraint(ALLOC_IN_RC(eadx_reg));
 3947   match(RegL);
 3948 
 3949   format %{ "EAX" %}
 3950   interface(REG_INTER);
 3951 %}
 3952 
 3953 // Flags register, used as output of compare instructions
 3954 operand rFlagsReg() %{
 3955   constraint(ALLOC_IN_RC(int_flags));
 3956   match(RegFlags);
 3957 
 3958   format %{ "EFLAGS" %}
 3959   interface(REG_INTER);
 3960 %}
 3961 
 3962 // Flags register, used as output of compare instructions
 3963 operand eFlagsReg() %{
 3964   constraint(ALLOC_IN_RC(int_flags));
 3965   match(RegFlags);
 3966 
 3967   format %{ "EFLAGS" %}
 3968   interface(REG_INTER);
 3969 %}
 3970 
 3971 // Flags register, used as output of FLOATING POINT compare instructions
 3972 operand eFlagsRegU() %{
 3973   constraint(ALLOC_IN_RC(int_flags));
 3974   match(RegFlags);
 3975 
 3976   format %{ "EFLAGS_U" %}
 3977   interface(REG_INTER);
 3978 %}
 3979 
 3980 operand eFlagsRegUCF() %{
 3981   constraint(ALLOC_IN_RC(int_flags));
 3982   match(RegFlags);
 3983   predicate(false);
 3984 
 3985   format %{ "EFLAGS_U_CF" %}
 3986   interface(REG_INTER);
 3987 %}
 3988 
 3989 // Condition Code Register used by long compare
 3990 operand flagsReg_long_LTGE() %{
 3991   constraint(ALLOC_IN_RC(int_flags));
 3992   match(RegFlags);
 3993   format %{ "FLAGS_LTGE" %}
 3994   interface(REG_INTER);
 3995 %}
 3996 operand flagsReg_long_EQNE() %{
 3997   constraint(ALLOC_IN_RC(int_flags));
 3998   match(RegFlags);
 3999   format %{ "FLAGS_EQNE" %}
 4000   interface(REG_INTER);
 4001 %}
 4002 operand flagsReg_long_LEGT() %{
 4003   constraint(ALLOC_IN_RC(int_flags));
 4004   match(RegFlags);
 4005   format %{ "FLAGS_LEGT" %}
 4006   interface(REG_INTER);
 4007 %}
 4008 
 4009 // Condition Code Register used by unsigned long compare
 4010 operand flagsReg_ulong_LTGE() %{
 4011   constraint(ALLOC_IN_RC(int_flags));
 4012   match(RegFlags);
 4013   format %{ "FLAGS_U_LTGE" %}
 4014   interface(REG_INTER);
 4015 %}
 4016 operand flagsReg_ulong_EQNE() %{
 4017   constraint(ALLOC_IN_RC(int_flags));
 4018   match(RegFlags);
 4019   format %{ "FLAGS_U_EQNE" %}
 4020   interface(REG_INTER);
 4021 %}
 4022 operand flagsReg_ulong_LEGT() %{
 4023   constraint(ALLOC_IN_RC(int_flags));
 4024   match(RegFlags);
 4025   format %{ "FLAGS_U_LEGT" %}
 4026   interface(REG_INTER);
 4027 %}
 4028 
 4029 // Float register operands
 4030 operand regDPR() %{
 4031   predicate( UseSSE < 2 );
 4032   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4033   match(RegD);
 4034   match(regDPR1);
 4035   match(regDPR2);
 4036   format %{ %}
 4037   interface(REG_INTER);
 4038 %}
 4039 
 4040 operand regDPR1(regDPR reg) %{
 4041   predicate( UseSSE < 2 );
 4042   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4043   match(reg);
 4044   format %{ "FPR1" %}
 4045   interface(REG_INTER);
 4046 %}
 4047 
 4048 operand regDPR2(regDPR reg) %{
 4049   predicate( UseSSE < 2 );
 4050   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4051   match(reg);
 4052   format %{ "FPR2" %}
 4053   interface(REG_INTER);
 4054 %}
 4055 
 4056 operand regnotDPR1(regDPR reg) %{
 4057   predicate( UseSSE < 2 );
 4058   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4059   match(reg);
 4060   format %{ %}
 4061   interface(REG_INTER);
 4062 %}
 4063 
 4064 // Float register operands
 4065 operand regFPR() %{
 4066   predicate( UseSSE < 2 );
 4067   constraint(ALLOC_IN_RC(fp_flt_reg));
 4068   match(RegF);
 4069   match(regFPR1);
 4070   format %{ %}
 4071   interface(REG_INTER);
 4072 %}
 4073 
 4074 // Float register operands
 4075 operand regFPR1(regFPR reg) %{
 4076   predicate( UseSSE < 2 );
 4077   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4078   match(reg);
 4079   format %{ "FPR1" %}
 4080   interface(REG_INTER);
 4081 %}
 4082 
 4083 // XMM Float register operands
 4084 operand regF() %{
 4085   predicate( UseSSE>=1 );
 4086   constraint(ALLOC_IN_RC(float_reg_legacy));
 4087   match(RegF);
 4088   format %{ %}
 4089   interface(REG_INTER);
 4090 %}
 4091 
 4092 operand legRegF() %{
 4093   predicate( UseSSE>=1 );
 4094   constraint(ALLOC_IN_RC(float_reg_legacy));
 4095   match(RegF);
 4096   format %{ %}
 4097   interface(REG_INTER);
 4098 %}
 4099 
 4100 // Float register operands
 4101 operand vlRegF() %{
 4102    constraint(ALLOC_IN_RC(float_reg_vl));
 4103    match(RegF);
 4104 
 4105    format %{ %}
 4106    interface(REG_INTER);
 4107 %}
 4108 
 4109 // XMM Double register operands
 4110 operand regD() %{
 4111   predicate( UseSSE>=2 );
 4112   constraint(ALLOC_IN_RC(double_reg_legacy));
 4113   match(RegD);
 4114   format %{ %}
 4115   interface(REG_INTER);
 4116 %}
 4117 
 4118 // Double register operands
 4119 operand legRegD() %{
 4120   predicate( UseSSE>=2 );
 4121   constraint(ALLOC_IN_RC(double_reg_legacy));
 4122   match(RegD);
 4123   format %{ %}
 4124   interface(REG_INTER);
 4125 %}
 4126 
 4127 operand vlRegD() %{
 4128    constraint(ALLOC_IN_RC(double_reg_vl));
 4129    match(RegD);
 4130 
 4131    format %{ %}
 4132    interface(REG_INTER);
 4133 %}
 4134 
 4135 //----------Memory Operands----------------------------------------------------
 4136 // Direct Memory Operand
 4137 operand direct(immP addr) %{
 4138   match(addr);
 4139 
 4140   format %{ "[$addr]" %}
 4141   interface(MEMORY_INTER) %{
 4142     base(0xFFFFFFFF);
 4143     index(0x4);
 4144     scale(0x0);
 4145     disp($addr);
 4146   %}
 4147 %}
 4148 
 4149 // Indirect Memory Operand
 4150 operand indirect(eRegP reg) %{
 4151   constraint(ALLOC_IN_RC(int_reg));
 4152   match(reg);
 4153 
 4154   format %{ "[$reg]" %}
 4155   interface(MEMORY_INTER) %{
 4156     base($reg);
 4157     index(0x4);
 4158     scale(0x0);
 4159     disp(0x0);
 4160   %}
 4161 %}
 4162 
 4163 // Indirect Memory Plus Short Offset Operand
 4164 operand indOffset8(eRegP reg, immI8 off) %{
 4165   match(AddP reg off);
 4166 
 4167   format %{ "[$reg + $off]" %}
 4168   interface(MEMORY_INTER) %{
 4169     base($reg);
 4170     index(0x4);
 4171     scale(0x0);
 4172     disp($off);
 4173   %}
 4174 %}
 4175 
 4176 // Indirect Memory Plus Long Offset Operand
 4177 operand indOffset32(eRegP reg, immI off) %{
 4178   match(AddP reg off);
 4179 
 4180   format %{ "[$reg + $off]" %}
 4181   interface(MEMORY_INTER) %{
 4182     base($reg);
 4183     index(0x4);
 4184     scale(0x0);
 4185     disp($off);
 4186   %}
 4187 %}
 4188 
 4189 // Indirect Memory Plus Long Offset Operand
 4190 operand indOffset32X(rRegI reg, immP off) %{
 4191   match(AddP off reg);
 4192 
 4193   format %{ "[$reg + $off]" %}
 4194   interface(MEMORY_INTER) %{
 4195     base($reg);
 4196     index(0x4);
 4197     scale(0x0);
 4198     disp($off);
 4199   %}
 4200 %}
 4201 
 4202 // Indirect Memory Plus Index Register Plus Offset Operand
 4203 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4204   match(AddP (AddP reg ireg) off);
 4205 
 4206   op_cost(10);
 4207   format %{"[$reg + $off + $ireg]" %}
 4208   interface(MEMORY_INTER) %{
 4209     base($reg);
 4210     index($ireg);
 4211     scale(0x0);
 4212     disp($off);
 4213   %}
 4214 %}
 4215 
 4216 // Indirect Memory Plus Index Register Plus Offset Operand
 4217 operand indIndex(eRegP reg, rRegI ireg) %{
 4218   match(AddP reg ireg);
 4219 
 4220   op_cost(10);
 4221   format %{"[$reg + $ireg]" %}
 4222   interface(MEMORY_INTER) %{
 4223     base($reg);
 4224     index($ireg);
 4225     scale(0x0);
 4226     disp(0x0);
 4227   %}
 4228 %}
 4229 
 4230 // // -------------------------------------------------------------------------
 4231 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4232 // // -------------------------------------------------------------------------
 4233 // // Scaled Memory Operands
 4234 // // Indirect Memory Times Scale Plus Offset Operand
 4235 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4236 //   match(AddP off (LShiftI ireg scale));
 4237 //
 4238 //   op_cost(10);
 4239 //   format %{"[$off + $ireg << $scale]" %}
 4240 //   interface(MEMORY_INTER) %{
 4241 //     base(0x4);
 4242 //     index($ireg);
 4243 //     scale($scale);
 4244 //     disp($off);
 4245 //   %}
 4246 // %}
 4247 
 4248 // Indirect Memory Times Scale Plus Index Register
 4249 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4250   match(AddP reg (LShiftI ireg scale));
 4251 
 4252   op_cost(10);
 4253   format %{"[$reg + $ireg << $scale]" %}
 4254   interface(MEMORY_INTER) %{
 4255     base($reg);
 4256     index($ireg);
 4257     scale($scale);
 4258     disp(0x0);
 4259   %}
 4260 %}
 4261 
 4262 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4263 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4264   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4265 
 4266   op_cost(10);
 4267   format %{"[$reg + $off + $ireg << $scale]" %}
 4268   interface(MEMORY_INTER) %{
 4269     base($reg);
 4270     index($ireg);
 4271     scale($scale);
 4272     disp($off);
 4273   %}
 4274 %}
 4275 
 4276 //----------Load Long Memory Operands------------------------------------------
 4277 // The load-long idiom will use it's address expression again after loading
 4278 // the first word of the long.  If the load-long destination overlaps with
 4279 // registers used in the addressing expression, the 2nd half will be loaded
 4280 // from a clobbered address.  Fix this by requiring that load-long use
 4281 // address registers that do not overlap with the load-long target.
 4282 
 4283 // load-long support
 4284 operand load_long_RegP() %{
 4285   constraint(ALLOC_IN_RC(esi_reg));
 4286   match(RegP);
 4287   match(eSIRegP);
 4288   op_cost(100);
 4289   format %{  %}
 4290   interface(REG_INTER);
 4291 %}
 4292 
 4293 // Indirect Memory Operand Long
 4294 operand load_long_indirect(load_long_RegP reg) %{
 4295   constraint(ALLOC_IN_RC(esi_reg));
 4296   match(reg);
 4297 
 4298   format %{ "[$reg]" %}
 4299   interface(MEMORY_INTER) %{
 4300     base($reg);
 4301     index(0x4);
 4302     scale(0x0);
 4303     disp(0x0);
 4304   %}
 4305 %}
 4306 
 4307 // Indirect Memory Plus Long Offset Operand
 4308 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4309   match(AddP reg off);
 4310 
 4311   format %{ "[$reg + $off]" %}
 4312   interface(MEMORY_INTER) %{
 4313     base($reg);
 4314     index(0x4);
 4315     scale(0x0);
 4316     disp($off);
 4317   %}
 4318 %}
 4319 
 4320 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4321 
 4322 
 4323 //----------Special Memory Operands--------------------------------------------
 4324 // Stack Slot Operand - This operand is used for loading and storing temporary
 4325 //                      values on the stack where a match requires a value to
 4326 //                      flow through memory.
 4327 operand stackSlotP(sRegP reg) %{
 4328   constraint(ALLOC_IN_RC(stack_slots));
 4329   // No match rule because this operand is only generated in matching
 4330   format %{ "[$reg]" %}
 4331   interface(MEMORY_INTER) %{
 4332     base(0x4);   // ESP
 4333     index(0x4);  // No Index
 4334     scale(0x0);  // No Scale
 4335     disp($reg);  // Stack Offset
 4336   %}
 4337 %}
 4338 
 4339 operand stackSlotI(sRegI reg) %{
 4340   constraint(ALLOC_IN_RC(stack_slots));
 4341   // No match rule because this operand is only generated in matching
 4342   format %{ "[$reg]" %}
 4343   interface(MEMORY_INTER) %{
 4344     base(0x4);   // ESP
 4345     index(0x4);  // No Index
 4346     scale(0x0);  // No Scale
 4347     disp($reg);  // Stack Offset
 4348   %}
 4349 %}
 4350 
 4351 operand stackSlotF(sRegF reg) %{
 4352   constraint(ALLOC_IN_RC(stack_slots));
 4353   // No match rule because this operand is only generated in matching
 4354   format %{ "[$reg]" %}
 4355   interface(MEMORY_INTER) %{
 4356     base(0x4);   // ESP
 4357     index(0x4);  // No Index
 4358     scale(0x0);  // No Scale
 4359     disp($reg);  // Stack Offset
 4360   %}
 4361 %}
 4362 
 4363 operand stackSlotD(sRegD reg) %{
 4364   constraint(ALLOC_IN_RC(stack_slots));
 4365   // No match rule because this operand is only generated in matching
 4366   format %{ "[$reg]" %}
 4367   interface(MEMORY_INTER) %{
 4368     base(0x4);   // ESP
 4369     index(0x4);  // No Index
 4370     scale(0x0);  // No Scale
 4371     disp($reg);  // Stack Offset
 4372   %}
 4373 %}
 4374 
 4375 operand stackSlotL(sRegL reg) %{
 4376   constraint(ALLOC_IN_RC(stack_slots));
 4377   // No match rule because this operand is only generated in matching
 4378   format %{ "[$reg]" %}
 4379   interface(MEMORY_INTER) %{
 4380     base(0x4);   // ESP
 4381     index(0x4);  // No Index
 4382     scale(0x0);  // No Scale
 4383     disp($reg);  // Stack Offset
 4384   %}
 4385 %}
 4386 
 4387 //----------Conditional Branch Operands----------------------------------------
 4388 // Comparison Op  - This is the operation of the comparison, and is limited to
 4389 //                  the following set of codes:
 4390 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4391 //
 4392 // Other attributes of the comparison, such as unsignedness, are specified
 4393 // by the comparison instruction that sets a condition code flags register.
 4394 // That result is represented by a flags operand whose subtype is appropriate
 4395 // to the unsignedness (etc.) of the comparison.
 4396 //
 4397 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4398 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4399 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4400 
 4401 // Comparison Code
 4402 operand cmpOp() %{
 4403   match(Bool);
 4404 
 4405   format %{ "" %}
 4406   interface(COND_INTER) %{
 4407     equal(0x4, "e");
 4408     not_equal(0x5, "ne");
 4409     less(0xC, "l");
 4410     greater_equal(0xD, "ge");
 4411     less_equal(0xE, "le");
 4412     greater(0xF, "g");
 4413     overflow(0x0, "o");
 4414     no_overflow(0x1, "no");
 4415   %}
 4416 %}
 4417 
 4418 // Comparison Code, unsigned compare.  Used by FP also, with
 4419 // C2 (unordered) turned into GT or LT already.  The other bits
 4420 // C0 and C3 are turned into Carry & Zero flags.
 4421 operand cmpOpU() %{
 4422   match(Bool);
 4423 
 4424   format %{ "" %}
 4425   interface(COND_INTER) %{
 4426     equal(0x4, "e");
 4427     not_equal(0x5, "ne");
 4428     less(0x2, "b");
 4429     greater_equal(0x3, "nb");
 4430     less_equal(0x6, "be");
 4431     greater(0x7, "nbe");
 4432     overflow(0x0, "o");
 4433     no_overflow(0x1, "no");
 4434   %}
 4435 %}
 4436 
 4437 // Floating comparisons that don't require any fixup for the unordered case
 4438 operand cmpOpUCF() %{
 4439   match(Bool);
 4440   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4441             n->as_Bool()->_test._test == BoolTest::ge ||
 4442             n->as_Bool()->_test._test == BoolTest::le ||
 4443             n->as_Bool()->_test._test == BoolTest::gt);
 4444   format %{ "" %}
 4445   interface(COND_INTER) %{
 4446     equal(0x4, "e");
 4447     not_equal(0x5, "ne");
 4448     less(0x2, "b");
 4449     greater_equal(0x3, "nb");
 4450     less_equal(0x6, "be");
 4451     greater(0x7, "nbe");
 4452     overflow(0x0, "o");
 4453     no_overflow(0x1, "no");
 4454   %}
 4455 %}
 4456 
 4457 
 4458 // Floating comparisons that can be fixed up with extra conditional jumps
 4459 operand cmpOpUCF2() %{
 4460   match(Bool);
 4461   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4462             n->as_Bool()->_test._test == BoolTest::eq);
 4463   format %{ "" %}
 4464   interface(COND_INTER) %{
 4465     equal(0x4, "e");
 4466     not_equal(0x5, "ne");
 4467     less(0x2, "b");
 4468     greater_equal(0x3, "nb");
 4469     less_equal(0x6, "be");
 4470     greater(0x7, "nbe");
 4471     overflow(0x0, "o");
 4472     no_overflow(0x1, "no");
 4473   %}
 4474 %}
 4475 
 4476 // Comparison Code for FP conditional move
 4477 operand cmpOp_fcmov() %{
 4478   match(Bool);
 4479 
 4480   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4481             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4482   format %{ "" %}
 4483   interface(COND_INTER) %{
 4484     equal        (0x0C8);
 4485     not_equal    (0x1C8);
 4486     less         (0x0C0);
 4487     greater_equal(0x1C0);
 4488     less_equal   (0x0D0);
 4489     greater      (0x1D0);
 4490     overflow(0x0, "o"); // not really supported by the instruction
 4491     no_overflow(0x1, "no"); // not really supported by the instruction
 4492   %}
 4493 %}
 4494 
 4495 // Comparison Code used in long compares
 4496 operand cmpOp_commute() %{
 4497   match(Bool);
 4498 
 4499   format %{ "" %}
 4500   interface(COND_INTER) %{
 4501     equal(0x4, "e");
 4502     not_equal(0x5, "ne");
 4503     less(0xF, "g");
 4504     greater_equal(0xE, "le");
 4505     less_equal(0xD, "ge");
 4506     greater(0xC, "l");
 4507     overflow(0x0, "o");
 4508     no_overflow(0x1, "no");
 4509   %}
 4510 %}
 4511 
 4512 // Comparison Code used in unsigned long compares
 4513 operand cmpOpU_commute() %{
 4514   match(Bool);
 4515 
 4516   format %{ "" %}
 4517   interface(COND_INTER) %{
 4518     equal(0x4, "e");
 4519     not_equal(0x5, "ne");
 4520     less(0x7, "nbe");
 4521     greater_equal(0x6, "be");
 4522     less_equal(0x3, "nb");
 4523     greater(0x2, "b");
 4524     overflow(0x0, "o");
 4525     no_overflow(0x1, "no");
 4526   %}
 4527 %}
 4528 
 4529 //----------OPERAND CLASSES----------------------------------------------------
 4530 // Operand Classes are groups of operands that are used as to simplify
 4531 // instruction definitions by not requiring the AD writer to specify separate
 4532 // instructions for every form of operand when the instruction accepts
 4533 // multiple operand types with the same basic encoding and format.  The classic
 4534 // case of this is memory operands.
 4535 
 4536 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4537                indIndex, indIndexScale, indIndexScaleOffset);
 4538 
 4539 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4540 // This means some kind of offset is always required and you cannot use
 4541 // an oop as the offset (done when working on static globals).
 4542 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4543                     indIndex, indIndexScale, indIndexScaleOffset);
 4544 
 4545 
 4546 //----------PIPELINE-----------------------------------------------------------
 4547 // Rules which define the behavior of the target architectures pipeline.
 4548 pipeline %{
 4549 
 4550 //----------ATTRIBUTES---------------------------------------------------------
 4551 attributes %{
 4552   variable_size_instructions;        // Fixed size instructions
 4553   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4554   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4555   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4556   instruction_fetch_units = 1;       // of 16 bytes
 4557 
 4558   // List of nop instructions
 4559   nops( MachNop );
 4560 %}
 4561 
 4562 //----------RESOURCES----------------------------------------------------------
 4563 // Resources are the functional units available to the machine
 4564 
 4565 // Generic P2/P3 pipeline
 4566 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4567 // 3 instructions decoded per cycle.
 4568 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4569 // 2 ALU op, only ALU0 handles mul/div instructions.
 4570 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4571            MS0, MS1, MEM = MS0 | MS1,
 4572            BR, FPU,
 4573            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4574 
 4575 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4576 // Pipeline Description specifies the stages in the machine's pipeline
 4577 
 4578 // Generic P2/P3 pipeline
 4579 pipe_desc(S0, S1, S2, S3, S4, S5);
 4580 
 4581 //----------PIPELINE CLASSES---------------------------------------------------
 4582 // Pipeline Classes describe the stages in which input and output are
 4583 // referenced by the hardware pipeline.
 4584 
 4585 // Naming convention: ialu or fpu
 4586 // Then: _reg
 4587 // Then: _reg if there is a 2nd register
 4588 // Then: _long if it's a pair of instructions implementing a long
 4589 // Then: _fat if it requires the big decoder
 4590 //   Or: _mem if it requires the big decoder and a memory unit.
 4591 
 4592 // Integer ALU reg operation
 4593 pipe_class ialu_reg(rRegI dst) %{
 4594     single_instruction;
 4595     dst    : S4(write);
 4596     dst    : S3(read);
 4597     DECODE : S0;        // any decoder
 4598     ALU    : S3;        // any alu
 4599 %}
 4600 
 4601 // Long ALU reg operation
 4602 pipe_class ialu_reg_long(eRegL dst) %{
 4603     instruction_count(2);
 4604     dst    : S4(write);
 4605     dst    : S3(read);
 4606     DECODE : S0(2);     // any 2 decoders
 4607     ALU    : S3(2);     // both alus
 4608 %}
 4609 
 4610 // Integer ALU reg operation using big decoder
 4611 pipe_class ialu_reg_fat(rRegI dst) %{
 4612     single_instruction;
 4613     dst    : S4(write);
 4614     dst    : S3(read);
 4615     D0     : S0;        // big decoder only
 4616     ALU    : S3;        // any alu
 4617 %}
 4618 
 4619 // Long ALU reg operation using big decoder
 4620 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4621     instruction_count(2);
 4622     dst    : S4(write);
 4623     dst    : S3(read);
 4624     D0     : S0(2);     // big decoder only; twice
 4625     ALU    : S3(2);     // any 2 alus
 4626 %}
 4627 
 4628 // Integer ALU reg-reg operation
 4629 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4630     single_instruction;
 4631     dst    : S4(write);
 4632     src    : S3(read);
 4633     DECODE : S0;        // any decoder
 4634     ALU    : S3;        // any alu
 4635 %}
 4636 
 4637 // Long ALU reg-reg operation
 4638 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4639     instruction_count(2);
 4640     dst    : S4(write);
 4641     src    : S3(read);
 4642     DECODE : S0(2);     // any 2 decoders
 4643     ALU    : S3(2);     // both alus
 4644 %}
 4645 
 4646 // Integer ALU reg-reg operation
 4647 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4648     single_instruction;
 4649     dst    : S4(write);
 4650     src    : S3(read);
 4651     D0     : S0;        // big decoder only
 4652     ALU    : S3;        // any alu
 4653 %}
 4654 
 4655 // Long ALU reg-reg operation
 4656 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4657     instruction_count(2);
 4658     dst    : S4(write);
 4659     src    : S3(read);
 4660     D0     : S0(2);     // big decoder only; twice
 4661     ALU    : S3(2);     // both alus
 4662 %}
 4663 
 4664 // Integer ALU reg-mem operation
 4665 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4666     single_instruction;
 4667     dst    : S5(write);
 4668     mem    : S3(read);
 4669     D0     : S0;        // big decoder only
 4670     ALU    : S4;        // any alu
 4671     MEM    : S3;        // any mem
 4672 %}
 4673 
 4674 // Long ALU reg-mem operation
 4675 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4676     instruction_count(2);
 4677     dst    : S5(write);
 4678     mem    : S3(read);
 4679     D0     : S0(2);     // big decoder only; twice
 4680     ALU    : S4(2);     // any 2 alus
 4681     MEM    : S3(2);     // both mems
 4682 %}
 4683 
 4684 // Integer mem operation (prefetch)
 4685 pipe_class ialu_mem(memory mem)
 4686 %{
 4687     single_instruction;
 4688     mem    : S3(read);
 4689     D0     : S0;        // big decoder only
 4690     MEM    : S3;        // any mem
 4691 %}
 4692 
 4693 // Integer Store to Memory
 4694 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4695     single_instruction;
 4696     mem    : S3(read);
 4697     src    : S5(read);
 4698     D0     : S0;        // big decoder only
 4699     ALU    : S4;        // any alu
 4700     MEM    : S3;
 4701 %}
 4702 
 4703 // Long Store to Memory
 4704 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4705     instruction_count(2);
 4706     mem    : S3(read);
 4707     src    : S5(read);
 4708     D0     : S0(2);     // big decoder only; twice
 4709     ALU    : S4(2);     // any 2 alus
 4710     MEM    : S3(2);     // Both mems
 4711 %}
 4712 
 4713 // Integer Store to Memory
 4714 pipe_class ialu_mem_imm(memory mem) %{
 4715     single_instruction;
 4716     mem    : S3(read);
 4717     D0     : S0;        // big decoder only
 4718     ALU    : S4;        // any alu
 4719     MEM    : S3;
 4720 %}
 4721 
 4722 // Integer ALU0 reg-reg operation
 4723 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4724     single_instruction;
 4725     dst    : S4(write);
 4726     src    : S3(read);
 4727     D0     : S0;        // Big decoder only
 4728     ALU0   : S3;        // only alu0
 4729 %}
 4730 
 4731 // Integer ALU0 reg-mem operation
 4732 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4733     single_instruction;
 4734     dst    : S5(write);
 4735     mem    : S3(read);
 4736     D0     : S0;        // big decoder only
 4737     ALU0   : S4;        // ALU0 only
 4738     MEM    : S3;        // any mem
 4739 %}
 4740 
 4741 // Integer ALU reg-reg operation
 4742 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4743     single_instruction;
 4744     cr     : S4(write);
 4745     src1   : S3(read);
 4746     src2   : S3(read);
 4747     DECODE : S0;        // any decoder
 4748     ALU    : S3;        // any alu
 4749 %}
 4750 
 4751 // Integer ALU reg-imm operation
 4752 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4753     single_instruction;
 4754     cr     : S4(write);
 4755     src1   : S3(read);
 4756     DECODE : S0;        // any decoder
 4757     ALU    : S3;        // any alu
 4758 %}
 4759 
 4760 // Integer ALU reg-mem operation
 4761 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4762     single_instruction;
 4763     cr     : S4(write);
 4764     src1   : S3(read);
 4765     src2   : S3(read);
 4766     D0     : S0;        // big decoder only
 4767     ALU    : S4;        // any alu
 4768     MEM    : S3;
 4769 %}
 4770 
 4771 // Conditional move reg-reg
 4772 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4773     instruction_count(4);
 4774     y      : S4(read);
 4775     q      : S3(read);
 4776     p      : S3(read);
 4777     DECODE : S0(4);     // any decoder
 4778 %}
 4779 
 4780 // Conditional move reg-reg
 4781 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4782     single_instruction;
 4783     dst    : S4(write);
 4784     src    : S3(read);
 4785     cr     : S3(read);
 4786     DECODE : S0;        // any decoder
 4787 %}
 4788 
 4789 // Conditional move reg-mem
 4790 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4791     single_instruction;
 4792     dst    : S4(write);
 4793     src    : S3(read);
 4794     cr     : S3(read);
 4795     DECODE : S0;        // any decoder
 4796     MEM    : S3;
 4797 %}
 4798 
 4799 // Conditional move reg-reg long
 4800 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4801     single_instruction;
 4802     dst    : S4(write);
 4803     src    : S3(read);
 4804     cr     : S3(read);
 4805     DECODE : S0(2);     // any 2 decoders
 4806 %}
 4807 
 4808 // Conditional move double reg-reg
 4809 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4810     single_instruction;
 4811     dst    : S4(write);
 4812     src    : S3(read);
 4813     cr     : S3(read);
 4814     DECODE : S0;        // any decoder
 4815 %}
 4816 
 4817 // Float reg-reg operation
 4818 pipe_class fpu_reg(regDPR dst) %{
 4819     instruction_count(2);
 4820     dst    : S3(read);
 4821     DECODE : S0(2);     // any 2 decoders
 4822     FPU    : S3;
 4823 %}
 4824 
 4825 // Float reg-reg operation
 4826 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4827     instruction_count(2);
 4828     dst    : S4(write);
 4829     src    : S3(read);
 4830     DECODE : S0(2);     // any 2 decoders
 4831     FPU    : S3;
 4832 %}
 4833 
 4834 // Float reg-reg operation
 4835 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4836     instruction_count(3);
 4837     dst    : S4(write);
 4838     src1   : S3(read);
 4839     src2   : S3(read);
 4840     DECODE : S0(3);     // any 3 decoders
 4841     FPU    : S3(2);
 4842 %}
 4843 
 4844 // Float reg-reg operation
 4845 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4846     instruction_count(4);
 4847     dst    : S4(write);
 4848     src1   : S3(read);
 4849     src2   : S3(read);
 4850     src3   : S3(read);
 4851     DECODE : S0(4);     // any 3 decoders
 4852     FPU    : S3(2);
 4853 %}
 4854 
 4855 // Float reg-reg operation
 4856 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4857     instruction_count(4);
 4858     dst    : S4(write);
 4859     src1   : S3(read);
 4860     src2   : S3(read);
 4861     src3   : S3(read);
 4862     DECODE : S1(3);     // any 3 decoders
 4863     D0     : S0;        // Big decoder only
 4864     FPU    : S3(2);
 4865     MEM    : S3;
 4866 %}
 4867 
 4868 // Float reg-mem operation
 4869 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4870     instruction_count(2);
 4871     dst    : S5(write);
 4872     mem    : S3(read);
 4873     D0     : S0;        // big decoder only
 4874     DECODE : S1;        // any decoder for FPU POP
 4875     FPU    : S4;
 4876     MEM    : S3;        // any mem
 4877 %}
 4878 
 4879 // Float reg-mem operation
 4880 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4881     instruction_count(3);
 4882     dst    : S5(write);
 4883     src1   : S3(read);
 4884     mem    : S3(read);
 4885     D0     : S0;        // big decoder only
 4886     DECODE : S1(2);     // any decoder for FPU POP
 4887     FPU    : S4;
 4888     MEM    : S3;        // any mem
 4889 %}
 4890 
 4891 // Float mem-reg operation
 4892 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4893     instruction_count(2);
 4894     src    : S5(read);
 4895     mem    : S3(read);
 4896     DECODE : S0;        // any decoder for FPU PUSH
 4897     D0     : S1;        // big decoder only
 4898     FPU    : S4;
 4899     MEM    : S3;        // any mem
 4900 %}
 4901 
 4902 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4903     instruction_count(3);
 4904     src1   : S3(read);
 4905     src2   : S3(read);
 4906     mem    : S3(read);
 4907     DECODE : S0(2);     // any decoder for FPU PUSH
 4908     D0     : S1;        // big decoder only
 4909     FPU    : S4;
 4910     MEM    : S3;        // any mem
 4911 %}
 4912 
 4913 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4914     instruction_count(3);
 4915     src1   : S3(read);
 4916     src2   : S3(read);
 4917     mem    : S4(read);
 4918     DECODE : S0;        // any decoder for FPU PUSH
 4919     D0     : S0(2);     // big decoder only
 4920     FPU    : S4;
 4921     MEM    : S3(2);     // any mem
 4922 %}
 4923 
 4924 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4925     instruction_count(2);
 4926     src1   : S3(read);
 4927     dst    : S4(read);
 4928     D0     : S0(2);     // big decoder only
 4929     MEM    : S3(2);     // any mem
 4930 %}
 4931 
 4932 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4933     instruction_count(3);
 4934     src1   : S3(read);
 4935     src2   : S3(read);
 4936     dst    : S4(read);
 4937     D0     : S0(3);     // big decoder only
 4938     FPU    : S4;
 4939     MEM    : S3(3);     // any mem
 4940 %}
 4941 
 4942 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4943     instruction_count(3);
 4944     src1   : S4(read);
 4945     mem    : S4(read);
 4946     DECODE : S0;        // any decoder for FPU PUSH
 4947     D0     : S0(2);     // big decoder only
 4948     FPU    : S4;
 4949     MEM    : S3(2);     // any mem
 4950 %}
 4951 
 4952 // Float load constant
 4953 pipe_class fpu_reg_con(regDPR dst) %{
 4954     instruction_count(2);
 4955     dst    : S5(write);
 4956     D0     : S0;        // big decoder only for the load
 4957     DECODE : S1;        // any decoder for FPU POP
 4958     FPU    : S4;
 4959     MEM    : S3;        // any mem
 4960 %}
 4961 
 4962 // Float load constant
 4963 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4964     instruction_count(3);
 4965     dst    : S5(write);
 4966     src    : S3(read);
 4967     D0     : S0;        // big decoder only for the load
 4968     DECODE : S1(2);     // any decoder for FPU POP
 4969     FPU    : S4;
 4970     MEM    : S3;        // any mem
 4971 %}
 4972 
 4973 // UnConditional branch
 4974 pipe_class pipe_jmp( label labl ) %{
 4975     single_instruction;
 4976     BR   : S3;
 4977 %}
 4978 
 4979 // Conditional branch
 4980 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4981     single_instruction;
 4982     cr    : S1(read);
 4983     BR    : S3;
 4984 %}
 4985 
 4986 // Allocation idiom
 4987 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4988     instruction_count(1); force_serialization;
 4989     fixed_latency(6);
 4990     heap_ptr : S3(read);
 4991     DECODE   : S0(3);
 4992     D0       : S2;
 4993     MEM      : S3;
 4994     ALU      : S3(2);
 4995     dst      : S5(write);
 4996     BR       : S5;
 4997 %}
 4998 
 4999 // Generic big/slow expanded idiom
 5000 pipe_class pipe_slow(  ) %{
 5001     instruction_count(10); multiple_bundles; force_serialization;
 5002     fixed_latency(100);
 5003     D0  : S0(2);
 5004     MEM : S3(2);
 5005 %}
 5006 
 5007 // The real do-nothing guy
 5008 pipe_class empty( ) %{
 5009     instruction_count(0);
 5010 %}
 5011 
 5012 // Define the class for the Nop node
 5013 define %{
 5014    MachNop = empty;
 5015 %}
 5016 
 5017 %}
 5018 
 5019 //----------INSTRUCTIONS-------------------------------------------------------
 5020 //
 5021 // match      -- States which machine-independent subtree may be replaced
 5022 //               by this instruction.
 5023 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5024 //               selection to identify a minimum cost tree of machine
 5025 //               instructions that matches a tree of machine-independent
 5026 //               instructions.
 5027 // format     -- A string providing the disassembly for this instruction.
 5028 //               The value of an instruction's operand may be inserted
 5029 //               by referring to it with a '$' prefix.
 5030 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5031 //               to within an encode class as $primary, $secondary, and $tertiary
 5032 //               respectively.  The primary opcode is commonly used to
 5033 //               indicate the type of machine instruction, while secondary
 5034 //               and tertiary are often used for prefix options or addressing
 5035 //               modes.
 5036 // ins_encode -- A list of encode classes with parameters. The encode class
 5037 //               name must have been defined in an 'enc_class' specification
 5038 //               in the encode section of the architecture description.
 5039 
 5040 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5041 // Load Float
 5042 instruct MoveF2LEG(legRegF dst, regF src) %{
 5043   match(Set dst src);
 5044   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5045   ins_encode %{
 5046     ShouldNotReachHere();
 5047   %}
 5048   ins_pipe( fpu_reg_reg );
 5049 %}
 5050 
 5051 // Load Float
 5052 instruct MoveLEG2F(regF dst, legRegF src) %{
 5053   match(Set dst src);
 5054   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5055   ins_encode %{
 5056     ShouldNotReachHere();
 5057   %}
 5058   ins_pipe( fpu_reg_reg );
 5059 %}
 5060 
 5061 // Load Float
 5062 instruct MoveF2VL(vlRegF dst, regF src) %{
 5063   match(Set dst src);
 5064   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5065   ins_encode %{
 5066     ShouldNotReachHere();
 5067   %}
 5068   ins_pipe( fpu_reg_reg );
 5069 %}
 5070 
 5071 // Load Float
 5072 instruct MoveVL2F(regF dst, vlRegF src) %{
 5073   match(Set dst src);
 5074   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5075   ins_encode %{
 5076     ShouldNotReachHere();
 5077   %}
 5078   ins_pipe( fpu_reg_reg );
 5079 %}
 5080 
 5081 
 5082 
 5083 // Load Double
 5084 instruct MoveD2LEG(legRegD dst, regD src) %{
 5085   match(Set dst src);
 5086   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5087   ins_encode %{
 5088     ShouldNotReachHere();
 5089   %}
 5090   ins_pipe( fpu_reg_reg );
 5091 %}
 5092 
 5093 // Load Double
 5094 instruct MoveLEG2D(regD dst, legRegD src) %{
 5095   match(Set dst src);
 5096   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5097   ins_encode %{
 5098     ShouldNotReachHere();
 5099   %}
 5100   ins_pipe( fpu_reg_reg );
 5101 %}
 5102 
 5103 // Load Double
 5104 instruct MoveD2VL(vlRegD dst, regD src) %{
 5105   match(Set dst src);
 5106   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5107   ins_encode %{
 5108     ShouldNotReachHere();
 5109   %}
 5110   ins_pipe( fpu_reg_reg );
 5111 %}
 5112 
 5113 // Load Double
 5114 instruct MoveVL2D(regD dst, vlRegD src) %{
 5115   match(Set dst src);
 5116   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5117   ins_encode %{
 5118     ShouldNotReachHere();
 5119   %}
 5120   ins_pipe( fpu_reg_reg );
 5121 %}
 5122 
 5123 //----------BSWAP-Instruction--------------------------------------------------
 5124 instruct bytes_reverse_int(rRegI dst) %{
 5125   match(Set dst (ReverseBytesI dst));
 5126 
 5127   format %{ "BSWAP  $dst" %}
 5128   opcode(0x0F, 0xC8);
 5129   ins_encode( OpcP, OpcSReg(dst) );
 5130   ins_pipe( ialu_reg );
 5131 %}
 5132 
 5133 instruct bytes_reverse_long(eRegL dst) %{
 5134   match(Set dst (ReverseBytesL dst));
 5135 
 5136   format %{ "BSWAP  $dst.lo\n\t"
 5137             "BSWAP  $dst.hi\n\t"
 5138             "XCHG   $dst.lo $dst.hi" %}
 5139 
 5140   ins_cost(125);
 5141   ins_encode( bswap_long_bytes(dst) );
 5142   ins_pipe( ialu_reg_reg);
 5143 %}
 5144 
 5145 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5146   match(Set dst (ReverseBytesUS dst));
 5147   effect(KILL cr);
 5148 
 5149   format %{ "BSWAP  $dst\n\t"
 5150             "SHR    $dst,16\n\t" %}
 5151   ins_encode %{
 5152     __ bswapl($dst$$Register);
 5153     __ shrl($dst$$Register, 16);
 5154   %}
 5155   ins_pipe( ialu_reg );
 5156 %}
 5157 
 5158 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5159   match(Set dst (ReverseBytesS dst));
 5160   effect(KILL cr);
 5161 
 5162   format %{ "BSWAP  $dst\n\t"
 5163             "SAR    $dst,16\n\t" %}
 5164   ins_encode %{
 5165     __ bswapl($dst$$Register);
 5166     __ sarl($dst$$Register, 16);
 5167   %}
 5168   ins_pipe( ialu_reg );
 5169 %}
 5170 
 5171 
 5172 //---------- Zeros Count Instructions ------------------------------------------
 5173 
 5174 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5175   predicate(UseCountLeadingZerosInstruction);
 5176   match(Set dst (CountLeadingZerosI src));
 5177   effect(KILL cr);
 5178 
 5179   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5180   ins_encode %{
 5181     __ lzcntl($dst$$Register, $src$$Register);
 5182   %}
 5183   ins_pipe(ialu_reg);
 5184 %}
 5185 
 5186 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5187   predicate(!UseCountLeadingZerosInstruction);
 5188   match(Set dst (CountLeadingZerosI src));
 5189   effect(KILL cr);
 5190 
 5191   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5192             "JNZ    skip\n\t"
 5193             "MOV    $dst, -1\n"
 5194       "skip:\n\t"
 5195             "NEG    $dst\n\t"
 5196             "ADD    $dst, 31" %}
 5197   ins_encode %{
 5198     Register Rdst = $dst$$Register;
 5199     Register Rsrc = $src$$Register;
 5200     Label skip;
 5201     __ bsrl(Rdst, Rsrc);
 5202     __ jccb(Assembler::notZero, skip);
 5203     __ movl(Rdst, -1);
 5204     __ bind(skip);
 5205     __ negl(Rdst);
 5206     __ addl(Rdst, BitsPerInt - 1);
 5207   %}
 5208   ins_pipe(ialu_reg);
 5209 %}
 5210 
 5211 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5212   predicate(UseCountLeadingZerosInstruction);
 5213   match(Set dst (CountLeadingZerosL src));
 5214   effect(TEMP dst, KILL cr);
 5215 
 5216   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5217             "JNC    done\n\t"
 5218             "LZCNT  $dst, $src.lo\n\t"
 5219             "ADD    $dst, 32\n"
 5220       "done:" %}
 5221   ins_encode %{
 5222     Register Rdst = $dst$$Register;
 5223     Register Rsrc = $src$$Register;
 5224     Label done;
 5225     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5226     __ jccb(Assembler::carryClear, done);
 5227     __ lzcntl(Rdst, Rsrc);
 5228     __ addl(Rdst, BitsPerInt);
 5229     __ bind(done);
 5230   %}
 5231   ins_pipe(ialu_reg);
 5232 %}
 5233 
 5234 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5235   predicate(!UseCountLeadingZerosInstruction);
 5236   match(Set dst (CountLeadingZerosL src));
 5237   effect(TEMP dst, KILL cr);
 5238 
 5239   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5240             "JZ     msw_is_zero\n\t"
 5241             "ADD    $dst, 32\n\t"
 5242             "JMP    not_zero\n"
 5243       "msw_is_zero:\n\t"
 5244             "BSR    $dst, $src.lo\n\t"
 5245             "JNZ    not_zero\n\t"
 5246             "MOV    $dst, -1\n"
 5247       "not_zero:\n\t"
 5248             "NEG    $dst\n\t"
 5249             "ADD    $dst, 63\n" %}
 5250  ins_encode %{
 5251     Register Rdst = $dst$$Register;
 5252     Register Rsrc = $src$$Register;
 5253     Label msw_is_zero;
 5254     Label not_zero;
 5255     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5256     __ jccb(Assembler::zero, msw_is_zero);
 5257     __ addl(Rdst, BitsPerInt);
 5258     __ jmpb(not_zero);
 5259     __ bind(msw_is_zero);
 5260     __ bsrl(Rdst, Rsrc);
 5261     __ jccb(Assembler::notZero, not_zero);
 5262     __ movl(Rdst, -1);
 5263     __ bind(not_zero);
 5264     __ negl(Rdst);
 5265     __ addl(Rdst, BitsPerLong - 1);
 5266   %}
 5267   ins_pipe(ialu_reg);
 5268 %}
 5269 
 5270 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5271   predicate(UseCountTrailingZerosInstruction);
 5272   match(Set dst (CountTrailingZerosI src));
 5273   effect(KILL cr);
 5274 
 5275   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5276   ins_encode %{
 5277     __ tzcntl($dst$$Register, $src$$Register);
 5278   %}
 5279   ins_pipe(ialu_reg);
 5280 %}
 5281 
 5282 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5283   predicate(!UseCountTrailingZerosInstruction);
 5284   match(Set dst (CountTrailingZerosI src));
 5285   effect(KILL cr);
 5286 
 5287   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5288             "JNZ    done\n\t"
 5289             "MOV    $dst, 32\n"
 5290       "done:" %}
 5291   ins_encode %{
 5292     Register Rdst = $dst$$Register;
 5293     Label done;
 5294     __ bsfl(Rdst, $src$$Register);
 5295     __ jccb(Assembler::notZero, done);
 5296     __ movl(Rdst, BitsPerInt);
 5297     __ bind(done);
 5298   %}
 5299   ins_pipe(ialu_reg);
 5300 %}
 5301 
 5302 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5303   predicate(UseCountTrailingZerosInstruction);
 5304   match(Set dst (CountTrailingZerosL src));
 5305   effect(TEMP dst, KILL cr);
 5306 
 5307   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5308             "JNC    done\n\t"
 5309             "TZCNT  $dst, $src.hi\n\t"
 5310             "ADD    $dst, 32\n"
 5311             "done:" %}
 5312   ins_encode %{
 5313     Register Rdst = $dst$$Register;
 5314     Register Rsrc = $src$$Register;
 5315     Label done;
 5316     __ tzcntl(Rdst, Rsrc);
 5317     __ jccb(Assembler::carryClear, done);
 5318     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5319     __ addl(Rdst, BitsPerInt);
 5320     __ bind(done);
 5321   %}
 5322   ins_pipe(ialu_reg);
 5323 %}
 5324 
 5325 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5326   predicate(!UseCountTrailingZerosInstruction);
 5327   match(Set dst (CountTrailingZerosL src));
 5328   effect(TEMP dst, KILL cr);
 5329 
 5330   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5331             "JNZ    done\n\t"
 5332             "BSF    $dst, $src.hi\n\t"
 5333             "JNZ    msw_not_zero\n\t"
 5334             "MOV    $dst, 32\n"
 5335       "msw_not_zero:\n\t"
 5336             "ADD    $dst, 32\n"
 5337       "done:" %}
 5338   ins_encode %{
 5339     Register Rdst = $dst$$Register;
 5340     Register Rsrc = $src$$Register;
 5341     Label msw_not_zero;
 5342     Label done;
 5343     __ bsfl(Rdst, Rsrc);
 5344     __ jccb(Assembler::notZero, done);
 5345     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5346     __ jccb(Assembler::notZero, msw_not_zero);
 5347     __ movl(Rdst, BitsPerInt);
 5348     __ bind(msw_not_zero);
 5349     __ addl(Rdst, BitsPerInt);
 5350     __ bind(done);
 5351   %}
 5352   ins_pipe(ialu_reg);
 5353 %}
 5354 
 5355 
 5356 //---------- Population Count Instructions -------------------------------------
 5357 
 5358 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5359   predicate(UsePopCountInstruction);
 5360   match(Set dst (PopCountI src));
 5361   effect(KILL cr);
 5362 
 5363   format %{ "POPCNT $dst, $src" %}
 5364   ins_encode %{
 5365     __ popcntl($dst$$Register, $src$$Register);
 5366   %}
 5367   ins_pipe(ialu_reg);
 5368 %}
 5369 
 5370 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5371   predicate(UsePopCountInstruction);
 5372   match(Set dst (PopCountI (LoadI mem)));
 5373   effect(KILL cr);
 5374 
 5375   format %{ "POPCNT $dst, $mem" %}
 5376   ins_encode %{
 5377     __ popcntl($dst$$Register, $mem$$Address);
 5378   %}
 5379   ins_pipe(ialu_reg);
 5380 %}
 5381 
 5382 // Note: Long.bitCount(long) returns an int.
 5383 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5384   predicate(UsePopCountInstruction);
 5385   match(Set dst (PopCountL src));
 5386   effect(KILL cr, TEMP tmp, TEMP dst);
 5387 
 5388   format %{ "POPCNT $dst, $src.lo\n\t"
 5389             "POPCNT $tmp, $src.hi\n\t"
 5390             "ADD    $dst, $tmp" %}
 5391   ins_encode %{
 5392     __ popcntl($dst$$Register, $src$$Register);
 5393     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5394     __ addl($dst$$Register, $tmp$$Register);
 5395   %}
 5396   ins_pipe(ialu_reg);
 5397 %}
 5398 
 5399 // Note: Long.bitCount(long) returns an int.
 5400 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5401   predicate(UsePopCountInstruction);
 5402   match(Set dst (PopCountL (LoadL mem)));
 5403   effect(KILL cr, TEMP tmp, TEMP dst);
 5404 
 5405   format %{ "POPCNT $dst, $mem\n\t"
 5406             "POPCNT $tmp, $mem+4\n\t"
 5407             "ADD    $dst, $tmp" %}
 5408   ins_encode %{
 5409     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5410     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5411     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5412     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5413     __ addl($dst$$Register, $tmp$$Register);
 5414   %}
 5415   ins_pipe(ialu_reg);
 5416 %}
 5417 
 5418 
 5419 //----------Load/Store/Move Instructions---------------------------------------
 5420 //----------Load Instructions--------------------------------------------------
 5421 // Load Byte (8bit signed)
 5422 instruct loadB(xRegI dst, memory mem) %{
 5423   match(Set dst (LoadB mem));
 5424 
 5425   ins_cost(125);
 5426   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5427 
 5428   ins_encode %{
 5429     __ movsbl($dst$$Register, $mem$$Address);
 5430   %}
 5431 
 5432   ins_pipe(ialu_reg_mem);
 5433 %}
 5434 
 5435 // Load Byte (8bit signed) into Long Register
 5436 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5437   match(Set dst (ConvI2L (LoadB mem)));
 5438   effect(KILL cr);
 5439 
 5440   ins_cost(375);
 5441   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5442             "MOV    $dst.hi,$dst.lo\n\t"
 5443             "SAR    $dst.hi,7" %}
 5444 
 5445   ins_encode %{
 5446     __ movsbl($dst$$Register, $mem$$Address);
 5447     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5448     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5449   %}
 5450 
 5451   ins_pipe(ialu_reg_mem);
 5452 %}
 5453 
 5454 // Load Unsigned Byte (8bit UNsigned)
 5455 instruct loadUB(xRegI dst, memory mem) %{
 5456   match(Set dst (LoadUB mem));
 5457 
 5458   ins_cost(125);
 5459   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5460 
 5461   ins_encode %{
 5462     __ movzbl($dst$$Register, $mem$$Address);
 5463   %}
 5464 
 5465   ins_pipe(ialu_reg_mem);
 5466 %}
 5467 
 5468 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5469 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5470   match(Set dst (ConvI2L (LoadUB mem)));
 5471   effect(KILL cr);
 5472 
 5473   ins_cost(250);
 5474   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5475             "XOR    $dst.hi,$dst.hi" %}
 5476 
 5477   ins_encode %{
 5478     Register Rdst = $dst$$Register;
 5479     __ movzbl(Rdst, $mem$$Address);
 5480     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5481   %}
 5482 
 5483   ins_pipe(ialu_reg_mem);
 5484 %}
 5485 
 5486 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5487 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5488   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5489   effect(KILL cr);
 5490 
 5491   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5492             "XOR    $dst.hi,$dst.hi\n\t"
 5493             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5494   ins_encode %{
 5495     Register Rdst = $dst$$Register;
 5496     __ movzbl(Rdst, $mem$$Address);
 5497     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5498     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5499   %}
 5500   ins_pipe(ialu_reg_mem);
 5501 %}
 5502 
 5503 // Load Short (16bit signed)
 5504 instruct loadS(rRegI dst, memory mem) %{
 5505   match(Set dst (LoadS mem));
 5506 
 5507   ins_cost(125);
 5508   format %{ "MOVSX  $dst,$mem\t# short" %}
 5509 
 5510   ins_encode %{
 5511     __ movswl($dst$$Register, $mem$$Address);
 5512   %}
 5513 
 5514   ins_pipe(ialu_reg_mem);
 5515 %}
 5516 
 5517 // Load Short (16 bit signed) to Byte (8 bit signed)
 5518 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5519   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5520 
 5521   ins_cost(125);
 5522   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5523   ins_encode %{
 5524     __ movsbl($dst$$Register, $mem$$Address);
 5525   %}
 5526   ins_pipe(ialu_reg_mem);
 5527 %}
 5528 
 5529 // Load Short (16bit signed) into Long Register
 5530 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5531   match(Set dst (ConvI2L (LoadS mem)));
 5532   effect(KILL cr);
 5533 
 5534   ins_cost(375);
 5535   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5536             "MOV    $dst.hi,$dst.lo\n\t"
 5537             "SAR    $dst.hi,15" %}
 5538 
 5539   ins_encode %{
 5540     __ movswl($dst$$Register, $mem$$Address);
 5541     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5542     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5543   %}
 5544 
 5545   ins_pipe(ialu_reg_mem);
 5546 %}
 5547 
 5548 // Load Unsigned Short/Char (16bit unsigned)
 5549 instruct loadUS(rRegI dst, memory mem) %{
 5550   match(Set dst (LoadUS mem));
 5551 
 5552   ins_cost(125);
 5553   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5554 
 5555   ins_encode %{
 5556     __ movzwl($dst$$Register, $mem$$Address);
 5557   %}
 5558 
 5559   ins_pipe(ialu_reg_mem);
 5560 %}
 5561 
 5562 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5563 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5564   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5565 
 5566   ins_cost(125);
 5567   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5568   ins_encode %{
 5569     __ movsbl($dst$$Register, $mem$$Address);
 5570   %}
 5571   ins_pipe(ialu_reg_mem);
 5572 %}
 5573 
 5574 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5575 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5576   match(Set dst (ConvI2L (LoadUS mem)));
 5577   effect(KILL cr);
 5578 
 5579   ins_cost(250);
 5580   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5581             "XOR    $dst.hi,$dst.hi" %}
 5582 
 5583   ins_encode %{
 5584     __ movzwl($dst$$Register, $mem$$Address);
 5585     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5586   %}
 5587 
 5588   ins_pipe(ialu_reg_mem);
 5589 %}
 5590 
 5591 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5592 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5593   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5594   effect(KILL cr);
 5595 
 5596   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5597             "XOR    $dst.hi,$dst.hi" %}
 5598   ins_encode %{
 5599     Register Rdst = $dst$$Register;
 5600     __ movzbl(Rdst, $mem$$Address);
 5601     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5602   %}
 5603   ins_pipe(ialu_reg_mem);
 5604 %}
 5605 
 5606 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5607 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5608   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5609   effect(KILL cr);
 5610 
 5611   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5612             "XOR    $dst.hi,$dst.hi\n\t"
 5613             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5614   ins_encode %{
 5615     Register Rdst = $dst$$Register;
 5616     __ movzwl(Rdst, $mem$$Address);
 5617     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5618     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5619   %}
 5620   ins_pipe(ialu_reg_mem);
 5621 %}
 5622 
 5623 // Load Integer
 5624 instruct loadI(rRegI dst, memory mem) %{
 5625   match(Set dst (LoadI mem));
 5626 
 5627   ins_cost(125);
 5628   format %{ "MOV    $dst,$mem\t# int" %}
 5629 
 5630   ins_encode %{
 5631     __ movl($dst$$Register, $mem$$Address);
 5632   %}
 5633 
 5634   ins_pipe(ialu_reg_mem);
 5635 %}
 5636 
 5637 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5638 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5639   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5640 
 5641   ins_cost(125);
 5642   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5643   ins_encode %{
 5644     __ movsbl($dst$$Register, $mem$$Address);
 5645   %}
 5646   ins_pipe(ialu_reg_mem);
 5647 %}
 5648 
 5649 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5650 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5651   match(Set dst (AndI (LoadI mem) mask));
 5652 
 5653   ins_cost(125);
 5654   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5655   ins_encode %{
 5656     __ movzbl($dst$$Register, $mem$$Address);
 5657   %}
 5658   ins_pipe(ialu_reg_mem);
 5659 %}
 5660 
 5661 // Load Integer (32 bit signed) to Short (16 bit signed)
 5662 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5663   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5664 
 5665   ins_cost(125);
 5666   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5667   ins_encode %{
 5668     __ movswl($dst$$Register, $mem$$Address);
 5669   %}
 5670   ins_pipe(ialu_reg_mem);
 5671 %}
 5672 
 5673 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5674 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5675   match(Set dst (AndI (LoadI mem) mask));
 5676 
 5677   ins_cost(125);
 5678   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5679   ins_encode %{
 5680     __ movzwl($dst$$Register, $mem$$Address);
 5681   %}
 5682   ins_pipe(ialu_reg_mem);
 5683 %}
 5684 
 5685 // Load Integer into Long Register
 5686 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5687   match(Set dst (ConvI2L (LoadI mem)));
 5688   effect(KILL cr);
 5689 
 5690   ins_cost(375);
 5691   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5692             "MOV    $dst.hi,$dst.lo\n\t"
 5693             "SAR    $dst.hi,31" %}
 5694 
 5695   ins_encode %{
 5696     __ movl($dst$$Register, $mem$$Address);
 5697     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5698     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5699   %}
 5700 
 5701   ins_pipe(ialu_reg_mem);
 5702 %}
 5703 
 5704 // Load Integer with mask 0xFF into Long Register
 5705 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5706   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5707   effect(KILL cr);
 5708 
 5709   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5710             "XOR    $dst.hi,$dst.hi" %}
 5711   ins_encode %{
 5712     Register Rdst = $dst$$Register;
 5713     __ movzbl(Rdst, $mem$$Address);
 5714     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5715   %}
 5716   ins_pipe(ialu_reg_mem);
 5717 %}
 5718 
 5719 // Load Integer with mask 0xFFFF into Long Register
 5720 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5721   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5722   effect(KILL cr);
 5723 
 5724   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5725             "XOR    $dst.hi,$dst.hi" %}
 5726   ins_encode %{
 5727     Register Rdst = $dst$$Register;
 5728     __ movzwl(Rdst, $mem$$Address);
 5729     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5730   %}
 5731   ins_pipe(ialu_reg_mem);
 5732 %}
 5733 
 5734 // Load Integer with 31-bit mask into Long Register
 5735 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5736   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5737   effect(KILL cr);
 5738 
 5739   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5740             "XOR    $dst.hi,$dst.hi\n\t"
 5741             "AND    $dst.lo,$mask" %}
 5742   ins_encode %{
 5743     Register Rdst = $dst$$Register;
 5744     __ movl(Rdst, $mem$$Address);
 5745     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5746     __ andl(Rdst, $mask$$constant);
 5747   %}
 5748   ins_pipe(ialu_reg_mem);
 5749 %}
 5750 
 5751 // Load Unsigned Integer into Long Register
 5752 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5753   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5754   effect(KILL cr);
 5755 
 5756   ins_cost(250);
 5757   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5758             "XOR    $dst.hi,$dst.hi" %}
 5759 
 5760   ins_encode %{
 5761     __ movl($dst$$Register, $mem$$Address);
 5762     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5763   %}
 5764 
 5765   ins_pipe(ialu_reg_mem);
 5766 %}
 5767 
 5768 // Load Long.  Cannot clobber address while loading, so restrict address
 5769 // register to ESI
 5770 instruct loadL(eRegL dst, load_long_memory mem) %{
 5771   predicate(!((LoadLNode*)n)->require_atomic_access());
 5772   match(Set dst (LoadL mem));
 5773 
 5774   ins_cost(250);
 5775   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5776             "MOV    $dst.hi,$mem+4" %}
 5777 
 5778   ins_encode %{
 5779     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5780     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5781     __ movl($dst$$Register, Amemlo);
 5782     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5783   %}
 5784 
 5785   ins_pipe(ialu_reg_long_mem);
 5786 %}
 5787 
 5788 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5789 // then store it down to the stack and reload on the int
 5790 // side.
 5791 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5792   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5793   match(Set dst (LoadL mem));
 5794 
 5795   ins_cost(200);
 5796   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5797             "FISTp  $dst" %}
 5798   ins_encode(enc_loadL_volatile(mem,dst));
 5799   ins_pipe( fpu_reg_mem );
 5800 %}
 5801 
 5802 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5803   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5804   match(Set dst (LoadL mem));
 5805   effect(TEMP tmp);
 5806   ins_cost(180);
 5807   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5808             "MOVSD  $dst,$tmp" %}
 5809   ins_encode %{
 5810     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5811     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5812   %}
 5813   ins_pipe( pipe_slow );
 5814 %}
 5815 
 5816 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5817   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5818   match(Set dst (LoadL mem));
 5819   effect(TEMP tmp);
 5820   ins_cost(160);
 5821   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5822             "MOVD   $dst.lo,$tmp\n\t"
 5823             "PSRLQ  $tmp,32\n\t"
 5824             "MOVD   $dst.hi,$tmp" %}
 5825   ins_encode %{
 5826     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5827     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5828     __ psrlq($tmp$$XMMRegister, 32);
 5829     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5830   %}
 5831   ins_pipe( pipe_slow );
 5832 %}
 5833 
 5834 // Load Range
 5835 instruct loadRange(rRegI dst, memory mem) %{
 5836   match(Set dst (LoadRange mem));
 5837 
 5838   ins_cost(125);
 5839   format %{ "MOV    $dst,$mem" %}
 5840   opcode(0x8B);
 5841   ins_encode( OpcP, RegMem(dst,mem));
 5842   ins_pipe( ialu_reg_mem );
 5843 %}
 5844 
 5845 
 5846 // Load Pointer
 5847 instruct loadP(eRegP dst, memory mem) %{
 5848   match(Set dst (LoadP mem));
 5849 
 5850   ins_cost(125);
 5851   format %{ "MOV    $dst,$mem" %}
 5852   opcode(0x8B);
 5853   ins_encode( OpcP, RegMem(dst,mem));
 5854   ins_pipe( ialu_reg_mem );
 5855 %}
 5856 
 5857 // Load Klass Pointer
 5858 instruct loadKlass(eRegP dst, memory mem) %{
 5859   match(Set dst (LoadKlass mem));
 5860 
 5861   ins_cost(125);
 5862   format %{ "MOV    $dst,$mem" %}
 5863   opcode(0x8B);
 5864   ins_encode( OpcP, RegMem(dst,mem));
 5865   ins_pipe( ialu_reg_mem );
 5866 %}
 5867 
 5868 // Load Double
 5869 instruct loadDPR(regDPR dst, memory mem) %{
 5870   predicate(UseSSE<=1);
 5871   match(Set dst (LoadD mem));
 5872 
 5873   ins_cost(150);
 5874   format %{ "FLD_D  ST,$mem\n\t"
 5875             "FSTP   $dst" %}
 5876   opcode(0xDD);               /* DD /0 */
 5877   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5878               Pop_Reg_DPR(dst) );
 5879   ins_pipe( fpu_reg_mem );
 5880 %}
 5881 
 5882 // Load Double to XMM
 5883 instruct loadD(regD dst, memory mem) %{
 5884   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5885   match(Set dst (LoadD mem));
 5886   ins_cost(145);
 5887   format %{ "MOVSD  $dst,$mem" %}
 5888   ins_encode %{
 5889     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5890   %}
 5891   ins_pipe( pipe_slow );
 5892 %}
 5893 
 5894 instruct loadD_partial(regD dst, memory mem) %{
 5895   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5896   match(Set dst (LoadD mem));
 5897   ins_cost(145);
 5898   format %{ "MOVLPD $dst,$mem" %}
 5899   ins_encode %{
 5900     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5901   %}
 5902   ins_pipe( pipe_slow );
 5903 %}
 5904 
 5905 // Load to XMM register (single-precision floating point)
 5906 // MOVSS instruction
 5907 instruct loadF(regF dst, memory mem) %{
 5908   predicate(UseSSE>=1);
 5909   match(Set dst (LoadF mem));
 5910   ins_cost(145);
 5911   format %{ "MOVSS  $dst,$mem" %}
 5912   ins_encode %{
 5913     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5914   %}
 5915   ins_pipe( pipe_slow );
 5916 %}
 5917 
 5918 // Load Float
 5919 instruct loadFPR(regFPR dst, memory mem) %{
 5920   predicate(UseSSE==0);
 5921   match(Set dst (LoadF mem));
 5922 
 5923   ins_cost(150);
 5924   format %{ "FLD_S  ST,$mem\n\t"
 5925             "FSTP   $dst" %}
 5926   opcode(0xD9);               /* D9 /0 */
 5927   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5928               Pop_Reg_FPR(dst) );
 5929   ins_pipe( fpu_reg_mem );
 5930 %}
 5931 
 5932 // Load Effective Address
 5933 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5934   match(Set dst mem);
 5935 
 5936   ins_cost(110);
 5937   format %{ "LEA    $dst,$mem" %}
 5938   opcode(0x8D);
 5939   ins_encode( OpcP, RegMem(dst,mem));
 5940   ins_pipe( ialu_reg_reg_fat );
 5941 %}
 5942 
 5943 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5944   match(Set dst mem);
 5945 
 5946   ins_cost(110);
 5947   format %{ "LEA    $dst,$mem" %}
 5948   opcode(0x8D);
 5949   ins_encode( OpcP, RegMem(dst,mem));
 5950   ins_pipe( ialu_reg_reg_fat );
 5951 %}
 5952 
 5953 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5954   match(Set dst mem);
 5955 
 5956   ins_cost(110);
 5957   format %{ "LEA    $dst,$mem" %}
 5958   opcode(0x8D);
 5959   ins_encode( OpcP, RegMem(dst,mem));
 5960   ins_pipe( ialu_reg_reg_fat );
 5961 %}
 5962 
 5963 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5964   match(Set dst mem);
 5965 
 5966   ins_cost(110);
 5967   format %{ "LEA    $dst,$mem" %}
 5968   opcode(0x8D);
 5969   ins_encode( OpcP, RegMem(dst,mem));
 5970   ins_pipe( ialu_reg_reg_fat );
 5971 %}
 5972 
 5973 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5974   match(Set dst mem);
 5975 
 5976   ins_cost(110);
 5977   format %{ "LEA    $dst,$mem" %}
 5978   opcode(0x8D);
 5979   ins_encode( OpcP, RegMem(dst,mem));
 5980   ins_pipe( ialu_reg_reg_fat );
 5981 %}
 5982 
 5983 // Load Constant
 5984 instruct loadConI(rRegI dst, immI src) %{
 5985   match(Set dst src);
 5986 
 5987   format %{ "MOV    $dst,$src" %}
 5988   ins_encode( LdImmI(dst, src) );
 5989   ins_pipe( ialu_reg_fat );
 5990 %}
 5991 
 5992 // Load Constant zero
 5993 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5994   match(Set dst src);
 5995   effect(KILL cr);
 5996 
 5997   ins_cost(50);
 5998   format %{ "XOR    $dst,$dst" %}
 5999   opcode(0x33);  /* + rd */
 6000   ins_encode( OpcP, RegReg( dst, dst ) );
 6001   ins_pipe( ialu_reg );
 6002 %}
 6003 
 6004 instruct loadConP(eRegP dst, immP src) %{
 6005   match(Set dst src);
 6006 
 6007   format %{ "MOV    $dst,$src" %}
 6008   opcode(0xB8);  /* + rd */
 6009   ins_encode( LdImmP(dst, src) );
 6010   ins_pipe( ialu_reg_fat );
 6011 %}
 6012 
 6013 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6014   match(Set dst src);
 6015   effect(KILL cr);
 6016   ins_cost(200);
 6017   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6018             "MOV    $dst.hi,$src.hi" %}
 6019   opcode(0xB8);
 6020   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6021   ins_pipe( ialu_reg_long_fat );
 6022 %}
 6023 
 6024 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6025   match(Set dst src);
 6026   effect(KILL cr);
 6027   ins_cost(150);
 6028   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6029             "XOR    $dst.hi,$dst.hi" %}
 6030   opcode(0x33,0x33);
 6031   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6032   ins_pipe( ialu_reg_long );
 6033 %}
 6034 
 6035 // The instruction usage is guarded by predicate in operand immFPR().
 6036 instruct loadConFPR(regFPR dst, immFPR con) %{
 6037   match(Set dst con);
 6038   ins_cost(125);
 6039   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6040             "FSTP   $dst" %}
 6041   ins_encode %{
 6042     __ fld_s($constantaddress($con));
 6043     __ fstp_d($dst$$reg);
 6044   %}
 6045   ins_pipe(fpu_reg_con);
 6046 %}
 6047 
 6048 // The instruction usage is guarded by predicate in operand immFPR0().
 6049 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6050   match(Set dst con);
 6051   ins_cost(125);
 6052   format %{ "FLDZ   ST\n\t"
 6053             "FSTP   $dst" %}
 6054   ins_encode %{
 6055     __ fldz();
 6056     __ fstp_d($dst$$reg);
 6057   %}
 6058   ins_pipe(fpu_reg_con);
 6059 %}
 6060 
 6061 // The instruction usage is guarded by predicate in operand immFPR1().
 6062 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6063   match(Set dst con);
 6064   ins_cost(125);
 6065   format %{ "FLD1   ST\n\t"
 6066             "FSTP   $dst" %}
 6067   ins_encode %{
 6068     __ fld1();
 6069     __ fstp_d($dst$$reg);
 6070   %}
 6071   ins_pipe(fpu_reg_con);
 6072 %}
 6073 
 6074 // The instruction usage is guarded by predicate in operand immF().
 6075 instruct loadConF(regF dst, immF con) %{
 6076   match(Set dst con);
 6077   ins_cost(125);
 6078   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6079   ins_encode %{
 6080     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6081   %}
 6082   ins_pipe(pipe_slow);
 6083 %}
 6084 
 6085 // The instruction usage is guarded by predicate in operand immF0().
 6086 instruct loadConF0(regF dst, immF0 src) %{
 6087   match(Set dst src);
 6088   ins_cost(100);
 6089   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6090   ins_encode %{
 6091     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6092   %}
 6093   ins_pipe(pipe_slow);
 6094 %}
 6095 
 6096 // The instruction usage is guarded by predicate in operand immDPR().
 6097 instruct loadConDPR(regDPR dst, immDPR con) %{
 6098   match(Set dst con);
 6099   ins_cost(125);
 6100 
 6101   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6102             "FSTP   $dst" %}
 6103   ins_encode %{
 6104     __ fld_d($constantaddress($con));
 6105     __ fstp_d($dst$$reg);
 6106   %}
 6107   ins_pipe(fpu_reg_con);
 6108 %}
 6109 
 6110 // The instruction usage is guarded by predicate in operand immDPR0().
 6111 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6112   match(Set dst con);
 6113   ins_cost(125);
 6114 
 6115   format %{ "FLDZ   ST\n\t"
 6116             "FSTP   $dst" %}
 6117   ins_encode %{
 6118     __ fldz();
 6119     __ fstp_d($dst$$reg);
 6120   %}
 6121   ins_pipe(fpu_reg_con);
 6122 %}
 6123 
 6124 // The instruction usage is guarded by predicate in operand immDPR1().
 6125 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6126   match(Set dst con);
 6127   ins_cost(125);
 6128 
 6129   format %{ "FLD1   ST\n\t"
 6130             "FSTP   $dst" %}
 6131   ins_encode %{
 6132     __ fld1();
 6133     __ fstp_d($dst$$reg);
 6134   %}
 6135   ins_pipe(fpu_reg_con);
 6136 %}
 6137 
 6138 // The instruction usage is guarded by predicate in operand immD().
 6139 instruct loadConD(regD dst, immD con) %{
 6140   match(Set dst con);
 6141   ins_cost(125);
 6142   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6143   ins_encode %{
 6144     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6145   %}
 6146   ins_pipe(pipe_slow);
 6147 %}
 6148 
 6149 // The instruction usage is guarded by predicate in operand immD0().
 6150 instruct loadConD0(regD dst, immD0 src) %{
 6151   match(Set dst src);
 6152   ins_cost(100);
 6153   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6154   ins_encode %{
 6155     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6156   %}
 6157   ins_pipe( pipe_slow );
 6158 %}
 6159 
 6160 // Load Stack Slot
 6161 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6162   match(Set dst src);
 6163   ins_cost(125);
 6164 
 6165   format %{ "MOV    $dst,$src" %}
 6166   opcode(0x8B);
 6167   ins_encode( OpcP, RegMem(dst,src));
 6168   ins_pipe( ialu_reg_mem );
 6169 %}
 6170 
 6171 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6172   match(Set dst src);
 6173 
 6174   ins_cost(200);
 6175   format %{ "MOV    $dst,$src.lo\n\t"
 6176             "MOV    $dst+4,$src.hi" %}
 6177   opcode(0x8B, 0x8B);
 6178   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6179   ins_pipe( ialu_mem_long_reg );
 6180 %}
 6181 
 6182 // Load Stack Slot
 6183 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6184   match(Set dst src);
 6185   ins_cost(125);
 6186 
 6187   format %{ "MOV    $dst,$src" %}
 6188   opcode(0x8B);
 6189   ins_encode( OpcP, RegMem(dst,src));
 6190   ins_pipe( ialu_reg_mem );
 6191 %}
 6192 
 6193 // Load Stack Slot
 6194 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6195   match(Set dst src);
 6196   ins_cost(125);
 6197 
 6198   format %{ "FLD_S  $src\n\t"
 6199             "FSTP   $dst" %}
 6200   opcode(0xD9);               /* D9 /0, FLD m32real */
 6201   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6202               Pop_Reg_FPR(dst) );
 6203   ins_pipe( fpu_reg_mem );
 6204 %}
 6205 
 6206 // Load Stack Slot
 6207 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6208   match(Set dst src);
 6209   ins_cost(125);
 6210 
 6211   format %{ "FLD_D  $src\n\t"
 6212             "FSTP   $dst" %}
 6213   opcode(0xDD);               /* DD /0, FLD m64real */
 6214   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6215               Pop_Reg_DPR(dst) );
 6216   ins_pipe( fpu_reg_mem );
 6217 %}
 6218 
 6219 // Prefetch instructions for allocation.
 6220 // Must be safe to execute with invalid address (cannot fault).
 6221 
 6222 instruct prefetchAlloc0( memory mem ) %{
 6223   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6224   match(PrefetchAllocation mem);
 6225   ins_cost(0);
 6226   size(0);
 6227   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6228   ins_encode();
 6229   ins_pipe(empty);
 6230 %}
 6231 
 6232 instruct prefetchAlloc( memory mem ) %{
 6233   predicate(AllocatePrefetchInstr==3);
 6234   match( PrefetchAllocation mem );
 6235   ins_cost(100);
 6236 
 6237   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6238   ins_encode %{
 6239     __ prefetchw($mem$$Address);
 6240   %}
 6241   ins_pipe(ialu_mem);
 6242 %}
 6243 
 6244 instruct prefetchAllocNTA( memory mem ) %{
 6245   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6246   match(PrefetchAllocation mem);
 6247   ins_cost(100);
 6248 
 6249   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6250   ins_encode %{
 6251     __ prefetchnta($mem$$Address);
 6252   %}
 6253   ins_pipe(ialu_mem);
 6254 %}
 6255 
 6256 instruct prefetchAllocT0( memory mem ) %{
 6257   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6258   match(PrefetchAllocation mem);
 6259   ins_cost(100);
 6260 
 6261   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6262   ins_encode %{
 6263     __ prefetcht0($mem$$Address);
 6264   %}
 6265   ins_pipe(ialu_mem);
 6266 %}
 6267 
 6268 instruct prefetchAllocT2( memory mem ) %{
 6269   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6270   match(PrefetchAllocation mem);
 6271   ins_cost(100);
 6272 
 6273   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6274   ins_encode %{
 6275     __ prefetcht2($mem$$Address);
 6276   %}
 6277   ins_pipe(ialu_mem);
 6278 %}
 6279 
 6280 //----------Store Instructions-------------------------------------------------
 6281 
 6282 // Store Byte
 6283 instruct storeB(memory mem, xRegI src) %{
 6284   match(Set mem (StoreB mem src));
 6285 
 6286   ins_cost(125);
 6287   format %{ "MOV8   $mem,$src" %}
 6288   opcode(0x88);
 6289   ins_encode( OpcP, RegMem( src, mem ) );
 6290   ins_pipe( ialu_mem_reg );
 6291 %}
 6292 
 6293 // Store Char/Short
 6294 instruct storeC(memory mem, rRegI src) %{
 6295   match(Set mem (StoreC mem src));
 6296 
 6297   ins_cost(125);
 6298   format %{ "MOV16  $mem,$src" %}
 6299   opcode(0x89, 0x66);
 6300   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6301   ins_pipe( ialu_mem_reg );
 6302 %}
 6303 
 6304 // Store Integer
 6305 instruct storeI(memory mem, rRegI src) %{
 6306   match(Set mem (StoreI mem src));
 6307 
 6308   ins_cost(125);
 6309   format %{ "MOV    $mem,$src" %}
 6310   opcode(0x89);
 6311   ins_encode( OpcP, RegMem( src, mem ) );
 6312   ins_pipe( ialu_mem_reg );
 6313 %}
 6314 
 6315 // Store Long
 6316 instruct storeL(long_memory mem, eRegL src) %{
 6317   predicate(!((StoreLNode*)n)->require_atomic_access());
 6318   match(Set mem (StoreL mem src));
 6319 
 6320   ins_cost(200);
 6321   format %{ "MOV    $mem,$src.lo\n\t"
 6322             "MOV    $mem+4,$src.hi" %}
 6323   opcode(0x89, 0x89);
 6324   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6325   ins_pipe( ialu_mem_long_reg );
 6326 %}
 6327 
 6328 // Store Long to Integer
 6329 instruct storeL2I(memory mem, eRegL src) %{
 6330   match(Set mem (StoreI mem (ConvL2I src)));
 6331 
 6332   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6333   ins_encode %{
 6334     __ movl($mem$$Address, $src$$Register);
 6335   %}
 6336   ins_pipe(ialu_mem_reg);
 6337 %}
 6338 
 6339 // Volatile Store Long.  Must be atomic, so move it into
 6340 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6341 // target address before the store (for null-ptr checks)
 6342 // so the memory operand is used twice in the encoding.
 6343 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6344   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6345   match(Set mem (StoreL mem src));
 6346   effect( KILL cr );
 6347   ins_cost(400);
 6348   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6349             "FILD   $src\n\t"
 6350             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6351   opcode(0x3B);
 6352   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6353   ins_pipe( fpu_reg_mem );
 6354 %}
 6355 
 6356 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6357   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6358   match(Set mem (StoreL mem src));
 6359   effect( TEMP tmp, KILL cr );
 6360   ins_cost(380);
 6361   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6362             "MOVSD  $tmp,$src\n\t"
 6363             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6364   ins_encode %{
 6365     __ cmpl(rax, $mem$$Address);
 6366     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6367     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6368   %}
 6369   ins_pipe( pipe_slow );
 6370 %}
 6371 
 6372 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6373   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6374   match(Set mem (StoreL mem src));
 6375   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6376   ins_cost(360);
 6377   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6378             "MOVD   $tmp,$src.lo\n\t"
 6379             "MOVD   $tmp2,$src.hi\n\t"
 6380             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6381             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6382   ins_encode %{
 6383     __ cmpl(rax, $mem$$Address);
 6384     __ movdl($tmp$$XMMRegister, $src$$Register);
 6385     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6386     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6387     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6388   %}
 6389   ins_pipe( pipe_slow );
 6390 %}
 6391 
 6392 // Store Pointer; for storing unknown oops and raw pointers
 6393 instruct storeP(memory mem, anyRegP src) %{
 6394   match(Set mem (StoreP mem src));
 6395 
 6396   ins_cost(125);
 6397   format %{ "MOV    $mem,$src" %}
 6398   opcode(0x89);
 6399   ins_encode( OpcP, RegMem( src, mem ) );
 6400   ins_pipe( ialu_mem_reg );
 6401 %}
 6402 
 6403 // Store Integer Immediate
 6404 instruct storeImmI(memory mem, immI src) %{
 6405   match(Set mem (StoreI mem src));
 6406 
 6407   ins_cost(150);
 6408   format %{ "MOV    $mem,$src" %}
 6409   opcode(0xC7);               /* C7 /0 */
 6410   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6411   ins_pipe( ialu_mem_imm );
 6412 %}
 6413 
 6414 // Store Short/Char Immediate
 6415 instruct storeImmI16(memory mem, immI16 src) %{
 6416   predicate(UseStoreImmI16);
 6417   match(Set mem (StoreC mem src));
 6418 
 6419   ins_cost(150);
 6420   format %{ "MOV16  $mem,$src" %}
 6421   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6422   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6423   ins_pipe( ialu_mem_imm );
 6424 %}
 6425 
 6426 // Store Pointer Immediate; null pointers or constant oops that do not
 6427 // need card-mark barriers.
 6428 instruct storeImmP(memory mem, immP src) %{
 6429   match(Set mem (StoreP mem src));
 6430 
 6431   ins_cost(150);
 6432   format %{ "MOV    $mem,$src" %}
 6433   opcode(0xC7);               /* C7 /0 */
 6434   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6435   ins_pipe( ialu_mem_imm );
 6436 %}
 6437 
 6438 // Store Byte Immediate
 6439 instruct storeImmB(memory mem, immI8 src) %{
 6440   match(Set mem (StoreB mem src));
 6441 
 6442   ins_cost(150);
 6443   format %{ "MOV8   $mem,$src" %}
 6444   opcode(0xC6);               /* C6 /0 */
 6445   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6446   ins_pipe( ialu_mem_imm );
 6447 %}
 6448 
 6449 // Store CMS card-mark Immediate
 6450 instruct storeImmCM(memory mem, immI8 src) %{
 6451   match(Set mem (StoreCM mem src));
 6452 
 6453   ins_cost(150);
 6454   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6455   opcode(0xC6);               /* C6 /0 */
 6456   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6457   ins_pipe( ialu_mem_imm );
 6458 %}
 6459 
 6460 // Store Double
 6461 instruct storeDPR( memory mem, regDPR1 src) %{
 6462   predicate(UseSSE<=1);
 6463   match(Set mem (StoreD mem src));
 6464 
 6465   ins_cost(100);
 6466   format %{ "FST_D  $mem,$src" %}
 6467   opcode(0xDD);       /* DD /2 */
 6468   ins_encode( enc_FPR_store(mem,src) );
 6469   ins_pipe( fpu_mem_reg );
 6470 %}
 6471 
 6472 // Store double does rounding on x86
 6473 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6474   predicate(UseSSE<=1);
 6475   match(Set mem (StoreD mem (RoundDouble src)));
 6476 
 6477   ins_cost(100);
 6478   format %{ "FST_D  $mem,$src\t# round" %}
 6479   opcode(0xDD);       /* DD /2 */
 6480   ins_encode( enc_FPR_store(mem,src) );
 6481   ins_pipe( fpu_mem_reg );
 6482 %}
 6483 
 6484 // Store XMM register to memory (double-precision floating points)
 6485 // MOVSD instruction
 6486 instruct storeD(memory mem, regD src) %{
 6487   predicate(UseSSE>=2);
 6488   match(Set mem (StoreD mem src));
 6489   ins_cost(95);
 6490   format %{ "MOVSD  $mem,$src" %}
 6491   ins_encode %{
 6492     __ movdbl($mem$$Address, $src$$XMMRegister);
 6493   %}
 6494   ins_pipe( pipe_slow );
 6495 %}
 6496 
 6497 // Store XMM register to memory (single-precision floating point)
 6498 // MOVSS instruction
 6499 instruct storeF(memory mem, regF src) %{
 6500   predicate(UseSSE>=1);
 6501   match(Set mem (StoreF mem src));
 6502   ins_cost(95);
 6503   format %{ "MOVSS  $mem,$src" %}
 6504   ins_encode %{
 6505     __ movflt($mem$$Address, $src$$XMMRegister);
 6506   %}
 6507   ins_pipe( pipe_slow );
 6508 %}
 6509 
 6510 
 6511 // Store Float
 6512 instruct storeFPR( memory mem, regFPR1 src) %{
 6513   predicate(UseSSE==0);
 6514   match(Set mem (StoreF mem src));
 6515 
 6516   ins_cost(100);
 6517   format %{ "FST_S  $mem,$src" %}
 6518   opcode(0xD9);       /* D9 /2 */
 6519   ins_encode( enc_FPR_store(mem,src) );
 6520   ins_pipe( fpu_mem_reg );
 6521 %}
 6522 
 6523 // Store Float does rounding on x86
 6524 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6525   predicate(UseSSE==0);
 6526   match(Set mem (StoreF mem (RoundFloat src)));
 6527 
 6528   ins_cost(100);
 6529   format %{ "FST_S  $mem,$src\t# round" %}
 6530   opcode(0xD9);       /* D9 /2 */
 6531   ins_encode( enc_FPR_store(mem,src) );
 6532   ins_pipe( fpu_mem_reg );
 6533 %}
 6534 
 6535 // Store Float does rounding on x86
 6536 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6537   predicate(UseSSE<=1);
 6538   match(Set mem (StoreF mem (ConvD2F src)));
 6539 
 6540   ins_cost(100);
 6541   format %{ "FST_S  $mem,$src\t# D-round" %}
 6542   opcode(0xD9);       /* D9 /2 */
 6543   ins_encode( enc_FPR_store(mem,src) );
 6544   ins_pipe( fpu_mem_reg );
 6545 %}
 6546 
 6547 // Store immediate Float value (it is faster than store from FPU register)
 6548 // The instruction usage is guarded by predicate in operand immFPR().
 6549 instruct storeFPR_imm( memory mem, immFPR src) %{
 6550   match(Set mem (StoreF mem src));
 6551 
 6552   ins_cost(50);
 6553   format %{ "MOV    $mem,$src\t# store float" %}
 6554   opcode(0xC7);               /* C7 /0 */
 6555   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6556   ins_pipe( ialu_mem_imm );
 6557 %}
 6558 
 6559 // Store immediate Float value (it is faster than store from XMM register)
 6560 // The instruction usage is guarded by predicate in operand immF().
 6561 instruct storeF_imm( memory mem, immF src) %{
 6562   match(Set mem (StoreF mem src));
 6563 
 6564   ins_cost(50);
 6565   format %{ "MOV    $mem,$src\t# store float" %}
 6566   opcode(0xC7);               /* C7 /0 */
 6567   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6568   ins_pipe( ialu_mem_imm );
 6569 %}
 6570 
 6571 // Store Integer to stack slot
 6572 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6573   match(Set dst src);
 6574 
 6575   ins_cost(100);
 6576   format %{ "MOV    $dst,$src" %}
 6577   opcode(0x89);
 6578   ins_encode( OpcPRegSS( dst, src ) );
 6579   ins_pipe( ialu_mem_reg );
 6580 %}
 6581 
 6582 // Store Integer to stack slot
 6583 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6584   match(Set dst src);
 6585 
 6586   ins_cost(100);
 6587   format %{ "MOV    $dst,$src" %}
 6588   opcode(0x89);
 6589   ins_encode( OpcPRegSS( dst, src ) );
 6590   ins_pipe( ialu_mem_reg );
 6591 %}
 6592 
 6593 // Store Long to stack slot
 6594 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6595   match(Set dst src);
 6596 
 6597   ins_cost(200);
 6598   format %{ "MOV    $dst,$src.lo\n\t"
 6599             "MOV    $dst+4,$src.hi" %}
 6600   opcode(0x89, 0x89);
 6601   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6602   ins_pipe( ialu_mem_long_reg );
 6603 %}
 6604 
 6605 //----------MemBar Instructions-----------------------------------------------
 6606 // Memory barrier flavors
 6607 
 6608 instruct membar_acquire() %{
 6609   match(MemBarAcquire);
 6610   match(LoadFence);
 6611   ins_cost(400);
 6612 
 6613   size(0);
 6614   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6615   ins_encode();
 6616   ins_pipe(empty);
 6617 %}
 6618 
 6619 instruct membar_acquire_lock() %{
 6620   match(MemBarAcquireLock);
 6621   ins_cost(0);
 6622 
 6623   size(0);
 6624   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6625   ins_encode( );
 6626   ins_pipe(empty);
 6627 %}
 6628 
 6629 instruct membar_release() %{
 6630   match(MemBarRelease);
 6631   match(StoreFence);
 6632   ins_cost(400);
 6633 
 6634   size(0);
 6635   format %{ "MEMBAR-release ! (empty encoding)" %}
 6636   ins_encode( );
 6637   ins_pipe(empty);
 6638 %}
 6639 
 6640 instruct membar_release_lock() %{
 6641   match(MemBarReleaseLock);
 6642   ins_cost(0);
 6643 
 6644   size(0);
 6645   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6646   ins_encode( );
 6647   ins_pipe(empty);
 6648 %}
 6649 
 6650 instruct membar_volatile(eFlagsReg cr) %{
 6651   match(MemBarVolatile);
 6652   effect(KILL cr);
 6653   ins_cost(400);
 6654 
 6655   format %{
 6656     $$template
 6657     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6658   %}
 6659   ins_encode %{
 6660     __ membar(Assembler::StoreLoad);
 6661   %}
 6662   ins_pipe(pipe_slow);
 6663 %}
 6664 
 6665 instruct unnecessary_membar_volatile() %{
 6666   match(MemBarVolatile);
 6667   predicate(Matcher::post_store_load_barrier(n));
 6668   ins_cost(0);
 6669 
 6670   size(0);
 6671   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6672   ins_encode( );
 6673   ins_pipe(empty);
 6674 %}
 6675 
 6676 instruct membar_storestore() %{
 6677   match(MemBarStoreStore);
 6678   match(StoreStoreFence);
 6679   ins_cost(0);
 6680 
 6681   size(0);
 6682   format %{ "MEMBAR-storestore (empty encoding)" %}
 6683   ins_encode( );
 6684   ins_pipe(empty);
 6685 %}
 6686 
 6687 //----------Move Instructions--------------------------------------------------
 6688 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6689   match(Set dst (CastX2P src));
 6690   format %{ "# X2P  $dst, $src" %}
 6691   ins_encode( /*empty encoding*/ );
 6692   ins_cost(0);
 6693   ins_pipe(empty);
 6694 %}
 6695 
 6696 instruct castP2X(rRegI dst, eRegP src ) %{
 6697   match(Set dst (CastP2X src));
 6698   ins_cost(50);
 6699   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6700   ins_encode( enc_Copy( dst, src) );
 6701   ins_pipe( ialu_reg_reg );
 6702 %}
 6703 
 6704 //----------Conditional Move---------------------------------------------------
 6705 // Conditional move
 6706 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6707   predicate(!VM_Version::supports_cmov() );
 6708   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6709   ins_cost(200);
 6710   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6711             "MOV    $dst,$src\n"
 6712       "skip:" %}
 6713   ins_encode %{
 6714     Label Lskip;
 6715     // Invert sense of branch from sense of CMOV
 6716     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6717     __ movl($dst$$Register, $src$$Register);
 6718     __ bind(Lskip);
 6719   %}
 6720   ins_pipe( pipe_cmov_reg );
 6721 %}
 6722 
 6723 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6724   predicate(!VM_Version::supports_cmov() );
 6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6726   ins_cost(200);
 6727   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6728             "MOV    $dst,$src\n"
 6729       "skip:" %}
 6730   ins_encode %{
 6731     Label Lskip;
 6732     // Invert sense of branch from sense of CMOV
 6733     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6734     __ movl($dst$$Register, $src$$Register);
 6735     __ bind(Lskip);
 6736   %}
 6737   ins_pipe( pipe_cmov_reg );
 6738 %}
 6739 
 6740 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6741   predicate(VM_Version::supports_cmov() );
 6742   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6743   ins_cost(200);
 6744   format %{ "CMOV$cop $dst,$src" %}
 6745   opcode(0x0F,0x40);
 6746   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6747   ins_pipe( pipe_cmov_reg );
 6748 %}
 6749 
 6750 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6751   predicate(VM_Version::supports_cmov() );
 6752   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6753   ins_cost(200);
 6754   format %{ "CMOV$cop $dst,$src" %}
 6755   opcode(0x0F,0x40);
 6756   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6757   ins_pipe( pipe_cmov_reg );
 6758 %}
 6759 
 6760 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6761   predicate(VM_Version::supports_cmov() );
 6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6763   ins_cost(200);
 6764   expand %{
 6765     cmovI_regU(cop, cr, dst, src);
 6766   %}
 6767 %}
 6768 
 6769 // Conditional move
 6770 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6771   predicate(VM_Version::supports_cmov() );
 6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6773   ins_cost(250);
 6774   format %{ "CMOV$cop $dst,$src" %}
 6775   opcode(0x0F,0x40);
 6776   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6777   ins_pipe( pipe_cmov_mem );
 6778 %}
 6779 
 6780 // Conditional move
 6781 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6782   predicate(VM_Version::supports_cmov() );
 6783   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6784   ins_cost(250);
 6785   format %{ "CMOV$cop $dst,$src" %}
 6786   opcode(0x0F,0x40);
 6787   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6788   ins_pipe( pipe_cmov_mem );
 6789 %}
 6790 
 6791 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6792   predicate(VM_Version::supports_cmov() );
 6793   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6794   ins_cost(250);
 6795   expand %{
 6796     cmovI_memU(cop, cr, dst, src);
 6797   %}
 6798 %}
 6799 
 6800 // Conditional move
 6801 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6802   predicate(VM_Version::supports_cmov() );
 6803   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6804   ins_cost(200);
 6805   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6806   opcode(0x0F,0x40);
 6807   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6808   ins_pipe( pipe_cmov_reg );
 6809 %}
 6810 
 6811 // Conditional move (non-P6 version)
 6812 // Note:  a CMoveP is generated for  stubs and native wrappers
 6813 //        regardless of whether we are on a P6, so we
 6814 //        emulate a cmov here
 6815 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6816   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6817   ins_cost(300);
 6818   format %{ "Jn$cop   skip\n\t"
 6819           "MOV    $dst,$src\t# pointer\n"
 6820       "skip:" %}
 6821   opcode(0x8b);
 6822   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6823   ins_pipe( pipe_cmov_reg );
 6824 %}
 6825 
 6826 // Conditional move
 6827 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6828   predicate(VM_Version::supports_cmov() );
 6829   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6830   ins_cost(200);
 6831   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6832   opcode(0x0F,0x40);
 6833   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6834   ins_pipe( pipe_cmov_reg );
 6835 %}
 6836 
 6837 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6838   predicate(VM_Version::supports_cmov() );
 6839   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6840   ins_cost(200);
 6841   expand %{
 6842     cmovP_regU(cop, cr, dst, src);
 6843   %}
 6844 %}
 6845 
 6846 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6847 // correctly meets the two pointer arguments; one is an incoming
 6848 // register but the other is a memory operand.  ALSO appears to
 6849 // be buggy with implicit null checks.
 6850 //
 6851 //// Conditional move
 6852 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6853 //  predicate(VM_Version::supports_cmov() );
 6854 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6855 //  ins_cost(250);
 6856 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6857 //  opcode(0x0F,0x40);
 6858 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6859 //  ins_pipe( pipe_cmov_mem );
 6860 //%}
 6861 //
 6862 //// Conditional move
 6863 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6864 //  predicate(VM_Version::supports_cmov() );
 6865 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6866 //  ins_cost(250);
 6867 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6868 //  opcode(0x0F,0x40);
 6869 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6870 //  ins_pipe( pipe_cmov_mem );
 6871 //%}
 6872 
 6873 // Conditional move
 6874 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6875   predicate(UseSSE<=1);
 6876   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6877   ins_cost(200);
 6878   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6879   opcode(0xDA);
 6880   ins_encode( enc_cmov_dpr(cop,src) );
 6881   ins_pipe( pipe_cmovDPR_reg );
 6882 %}
 6883 
 6884 // Conditional move
 6885 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6886   predicate(UseSSE==0);
 6887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6888   ins_cost(200);
 6889   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6890   opcode(0xDA);
 6891   ins_encode( enc_cmov_dpr(cop,src) );
 6892   ins_pipe( pipe_cmovDPR_reg );
 6893 %}
 6894 
 6895 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6896 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6897   predicate(UseSSE<=1);
 6898   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6899   ins_cost(200);
 6900   format %{ "Jn$cop   skip\n\t"
 6901             "MOV    $dst,$src\t# double\n"
 6902       "skip:" %}
 6903   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6904   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6905   ins_pipe( pipe_cmovDPR_reg );
 6906 %}
 6907 
 6908 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6909 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6910   predicate(UseSSE==0);
 6911   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6912   ins_cost(200);
 6913   format %{ "Jn$cop    skip\n\t"
 6914             "MOV    $dst,$src\t# float\n"
 6915       "skip:" %}
 6916   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6917   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6918   ins_pipe( pipe_cmovDPR_reg );
 6919 %}
 6920 
 6921 // No CMOVE with SSE/SSE2
 6922 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6923   predicate (UseSSE>=1);
 6924   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6925   ins_cost(200);
 6926   format %{ "Jn$cop   skip\n\t"
 6927             "MOVSS  $dst,$src\t# float\n"
 6928       "skip:" %}
 6929   ins_encode %{
 6930     Label skip;
 6931     // Invert sense of branch from sense of CMOV
 6932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6933     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6934     __ bind(skip);
 6935   %}
 6936   ins_pipe( pipe_slow );
 6937 %}
 6938 
 6939 // No CMOVE with SSE/SSE2
 6940 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6941   predicate (UseSSE>=2);
 6942   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6943   ins_cost(200);
 6944   format %{ "Jn$cop   skip\n\t"
 6945             "MOVSD  $dst,$src\t# float\n"
 6946       "skip:" %}
 6947   ins_encode %{
 6948     Label skip;
 6949     // Invert sense of branch from sense of CMOV
 6950     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6951     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6952     __ bind(skip);
 6953   %}
 6954   ins_pipe( pipe_slow );
 6955 %}
 6956 
 6957 // unsigned version
 6958 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6959   predicate (UseSSE>=1);
 6960   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6961   ins_cost(200);
 6962   format %{ "Jn$cop   skip\n\t"
 6963             "MOVSS  $dst,$src\t# float\n"
 6964       "skip:" %}
 6965   ins_encode %{
 6966     Label skip;
 6967     // Invert sense of branch from sense of CMOV
 6968     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6969     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6970     __ bind(skip);
 6971   %}
 6972   ins_pipe( pipe_slow );
 6973 %}
 6974 
 6975 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6976   predicate (UseSSE>=1);
 6977   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6978   ins_cost(200);
 6979   expand %{
 6980     fcmovF_regU(cop, cr, dst, src);
 6981   %}
 6982 %}
 6983 
 6984 // unsigned version
 6985 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6986   predicate (UseSSE>=2);
 6987   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6988   ins_cost(200);
 6989   format %{ "Jn$cop   skip\n\t"
 6990             "MOVSD  $dst,$src\t# float\n"
 6991       "skip:" %}
 6992   ins_encode %{
 6993     Label skip;
 6994     // Invert sense of branch from sense of CMOV
 6995     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6996     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6997     __ bind(skip);
 6998   %}
 6999   ins_pipe( pipe_slow );
 7000 %}
 7001 
 7002 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7003   predicate (UseSSE>=2);
 7004   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7005   ins_cost(200);
 7006   expand %{
 7007     fcmovD_regU(cop, cr, dst, src);
 7008   %}
 7009 %}
 7010 
 7011 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7012   predicate(VM_Version::supports_cmov() );
 7013   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7014   ins_cost(200);
 7015   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7016             "CMOV$cop $dst.hi,$src.hi" %}
 7017   opcode(0x0F,0x40);
 7018   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7019   ins_pipe( pipe_cmov_reg_long );
 7020 %}
 7021 
 7022 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7023   predicate(VM_Version::supports_cmov() );
 7024   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7025   ins_cost(200);
 7026   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7027             "CMOV$cop $dst.hi,$src.hi" %}
 7028   opcode(0x0F,0x40);
 7029   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7030   ins_pipe( pipe_cmov_reg_long );
 7031 %}
 7032 
 7033 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7034   predicate(VM_Version::supports_cmov() );
 7035   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7036   ins_cost(200);
 7037   expand %{
 7038     cmovL_regU(cop, cr, dst, src);
 7039   %}
 7040 %}
 7041 
 7042 //----------Arithmetic Instructions--------------------------------------------
 7043 //----------Addition Instructions----------------------------------------------
 7044 
 7045 // Integer Addition Instructions
 7046 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7047   match(Set dst (AddI dst src));
 7048   effect(KILL cr);
 7049 
 7050   size(2);
 7051   format %{ "ADD    $dst,$src" %}
 7052   opcode(0x03);
 7053   ins_encode( OpcP, RegReg( dst, src) );
 7054   ins_pipe( ialu_reg_reg );
 7055 %}
 7056 
 7057 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7058   match(Set dst (AddI dst src));
 7059   effect(KILL cr);
 7060 
 7061   format %{ "ADD    $dst,$src" %}
 7062   opcode(0x81, 0x00); /* /0 id */
 7063   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7064   ins_pipe( ialu_reg );
 7065 %}
 7066 
 7067 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7068   predicate(UseIncDec);
 7069   match(Set dst (AddI dst src));
 7070   effect(KILL cr);
 7071 
 7072   size(1);
 7073   format %{ "INC    $dst" %}
 7074   opcode(0x40); /*  */
 7075   ins_encode( Opc_plus( primary, dst ) );
 7076   ins_pipe( ialu_reg );
 7077 %}
 7078 
 7079 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7080   match(Set dst (AddI src0 src1));
 7081   ins_cost(110);
 7082 
 7083   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7084   opcode(0x8D); /* 0x8D /r */
 7085   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7086   ins_pipe( ialu_reg_reg );
 7087 %}
 7088 
 7089 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7090   match(Set dst (AddP src0 src1));
 7091   ins_cost(110);
 7092 
 7093   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7094   opcode(0x8D); /* 0x8D /r */
 7095   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7096   ins_pipe( ialu_reg_reg );
 7097 %}
 7098 
 7099 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7100   predicate(UseIncDec);
 7101   match(Set dst (AddI dst src));
 7102   effect(KILL cr);
 7103 
 7104   size(1);
 7105   format %{ "DEC    $dst" %}
 7106   opcode(0x48); /*  */
 7107   ins_encode( Opc_plus( primary, dst ) );
 7108   ins_pipe( ialu_reg );
 7109 %}
 7110 
 7111 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7112   match(Set dst (AddP dst src));
 7113   effect(KILL cr);
 7114 
 7115   size(2);
 7116   format %{ "ADD    $dst,$src" %}
 7117   opcode(0x03);
 7118   ins_encode( OpcP, RegReg( dst, src) );
 7119   ins_pipe( ialu_reg_reg );
 7120 %}
 7121 
 7122 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7123   match(Set dst (AddP dst src));
 7124   effect(KILL cr);
 7125 
 7126   format %{ "ADD    $dst,$src" %}
 7127   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7128   // ins_encode( RegImm( dst, src) );
 7129   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7130   ins_pipe( ialu_reg );
 7131 %}
 7132 
 7133 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7134   match(Set dst (AddI dst (LoadI src)));
 7135   effect(KILL cr);
 7136 
 7137   ins_cost(150);
 7138   format %{ "ADD    $dst,$src" %}
 7139   opcode(0x03);
 7140   ins_encode( OpcP, RegMem( dst, src) );
 7141   ins_pipe( ialu_reg_mem );
 7142 %}
 7143 
 7144 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7145   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7146   effect(KILL cr);
 7147 
 7148   ins_cost(150);
 7149   format %{ "ADD    $dst,$src" %}
 7150   opcode(0x01);  /* Opcode 01 /r */
 7151   ins_encode( OpcP, RegMem( src, dst ) );
 7152   ins_pipe( ialu_mem_reg );
 7153 %}
 7154 
 7155 // Add Memory with Immediate
 7156 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7158   effect(KILL cr);
 7159 
 7160   ins_cost(125);
 7161   format %{ "ADD    $dst,$src" %}
 7162   opcode(0x81);               /* Opcode 81 /0 id */
 7163   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7164   ins_pipe( ialu_mem_imm );
 7165 %}
 7166 
 7167 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7169   effect(KILL cr);
 7170 
 7171   ins_cost(125);
 7172   format %{ "INC    $dst" %}
 7173   opcode(0xFF);               /* Opcode FF /0 */
 7174   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7175   ins_pipe( ialu_mem_imm );
 7176 %}
 7177 
 7178 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7179   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7180   effect(KILL cr);
 7181 
 7182   ins_cost(125);
 7183   format %{ "DEC    $dst" %}
 7184   opcode(0xFF);               /* Opcode FF /1 */
 7185   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7186   ins_pipe( ialu_mem_imm );
 7187 %}
 7188 
 7189 
 7190 instruct checkCastPP( eRegP dst ) %{
 7191   match(Set dst (CheckCastPP dst));
 7192 
 7193   size(0);
 7194   format %{ "#checkcastPP of $dst" %}
 7195   ins_encode( /*empty encoding*/ );
 7196   ins_pipe( empty );
 7197 %}
 7198 
 7199 instruct castPP( eRegP dst ) %{
 7200   match(Set dst (CastPP dst));
 7201   format %{ "#castPP of $dst" %}
 7202   ins_encode( /*empty encoding*/ );
 7203   ins_pipe( empty );
 7204 %}
 7205 
 7206 instruct castII( rRegI dst ) %{
 7207   match(Set dst (CastII dst));
 7208   format %{ "#castII of $dst" %}
 7209   ins_encode( /*empty encoding*/ );
 7210   ins_cost(0);
 7211   ins_pipe( empty );
 7212 %}
 7213 
 7214 instruct castLL( eRegL dst ) %{
 7215   match(Set dst (CastLL dst));
 7216   format %{ "#castLL of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castFF( regF dst ) %{
 7223   predicate(UseSSE >= 1);
 7224   match(Set dst (CastFF dst));
 7225   format %{ "#castFF of $dst" %}
 7226   ins_encode( /*empty encoding*/ );
 7227   ins_cost(0);
 7228   ins_pipe( empty );
 7229 %}
 7230 
 7231 instruct castDD( regD dst ) %{
 7232   predicate(UseSSE >= 2);
 7233   match(Set dst (CastDD dst));
 7234   format %{ "#castDD of $dst" %}
 7235   ins_encode( /*empty encoding*/ );
 7236   ins_cost(0);
 7237   ins_pipe( empty );
 7238 %}
 7239 
 7240 instruct castFF_PR( regFPR dst ) %{
 7241   predicate(UseSSE < 1);
 7242   match(Set dst (CastFF dst));
 7243   format %{ "#castFF of $dst" %}
 7244   ins_encode( /*empty encoding*/ );
 7245   ins_cost(0);
 7246   ins_pipe( empty );
 7247 %}
 7248 
 7249 instruct castDD_PR( regDPR dst ) %{
 7250   predicate(UseSSE < 2);
 7251   match(Set dst (CastDD dst));
 7252   format %{ "#castDD of $dst" %}
 7253   ins_encode( /*empty encoding*/ );
 7254   ins_cost(0);
 7255   ins_pipe( empty );
 7256 %}
 7257 
 7258 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7259 
 7260 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7261   predicate(VM_Version::supports_cx8());
 7262   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7263   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7264   effect(KILL cr, KILL oldval);
 7265   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7266             "MOV    $res,0\n\t"
 7267             "JNE,s  fail\n\t"
 7268             "MOV    $res,1\n"
 7269           "fail:" %}
 7270   ins_encode( enc_cmpxchg8(mem_ptr),
 7271               enc_flags_ne_to_boolean(res) );
 7272   ins_pipe( pipe_cmpxchg );
 7273 %}
 7274 
 7275 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7276   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7277   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7278   effect(KILL cr, KILL oldval);
 7279   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7280             "MOV    $res,0\n\t"
 7281             "JNE,s  fail\n\t"
 7282             "MOV    $res,1\n"
 7283           "fail:" %}
 7284   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7285   ins_pipe( pipe_cmpxchg );
 7286 %}
 7287 
 7288 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7289   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7290   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7291   effect(KILL cr, KILL oldval);
 7292   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7293             "MOV    $res,0\n\t"
 7294             "JNE,s  fail\n\t"
 7295             "MOV    $res,1\n"
 7296           "fail:" %}
 7297   ins_encode( enc_cmpxchgb(mem_ptr),
 7298               enc_flags_ne_to_boolean(res) );
 7299   ins_pipe( pipe_cmpxchg );
 7300 %}
 7301 
 7302 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7303   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7304   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7305   effect(KILL cr, KILL oldval);
 7306   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7307             "MOV    $res,0\n\t"
 7308             "JNE,s  fail\n\t"
 7309             "MOV    $res,1\n"
 7310           "fail:" %}
 7311   ins_encode( enc_cmpxchgw(mem_ptr),
 7312               enc_flags_ne_to_boolean(res) );
 7313   ins_pipe( pipe_cmpxchg );
 7314 %}
 7315 
 7316 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7317   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7318   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7319   effect(KILL cr, KILL oldval);
 7320   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7321             "MOV    $res,0\n\t"
 7322             "JNE,s  fail\n\t"
 7323             "MOV    $res,1\n"
 7324           "fail:" %}
 7325   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7326   ins_pipe( pipe_cmpxchg );
 7327 %}
 7328 
 7329 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7330   predicate(VM_Version::supports_cx8());
 7331   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7332   effect(KILL cr);
 7333   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7334   ins_encode( enc_cmpxchg8(mem_ptr) );
 7335   ins_pipe( pipe_cmpxchg );
 7336 %}
 7337 
 7338 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7339   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7340   effect(KILL cr);
 7341   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7342   ins_encode( enc_cmpxchg(mem_ptr) );
 7343   ins_pipe( pipe_cmpxchg );
 7344 %}
 7345 
 7346 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7347   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7348   effect(KILL cr);
 7349   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7350   ins_encode( enc_cmpxchgb(mem_ptr) );
 7351   ins_pipe( pipe_cmpxchg );
 7352 %}
 7353 
 7354 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7355   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7356   effect(KILL cr);
 7357   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7358   ins_encode( enc_cmpxchgw(mem_ptr) );
 7359   ins_pipe( pipe_cmpxchg );
 7360 %}
 7361 
 7362 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7363   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7364   effect(KILL cr);
 7365   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7366   ins_encode( enc_cmpxchg(mem_ptr) );
 7367   ins_pipe( pipe_cmpxchg );
 7368 %}
 7369 
 7370 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7371   predicate(n->as_LoadStore()->result_not_used());
 7372   match(Set dummy (GetAndAddB mem add));
 7373   effect(KILL cr);
 7374   format %{ "ADDB  [$mem],$add" %}
 7375   ins_encode %{
 7376     __ lock();
 7377     __ addb($mem$$Address, $add$$constant);
 7378   %}
 7379   ins_pipe( pipe_cmpxchg );
 7380 %}
 7381 
 7382 // Important to match to xRegI: only 8-bit regs.
 7383 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7384   match(Set newval (GetAndAddB mem newval));
 7385   effect(KILL cr);
 7386   format %{ "XADDB  [$mem],$newval" %}
 7387   ins_encode %{
 7388     __ lock();
 7389     __ xaddb($mem$$Address, $newval$$Register);
 7390   %}
 7391   ins_pipe( pipe_cmpxchg );
 7392 %}
 7393 
 7394 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7395   predicate(n->as_LoadStore()->result_not_used());
 7396   match(Set dummy (GetAndAddS mem add));
 7397   effect(KILL cr);
 7398   format %{ "ADDS  [$mem],$add" %}
 7399   ins_encode %{
 7400     __ lock();
 7401     __ addw($mem$$Address, $add$$constant);
 7402   %}
 7403   ins_pipe( pipe_cmpxchg );
 7404 %}
 7405 
 7406 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7407   match(Set newval (GetAndAddS mem newval));
 7408   effect(KILL cr);
 7409   format %{ "XADDS  [$mem],$newval" %}
 7410   ins_encode %{
 7411     __ lock();
 7412     __ xaddw($mem$$Address, $newval$$Register);
 7413   %}
 7414   ins_pipe( pipe_cmpxchg );
 7415 %}
 7416 
 7417 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7418   predicate(n->as_LoadStore()->result_not_used());
 7419   match(Set dummy (GetAndAddI mem add));
 7420   effect(KILL cr);
 7421   format %{ "ADDL  [$mem],$add" %}
 7422   ins_encode %{
 7423     __ lock();
 7424     __ addl($mem$$Address, $add$$constant);
 7425   %}
 7426   ins_pipe( pipe_cmpxchg );
 7427 %}
 7428 
 7429 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7430   match(Set newval (GetAndAddI mem newval));
 7431   effect(KILL cr);
 7432   format %{ "XADDL  [$mem],$newval" %}
 7433   ins_encode %{
 7434     __ lock();
 7435     __ xaddl($mem$$Address, $newval$$Register);
 7436   %}
 7437   ins_pipe( pipe_cmpxchg );
 7438 %}
 7439 
 7440 // Important to match to xRegI: only 8-bit regs.
 7441 instruct xchgB( memory mem, xRegI newval) %{
 7442   match(Set newval (GetAndSetB mem newval));
 7443   format %{ "XCHGB  $newval,[$mem]" %}
 7444   ins_encode %{
 7445     __ xchgb($newval$$Register, $mem$$Address);
 7446   %}
 7447   ins_pipe( pipe_cmpxchg );
 7448 %}
 7449 
 7450 instruct xchgS( memory mem, rRegI newval) %{
 7451   match(Set newval (GetAndSetS mem newval));
 7452   format %{ "XCHGW  $newval,[$mem]" %}
 7453   ins_encode %{
 7454     __ xchgw($newval$$Register, $mem$$Address);
 7455   %}
 7456   ins_pipe( pipe_cmpxchg );
 7457 %}
 7458 
 7459 instruct xchgI( memory mem, rRegI newval) %{
 7460   match(Set newval (GetAndSetI mem newval));
 7461   format %{ "XCHGL  $newval,[$mem]" %}
 7462   ins_encode %{
 7463     __ xchgl($newval$$Register, $mem$$Address);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 instruct xchgP( memory mem, pRegP newval) %{
 7469   match(Set newval (GetAndSetP mem newval));
 7470   format %{ "XCHGL  $newval,[$mem]" %}
 7471   ins_encode %{
 7472     __ xchgl($newval$$Register, $mem$$Address);
 7473   %}
 7474   ins_pipe( pipe_cmpxchg );
 7475 %}
 7476 
 7477 //----------Subtraction Instructions-------------------------------------------
 7478 
 7479 // Integer Subtraction Instructions
 7480 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7481   match(Set dst (SubI dst src));
 7482   effect(KILL cr);
 7483 
 7484   size(2);
 7485   format %{ "SUB    $dst,$src" %}
 7486   opcode(0x2B);
 7487   ins_encode( OpcP, RegReg( dst, src) );
 7488   ins_pipe( ialu_reg_reg );
 7489 %}
 7490 
 7491 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7492   match(Set dst (SubI dst src));
 7493   effect(KILL cr);
 7494 
 7495   format %{ "SUB    $dst,$src" %}
 7496   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7497   // ins_encode( RegImm( dst, src) );
 7498   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7499   ins_pipe( ialu_reg );
 7500 %}
 7501 
 7502 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7503   match(Set dst (SubI dst (LoadI src)));
 7504   effect(KILL cr);
 7505 
 7506   ins_cost(150);
 7507   format %{ "SUB    $dst,$src" %}
 7508   opcode(0x2B);
 7509   ins_encode( OpcP, RegMem( dst, src) );
 7510   ins_pipe( ialu_reg_mem );
 7511 %}
 7512 
 7513 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7514   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7515   effect(KILL cr);
 7516 
 7517   ins_cost(150);
 7518   format %{ "SUB    $dst,$src" %}
 7519   opcode(0x29);  /* Opcode 29 /r */
 7520   ins_encode( OpcP, RegMem( src, dst ) );
 7521   ins_pipe( ialu_mem_reg );
 7522 %}
 7523 
 7524 // Subtract from a pointer
 7525 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7526   match(Set dst (AddP dst (SubI zero src)));
 7527   effect(KILL cr);
 7528 
 7529   size(2);
 7530   format %{ "SUB    $dst,$src" %}
 7531   opcode(0x2B);
 7532   ins_encode( OpcP, RegReg( dst, src) );
 7533   ins_pipe( ialu_reg_reg );
 7534 %}
 7535 
 7536 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7537   match(Set dst (SubI zero dst));
 7538   effect(KILL cr);
 7539 
 7540   size(2);
 7541   format %{ "NEG    $dst" %}
 7542   opcode(0xF7,0x03);  // Opcode F7 /3
 7543   ins_encode( OpcP, RegOpc( dst ) );
 7544   ins_pipe( ialu_reg );
 7545 %}
 7546 
 7547 //----------Multiplication/Division Instructions-------------------------------
 7548 // Integer Multiplication Instructions
 7549 // Multiply Register
 7550 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7551   match(Set dst (MulI dst src));
 7552   effect(KILL cr);
 7553 
 7554   size(3);
 7555   ins_cost(300);
 7556   format %{ "IMUL   $dst,$src" %}
 7557   opcode(0xAF, 0x0F);
 7558   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7559   ins_pipe( ialu_reg_reg_alu0 );
 7560 %}
 7561 
 7562 // Multiply 32-bit Immediate
 7563 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7564   match(Set dst (MulI src imm));
 7565   effect(KILL cr);
 7566 
 7567   ins_cost(300);
 7568   format %{ "IMUL   $dst,$src,$imm" %}
 7569   opcode(0x69);  /* 69 /r id */
 7570   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7571   ins_pipe( ialu_reg_reg_alu0 );
 7572 %}
 7573 
 7574 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7575   match(Set dst src);
 7576   effect(KILL cr);
 7577 
 7578   // Note that this is artificially increased to make it more expensive than loadConL
 7579   ins_cost(250);
 7580   format %{ "MOV    EAX,$src\t// low word only" %}
 7581   opcode(0xB8);
 7582   ins_encode( LdImmL_Lo(dst, src) );
 7583   ins_pipe( ialu_reg_fat );
 7584 %}
 7585 
 7586 // Multiply by 32-bit Immediate, taking the shifted high order results
 7587 //  (special case for shift by 32)
 7588 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7589   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7590   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7591              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7592              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7593   effect(USE src1, KILL cr);
 7594 
 7595   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7596   ins_cost(0*100 + 1*400 - 150);
 7597   format %{ "IMUL   EDX:EAX,$src1" %}
 7598   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7599   ins_pipe( pipe_slow );
 7600 %}
 7601 
 7602 // Multiply by 32-bit Immediate, taking the shifted high order results
 7603 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7604   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7605   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7606              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7607              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7608   effect(USE src1, KILL cr);
 7609 
 7610   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7611   ins_cost(1*100 + 1*400 - 150);
 7612   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7613             "SAR    EDX,$cnt-32" %}
 7614   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7615   ins_pipe( pipe_slow );
 7616 %}
 7617 
 7618 // Multiply Memory 32-bit Immediate
 7619 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7620   match(Set dst (MulI (LoadI src) imm));
 7621   effect(KILL cr);
 7622 
 7623   ins_cost(300);
 7624   format %{ "IMUL   $dst,$src,$imm" %}
 7625   opcode(0x69);  /* 69 /r id */
 7626   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7627   ins_pipe( ialu_reg_mem_alu0 );
 7628 %}
 7629 
 7630 // Multiply Memory
 7631 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7632   match(Set dst (MulI dst (LoadI src)));
 7633   effect(KILL cr);
 7634 
 7635   ins_cost(350);
 7636   format %{ "IMUL   $dst,$src" %}
 7637   opcode(0xAF, 0x0F);
 7638   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7639   ins_pipe( ialu_reg_mem_alu0 );
 7640 %}
 7641 
 7642 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7643 %{
 7644   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7645   effect(KILL cr, KILL src2);
 7646 
 7647   expand %{ mulI_eReg(dst, src1, cr);
 7648            mulI_eReg(src2, src3, cr);
 7649            addI_eReg(dst, src2, cr); %}
 7650 %}
 7651 
 7652 // Multiply Register Int to Long
 7653 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7654   // Basic Idea: long = (long)int * (long)int
 7655   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7656   effect(DEF dst, USE src, USE src1, KILL flags);
 7657 
 7658   ins_cost(300);
 7659   format %{ "IMUL   $dst,$src1" %}
 7660 
 7661   ins_encode( long_int_multiply( dst, src1 ) );
 7662   ins_pipe( ialu_reg_reg_alu0 );
 7663 %}
 7664 
 7665 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7666   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7667   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7668   effect(KILL flags);
 7669 
 7670   ins_cost(300);
 7671   format %{ "MUL    $dst,$src1" %}
 7672 
 7673   ins_encode( long_uint_multiply(dst, src1) );
 7674   ins_pipe( ialu_reg_reg_alu0 );
 7675 %}
 7676 
 7677 // Multiply Register Long
 7678 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7679   match(Set dst (MulL dst src));
 7680   effect(KILL cr, TEMP tmp);
 7681   ins_cost(4*100+3*400);
 7682 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7683 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7684   format %{ "MOV    $tmp,$src.lo\n\t"
 7685             "IMUL   $tmp,EDX\n\t"
 7686             "MOV    EDX,$src.hi\n\t"
 7687             "IMUL   EDX,EAX\n\t"
 7688             "ADD    $tmp,EDX\n\t"
 7689             "MUL    EDX:EAX,$src.lo\n\t"
 7690             "ADD    EDX,$tmp" %}
 7691   ins_encode( long_multiply( dst, src, tmp ) );
 7692   ins_pipe( pipe_slow );
 7693 %}
 7694 
 7695 // Multiply Register Long where the left operand's high 32 bits are zero
 7696 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7697   predicate(is_operand_hi32_zero(n->in(1)));
 7698   match(Set dst (MulL dst src));
 7699   effect(KILL cr, TEMP tmp);
 7700   ins_cost(2*100+2*400);
 7701 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7702 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7703   format %{ "MOV    $tmp,$src.hi\n\t"
 7704             "IMUL   $tmp,EAX\n\t"
 7705             "MUL    EDX:EAX,$src.lo\n\t"
 7706             "ADD    EDX,$tmp" %}
 7707   ins_encode %{
 7708     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7709     __ imull($tmp$$Register, rax);
 7710     __ mull($src$$Register);
 7711     __ addl(rdx, $tmp$$Register);
 7712   %}
 7713   ins_pipe( pipe_slow );
 7714 %}
 7715 
 7716 // Multiply Register Long where the right operand's high 32 bits are zero
 7717 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7718   predicate(is_operand_hi32_zero(n->in(2)));
 7719   match(Set dst (MulL dst src));
 7720   effect(KILL cr, TEMP tmp);
 7721   ins_cost(2*100+2*400);
 7722 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7723 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7724   format %{ "MOV    $tmp,$src.lo\n\t"
 7725             "IMUL   $tmp,EDX\n\t"
 7726             "MUL    EDX:EAX,$src.lo\n\t"
 7727             "ADD    EDX,$tmp" %}
 7728   ins_encode %{
 7729     __ movl($tmp$$Register, $src$$Register);
 7730     __ imull($tmp$$Register, rdx);
 7731     __ mull($src$$Register);
 7732     __ addl(rdx, $tmp$$Register);
 7733   %}
 7734   ins_pipe( pipe_slow );
 7735 %}
 7736 
 7737 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7738 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7739   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7740   match(Set dst (MulL dst src));
 7741   effect(KILL cr);
 7742   ins_cost(1*400);
 7743 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7744 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7745   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7746   ins_encode %{
 7747     __ mull($src$$Register);
 7748   %}
 7749   ins_pipe( pipe_slow );
 7750 %}
 7751 
 7752 // Multiply Register Long by small constant
 7753 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7754   match(Set dst (MulL dst src));
 7755   effect(KILL cr, TEMP tmp);
 7756   ins_cost(2*100+2*400);
 7757   size(12);
 7758 // Basic idea: lo(result) = lo(src * EAX)
 7759 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7760   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7761             "MOV    EDX,$src\n\t"
 7762             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7763             "ADD    EDX,$tmp" %}
 7764   ins_encode( long_multiply_con( dst, src, tmp ) );
 7765   ins_pipe( pipe_slow );
 7766 %}
 7767 
 7768 // Integer DIV with Register
 7769 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7770   match(Set rax (DivI rax div));
 7771   effect(KILL rdx, KILL cr);
 7772   size(26);
 7773   ins_cost(30*100+10*100);
 7774   format %{ "CMP    EAX,0x80000000\n\t"
 7775             "JNE,s  normal\n\t"
 7776             "XOR    EDX,EDX\n\t"
 7777             "CMP    ECX,-1\n\t"
 7778             "JE,s   done\n"
 7779     "normal: CDQ\n\t"
 7780             "IDIV   $div\n\t"
 7781     "done:"        %}
 7782   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7783   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7784   ins_pipe( ialu_reg_reg_alu0 );
 7785 %}
 7786 
 7787 // Divide Register Long
 7788 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7789   match(Set dst (DivL src1 src2));
 7790   effect(CALL);
 7791   ins_cost(10000);
 7792   format %{ "PUSH   $src1.hi\n\t"
 7793             "PUSH   $src1.lo\n\t"
 7794             "PUSH   $src2.hi\n\t"
 7795             "PUSH   $src2.lo\n\t"
 7796             "CALL   SharedRuntime::ldiv\n\t"
 7797             "ADD    ESP,16" %}
 7798   ins_encode( long_div(src1,src2) );
 7799   ins_pipe( pipe_slow );
 7800 %}
 7801 
 7802 // Integer DIVMOD with Register, both quotient and mod results
 7803 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7804   match(DivModI rax div);
 7805   effect(KILL cr);
 7806   size(26);
 7807   ins_cost(30*100+10*100);
 7808   format %{ "CMP    EAX,0x80000000\n\t"
 7809             "JNE,s  normal\n\t"
 7810             "XOR    EDX,EDX\n\t"
 7811             "CMP    ECX,-1\n\t"
 7812             "JE,s   done\n"
 7813     "normal: CDQ\n\t"
 7814             "IDIV   $div\n\t"
 7815     "done:"        %}
 7816   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7817   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7818   ins_pipe( pipe_slow );
 7819 %}
 7820 
 7821 // Integer MOD with Register
 7822 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7823   match(Set rdx (ModI rax div));
 7824   effect(KILL rax, KILL cr);
 7825 
 7826   size(26);
 7827   ins_cost(300);
 7828   format %{ "CDQ\n\t"
 7829             "IDIV   $div" %}
 7830   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7831   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7832   ins_pipe( ialu_reg_reg_alu0 );
 7833 %}
 7834 
 7835 // Remainder Register Long
 7836 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7837   match(Set dst (ModL src1 src2));
 7838   effect(CALL);
 7839   ins_cost(10000);
 7840   format %{ "PUSH   $src1.hi\n\t"
 7841             "PUSH   $src1.lo\n\t"
 7842             "PUSH   $src2.hi\n\t"
 7843             "PUSH   $src2.lo\n\t"
 7844             "CALL   SharedRuntime::lrem\n\t"
 7845             "ADD    ESP,16" %}
 7846   ins_encode( long_mod(src1,src2) );
 7847   ins_pipe( pipe_slow );
 7848 %}
 7849 
 7850 // Divide Register Long (no special case since divisor != -1)
 7851 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7852   match(Set dst (DivL dst imm));
 7853   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7854   ins_cost(1000);
 7855   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7856             "XOR    $tmp2,$tmp2\n\t"
 7857             "CMP    $tmp,EDX\n\t"
 7858             "JA,s   fast\n\t"
 7859             "MOV    $tmp2,EAX\n\t"
 7860             "MOV    EAX,EDX\n\t"
 7861             "MOV    EDX,0\n\t"
 7862             "JLE,s  pos\n\t"
 7863             "LNEG   EAX : $tmp2\n\t"
 7864             "DIV    $tmp # unsigned division\n\t"
 7865             "XCHG   EAX,$tmp2\n\t"
 7866             "DIV    $tmp\n\t"
 7867             "LNEG   $tmp2 : EAX\n\t"
 7868             "JMP,s  done\n"
 7869     "pos:\n\t"
 7870             "DIV    $tmp\n\t"
 7871             "XCHG   EAX,$tmp2\n"
 7872     "fast:\n\t"
 7873             "DIV    $tmp\n"
 7874     "done:\n\t"
 7875             "MOV    EDX,$tmp2\n\t"
 7876             "NEG    EDX:EAX # if $imm < 0" %}
 7877   ins_encode %{
 7878     int con = (int)$imm$$constant;
 7879     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7880     int pcon = (con > 0) ? con : -con;
 7881     Label Lfast, Lpos, Ldone;
 7882 
 7883     __ movl($tmp$$Register, pcon);
 7884     __ xorl($tmp2$$Register,$tmp2$$Register);
 7885     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7886     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7887 
 7888     __ movl($tmp2$$Register, $dst$$Register); // save
 7889     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7890     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7891     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7892 
 7893     // Negative dividend.
 7894     // convert value to positive to use unsigned division
 7895     __ lneg($dst$$Register, $tmp2$$Register);
 7896     __ divl($tmp$$Register);
 7897     __ xchgl($dst$$Register, $tmp2$$Register);
 7898     __ divl($tmp$$Register);
 7899     // revert result back to negative
 7900     __ lneg($tmp2$$Register, $dst$$Register);
 7901     __ jmpb(Ldone);
 7902 
 7903     __ bind(Lpos);
 7904     __ divl($tmp$$Register); // Use unsigned division
 7905     __ xchgl($dst$$Register, $tmp2$$Register);
 7906     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7907 
 7908     __ bind(Lfast);
 7909     // fast path: src is positive
 7910     __ divl($tmp$$Register); // Use unsigned division
 7911 
 7912     __ bind(Ldone);
 7913     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7914     if (con < 0) {
 7915       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7916     }
 7917   %}
 7918   ins_pipe( pipe_slow );
 7919 %}
 7920 
 7921 // Remainder Register Long (remainder fit into 32 bits)
 7922 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7923   match(Set dst (ModL dst imm));
 7924   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7925   ins_cost(1000);
 7926   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7927             "CMP    $tmp,EDX\n\t"
 7928             "JA,s   fast\n\t"
 7929             "MOV    $tmp2,EAX\n\t"
 7930             "MOV    EAX,EDX\n\t"
 7931             "MOV    EDX,0\n\t"
 7932             "JLE,s  pos\n\t"
 7933             "LNEG   EAX : $tmp2\n\t"
 7934             "DIV    $tmp # unsigned division\n\t"
 7935             "MOV    EAX,$tmp2\n\t"
 7936             "DIV    $tmp\n\t"
 7937             "NEG    EDX\n\t"
 7938             "JMP,s  done\n"
 7939     "pos:\n\t"
 7940             "DIV    $tmp\n\t"
 7941             "MOV    EAX,$tmp2\n"
 7942     "fast:\n\t"
 7943             "DIV    $tmp\n"
 7944     "done:\n\t"
 7945             "MOV    EAX,EDX\n\t"
 7946             "SAR    EDX,31\n\t" %}
 7947   ins_encode %{
 7948     int con = (int)$imm$$constant;
 7949     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7950     int pcon = (con > 0) ? con : -con;
 7951     Label  Lfast, Lpos, Ldone;
 7952 
 7953     __ movl($tmp$$Register, pcon);
 7954     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7955     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7956 
 7957     __ movl($tmp2$$Register, $dst$$Register); // save
 7958     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7959     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7960     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7961 
 7962     // Negative dividend.
 7963     // convert value to positive to use unsigned division
 7964     __ lneg($dst$$Register, $tmp2$$Register);
 7965     __ divl($tmp$$Register);
 7966     __ movl($dst$$Register, $tmp2$$Register);
 7967     __ divl($tmp$$Register);
 7968     // revert remainder back to negative
 7969     __ negl(HIGH_FROM_LOW($dst$$Register));
 7970     __ jmpb(Ldone);
 7971 
 7972     __ bind(Lpos);
 7973     __ divl($tmp$$Register);
 7974     __ movl($dst$$Register, $tmp2$$Register);
 7975 
 7976     __ bind(Lfast);
 7977     // fast path: src is positive
 7978     __ divl($tmp$$Register);
 7979 
 7980     __ bind(Ldone);
 7981     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7982     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7983 
 7984   %}
 7985   ins_pipe( pipe_slow );
 7986 %}
 7987 
 7988 // Integer Shift Instructions
 7989 // Shift Left by one
 7990 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7991   match(Set dst (LShiftI dst shift));
 7992   effect(KILL cr);
 7993 
 7994   size(2);
 7995   format %{ "SHL    $dst,$shift" %}
 7996   opcode(0xD1, 0x4);  /* D1 /4 */
 7997   ins_encode( OpcP, RegOpc( dst ) );
 7998   ins_pipe( ialu_reg );
 7999 %}
 8000 
 8001 // Shift Left by 8-bit immediate
 8002 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8003   match(Set dst (LShiftI dst shift));
 8004   effect(KILL cr);
 8005 
 8006   size(3);
 8007   format %{ "SHL    $dst,$shift" %}
 8008   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8009   ins_encode( RegOpcImm( dst, shift) );
 8010   ins_pipe( ialu_reg );
 8011 %}
 8012 
 8013 // Shift Left by variable
 8014 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8015   match(Set dst (LShiftI dst shift));
 8016   effect(KILL cr);
 8017 
 8018   size(2);
 8019   format %{ "SHL    $dst,$shift" %}
 8020   opcode(0xD3, 0x4);  /* D3 /4 */
 8021   ins_encode( OpcP, RegOpc( dst ) );
 8022   ins_pipe( ialu_reg_reg );
 8023 %}
 8024 
 8025 // Arithmetic shift right by one
 8026 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8027   match(Set dst (RShiftI dst shift));
 8028   effect(KILL cr);
 8029 
 8030   size(2);
 8031   format %{ "SAR    $dst,$shift" %}
 8032   opcode(0xD1, 0x7);  /* D1 /7 */
 8033   ins_encode( OpcP, RegOpc( dst ) );
 8034   ins_pipe( ialu_reg );
 8035 %}
 8036 
 8037 // Arithmetic shift right by one
 8038 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8039   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8040   effect(KILL cr);
 8041   format %{ "SAR    $dst,$shift" %}
 8042   opcode(0xD1, 0x7);  /* D1 /7 */
 8043   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8044   ins_pipe( ialu_mem_imm );
 8045 %}
 8046 
 8047 // Arithmetic Shift Right by 8-bit immediate
 8048 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8049   match(Set dst (RShiftI dst shift));
 8050   effect(KILL cr);
 8051 
 8052   size(3);
 8053   format %{ "SAR    $dst,$shift" %}
 8054   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8055   ins_encode( RegOpcImm( dst, shift ) );
 8056   ins_pipe( ialu_mem_imm );
 8057 %}
 8058 
 8059 // Arithmetic Shift Right by 8-bit immediate
 8060 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8061   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8062   effect(KILL cr);
 8063 
 8064   format %{ "SAR    $dst,$shift" %}
 8065   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8066   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8067   ins_pipe( ialu_mem_imm );
 8068 %}
 8069 
 8070 // Arithmetic Shift Right by variable
 8071 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8072   match(Set dst (RShiftI dst shift));
 8073   effect(KILL cr);
 8074 
 8075   size(2);
 8076   format %{ "SAR    $dst,$shift" %}
 8077   opcode(0xD3, 0x7);  /* D3 /7 */
 8078   ins_encode( OpcP, RegOpc( dst ) );
 8079   ins_pipe( ialu_reg_reg );
 8080 %}
 8081 
 8082 // Logical shift right by one
 8083 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8084   match(Set dst (URShiftI dst shift));
 8085   effect(KILL cr);
 8086 
 8087   size(2);
 8088   format %{ "SHR    $dst,$shift" %}
 8089   opcode(0xD1, 0x5);  /* D1 /5 */
 8090   ins_encode( OpcP, RegOpc( dst ) );
 8091   ins_pipe( ialu_reg );
 8092 %}
 8093 
 8094 // Logical Shift Right by 8-bit immediate
 8095 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8096   match(Set dst (URShiftI dst shift));
 8097   effect(KILL cr);
 8098 
 8099   size(3);
 8100   format %{ "SHR    $dst,$shift" %}
 8101   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8102   ins_encode( RegOpcImm( dst, shift) );
 8103   ins_pipe( ialu_reg );
 8104 %}
 8105 
 8106 
 8107 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8108 // This idiom is used by the compiler for the i2b bytecode.
 8109 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8110   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8111 
 8112   size(3);
 8113   format %{ "MOVSX  $dst,$src :8" %}
 8114   ins_encode %{
 8115     __ movsbl($dst$$Register, $src$$Register);
 8116   %}
 8117   ins_pipe(ialu_reg_reg);
 8118 %}
 8119 
 8120 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8121 // This idiom is used by the compiler the i2s bytecode.
 8122 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8123   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8124 
 8125   size(3);
 8126   format %{ "MOVSX  $dst,$src :16" %}
 8127   ins_encode %{
 8128     __ movswl($dst$$Register, $src$$Register);
 8129   %}
 8130   ins_pipe(ialu_reg_reg);
 8131 %}
 8132 
 8133 
 8134 // Logical Shift Right by variable
 8135 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8136   match(Set dst (URShiftI dst shift));
 8137   effect(KILL cr);
 8138 
 8139   size(2);
 8140   format %{ "SHR    $dst,$shift" %}
 8141   opcode(0xD3, 0x5);  /* D3 /5 */
 8142   ins_encode( OpcP, RegOpc( dst ) );
 8143   ins_pipe( ialu_reg_reg );
 8144 %}
 8145 
 8146 
 8147 //----------Logical Instructions-----------------------------------------------
 8148 //----------Integer Logical Instructions---------------------------------------
 8149 // And Instructions
 8150 // And Register with Register
 8151 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8152   match(Set dst (AndI dst src));
 8153   effect(KILL cr);
 8154 
 8155   size(2);
 8156   format %{ "AND    $dst,$src" %}
 8157   opcode(0x23);
 8158   ins_encode( OpcP, RegReg( dst, src) );
 8159   ins_pipe( ialu_reg_reg );
 8160 %}
 8161 
 8162 // And Register with Immediate
 8163 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8164   match(Set dst (AndI dst src));
 8165   effect(KILL cr);
 8166 
 8167   format %{ "AND    $dst,$src" %}
 8168   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8169   // ins_encode( RegImm( dst, src) );
 8170   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8171   ins_pipe( ialu_reg );
 8172 %}
 8173 
 8174 // And Register with Memory
 8175 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8176   match(Set dst (AndI dst (LoadI src)));
 8177   effect(KILL cr);
 8178 
 8179   ins_cost(150);
 8180   format %{ "AND    $dst,$src" %}
 8181   opcode(0x23);
 8182   ins_encode( OpcP, RegMem( dst, src) );
 8183   ins_pipe( ialu_reg_mem );
 8184 %}
 8185 
 8186 // And Memory with Register
 8187 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8188   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8189   effect(KILL cr);
 8190 
 8191   ins_cost(150);
 8192   format %{ "AND    $dst,$src" %}
 8193   opcode(0x21);  /* Opcode 21 /r */
 8194   ins_encode( OpcP, RegMem( src, dst ) );
 8195   ins_pipe( ialu_mem_reg );
 8196 %}
 8197 
 8198 // And Memory with Immediate
 8199 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8200   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8201   effect(KILL cr);
 8202 
 8203   ins_cost(125);
 8204   format %{ "AND    $dst,$src" %}
 8205   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8206   // ins_encode( MemImm( dst, src) );
 8207   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8208   ins_pipe( ialu_mem_imm );
 8209 %}
 8210 
 8211 // BMI1 instructions
 8212 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8213   match(Set dst (AndI (XorI src1 minus_1) src2));
 8214   predicate(UseBMI1Instructions);
 8215   effect(KILL cr);
 8216 
 8217   format %{ "ANDNL  $dst, $src1, $src2" %}
 8218 
 8219   ins_encode %{
 8220     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8221   %}
 8222   ins_pipe(ialu_reg);
 8223 %}
 8224 
 8225 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8226   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8227   predicate(UseBMI1Instructions);
 8228   effect(KILL cr);
 8229 
 8230   ins_cost(125);
 8231   format %{ "ANDNL  $dst, $src1, $src2" %}
 8232 
 8233   ins_encode %{
 8234     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8235   %}
 8236   ins_pipe(ialu_reg_mem);
 8237 %}
 8238 
 8239 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8240   match(Set dst (AndI (SubI imm_zero src) src));
 8241   predicate(UseBMI1Instructions);
 8242   effect(KILL cr);
 8243 
 8244   format %{ "BLSIL  $dst, $src" %}
 8245 
 8246   ins_encode %{
 8247     __ blsil($dst$$Register, $src$$Register);
 8248   %}
 8249   ins_pipe(ialu_reg);
 8250 %}
 8251 
 8252 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8253   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8254   predicate(UseBMI1Instructions);
 8255   effect(KILL cr);
 8256 
 8257   ins_cost(125);
 8258   format %{ "BLSIL  $dst, $src" %}
 8259 
 8260   ins_encode %{
 8261     __ blsil($dst$$Register, $src$$Address);
 8262   %}
 8263   ins_pipe(ialu_reg_mem);
 8264 %}
 8265 
 8266 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8267 %{
 8268   match(Set dst (XorI (AddI src minus_1) src));
 8269   predicate(UseBMI1Instructions);
 8270   effect(KILL cr);
 8271 
 8272   format %{ "BLSMSKL $dst, $src" %}
 8273 
 8274   ins_encode %{
 8275     __ blsmskl($dst$$Register, $src$$Register);
 8276   %}
 8277 
 8278   ins_pipe(ialu_reg);
 8279 %}
 8280 
 8281 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8282 %{
 8283   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8284   predicate(UseBMI1Instructions);
 8285   effect(KILL cr);
 8286 
 8287   ins_cost(125);
 8288   format %{ "BLSMSKL $dst, $src" %}
 8289 
 8290   ins_encode %{
 8291     __ blsmskl($dst$$Register, $src$$Address);
 8292   %}
 8293 
 8294   ins_pipe(ialu_reg_mem);
 8295 %}
 8296 
 8297 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8298 %{
 8299   match(Set dst (AndI (AddI src minus_1) src) );
 8300   predicate(UseBMI1Instructions);
 8301   effect(KILL cr);
 8302 
 8303   format %{ "BLSRL  $dst, $src" %}
 8304 
 8305   ins_encode %{
 8306     __ blsrl($dst$$Register, $src$$Register);
 8307   %}
 8308 
 8309   ins_pipe(ialu_reg);
 8310 %}
 8311 
 8312 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8313 %{
 8314   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8315   predicate(UseBMI1Instructions);
 8316   effect(KILL cr);
 8317 
 8318   ins_cost(125);
 8319   format %{ "BLSRL  $dst, $src" %}
 8320 
 8321   ins_encode %{
 8322     __ blsrl($dst$$Register, $src$$Address);
 8323   %}
 8324 
 8325   ins_pipe(ialu_reg_mem);
 8326 %}
 8327 
 8328 // Or Instructions
 8329 // Or Register with Register
 8330 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8331   match(Set dst (OrI dst src));
 8332   effect(KILL cr);
 8333 
 8334   size(2);
 8335   format %{ "OR     $dst,$src" %}
 8336   opcode(0x0B);
 8337   ins_encode( OpcP, RegReg( dst, src) );
 8338   ins_pipe( ialu_reg_reg );
 8339 %}
 8340 
 8341 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8342   match(Set dst (OrI dst (CastP2X src)));
 8343   effect(KILL cr);
 8344 
 8345   size(2);
 8346   format %{ "OR     $dst,$src" %}
 8347   opcode(0x0B);
 8348   ins_encode( OpcP, RegReg( dst, src) );
 8349   ins_pipe( ialu_reg_reg );
 8350 %}
 8351 
 8352 
 8353 // Or Register with Immediate
 8354 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8355   match(Set dst (OrI dst src));
 8356   effect(KILL cr);
 8357 
 8358   format %{ "OR     $dst,$src" %}
 8359   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8360   // ins_encode( RegImm( dst, src) );
 8361   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8362   ins_pipe( ialu_reg );
 8363 %}
 8364 
 8365 // Or Register with Memory
 8366 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8367   match(Set dst (OrI dst (LoadI src)));
 8368   effect(KILL cr);
 8369 
 8370   ins_cost(150);
 8371   format %{ "OR     $dst,$src" %}
 8372   opcode(0x0B);
 8373   ins_encode( OpcP, RegMem( dst, src) );
 8374   ins_pipe( ialu_reg_mem );
 8375 %}
 8376 
 8377 // Or Memory with Register
 8378 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8379   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8380   effect(KILL cr);
 8381 
 8382   ins_cost(150);
 8383   format %{ "OR     $dst,$src" %}
 8384   opcode(0x09);  /* Opcode 09 /r */
 8385   ins_encode( OpcP, RegMem( src, dst ) );
 8386   ins_pipe( ialu_mem_reg );
 8387 %}
 8388 
 8389 // Or Memory with Immediate
 8390 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8391   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8392   effect(KILL cr);
 8393 
 8394   ins_cost(125);
 8395   format %{ "OR     $dst,$src" %}
 8396   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8397   // ins_encode( MemImm( dst, src) );
 8398   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8399   ins_pipe( ialu_mem_imm );
 8400 %}
 8401 
 8402 // ROL/ROR
 8403 // ROL expand
 8404 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8405   effect(USE_DEF dst, USE shift, KILL cr);
 8406 
 8407   format %{ "ROL    $dst, $shift" %}
 8408   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8409   ins_encode( OpcP, RegOpc( dst ));
 8410   ins_pipe( ialu_reg );
 8411 %}
 8412 
 8413 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8414   effect(USE_DEF dst, USE shift, KILL cr);
 8415 
 8416   format %{ "ROL    $dst, $shift" %}
 8417   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8418   ins_encode( RegOpcImm(dst, shift) );
 8419   ins_pipe(ialu_reg);
 8420 %}
 8421 
 8422 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8423   effect(USE_DEF dst, USE shift, KILL cr);
 8424 
 8425   format %{ "ROL    $dst, $shift" %}
 8426   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8427   ins_encode(OpcP, RegOpc(dst));
 8428   ins_pipe( ialu_reg_reg );
 8429 %}
 8430 // end of ROL expand
 8431 
 8432 // ROL 32bit by one once
 8433 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8434   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8435 
 8436   expand %{
 8437     rolI_eReg_imm1(dst, lshift, cr);
 8438   %}
 8439 %}
 8440 
 8441 // ROL 32bit var by imm8 once
 8442 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8443   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8444   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8445 
 8446   expand %{
 8447     rolI_eReg_imm8(dst, lshift, cr);
 8448   %}
 8449 %}
 8450 
 8451 // ROL 32bit var by var once
 8452 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8453   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8454 
 8455   expand %{
 8456     rolI_eReg_CL(dst, shift, cr);
 8457   %}
 8458 %}
 8459 
 8460 // ROL 32bit var by var once
 8461 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8462   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8463 
 8464   expand %{
 8465     rolI_eReg_CL(dst, shift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROR expand
 8470 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8471   effect(USE_DEF dst, USE shift, KILL cr);
 8472 
 8473   format %{ "ROR    $dst, $shift" %}
 8474   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8475   ins_encode( OpcP, RegOpc( dst ) );
 8476   ins_pipe( ialu_reg );
 8477 %}
 8478 
 8479 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8480   effect (USE_DEF dst, USE shift, KILL cr);
 8481 
 8482   format %{ "ROR    $dst, $shift" %}
 8483   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8484   ins_encode( RegOpcImm(dst, shift) );
 8485   ins_pipe( ialu_reg );
 8486 %}
 8487 
 8488 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8489   effect(USE_DEF dst, USE shift, KILL cr);
 8490 
 8491   format %{ "ROR    $dst, $shift" %}
 8492   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8493   ins_encode(OpcP, RegOpc(dst));
 8494   ins_pipe( ialu_reg_reg );
 8495 %}
 8496 // end of ROR expand
 8497 
 8498 // ROR right once
 8499 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8500   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8501 
 8502   expand %{
 8503     rorI_eReg_imm1(dst, rshift, cr);
 8504   %}
 8505 %}
 8506 
 8507 // ROR 32bit by immI8 once
 8508 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8509   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8510   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8511 
 8512   expand %{
 8513     rorI_eReg_imm8(dst, rshift, cr);
 8514   %}
 8515 %}
 8516 
 8517 // ROR 32bit var by var once
 8518 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8519   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8520 
 8521   expand %{
 8522     rorI_eReg_CL(dst, shift, cr);
 8523   %}
 8524 %}
 8525 
 8526 // ROR 32bit var by var once
 8527 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8528   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8529 
 8530   expand %{
 8531     rorI_eReg_CL(dst, shift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // Xor Instructions
 8536 // Xor Register with Register
 8537 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8538   match(Set dst (XorI dst src));
 8539   effect(KILL cr);
 8540 
 8541   size(2);
 8542   format %{ "XOR    $dst,$src" %}
 8543   opcode(0x33);
 8544   ins_encode( OpcP, RegReg( dst, src) );
 8545   ins_pipe( ialu_reg_reg );
 8546 %}
 8547 
 8548 // Xor Register with Immediate -1
 8549 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8550   match(Set dst (XorI dst imm));
 8551 
 8552   size(2);
 8553   format %{ "NOT    $dst" %}
 8554   ins_encode %{
 8555      __ notl($dst$$Register);
 8556   %}
 8557   ins_pipe( ialu_reg );
 8558 %}
 8559 
 8560 // Xor Register with Immediate
 8561 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8562   match(Set dst (XorI dst src));
 8563   effect(KILL cr);
 8564 
 8565   format %{ "XOR    $dst,$src" %}
 8566   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8567   // ins_encode( RegImm( dst, src) );
 8568   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8569   ins_pipe( ialu_reg );
 8570 %}
 8571 
 8572 // Xor Register with Memory
 8573 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8574   match(Set dst (XorI dst (LoadI src)));
 8575   effect(KILL cr);
 8576 
 8577   ins_cost(150);
 8578   format %{ "XOR    $dst,$src" %}
 8579   opcode(0x33);
 8580   ins_encode( OpcP, RegMem(dst, src) );
 8581   ins_pipe( ialu_reg_mem );
 8582 %}
 8583 
 8584 // Xor Memory with Register
 8585 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8586   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8587   effect(KILL cr);
 8588 
 8589   ins_cost(150);
 8590   format %{ "XOR    $dst,$src" %}
 8591   opcode(0x31);  /* Opcode 31 /r */
 8592   ins_encode( OpcP, RegMem( src, dst ) );
 8593   ins_pipe( ialu_mem_reg );
 8594 %}
 8595 
 8596 // Xor Memory with Immediate
 8597 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8598   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8599   effect(KILL cr);
 8600 
 8601   ins_cost(125);
 8602   format %{ "XOR    $dst,$src" %}
 8603   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8604   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8605   ins_pipe( ialu_mem_imm );
 8606 %}
 8607 
 8608 //----------Convert Int to Boolean---------------------------------------------
 8609 
 8610 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8611   effect( DEF dst, USE src );
 8612   format %{ "MOV    $dst,$src" %}
 8613   ins_encode( enc_Copy( dst, src) );
 8614   ins_pipe( ialu_reg_reg );
 8615 %}
 8616 
 8617 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8618   effect( USE_DEF dst, USE src, KILL cr );
 8619 
 8620   size(4);
 8621   format %{ "NEG    $dst\n\t"
 8622             "ADC    $dst,$src" %}
 8623   ins_encode( neg_reg(dst),
 8624               OpcRegReg(0x13,dst,src) );
 8625   ins_pipe( ialu_reg_reg_long );
 8626 %}
 8627 
 8628 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8629   match(Set dst (Conv2B src));
 8630 
 8631   expand %{
 8632     movI_nocopy(dst,src);
 8633     ci2b(dst,src,cr);
 8634   %}
 8635 %}
 8636 
 8637 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8638   effect( DEF dst, USE src );
 8639   format %{ "MOV    $dst,$src" %}
 8640   ins_encode( enc_Copy( dst, src) );
 8641   ins_pipe( ialu_reg_reg );
 8642 %}
 8643 
 8644 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8645   effect( USE_DEF dst, USE src, KILL cr );
 8646   format %{ "NEG    $dst\n\t"
 8647             "ADC    $dst,$src" %}
 8648   ins_encode( neg_reg(dst),
 8649               OpcRegReg(0x13,dst,src) );
 8650   ins_pipe( ialu_reg_reg_long );
 8651 %}
 8652 
 8653 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8654   match(Set dst (Conv2B src));
 8655 
 8656   expand %{
 8657     movP_nocopy(dst,src);
 8658     cp2b(dst,src,cr);
 8659   %}
 8660 %}
 8661 
 8662 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8663   match(Set dst (CmpLTMask p q));
 8664   effect(KILL cr);
 8665   ins_cost(400);
 8666 
 8667   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8668   format %{ "XOR    $dst,$dst\n\t"
 8669             "CMP    $p,$q\n\t"
 8670             "SETlt  $dst\n\t"
 8671             "NEG    $dst" %}
 8672   ins_encode %{
 8673     Register Rp = $p$$Register;
 8674     Register Rq = $q$$Register;
 8675     Register Rd = $dst$$Register;
 8676     Label done;
 8677     __ xorl(Rd, Rd);
 8678     __ cmpl(Rp, Rq);
 8679     __ setb(Assembler::less, Rd);
 8680     __ negl(Rd);
 8681   %}
 8682 
 8683   ins_pipe(pipe_slow);
 8684 %}
 8685 
 8686 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8687   match(Set dst (CmpLTMask dst zero));
 8688   effect(DEF dst, KILL cr);
 8689   ins_cost(100);
 8690 
 8691   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8692   ins_encode %{
 8693   __ sarl($dst$$Register, 31);
 8694   %}
 8695   ins_pipe(ialu_reg);
 8696 %}
 8697 
 8698 /* better to save a register than avoid a branch */
 8699 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8700   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8701   effect(KILL cr);
 8702   ins_cost(400);
 8703   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8704             "JGE    done\n\t"
 8705             "ADD    $p,$y\n"
 8706             "done:  " %}
 8707   ins_encode %{
 8708     Register Rp = $p$$Register;
 8709     Register Rq = $q$$Register;
 8710     Register Ry = $y$$Register;
 8711     Label done;
 8712     __ subl(Rp, Rq);
 8713     __ jccb(Assembler::greaterEqual, done);
 8714     __ addl(Rp, Ry);
 8715     __ bind(done);
 8716   %}
 8717 
 8718   ins_pipe(pipe_cmplt);
 8719 %}
 8720 
 8721 /* better to save a register than avoid a branch */
 8722 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8723   match(Set y (AndI (CmpLTMask p q) y));
 8724   effect(KILL cr);
 8725 
 8726   ins_cost(300);
 8727 
 8728   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8729             "JLT      done\n\t"
 8730             "XORL     $y, $y\n"
 8731             "done:  " %}
 8732   ins_encode %{
 8733     Register Rp = $p$$Register;
 8734     Register Rq = $q$$Register;
 8735     Register Ry = $y$$Register;
 8736     Label done;
 8737     __ cmpl(Rp, Rq);
 8738     __ jccb(Assembler::less, done);
 8739     __ xorl(Ry, Ry);
 8740     __ bind(done);
 8741   %}
 8742 
 8743   ins_pipe(pipe_cmplt);
 8744 %}
 8745 
 8746 /* If I enable this, I encourage spilling in the inner loop of compress.
 8747 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8748   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8749 */
 8750 //----------Overflow Math Instructions-----------------------------------------
 8751 
 8752 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8753 %{
 8754   match(Set cr (OverflowAddI op1 op2));
 8755   effect(DEF cr, USE_KILL op1, USE op2);
 8756 
 8757   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8758 
 8759   ins_encode %{
 8760     __ addl($op1$$Register, $op2$$Register);
 8761   %}
 8762   ins_pipe(ialu_reg_reg);
 8763 %}
 8764 
 8765 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8766 %{
 8767   match(Set cr (OverflowAddI op1 op2));
 8768   effect(DEF cr, USE_KILL op1, USE op2);
 8769 
 8770   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8771 
 8772   ins_encode %{
 8773     __ addl($op1$$Register, $op2$$constant);
 8774   %}
 8775   ins_pipe(ialu_reg_reg);
 8776 %}
 8777 
 8778 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8779 %{
 8780   match(Set cr (OverflowSubI op1 op2));
 8781 
 8782   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8783   ins_encode %{
 8784     __ cmpl($op1$$Register, $op2$$Register);
 8785   %}
 8786   ins_pipe(ialu_reg_reg);
 8787 %}
 8788 
 8789 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8790 %{
 8791   match(Set cr (OverflowSubI op1 op2));
 8792 
 8793   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8794   ins_encode %{
 8795     __ cmpl($op1$$Register, $op2$$constant);
 8796   %}
 8797   ins_pipe(ialu_reg_reg);
 8798 %}
 8799 
 8800 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8801 %{
 8802   match(Set cr (OverflowSubI zero op2));
 8803   effect(DEF cr, USE_KILL op2);
 8804 
 8805   format %{ "NEG    $op2\t# overflow check int" %}
 8806   ins_encode %{
 8807     __ negl($op2$$Register);
 8808   %}
 8809   ins_pipe(ialu_reg_reg);
 8810 %}
 8811 
 8812 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8813 %{
 8814   match(Set cr (OverflowMulI op1 op2));
 8815   effect(DEF cr, USE_KILL op1, USE op2);
 8816 
 8817   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8818   ins_encode %{
 8819     __ imull($op1$$Register, $op2$$Register);
 8820   %}
 8821   ins_pipe(ialu_reg_reg_alu0);
 8822 %}
 8823 
 8824 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8825 %{
 8826   match(Set cr (OverflowMulI op1 op2));
 8827   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8828 
 8829   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8830   ins_encode %{
 8831     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8832   %}
 8833   ins_pipe(ialu_reg_reg_alu0);
 8834 %}
 8835 
 8836 // Integer Absolute Instructions
 8837 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8838 %{
 8839   match(Set dst (AbsI src));
 8840   effect(TEMP dst, TEMP tmp, KILL cr);
 8841   format %{ "movl $tmp, $src\n\t"
 8842             "sarl $tmp, 31\n\t"
 8843             "movl $dst, $src\n\t"
 8844             "xorl $dst, $tmp\n\t"
 8845             "subl $dst, $tmp\n"
 8846           %}
 8847   ins_encode %{
 8848     __ movl($tmp$$Register, $src$$Register);
 8849     __ sarl($tmp$$Register, 31);
 8850     __ movl($dst$$Register, $src$$Register);
 8851     __ xorl($dst$$Register, $tmp$$Register);
 8852     __ subl($dst$$Register, $tmp$$Register);
 8853   %}
 8854 
 8855   ins_pipe(ialu_reg_reg);
 8856 %}
 8857 
 8858 //----------Long Instructions------------------------------------------------
 8859 // Add Long Register with Register
 8860 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8861   match(Set dst (AddL dst src));
 8862   effect(KILL cr);
 8863   ins_cost(200);
 8864   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8865             "ADC    $dst.hi,$src.hi" %}
 8866   opcode(0x03, 0x13);
 8867   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8868   ins_pipe( ialu_reg_reg_long );
 8869 %}
 8870 
 8871 // Add Long Register with Immediate
 8872 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8873   match(Set dst (AddL dst src));
 8874   effect(KILL cr);
 8875   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8876             "ADC    $dst.hi,$src.hi" %}
 8877   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8878   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8879   ins_pipe( ialu_reg_long );
 8880 %}
 8881 
 8882 // Add Long Register with Memory
 8883 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8884   match(Set dst (AddL dst (LoadL mem)));
 8885   effect(KILL cr);
 8886   ins_cost(125);
 8887   format %{ "ADD    $dst.lo,$mem\n\t"
 8888             "ADC    $dst.hi,$mem+4" %}
 8889   opcode(0x03, 0x13);
 8890   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8891   ins_pipe( ialu_reg_long_mem );
 8892 %}
 8893 
 8894 // Subtract Long Register with Register.
 8895 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8896   match(Set dst (SubL dst src));
 8897   effect(KILL cr);
 8898   ins_cost(200);
 8899   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8900             "SBB    $dst.hi,$src.hi" %}
 8901   opcode(0x2B, 0x1B);
 8902   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8903   ins_pipe( ialu_reg_reg_long );
 8904 %}
 8905 
 8906 // Subtract Long Register with Immediate
 8907 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8908   match(Set dst (SubL dst src));
 8909   effect(KILL cr);
 8910   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8911             "SBB    $dst.hi,$src.hi" %}
 8912   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8913   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8914   ins_pipe( ialu_reg_long );
 8915 %}
 8916 
 8917 // Subtract Long Register with Memory
 8918 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8919   match(Set dst (SubL dst (LoadL mem)));
 8920   effect(KILL cr);
 8921   ins_cost(125);
 8922   format %{ "SUB    $dst.lo,$mem\n\t"
 8923             "SBB    $dst.hi,$mem+4" %}
 8924   opcode(0x2B, 0x1B);
 8925   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8926   ins_pipe( ialu_reg_long_mem );
 8927 %}
 8928 
 8929 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8930   match(Set dst (SubL zero dst));
 8931   effect(KILL cr);
 8932   ins_cost(300);
 8933   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8934   ins_encode( neg_long(dst) );
 8935   ins_pipe( ialu_reg_reg_long );
 8936 %}
 8937 
 8938 // And Long Register with Register
 8939 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8940   match(Set dst (AndL dst src));
 8941   effect(KILL cr);
 8942   format %{ "AND    $dst.lo,$src.lo\n\t"
 8943             "AND    $dst.hi,$src.hi" %}
 8944   opcode(0x23,0x23);
 8945   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8946   ins_pipe( ialu_reg_reg_long );
 8947 %}
 8948 
 8949 // And Long Register with Immediate
 8950 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8951   match(Set dst (AndL dst src));
 8952   effect(KILL cr);
 8953   format %{ "AND    $dst.lo,$src.lo\n\t"
 8954             "AND    $dst.hi,$src.hi" %}
 8955   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8956   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8957   ins_pipe( ialu_reg_long );
 8958 %}
 8959 
 8960 // And Long Register with Memory
 8961 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8962   match(Set dst (AndL dst (LoadL mem)));
 8963   effect(KILL cr);
 8964   ins_cost(125);
 8965   format %{ "AND    $dst.lo,$mem\n\t"
 8966             "AND    $dst.hi,$mem+4" %}
 8967   opcode(0x23, 0x23);
 8968   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8969   ins_pipe( ialu_reg_long_mem );
 8970 %}
 8971 
 8972 // BMI1 instructions
 8973 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8974   match(Set dst (AndL (XorL src1 minus_1) src2));
 8975   predicate(UseBMI1Instructions);
 8976   effect(KILL cr, TEMP dst);
 8977 
 8978   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8979             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8980          %}
 8981 
 8982   ins_encode %{
 8983     Register Rdst = $dst$$Register;
 8984     Register Rsrc1 = $src1$$Register;
 8985     Register Rsrc2 = $src2$$Register;
 8986     __ andnl(Rdst, Rsrc1, Rsrc2);
 8987     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8988   %}
 8989   ins_pipe(ialu_reg_reg_long);
 8990 %}
 8991 
 8992 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8993   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8994   predicate(UseBMI1Instructions);
 8995   effect(KILL cr, TEMP dst);
 8996 
 8997   ins_cost(125);
 8998   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8999             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9000          %}
 9001 
 9002   ins_encode %{
 9003     Register Rdst = $dst$$Register;
 9004     Register Rsrc1 = $src1$$Register;
 9005     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9006 
 9007     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9008     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9009   %}
 9010   ins_pipe(ialu_reg_mem);
 9011 %}
 9012 
 9013 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9014   match(Set dst (AndL (SubL imm_zero src) src));
 9015   predicate(UseBMI1Instructions);
 9016   effect(KILL cr, TEMP dst);
 9017 
 9018   format %{ "MOVL   $dst.hi, 0\n\t"
 9019             "BLSIL  $dst.lo, $src.lo\n\t"
 9020             "JNZ    done\n\t"
 9021             "BLSIL  $dst.hi, $src.hi\n"
 9022             "done:"
 9023          %}
 9024 
 9025   ins_encode %{
 9026     Label done;
 9027     Register Rdst = $dst$$Register;
 9028     Register Rsrc = $src$$Register;
 9029     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9030     __ blsil(Rdst, Rsrc);
 9031     __ jccb(Assembler::notZero, done);
 9032     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9033     __ bind(done);
 9034   %}
 9035   ins_pipe(ialu_reg);
 9036 %}
 9037 
 9038 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9039   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9040   predicate(UseBMI1Instructions);
 9041   effect(KILL cr, TEMP dst);
 9042 
 9043   ins_cost(125);
 9044   format %{ "MOVL   $dst.hi, 0\n\t"
 9045             "BLSIL  $dst.lo, $src\n\t"
 9046             "JNZ    done\n\t"
 9047             "BLSIL  $dst.hi, $src+4\n"
 9048             "done:"
 9049          %}
 9050 
 9051   ins_encode %{
 9052     Label done;
 9053     Register Rdst = $dst$$Register;
 9054     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9055 
 9056     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9057     __ blsil(Rdst, $src$$Address);
 9058     __ jccb(Assembler::notZero, done);
 9059     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9060     __ bind(done);
 9061   %}
 9062   ins_pipe(ialu_reg_mem);
 9063 %}
 9064 
 9065 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9066 %{
 9067   match(Set dst (XorL (AddL src minus_1) src));
 9068   predicate(UseBMI1Instructions);
 9069   effect(KILL cr, TEMP dst);
 9070 
 9071   format %{ "MOVL    $dst.hi, 0\n\t"
 9072             "BLSMSKL $dst.lo, $src.lo\n\t"
 9073             "JNC     done\n\t"
 9074             "BLSMSKL $dst.hi, $src.hi\n"
 9075             "done:"
 9076          %}
 9077 
 9078   ins_encode %{
 9079     Label done;
 9080     Register Rdst = $dst$$Register;
 9081     Register Rsrc = $src$$Register;
 9082     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9083     __ blsmskl(Rdst, Rsrc);
 9084     __ jccb(Assembler::carryClear, done);
 9085     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9086     __ bind(done);
 9087   %}
 9088 
 9089   ins_pipe(ialu_reg);
 9090 %}
 9091 
 9092 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9093 %{
 9094   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9095   predicate(UseBMI1Instructions);
 9096   effect(KILL cr, TEMP dst);
 9097 
 9098   ins_cost(125);
 9099   format %{ "MOVL    $dst.hi, 0\n\t"
 9100             "BLSMSKL $dst.lo, $src\n\t"
 9101             "JNC     done\n\t"
 9102             "BLSMSKL $dst.hi, $src+4\n"
 9103             "done:"
 9104          %}
 9105 
 9106   ins_encode %{
 9107     Label done;
 9108     Register Rdst = $dst$$Register;
 9109     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9110 
 9111     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9112     __ blsmskl(Rdst, $src$$Address);
 9113     __ jccb(Assembler::carryClear, done);
 9114     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9115     __ bind(done);
 9116   %}
 9117 
 9118   ins_pipe(ialu_reg_mem);
 9119 %}
 9120 
 9121 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9122 %{
 9123   match(Set dst (AndL (AddL src minus_1) src) );
 9124   predicate(UseBMI1Instructions);
 9125   effect(KILL cr, TEMP dst);
 9126 
 9127   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9128             "BLSRL  $dst.lo, $src.lo\n\t"
 9129             "JNC    done\n\t"
 9130             "BLSRL  $dst.hi, $src.hi\n"
 9131             "done:"
 9132   %}
 9133 
 9134   ins_encode %{
 9135     Label done;
 9136     Register Rdst = $dst$$Register;
 9137     Register Rsrc = $src$$Register;
 9138     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9139     __ blsrl(Rdst, Rsrc);
 9140     __ jccb(Assembler::carryClear, done);
 9141     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9142     __ bind(done);
 9143   %}
 9144 
 9145   ins_pipe(ialu_reg);
 9146 %}
 9147 
 9148 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9149 %{
 9150   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9151   predicate(UseBMI1Instructions);
 9152   effect(KILL cr, TEMP dst);
 9153 
 9154   ins_cost(125);
 9155   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9156             "BLSRL  $dst.lo, $src\n\t"
 9157             "JNC    done\n\t"
 9158             "BLSRL  $dst.hi, $src+4\n"
 9159             "done:"
 9160   %}
 9161 
 9162   ins_encode %{
 9163     Label done;
 9164     Register Rdst = $dst$$Register;
 9165     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9166     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9167     __ blsrl(Rdst, $src$$Address);
 9168     __ jccb(Assembler::carryClear, done);
 9169     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9170     __ bind(done);
 9171   %}
 9172 
 9173   ins_pipe(ialu_reg_mem);
 9174 %}
 9175 
 9176 // Or Long Register with Register
 9177 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9178   match(Set dst (OrL dst src));
 9179   effect(KILL cr);
 9180   format %{ "OR     $dst.lo,$src.lo\n\t"
 9181             "OR     $dst.hi,$src.hi" %}
 9182   opcode(0x0B,0x0B);
 9183   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9184   ins_pipe( ialu_reg_reg_long );
 9185 %}
 9186 
 9187 // Or Long Register with Immediate
 9188 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9189   match(Set dst (OrL dst src));
 9190   effect(KILL cr);
 9191   format %{ "OR     $dst.lo,$src.lo\n\t"
 9192             "OR     $dst.hi,$src.hi" %}
 9193   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9194   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9195   ins_pipe( ialu_reg_long );
 9196 %}
 9197 
 9198 // Or Long Register with Memory
 9199 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9200   match(Set dst (OrL dst (LoadL mem)));
 9201   effect(KILL cr);
 9202   ins_cost(125);
 9203   format %{ "OR     $dst.lo,$mem\n\t"
 9204             "OR     $dst.hi,$mem+4" %}
 9205   opcode(0x0B,0x0B);
 9206   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9207   ins_pipe( ialu_reg_long_mem );
 9208 %}
 9209 
 9210 // Xor Long Register with Register
 9211 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9212   match(Set dst (XorL dst src));
 9213   effect(KILL cr);
 9214   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9215             "XOR    $dst.hi,$src.hi" %}
 9216   opcode(0x33,0x33);
 9217   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9218   ins_pipe( ialu_reg_reg_long );
 9219 %}
 9220 
 9221 // Xor Long Register with Immediate -1
 9222 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9223   match(Set dst (XorL dst imm));
 9224   format %{ "NOT    $dst.lo\n\t"
 9225             "NOT    $dst.hi" %}
 9226   ins_encode %{
 9227      __ notl($dst$$Register);
 9228      __ notl(HIGH_FROM_LOW($dst$$Register));
 9229   %}
 9230   ins_pipe( ialu_reg_long );
 9231 %}
 9232 
 9233 // Xor Long Register with Immediate
 9234 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9235   match(Set dst (XorL dst src));
 9236   effect(KILL cr);
 9237   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9238             "XOR    $dst.hi,$src.hi" %}
 9239   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9240   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9241   ins_pipe( ialu_reg_long );
 9242 %}
 9243 
 9244 // Xor Long Register with Memory
 9245 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9246   match(Set dst (XorL dst (LoadL mem)));
 9247   effect(KILL cr);
 9248   ins_cost(125);
 9249   format %{ "XOR    $dst.lo,$mem\n\t"
 9250             "XOR    $dst.hi,$mem+4" %}
 9251   opcode(0x33,0x33);
 9252   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9253   ins_pipe( ialu_reg_long_mem );
 9254 %}
 9255 
 9256 // Shift Left Long by 1
 9257 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9258   predicate(UseNewLongLShift);
 9259   match(Set dst (LShiftL dst cnt));
 9260   effect(KILL cr);
 9261   ins_cost(100);
 9262   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9263             "ADC    $dst.hi,$dst.hi" %}
 9264   ins_encode %{
 9265     __ addl($dst$$Register,$dst$$Register);
 9266     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9267   %}
 9268   ins_pipe( ialu_reg_long );
 9269 %}
 9270 
 9271 // Shift Left Long by 2
 9272 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9273   predicate(UseNewLongLShift);
 9274   match(Set dst (LShiftL dst cnt));
 9275   effect(KILL cr);
 9276   ins_cost(100);
 9277   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9278             "ADC    $dst.hi,$dst.hi\n\t"
 9279             "ADD    $dst.lo,$dst.lo\n\t"
 9280             "ADC    $dst.hi,$dst.hi" %}
 9281   ins_encode %{
 9282     __ addl($dst$$Register,$dst$$Register);
 9283     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9284     __ addl($dst$$Register,$dst$$Register);
 9285     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9286   %}
 9287   ins_pipe( ialu_reg_long );
 9288 %}
 9289 
 9290 // Shift Left Long by 3
 9291 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9292   predicate(UseNewLongLShift);
 9293   match(Set dst (LShiftL dst cnt));
 9294   effect(KILL cr);
 9295   ins_cost(100);
 9296   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9297             "ADC    $dst.hi,$dst.hi\n\t"
 9298             "ADD    $dst.lo,$dst.lo\n\t"
 9299             "ADC    $dst.hi,$dst.hi\n\t"
 9300             "ADD    $dst.lo,$dst.lo\n\t"
 9301             "ADC    $dst.hi,$dst.hi" %}
 9302   ins_encode %{
 9303     __ addl($dst$$Register,$dst$$Register);
 9304     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9305     __ addl($dst$$Register,$dst$$Register);
 9306     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9307     __ addl($dst$$Register,$dst$$Register);
 9308     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9309   %}
 9310   ins_pipe( ialu_reg_long );
 9311 %}
 9312 
 9313 // Shift Left Long by 1-31
 9314 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9315   match(Set dst (LShiftL dst cnt));
 9316   effect(KILL cr);
 9317   ins_cost(200);
 9318   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9319             "SHL    $dst.lo,$cnt" %}
 9320   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9321   ins_encode( move_long_small_shift(dst,cnt) );
 9322   ins_pipe( ialu_reg_long );
 9323 %}
 9324 
 9325 // Shift Left Long by 32-63
 9326 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9327   match(Set dst (LShiftL dst cnt));
 9328   effect(KILL cr);
 9329   ins_cost(300);
 9330   format %{ "MOV    $dst.hi,$dst.lo\n"
 9331           "\tSHL    $dst.hi,$cnt-32\n"
 9332           "\tXOR    $dst.lo,$dst.lo" %}
 9333   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9334   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9335   ins_pipe( ialu_reg_long );
 9336 %}
 9337 
 9338 // Shift Left Long by variable
 9339 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9340   match(Set dst (LShiftL dst shift));
 9341   effect(KILL cr);
 9342   ins_cost(500+200);
 9343   size(17);
 9344   format %{ "TEST   $shift,32\n\t"
 9345             "JEQ,s  small\n\t"
 9346             "MOV    $dst.hi,$dst.lo\n\t"
 9347             "XOR    $dst.lo,$dst.lo\n"
 9348     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9349             "SHL    $dst.lo,$shift" %}
 9350   ins_encode( shift_left_long( dst, shift ) );
 9351   ins_pipe( pipe_slow );
 9352 %}
 9353 
 9354 // Shift Right Long by 1-31
 9355 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9356   match(Set dst (URShiftL dst cnt));
 9357   effect(KILL cr);
 9358   ins_cost(200);
 9359   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9360             "SHR    $dst.hi,$cnt" %}
 9361   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9362   ins_encode( move_long_small_shift(dst,cnt) );
 9363   ins_pipe( ialu_reg_long );
 9364 %}
 9365 
 9366 // Shift Right Long by 32-63
 9367 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9368   match(Set dst (URShiftL dst cnt));
 9369   effect(KILL cr);
 9370   ins_cost(300);
 9371   format %{ "MOV    $dst.lo,$dst.hi\n"
 9372           "\tSHR    $dst.lo,$cnt-32\n"
 9373           "\tXOR    $dst.hi,$dst.hi" %}
 9374   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9375   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9376   ins_pipe( ialu_reg_long );
 9377 %}
 9378 
 9379 // Shift Right Long by variable
 9380 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9381   match(Set dst (URShiftL dst shift));
 9382   effect(KILL cr);
 9383   ins_cost(600);
 9384   size(17);
 9385   format %{ "TEST   $shift,32\n\t"
 9386             "JEQ,s  small\n\t"
 9387             "MOV    $dst.lo,$dst.hi\n\t"
 9388             "XOR    $dst.hi,$dst.hi\n"
 9389     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9390             "SHR    $dst.hi,$shift" %}
 9391   ins_encode( shift_right_long( dst, shift ) );
 9392   ins_pipe( pipe_slow );
 9393 %}
 9394 
 9395 // Shift Right Long by 1-31
 9396 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9397   match(Set dst (RShiftL dst cnt));
 9398   effect(KILL cr);
 9399   ins_cost(200);
 9400   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9401             "SAR    $dst.hi,$cnt" %}
 9402   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9403   ins_encode( move_long_small_shift(dst,cnt) );
 9404   ins_pipe( ialu_reg_long );
 9405 %}
 9406 
 9407 // Shift Right Long by 32-63
 9408 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9409   match(Set dst (RShiftL dst cnt));
 9410   effect(KILL cr);
 9411   ins_cost(300);
 9412   format %{ "MOV    $dst.lo,$dst.hi\n"
 9413           "\tSAR    $dst.lo,$cnt-32\n"
 9414           "\tSAR    $dst.hi,31" %}
 9415   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9416   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9417   ins_pipe( ialu_reg_long );
 9418 %}
 9419 
 9420 // Shift Right arithmetic Long by variable
 9421 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9422   match(Set dst (RShiftL dst shift));
 9423   effect(KILL cr);
 9424   ins_cost(600);
 9425   size(18);
 9426   format %{ "TEST   $shift,32\n\t"
 9427             "JEQ,s  small\n\t"
 9428             "MOV    $dst.lo,$dst.hi\n\t"
 9429             "SAR    $dst.hi,31\n"
 9430     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9431             "SAR    $dst.hi,$shift" %}
 9432   ins_encode( shift_right_arith_long( dst, shift ) );
 9433   ins_pipe( pipe_slow );
 9434 %}
 9435 
 9436 
 9437 //----------Double Instructions------------------------------------------------
 9438 // Double Math
 9439 
 9440 // Compare & branch
 9441 
 9442 // P6 version of float compare, sets condition codes in EFLAGS
 9443 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9444   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9445   match(Set cr (CmpD src1 src2));
 9446   effect(KILL rax);
 9447   ins_cost(150);
 9448   format %{ "FLD    $src1\n\t"
 9449             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9450             "JNP    exit\n\t"
 9451             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9452             "SAHF\n"
 9453      "exit:\tNOP               // avoid branch to branch" %}
 9454   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9455   ins_encode( Push_Reg_DPR(src1),
 9456               OpcP, RegOpc(src2),
 9457               cmpF_P6_fixup );
 9458   ins_pipe( pipe_slow );
 9459 %}
 9460 
 9461 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9462   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9463   match(Set cr (CmpD src1 src2));
 9464   ins_cost(150);
 9465   format %{ "FLD    $src1\n\t"
 9466             "FUCOMIP ST,$src2  // P6 instruction" %}
 9467   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9468   ins_encode( Push_Reg_DPR(src1),
 9469               OpcP, RegOpc(src2));
 9470   ins_pipe( pipe_slow );
 9471 %}
 9472 
 9473 // Compare & branch
 9474 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9475   predicate(UseSSE<=1);
 9476   match(Set cr (CmpD src1 src2));
 9477   effect(KILL rax);
 9478   ins_cost(200);
 9479   format %{ "FLD    $src1\n\t"
 9480             "FCOMp  $src2\n\t"
 9481             "FNSTSW AX\n\t"
 9482             "TEST   AX,0x400\n\t"
 9483             "JZ,s   flags\n\t"
 9484             "MOV    AH,1\t# unordered treat as LT\n"
 9485     "flags:\tSAHF" %}
 9486   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9487   ins_encode( Push_Reg_DPR(src1),
 9488               OpcP, RegOpc(src2),
 9489               fpu_flags);
 9490   ins_pipe( pipe_slow );
 9491 %}
 9492 
 9493 // Compare vs zero into -1,0,1
 9494 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9495   predicate(UseSSE<=1);
 9496   match(Set dst (CmpD3 src1 zero));
 9497   effect(KILL cr, KILL rax);
 9498   ins_cost(280);
 9499   format %{ "FTSTD  $dst,$src1" %}
 9500   opcode(0xE4, 0xD9);
 9501   ins_encode( Push_Reg_DPR(src1),
 9502               OpcS, OpcP, PopFPU,
 9503               CmpF_Result(dst));
 9504   ins_pipe( pipe_slow );
 9505 %}
 9506 
 9507 // Compare into -1,0,1
 9508 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9509   predicate(UseSSE<=1);
 9510   match(Set dst (CmpD3 src1 src2));
 9511   effect(KILL cr, KILL rax);
 9512   ins_cost(300);
 9513   format %{ "FCMPD  $dst,$src1,$src2" %}
 9514   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9515   ins_encode( Push_Reg_DPR(src1),
 9516               OpcP, RegOpc(src2),
 9517               CmpF_Result(dst));
 9518   ins_pipe( pipe_slow );
 9519 %}
 9520 
 9521 // float compare and set condition codes in EFLAGS by XMM regs
 9522 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9523   predicate(UseSSE>=2);
 9524   match(Set cr (CmpD src1 src2));
 9525   ins_cost(145);
 9526   format %{ "UCOMISD $src1,$src2\n\t"
 9527             "JNP,s   exit\n\t"
 9528             "PUSHF\t# saw NaN, set CF\n\t"
 9529             "AND     [rsp], #0xffffff2b\n\t"
 9530             "POPF\n"
 9531     "exit:" %}
 9532   ins_encode %{
 9533     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9534     emit_cmpfp_fixup(_masm);
 9535   %}
 9536   ins_pipe( pipe_slow );
 9537 %}
 9538 
 9539 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9540   predicate(UseSSE>=2);
 9541   match(Set cr (CmpD src1 src2));
 9542   ins_cost(100);
 9543   format %{ "UCOMISD $src1,$src2" %}
 9544   ins_encode %{
 9545     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9546   %}
 9547   ins_pipe( pipe_slow );
 9548 %}
 9549 
 9550 // float compare and set condition codes in EFLAGS by XMM regs
 9551 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9552   predicate(UseSSE>=2);
 9553   match(Set cr (CmpD src1 (LoadD src2)));
 9554   ins_cost(145);
 9555   format %{ "UCOMISD $src1,$src2\n\t"
 9556             "JNP,s   exit\n\t"
 9557             "PUSHF\t# saw NaN, set CF\n\t"
 9558             "AND     [rsp], #0xffffff2b\n\t"
 9559             "POPF\n"
 9560     "exit:" %}
 9561   ins_encode %{
 9562     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9563     emit_cmpfp_fixup(_masm);
 9564   %}
 9565   ins_pipe( pipe_slow );
 9566 %}
 9567 
 9568 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9569   predicate(UseSSE>=2);
 9570   match(Set cr (CmpD src1 (LoadD src2)));
 9571   ins_cost(100);
 9572   format %{ "UCOMISD $src1,$src2" %}
 9573   ins_encode %{
 9574     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9575   %}
 9576   ins_pipe( pipe_slow );
 9577 %}
 9578 
 9579 // Compare into -1,0,1 in XMM
 9580 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9581   predicate(UseSSE>=2);
 9582   match(Set dst (CmpD3 src1 src2));
 9583   effect(KILL cr);
 9584   ins_cost(255);
 9585   format %{ "UCOMISD $src1, $src2\n\t"
 9586             "MOV     $dst, #-1\n\t"
 9587             "JP,s    done\n\t"
 9588             "JB,s    done\n\t"
 9589             "SETNE   $dst\n\t"
 9590             "MOVZB   $dst, $dst\n"
 9591     "done:" %}
 9592   ins_encode %{
 9593     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9594     emit_cmpfp3(_masm, $dst$$Register);
 9595   %}
 9596   ins_pipe( pipe_slow );
 9597 %}
 9598 
 9599 // Compare into -1,0,1 in XMM and memory
 9600 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9601   predicate(UseSSE>=2);
 9602   match(Set dst (CmpD3 src1 (LoadD src2)));
 9603   effect(KILL cr);
 9604   ins_cost(275);
 9605   format %{ "UCOMISD $src1, $src2\n\t"
 9606             "MOV     $dst, #-1\n\t"
 9607             "JP,s    done\n\t"
 9608             "JB,s    done\n\t"
 9609             "SETNE   $dst\n\t"
 9610             "MOVZB   $dst, $dst\n"
 9611     "done:" %}
 9612   ins_encode %{
 9613     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9614     emit_cmpfp3(_masm, $dst$$Register);
 9615   %}
 9616   ins_pipe( pipe_slow );
 9617 %}
 9618 
 9619 
 9620 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9621   predicate (UseSSE <=1);
 9622   match(Set dst (SubD dst src));
 9623 
 9624   format %{ "FLD    $src\n\t"
 9625             "DSUBp  $dst,ST" %}
 9626   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9627   ins_cost(150);
 9628   ins_encode( Push_Reg_DPR(src),
 9629               OpcP, RegOpc(dst) );
 9630   ins_pipe( fpu_reg_reg );
 9631 %}
 9632 
 9633 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9634   predicate (UseSSE <=1);
 9635   match(Set dst (RoundDouble (SubD src1 src2)));
 9636   ins_cost(250);
 9637 
 9638   format %{ "FLD    $src2\n\t"
 9639             "DSUB   ST,$src1\n\t"
 9640             "FSTP_D $dst\t# D-round" %}
 9641   opcode(0xD8, 0x5);
 9642   ins_encode( Push_Reg_DPR(src2),
 9643               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9644   ins_pipe( fpu_mem_reg_reg );
 9645 %}
 9646 
 9647 
 9648 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9649   predicate (UseSSE <=1);
 9650   match(Set dst (SubD dst (LoadD src)));
 9651   ins_cost(150);
 9652 
 9653   format %{ "FLD    $src\n\t"
 9654             "DSUBp  $dst,ST" %}
 9655   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9656   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9657               OpcP, RegOpc(dst) );
 9658   ins_pipe( fpu_reg_mem );
 9659 %}
 9660 
 9661 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9662   predicate (UseSSE<=1);
 9663   match(Set dst (AbsD src));
 9664   ins_cost(100);
 9665   format %{ "FABS" %}
 9666   opcode(0xE1, 0xD9);
 9667   ins_encode( OpcS, OpcP );
 9668   ins_pipe( fpu_reg_reg );
 9669 %}
 9670 
 9671 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9672   predicate(UseSSE<=1);
 9673   match(Set dst (NegD src));
 9674   ins_cost(100);
 9675   format %{ "FCHS" %}
 9676   opcode(0xE0, 0xD9);
 9677   ins_encode( OpcS, OpcP );
 9678   ins_pipe( fpu_reg_reg );
 9679 %}
 9680 
 9681 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9682   predicate(UseSSE<=1);
 9683   match(Set dst (AddD dst src));
 9684   format %{ "FLD    $src\n\t"
 9685             "DADD   $dst,ST" %}
 9686   size(4);
 9687   ins_cost(150);
 9688   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9689   ins_encode( Push_Reg_DPR(src),
 9690               OpcP, RegOpc(dst) );
 9691   ins_pipe( fpu_reg_reg );
 9692 %}
 9693 
 9694 
 9695 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9696   predicate(UseSSE<=1);
 9697   match(Set dst (RoundDouble (AddD src1 src2)));
 9698   ins_cost(250);
 9699 
 9700   format %{ "FLD    $src2\n\t"
 9701             "DADD   ST,$src1\n\t"
 9702             "FSTP_D $dst\t# D-round" %}
 9703   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9704   ins_encode( Push_Reg_DPR(src2),
 9705               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9706   ins_pipe( fpu_mem_reg_reg );
 9707 %}
 9708 
 9709 
 9710 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9711   predicate(UseSSE<=1);
 9712   match(Set dst (AddD dst (LoadD src)));
 9713   ins_cost(150);
 9714 
 9715   format %{ "FLD    $src\n\t"
 9716             "DADDp  $dst,ST" %}
 9717   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9718   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9719               OpcP, RegOpc(dst) );
 9720   ins_pipe( fpu_reg_mem );
 9721 %}
 9722 
 9723 // add-to-memory
 9724 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9725   predicate(UseSSE<=1);
 9726   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9727   ins_cost(150);
 9728 
 9729   format %{ "FLD_D  $dst\n\t"
 9730             "DADD   ST,$src\n\t"
 9731             "FST_D  $dst" %}
 9732   opcode(0xDD, 0x0);
 9733   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9734               Opcode(0xD8), RegOpc(src),
 9735               set_instruction_start,
 9736               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9737   ins_pipe( fpu_reg_mem );
 9738 %}
 9739 
 9740 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9741   predicate(UseSSE<=1);
 9742   match(Set dst (AddD dst con));
 9743   ins_cost(125);
 9744   format %{ "FLD1\n\t"
 9745             "DADDp  $dst,ST" %}
 9746   ins_encode %{
 9747     __ fld1();
 9748     __ faddp($dst$$reg);
 9749   %}
 9750   ins_pipe(fpu_reg);
 9751 %}
 9752 
 9753 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9754   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9755   match(Set dst (AddD dst con));
 9756   ins_cost(200);
 9757   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9758             "DADDp  $dst,ST" %}
 9759   ins_encode %{
 9760     __ fld_d($constantaddress($con));
 9761     __ faddp($dst$$reg);
 9762   %}
 9763   ins_pipe(fpu_reg_mem);
 9764 %}
 9765 
 9766 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9767   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9768   match(Set dst (RoundDouble (AddD src con)));
 9769   ins_cost(200);
 9770   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9771             "DADD   ST,$src\n\t"
 9772             "FSTP_D $dst\t# D-round" %}
 9773   ins_encode %{
 9774     __ fld_d($constantaddress($con));
 9775     __ fadd($src$$reg);
 9776     __ fstp_d(Address(rsp, $dst$$disp));
 9777   %}
 9778   ins_pipe(fpu_mem_reg_con);
 9779 %}
 9780 
 9781 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9782   predicate(UseSSE<=1);
 9783   match(Set dst (MulD dst src));
 9784   format %{ "FLD    $src\n\t"
 9785             "DMULp  $dst,ST" %}
 9786   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9787   ins_cost(150);
 9788   ins_encode( Push_Reg_DPR(src),
 9789               OpcP, RegOpc(dst) );
 9790   ins_pipe( fpu_reg_reg );
 9791 %}
 9792 
 9793 // Strict FP instruction biases argument before multiply then
 9794 // biases result to avoid double rounding of subnormals.
 9795 //
 9796 // scale arg1 by multiplying arg1 by 2^(-15360)
 9797 // load arg2
 9798 // multiply scaled arg1 by arg2
 9799 // rescale product by 2^(15360)
 9800 //
 9801 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9802   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9803   match(Set dst (MulD dst src));
 9804   ins_cost(1);   // Select this instruction for all FP double multiplies
 9805 
 9806   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9807             "DMULp  $dst,ST\n\t"
 9808             "FLD    $src\n\t"
 9809             "DMULp  $dst,ST\n\t"
 9810             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9811             "DMULp  $dst,ST\n\t" %}
 9812   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9813   ins_encode( strictfp_bias1(dst),
 9814               Push_Reg_DPR(src),
 9815               OpcP, RegOpc(dst),
 9816               strictfp_bias2(dst) );
 9817   ins_pipe( fpu_reg_reg );
 9818 %}
 9819 
 9820 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9821   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9822   match(Set dst (MulD dst con));
 9823   ins_cost(200);
 9824   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9825             "DMULp  $dst,ST" %}
 9826   ins_encode %{
 9827     __ fld_d($constantaddress($con));
 9828     __ fmulp($dst$$reg);
 9829   %}
 9830   ins_pipe(fpu_reg_mem);
 9831 %}
 9832 
 9833 
 9834 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9835   predicate( UseSSE<=1 );
 9836   match(Set dst (MulD dst (LoadD src)));
 9837   ins_cost(200);
 9838   format %{ "FLD_D  $src\n\t"
 9839             "DMULp  $dst,ST" %}
 9840   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9841   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9842               OpcP, RegOpc(dst) );
 9843   ins_pipe( fpu_reg_mem );
 9844 %}
 9845 
 9846 //
 9847 // Cisc-alternate to reg-reg multiply
 9848 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9849   predicate( UseSSE<=1 );
 9850   match(Set dst (MulD src (LoadD mem)));
 9851   ins_cost(250);
 9852   format %{ "FLD_D  $mem\n\t"
 9853             "DMUL   ST,$src\n\t"
 9854             "FSTP_D $dst" %}
 9855   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9856   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9857               OpcReg_FPR(src),
 9858               Pop_Reg_DPR(dst) );
 9859   ins_pipe( fpu_reg_reg_mem );
 9860 %}
 9861 
 9862 
 9863 // MACRO3 -- addDPR a mulDPR
 9864 // This instruction is a '2-address' instruction in that the result goes
 9865 // back to src2.  This eliminates a move from the macro; possibly the
 9866 // register allocator will have to add it back (and maybe not).
 9867 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9868   predicate( UseSSE<=1 );
 9869   match(Set src2 (AddD (MulD src0 src1) src2));
 9870   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9871             "DMUL   ST,$src1\n\t"
 9872             "DADDp  $src2,ST" %}
 9873   ins_cost(250);
 9874   opcode(0xDD); /* LoadD DD /0 */
 9875   ins_encode( Push_Reg_FPR(src0),
 9876               FMul_ST_reg(src1),
 9877               FAddP_reg_ST(src2) );
 9878   ins_pipe( fpu_reg_reg_reg );
 9879 %}
 9880 
 9881 
 9882 // MACRO3 -- subDPR a mulDPR
 9883 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9884   predicate( UseSSE<=1 );
 9885   match(Set src2 (SubD (MulD src0 src1) src2));
 9886   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9887             "DMUL   ST,$src1\n\t"
 9888             "DSUBRp $src2,ST" %}
 9889   ins_cost(250);
 9890   ins_encode( Push_Reg_FPR(src0),
 9891               FMul_ST_reg(src1),
 9892               Opcode(0xDE), Opc_plus(0xE0,src2));
 9893   ins_pipe( fpu_reg_reg_reg );
 9894 %}
 9895 
 9896 
 9897 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9898   predicate( UseSSE<=1 );
 9899   match(Set dst (DivD dst src));
 9900 
 9901   format %{ "FLD    $src\n\t"
 9902             "FDIVp  $dst,ST" %}
 9903   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9904   ins_cost(150);
 9905   ins_encode( Push_Reg_DPR(src),
 9906               OpcP, RegOpc(dst) );
 9907   ins_pipe( fpu_reg_reg );
 9908 %}
 9909 
 9910 // Strict FP instruction biases argument before division then
 9911 // biases result, to avoid double rounding of subnormals.
 9912 //
 9913 // scale dividend by multiplying dividend by 2^(-15360)
 9914 // load divisor
 9915 // divide scaled dividend by divisor
 9916 // rescale quotient by 2^(15360)
 9917 //
 9918 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9919   predicate (UseSSE<=1);
 9920   match(Set dst (DivD dst src));
 9921   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9922   ins_cost(01);
 9923 
 9924   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9925             "DMULp  $dst,ST\n\t"
 9926             "FLD    $src\n\t"
 9927             "FDIVp  $dst,ST\n\t"
 9928             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9929             "DMULp  $dst,ST\n\t" %}
 9930   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9931   ins_encode( strictfp_bias1(dst),
 9932               Push_Reg_DPR(src),
 9933               OpcP, RegOpc(dst),
 9934               strictfp_bias2(dst) );
 9935   ins_pipe( fpu_reg_reg );
 9936 %}
 9937 
 9938 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9939   predicate(UseSSE<=1);
 9940   match(Set dst (ModD dst src));
 9941   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9942 
 9943   format %{ "DMOD   $dst,$src" %}
 9944   ins_cost(250);
 9945   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9946               emitModDPR(),
 9947               Push_Result_Mod_DPR(src),
 9948               Pop_Reg_DPR(dst));
 9949   ins_pipe( pipe_slow );
 9950 %}
 9951 
 9952 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9953   predicate(UseSSE>=2);
 9954   match(Set dst (ModD src0 src1));
 9955   effect(KILL rax, KILL cr);
 9956 
 9957   format %{ "SUB    ESP,8\t # DMOD\n"
 9958           "\tMOVSD  [ESP+0],$src1\n"
 9959           "\tFLD_D  [ESP+0]\n"
 9960           "\tMOVSD  [ESP+0],$src0\n"
 9961           "\tFLD_D  [ESP+0]\n"
 9962      "loop:\tFPREM\n"
 9963           "\tFWAIT\n"
 9964           "\tFNSTSW AX\n"
 9965           "\tSAHF\n"
 9966           "\tJP     loop\n"
 9967           "\tFSTP_D [ESP+0]\n"
 9968           "\tMOVSD  $dst,[ESP+0]\n"
 9969           "\tADD    ESP,8\n"
 9970           "\tFSTP   ST0\t # Restore FPU Stack"
 9971     %}
 9972   ins_cost(250);
 9973   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9974   ins_pipe( pipe_slow );
 9975 %}
 9976 
 9977 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9978   predicate (UseSSE<=1);
 9979   match(Set dst(AtanD dst src));
 9980   format %{ "DATA   $dst,$src" %}
 9981   opcode(0xD9, 0xF3);
 9982   ins_encode( Push_Reg_DPR(src),
 9983               OpcP, OpcS, RegOpc(dst) );
 9984   ins_pipe( pipe_slow );
 9985 %}
 9986 
 9987 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9988   predicate (UseSSE>=2);
 9989   match(Set dst(AtanD dst src));
 9990   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9991   format %{ "DATA   $dst,$src" %}
 9992   opcode(0xD9, 0xF3);
 9993   ins_encode( Push_SrcD(src),
 9994               OpcP, OpcS, Push_ResultD(dst) );
 9995   ins_pipe( pipe_slow );
 9996 %}
 9997 
 9998 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9999   predicate (UseSSE<=1);
10000   match(Set dst (SqrtD src));
10001   format %{ "DSQRT  $dst,$src" %}
10002   opcode(0xFA, 0xD9);
10003   ins_encode( Push_Reg_DPR(src),
10004               OpcS, OpcP, Pop_Reg_DPR(dst) );
10005   ins_pipe( pipe_slow );
10006 %}
10007 
10008 //-------------Float Instructions-------------------------------
10009 // Float Math
10010 
10011 // Code for float compare:
10012 //     fcompp();
10013 //     fwait(); fnstsw_ax();
10014 //     sahf();
10015 //     movl(dst, unordered_result);
10016 //     jcc(Assembler::parity, exit);
10017 //     movl(dst, less_result);
10018 //     jcc(Assembler::below, exit);
10019 //     movl(dst, equal_result);
10020 //     jcc(Assembler::equal, exit);
10021 //     movl(dst, greater_result);
10022 //   exit:
10023 
10024 // P6 version of float compare, sets condition codes in EFLAGS
10025 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10026   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10027   match(Set cr (CmpF src1 src2));
10028   effect(KILL rax);
10029   ins_cost(150);
10030   format %{ "FLD    $src1\n\t"
10031             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10032             "JNP    exit\n\t"
10033             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10034             "SAHF\n"
10035      "exit:\tNOP               // avoid branch to branch" %}
10036   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10037   ins_encode( Push_Reg_DPR(src1),
10038               OpcP, RegOpc(src2),
10039               cmpF_P6_fixup );
10040   ins_pipe( pipe_slow );
10041 %}
10042 
10043 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10044   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10045   match(Set cr (CmpF src1 src2));
10046   ins_cost(100);
10047   format %{ "FLD    $src1\n\t"
10048             "FUCOMIP ST,$src2  // P6 instruction" %}
10049   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10050   ins_encode( Push_Reg_DPR(src1),
10051               OpcP, RegOpc(src2));
10052   ins_pipe( pipe_slow );
10053 %}
10054 
10055 
10056 // Compare & branch
10057 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10058   predicate(UseSSE == 0);
10059   match(Set cr (CmpF src1 src2));
10060   effect(KILL rax);
10061   ins_cost(200);
10062   format %{ "FLD    $src1\n\t"
10063             "FCOMp  $src2\n\t"
10064             "FNSTSW AX\n\t"
10065             "TEST   AX,0x400\n\t"
10066             "JZ,s   flags\n\t"
10067             "MOV    AH,1\t# unordered treat as LT\n"
10068     "flags:\tSAHF" %}
10069   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10070   ins_encode( Push_Reg_DPR(src1),
10071               OpcP, RegOpc(src2),
10072               fpu_flags);
10073   ins_pipe( pipe_slow );
10074 %}
10075 
10076 // Compare vs zero into -1,0,1
10077 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10078   predicate(UseSSE == 0);
10079   match(Set dst (CmpF3 src1 zero));
10080   effect(KILL cr, KILL rax);
10081   ins_cost(280);
10082   format %{ "FTSTF  $dst,$src1" %}
10083   opcode(0xE4, 0xD9);
10084   ins_encode( Push_Reg_DPR(src1),
10085               OpcS, OpcP, PopFPU,
10086               CmpF_Result(dst));
10087   ins_pipe( pipe_slow );
10088 %}
10089 
10090 // Compare into -1,0,1
10091 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10092   predicate(UseSSE == 0);
10093   match(Set dst (CmpF3 src1 src2));
10094   effect(KILL cr, KILL rax);
10095   ins_cost(300);
10096   format %{ "FCMPF  $dst,$src1,$src2" %}
10097   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10098   ins_encode( Push_Reg_DPR(src1),
10099               OpcP, RegOpc(src2),
10100               CmpF_Result(dst));
10101   ins_pipe( pipe_slow );
10102 %}
10103 
10104 // float compare and set condition codes in EFLAGS by XMM regs
10105 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10106   predicate(UseSSE>=1);
10107   match(Set cr (CmpF src1 src2));
10108   ins_cost(145);
10109   format %{ "UCOMISS $src1,$src2\n\t"
10110             "JNP,s   exit\n\t"
10111             "PUSHF\t# saw NaN, set CF\n\t"
10112             "AND     [rsp], #0xffffff2b\n\t"
10113             "POPF\n"
10114     "exit:" %}
10115   ins_encode %{
10116     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10117     emit_cmpfp_fixup(_masm);
10118   %}
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10123   predicate(UseSSE>=1);
10124   match(Set cr (CmpF src1 src2));
10125   ins_cost(100);
10126   format %{ "UCOMISS $src1,$src2" %}
10127   ins_encode %{
10128     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10129   %}
10130   ins_pipe( pipe_slow );
10131 %}
10132 
10133 // float compare and set condition codes in EFLAGS by XMM regs
10134 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10135   predicate(UseSSE>=1);
10136   match(Set cr (CmpF src1 (LoadF src2)));
10137   ins_cost(165);
10138   format %{ "UCOMISS $src1,$src2\n\t"
10139             "JNP,s   exit\n\t"
10140             "PUSHF\t# saw NaN, set CF\n\t"
10141             "AND     [rsp], #0xffffff2b\n\t"
10142             "POPF\n"
10143     "exit:" %}
10144   ins_encode %{
10145     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10146     emit_cmpfp_fixup(_masm);
10147   %}
10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10152   predicate(UseSSE>=1);
10153   match(Set cr (CmpF src1 (LoadF src2)));
10154   ins_cost(100);
10155   format %{ "UCOMISS $src1,$src2" %}
10156   ins_encode %{
10157     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10158   %}
10159   ins_pipe( pipe_slow );
10160 %}
10161 
10162 // Compare into -1,0,1 in XMM
10163 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10164   predicate(UseSSE>=1);
10165   match(Set dst (CmpF3 src1 src2));
10166   effect(KILL cr);
10167   ins_cost(255);
10168   format %{ "UCOMISS $src1, $src2\n\t"
10169             "MOV     $dst, #-1\n\t"
10170             "JP,s    done\n\t"
10171             "JB,s    done\n\t"
10172             "SETNE   $dst\n\t"
10173             "MOVZB   $dst, $dst\n"
10174     "done:" %}
10175   ins_encode %{
10176     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10177     emit_cmpfp3(_masm, $dst$$Register);
10178   %}
10179   ins_pipe( pipe_slow );
10180 %}
10181 
10182 // Compare into -1,0,1 in XMM and memory
10183 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10184   predicate(UseSSE>=1);
10185   match(Set dst (CmpF3 src1 (LoadF src2)));
10186   effect(KILL cr);
10187   ins_cost(275);
10188   format %{ "UCOMISS $src1, $src2\n\t"
10189             "MOV     $dst, #-1\n\t"
10190             "JP,s    done\n\t"
10191             "JB,s    done\n\t"
10192             "SETNE   $dst\n\t"
10193             "MOVZB   $dst, $dst\n"
10194     "done:" %}
10195   ins_encode %{
10196     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10197     emit_cmpfp3(_masm, $dst$$Register);
10198   %}
10199   ins_pipe( pipe_slow );
10200 %}
10201 
10202 // Spill to obtain 24-bit precision
10203 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10204   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10205   match(Set dst (SubF src1 src2));
10206 
10207   format %{ "FSUB   $dst,$src1 - $src2" %}
10208   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10209   ins_encode( Push_Reg_FPR(src1),
10210               OpcReg_FPR(src2),
10211               Pop_Mem_FPR(dst) );
10212   ins_pipe( fpu_mem_reg_reg );
10213 %}
10214 //
10215 // This instruction does not round to 24-bits
10216 instruct subFPR_reg(regFPR dst, regFPR src) %{
10217   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10218   match(Set dst (SubF dst src));
10219 
10220   format %{ "FSUB   $dst,$src" %}
10221   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10222   ins_encode( Push_Reg_FPR(src),
10223               OpcP, RegOpc(dst) );
10224   ins_pipe( fpu_reg_reg );
10225 %}
10226 
10227 // Spill to obtain 24-bit precision
10228 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10229   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10230   match(Set dst (AddF src1 src2));
10231 
10232   format %{ "FADD   $dst,$src1,$src2" %}
10233   opcode(0xD8, 0x0); /* D8 C0+i */
10234   ins_encode( Push_Reg_FPR(src2),
10235               OpcReg_FPR(src1),
10236               Pop_Mem_FPR(dst) );
10237   ins_pipe( fpu_mem_reg_reg );
10238 %}
10239 //
10240 // This instruction does not round to 24-bits
10241 instruct addFPR_reg(regFPR dst, regFPR src) %{
10242   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10243   match(Set dst (AddF dst src));
10244 
10245   format %{ "FLD    $src\n\t"
10246             "FADDp  $dst,ST" %}
10247   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10248   ins_encode( Push_Reg_FPR(src),
10249               OpcP, RegOpc(dst) );
10250   ins_pipe( fpu_reg_reg );
10251 %}
10252 
10253 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10254   predicate(UseSSE==0);
10255   match(Set dst (AbsF src));
10256   ins_cost(100);
10257   format %{ "FABS" %}
10258   opcode(0xE1, 0xD9);
10259   ins_encode( OpcS, OpcP );
10260   ins_pipe( fpu_reg_reg );
10261 %}
10262 
10263 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10264   predicate(UseSSE==0);
10265   match(Set dst (NegF src));
10266   ins_cost(100);
10267   format %{ "FCHS" %}
10268   opcode(0xE0, 0xD9);
10269   ins_encode( OpcS, OpcP );
10270   ins_pipe( fpu_reg_reg );
10271 %}
10272 
10273 // Cisc-alternate to addFPR_reg
10274 // Spill to obtain 24-bit precision
10275 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10276   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10277   match(Set dst (AddF src1 (LoadF src2)));
10278 
10279   format %{ "FLD    $src2\n\t"
10280             "FADD   ST,$src1\n\t"
10281             "FSTP_S $dst" %}
10282   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10283   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10284               OpcReg_FPR(src1),
10285               Pop_Mem_FPR(dst) );
10286   ins_pipe( fpu_mem_reg_mem );
10287 %}
10288 //
10289 // Cisc-alternate to addFPR_reg
10290 // This instruction does not round to 24-bits
10291 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10292   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10293   match(Set dst (AddF dst (LoadF src)));
10294 
10295   format %{ "FADD   $dst,$src" %}
10296   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10297   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10298               OpcP, RegOpc(dst) );
10299   ins_pipe( fpu_reg_mem );
10300 %}
10301 
10302 // // Following two instructions for _222_mpegaudio
10303 // Spill to obtain 24-bit precision
10304 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10305   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10306   match(Set dst (AddF src1 src2));
10307 
10308   format %{ "FADD   $dst,$src1,$src2" %}
10309   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10310   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10311               OpcReg_FPR(src2),
10312               Pop_Mem_FPR(dst) );
10313   ins_pipe( fpu_mem_reg_mem );
10314 %}
10315 
10316 // Cisc-spill variant
10317 // Spill to obtain 24-bit precision
10318 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10319   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10320   match(Set dst (AddF src1 (LoadF src2)));
10321 
10322   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10323   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10324   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10325               set_instruction_start,
10326               OpcP, RMopc_Mem(secondary,src1),
10327               Pop_Mem_FPR(dst) );
10328   ins_pipe( fpu_mem_mem_mem );
10329 %}
10330 
10331 // Spill to obtain 24-bit precision
10332 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10333   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334   match(Set dst (AddF src1 src2));
10335 
10336   format %{ "FADD   $dst,$src1,$src2" %}
10337   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10338   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10339               set_instruction_start,
10340               OpcP, RMopc_Mem(secondary,src1),
10341               Pop_Mem_FPR(dst) );
10342   ins_pipe( fpu_mem_mem_mem );
10343 %}
10344 
10345 
10346 // Spill to obtain 24-bit precision
10347 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10348   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10349   match(Set dst (AddF src con));
10350   format %{ "FLD    $src\n\t"
10351             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10352             "FSTP_S $dst"  %}
10353   ins_encode %{
10354     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10355     __ fadd_s($constantaddress($con));
10356     __ fstp_s(Address(rsp, $dst$$disp));
10357   %}
10358   ins_pipe(fpu_mem_reg_con);
10359 %}
10360 //
10361 // This instruction does not round to 24-bits
10362 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10363   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10364   match(Set dst (AddF src con));
10365   format %{ "FLD    $src\n\t"
10366             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10367             "FSTP   $dst"  %}
10368   ins_encode %{
10369     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10370     __ fadd_s($constantaddress($con));
10371     __ fstp_d($dst$$reg);
10372   %}
10373   ins_pipe(fpu_reg_reg_con);
10374 %}
10375 
10376 // Spill to obtain 24-bit precision
10377 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10378   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10379   match(Set dst (MulF src1 src2));
10380 
10381   format %{ "FLD    $src1\n\t"
10382             "FMUL   $src2\n\t"
10383             "FSTP_S $dst"  %}
10384   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10385   ins_encode( Push_Reg_FPR(src1),
10386               OpcReg_FPR(src2),
10387               Pop_Mem_FPR(dst) );
10388   ins_pipe( fpu_mem_reg_reg );
10389 %}
10390 //
10391 // This instruction does not round to 24-bits
10392 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10393   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10394   match(Set dst (MulF src1 src2));
10395 
10396   format %{ "FLD    $src1\n\t"
10397             "FMUL   $src2\n\t"
10398             "FSTP_S $dst"  %}
10399   opcode(0xD8, 0x1); /* D8 C8+i */
10400   ins_encode( Push_Reg_FPR(src2),
10401               OpcReg_FPR(src1),
10402               Pop_Reg_FPR(dst) );
10403   ins_pipe( fpu_reg_reg_reg );
10404 %}
10405 
10406 
10407 // Spill to obtain 24-bit precision
10408 // Cisc-alternate to reg-reg multiply
10409 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10410   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10411   match(Set dst (MulF src1 (LoadF src2)));
10412 
10413   format %{ "FLD_S  $src2\n\t"
10414             "FMUL   $src1\n\t"
10415             "FSTP_S $dst"  %}
10416   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10417   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10418               OpcReg_FPR(src1),
10419               Pop_Mem_FPR(dst) );
10420   ins_pipe( fpu_mem_reg_mem );
10421 %}
10422 //
10423 // This instruction does not round to 24-bits
10424 // Cisc-alternate to reg-reg multiply
10425 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10426   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10427   match(Set dst (MulF src1 (LoadF src2)));
10428 
10429   format %{ "FMUL   $dst,$src1,$src2" %}
10430   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10431   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10432               OpcReg_FPR(src1),
10433               Pop_Reg_FPR(dst) );
10434   ins_pipe( fpu_reg_reg_mem );
10435 %}
10436 
10437 // Spill to obtain 24-bit precision
10438 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10439   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10440   match(Set dst (MulF src1 src2));
10441 
10442   format %{ "FMUL   $dst,$src1,$src2" %}
10443   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10444   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10445               set_instruction_start,
10446               OpcP, RMopc_Mem(secondary,src1),
10447               Pop_Mem_FPR(dst) );
10448   ins_pipe( fpu_mem_mem_mem );
10449 %}
10450 
10451 // Spill to obtain 24-bit precision
10452 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10453   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10454   match(Set dst (MulF src con));
10455 
10456   format %{ "FLD    $src\n\t"
10457             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10458             "FSTP_S $dst"  %}
10459   ins_encode %{
10460     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10461     __ fmul_s($constantaddress($con));
10462     __ fstp_s(Address(rsp, $dst$$disp));
10463   %}
10464   ins_pipe(fpu_mem_reg_con);
10465 %}
10466 //
10467 // This instruction does not round to 24-bits
10468 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10469   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10470   match(Set dst (MulF src con));
10471 
10472   format %{ "FLD    $src\n\t"
10473             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10474             "FSTP   $dst"  %}
10475   ins_encode %{
10476     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10477     __ fmul_s($constantaddress($con));
10478     __ fstp_d($dst$$reg);
10479   %}
10480   ins_pipe(fpu_reg_reg_con);
10481 %}
10482 
10483 
10484 //
10485 // MACRO1 -- subsume unshared load into mulFPR
10486 // This instruction does not round to 24-bits
10487 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10488   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10489   match(Set dst (MulF (LoadF mem1) src));
10490 
10491   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10492             "FMUL   ST,$src\n\t"
10493             "FSTP   $dst" %}
10494   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10495   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10496               OpcReg_FPR(src),
10497               Pop_Reg_FPR(dst) );
10498   ins_pipe( fpu_reg_reg_mem );
10499 %}
10500 //
10501 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10502 // This instruction does not round to 24-bits
10503 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10504   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10505   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10506   ins_cost(95);
10507 
10508   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10509             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10510             "FADD   ST,$src2\n\t"
10511             "FSTP   $dst" %}
10512   opcode(0xD9); /* LoadF D9 /0 */
10513   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10514               FMul_ST_reg(src1),
10515               FAdd_ST_reg(src2),
10516               Pop_Reg_FPR(dst) );
10517   ins_pipe( fpu_reg_mem_reg_reg );
10518 %}
10519 
10520 // MACRO3 -- addFPR a mulFPR
10521 // This instruction does not round to 24-bits.  It is a '2-address'
10522 // instruction in that the result goes back to src2.  This eliminates
10523 // a move from the macro; possibly the register allocator will have
10524 // to add it back (and maybe not).
10525 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10526   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10527   match(Set src2 (AddF (MulF src0 src1) src2));
10528 
10529   format %{ "FLD    $src0     ===MACRO3===\n\t"
10530             "FMUL   ST,$src1\n\t"
10531             "FADDP  $src2,ST" %}
10532   opcode(0xD9); /* LoadF D9 /0 */
10533   ins_encode( Push_Reg_FPR(src0),
10534               FMul_ST_reg(src1),
10535               FAddP_reg_ST(src2) );
10536   ins_pipe( fpu_reg_reg_reg );
10537 %}
10538 
10539 // MACRO4 -- divFPR subFPR
10540 // This instruction does not round to 24-bits
10541 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10542   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10543   match(Set dst (DivF (SubF src2 src1) src3));
10544 
10545   format %{ "FLD    $src2   ===MACRO4===\n\t"
10546             "FSUB   ST,$src1\n\t"
10547             "FDIV   ST,$src3\n\t"
10548             "FSTP  $dst" %}
10549   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10550   ins_encode( Push_Reg_FPR(src2),
10551               subFPR_divFPR_encode(src1,src3),
10552               Pop_Reg_FPR(dst) );
10553   ins_pipe( fpu_reg_reg_reg_reg );
10554 %}
10555 
10556 // Spill to obtain 24-bit precision
10557 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10558   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10559   match(Set dst (DivF src1 src2));
10560 
10561   format %{ "FDIV   $dst,$src1,$src2" %}
10562   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10563   ins_encode( Push_Reg_FPR(src1),
10564               OpcReg_FPR(src2),
10565               Pop_Mem_FPR(dst) );
10566   ins_pipe( fpu_mem_reg_reg );
10567 %}
10568 //
10569 // This instruction does not round to 24-bits
10570 instruct divFPR_reg(regFPR dst, regFPR src) %{
10571   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10572   match(Set dst (DivF dst src));
10573 
10574   format %{ "FDIV   $dst,$src" %}
10575   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10576   ins_encode( Push_Reg_FPR(src),
10577               OpcP, RegOpc(dst) );
10578   ins_pipe( fpu_reg_reg );
10579 %}
10580 
10581 
10582 // Spill to obtain 24-bit precision
10583 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10584   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10585   match(Set dst (ModF src1 src2));
10586   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10587 
10588   format %{ "FMOD   $dst,$src1,$src2" %}
10589   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10590               emitModDPR(),
10591               Push_Result_Mod_DPR(src2),
10592               Pop_Mem_FPR(dst));
10593   ins_pipe( pipe_slow );
10594 %}
10595 //
10596 // This instruction does not round to 24-bits
10597 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10598   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10599   match(Set dst (ModF dst src));
10600   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10601 
10602   format %{ "FMOD   $dst,$src" %}
10603   ins_encode(Push_Reg_Mod_DPR(dst, src),
10604               emitModDPR(),
10605               Push_Result_Mod_DPR(src),
10606               Pop_Reg_FPR(dst));
10607   ins_pipe( pipe_slow );
10608 %}
10609 
10610 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10611   predicate(UseSSE>=1);
10612   match(Set dst (ModF src0 src1));
10613   effect(KILL rax, KILL cr);
10614   format %{ "SUB    ESP,4\t # FMOD\n"
10615           "\tMOVSS  [ESP+0],$src1\n"
10616           "\tFLD_S  [ESP+0]\n"
10617           "\tMOVSS  [ESP+0],$src0\n"
10618           "\tFLD_S  [ESP+0]\n"
10619      "loop:\tFPREM\n"
10620           "\tFWAIT\n"
10621           "\tFNSTSW AX\n"
10622           "\tSAHF\n"
10623           "\tJP     loop\n"
10624           "\tFSTP_S [ESP+0]\n"
10625           "\tMOVSS  $dst,[ESP+0]\n"
10626           "\tADD    ESP,4\n"
10627           "\tFSTP   ST0\t # Restore FPU Stack"
10628     %}
10629   ins_cost(250);
10630   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10631   ins_pipe( pipe_slow );
10632 %}
10633 
10634 
10635 //----------Arithmetic Conversion Instructions---------------------------------
10636 // The conversions operations are all Alpha sorted.  Please keep it that way!
10637 
10638 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10639   predicate(UseSSE==0);
10640   match(Set dst (RoundFloat src));
10641   ins_cost(125);
10642   format %{ "FST_S  $dst,$src\t# F-round" %}
10643   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10644   ins_pipe( fpu_mem_reg );
10645 %}
10646 
10647 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10648   predicate(UseSSE<=1);
10649   match(Set dst (RoundDouble src));
10650   ins_cost(125);
10651   format %{ "FST_D  $dst,$src\t# D-round" %}
10652   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10653   ins_pipe( fpu_mem_reg );
10654 %}
10655 
10656 // Force rounding to 24-bit precision and 6-bit exponent
10657 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10658   predicate(UseSSE==0);
10659   match(Set dst (ConvD2F src));
10660   format %{ "FST_S  $dst,$src\t# F-round" %}
10661   expand %{
10662     roundFloat_mem_reg(dst,src);
10663   %}
10664 %}
10665 
10666 // Force rounding to 24-bit precision and 6-bit exponent
10667 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10668   predicate(UseSSE==1);
10669   match(Set dst (ConvD2F src));
10670   effect( KILL cr );
10671   format %{ "SUB    ESP,4\n\t"
10672             "FST_S  [ESP],$src\t# F-round\n\t"
10673             "MOVSS  $dst,[ESP]\n\t"
10674             "ADD ESP,4" %}
10675   ins_encode %{
10676     __ subptr(rsp, 4);
10677     if ($src$$reg != FPR1L_enc) {
10678       __ fld_s($src$$reg-1);
10679       __ fstp_s(Address(rsp, 0));
10680     } else {
10681       __ fst_s(Address(rsp, 0));
10682     }
10683     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10684     __ addptr(rsp, 4);
10685   %}
10686   ins_pipe( pipe_slow );
10687 %}
10688 
10689 // Force rounding double precision to single precision
10690 instruct convD2F_reg(regF dst, regD src) %{
10691   predicate(UseSSE>=2);
10692   match(Set dst (ConvD2F src));
10693   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10694   ins_encode %{
10695     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10696   %}
10697   ins_pipe( pipe_slow );
10698 %}
10699 
10700 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10701   predicate(UseSSE==0);
10702   match(Set dst (ConvF2D src));
10703   format %{ "FST_S  $dst,$src\t# D-round" %}
10704   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10705   ins_pipe( fpu_reg_reg );
10706 %}
10707 
10708 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10709   predicate(UseSSE==1);
10710   match(Set dst (ConvF2D src));
10711   format %{ "FST_D  $dst,$src\t# D-round" %}
10712   expand %{
10713     roundDouble_mem_reg(dst,src);
10714   %}
10715 %}
10716 
10717 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10718   predicate(UseSSE==1);
10719   match(Set dst (ConvF2D src));
10720   effect( KILL cr );
10721   format %{ "SUB    ESP,4\n\t"
10722             "MOVSS  [ESP] $src\n\t"
10723             "FLD_S  [ESP]\n\t"
10724             "ADD    ESP,4\n\t"
10725             "FSTP   $dst\t# D-round" %}
10726   ins_encode %{
10727     __ subptr(rsp, 4);
10728     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10729     __ fld_s(Address(rsp, 0));
10730     __ addptr(rsp, 4);
10731     __ fstp_d($dst$$reg);
10732   %}
10733   ins_pipe( pipe_slow );
10734 %}
10735 
10736 instruct convF2D_reg(regD dst, regF src) %{
10737   predicate(UseSSE>=2);
10738   match(Set dst (ConvF2D src));
10739   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10740   ins_encode %{
10741     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10742   %}
10743   ins_pipe( pipe_slow );
10744 %}
10745 
10746 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10747 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10748   predicate(UseSSE<=1);
10749   match(Set dst (ConvD2I src));
10750   effect( KILL tmp, KILL cr );
10751   format %{ "FLD    $src\t# Convert double to int \n\t"
10752             "FLDCW  trunc mode\n\t"
10753             "SUB    ESP,4\n\t"
10754             "FISTp  [ESP + #0]\n\t"
10755             "FLDCW  std/24-bit mode\n\t"
10756             "POP    EAX\n\t"
10757             "CMP    EAX,0x80000000\n\t"
10758             "JNE,s  fast\n\t"
10759             "FLD_D  $src\n\t"
10760             "CALL   d2i_wrapper\n"
10761       "fast:" %}
10762   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10763   ins_pipe( pipe_slow );
10764 %}
10765 
10766 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10767 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10768   predicate(UseSSE>=2);
10769   match(Set dst (ConvD2I src));
10770   effect( KILL tmp, KILL cr );
10771   format %{ "CVTTSD2SI $dst, $src\n\t"
10772             "CMP    $dst,0x80000000\n\t"
10773             "JNE,s  fast\n\t"
10774             "SUB    ESP, 8\n\t"
10775             "MOVSD  [ESP], $src\n\t"
10776             "FLD_D  [ESP]\n\t"
10777             "ADD    ESP, 8\n\t"
10778             "CALL   d2i_wrapper\n"
10779       "fast:" %}
10780   ins_encode %{
10781     Label fast;
10782     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10783     __ cmpl($dst$$Register, 0x80000000);
10784     __ jccb(Assembler::notEqual, fast);
10785     __ subptr(rsp, 8);
10786     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10787     __ fld_d(Address(rsp, 0));
10788     __ addptr(rsp, 8);
10789     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10790     __ post_call_nop();
10791     __ bind(fast);
10792   %}
10793   ins_pipe( pipe_slow );
10794 %}
10795 
10796 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10797   predicate(UseSSE<=1);
10798   match(Set dst (ConvD2L src));
10799   effect( KILL cr );
10800   format %{ "FLD    $src\t# Convert double to long\n\t"
10801             "FLDCW  trunc mode\n\t"
10802             "SUB    ESP,8\n\t"
10803             "FISTp  [ESP + #0]\n\t"
10804             "FLDCW  std/24-bit mode\n\t"
10805             "POP    EAX\n\t"
10806             "POP    EDX\n\t"
10807             "CMP    EDX,0x80000000\n\t"
10808             "JNE,s  fast\n\t"
10809             "TEST   EAX,EAX\n\t"
10810             "JNE,s  fast\n\t"
10811             "FLD    $src\n\t"
10812             "CALL   d2l_wrapper\n"
10813       "fast:" %}
10814   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10815   ins_pipe( pipe_slow );
10816 %}
10817 
10818 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10819 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10820   predicate (UseSSE>=2);
10821   match(Set dst (ConvD2L src));
10822   effect( KILL cr );
10823   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10824             "MOVSD  [ESP],$src\n\t"
10825             "FLD_D  [ESP]\n\t"
10826             "FLDCW  trunc mode\n\t"
10827             "FISTp  [ESP + #0]\n\t"
10828             "FLDCW  std/24-bit mode\n\t"
10829             "POP    EAX\n\t"
10830             "POP    EDX\n\t"
10831             "CMP    EDX,0x80000000\n\t"
10832             "JNE,s  fast\n\t"
10833             "TEST   EAX,EAX\n\t"
10834             "JNE,s  fast\n\t"
10835             "SUB    ESP,8\n\t"
10836             "MOVSD  [ESP],$src\n\t"
10837             "FLD_D  [ESP]\n\t"
10838             "ADD    ESP,8\n\t"
10839             "CALL   d2l_wrapper\n"
10840       "fast:" %}
10841   ins_encode %{
10842     Label fast;
10843     __ subptr(rsp, 8);
10844     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10845     __ fld_d(Address(rsp, 0));
10846     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10847     __ fistp_d(Address(rsp, 0));
10848     // Restore the rounding mode, mask the exception
10849     if (Compile::current()->in_24_bit_fp_mode()) {
10850       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10851     } else {
10852       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10853     }
10854     // Load the converted long, adjust CPU stack
10855     __ pop(rax);
10856     __ pop(rdx);
10857     __ cmpl(rdx, 0x80000000);
10858     __ jccb(Assembler::notEqual, fast);
10859     __ testl(rax, rax);
10860     __ jccb(Assembler::notEqual, fast);
10861     __ subptr(rsp, 8);
10862     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10863     __ fld_d(Address(rsp, 0));
10864     __ addptr(rsp, 8);
10865     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10866     __ post_call_nop();
10867     __ bind(fast);
10868   %}
10869   ins_pipe( pipe_slow );
10870 %}
10871 
10872 // Convert a double to an int.  Java semantics require we do complex
10873 // manglations in the corner cases.  So we set the rounding mode to
10874 // 'zero', store the darned double down as an int, and reset the
10875 // rounding mode to 'nearest'.  The hardware stores a flag value down
10876 // if we would overflow or converted a NAN; we check for this and
10877 // and go the slow path if needed.
10878 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10879   predicate(UseSSE==0);
10880   match(Set dst (ConvF2I src));
10881   effect( KILL tmp, KILL cr );
10882   format %{ "FLD    $src\t# Convert float to int \n\t"
10883             "FLDCW  trunc mode\n\t"
10884             "SUB    ESP,4\n\t"
10885             "FISTp  [ESP + #0]\n\t"
10886             "FLDCW  std/24-bit mode\n\t"
10887             "POP    EAX\n\t"
10888             "CMP    EAX,0x80000000\n\t"
10889             "JNE,s  fast\n\t"
10890             "FLD    $src\n\t"
10891             "CALL   d2i_wrapper\n"
10892       "fast:" %}
10893   // DPR2I_encoding works for FPR2I
10894   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 // Convert a float in xmm to an int reg.
10899 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10900   predicate(UseSSE>=1);
10901   match(Set dst (ConvF2I src));
10902   effect( KILL tmp, KILL cr );
10903   format %{ "CVTTSS2SI $dst, $src\n\t"
10904             "CMP    $dst,0x80000000\n\t"
10905             "JNE,s  fast\n\t"
10906             "SUB    ESP, 4\n\t"
10907             "MOVSS  [ESP], $src\n\t"
10908             "FLD    [ESP]\n\t"
10909             "ADD    ESP, 4\n\t"
10910             "CALL   d2i_wrapper\n"
10911       "fast:" %}
10912   ins_encode %{
10913     Label fast;
10914     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10915     __ cmpl($dst$$Register, 0x80000000);
10916     __ jccb(Assembler::notEqual, fast);
10917     __ subptr(rsp, 4);
10918     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10919     __ fld_s(Address(rsp, 0));
10920     __ addptr(rsp, 4);
10921     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10922     __ post_call_nop();
10923     __ bind(fast);
10924   %}
10925   ins_pipe( pipe_slow );
10926 %}
10927 
10928 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10929   predicate(UseSSE==0);
10930   match(Set dst (ConvF2L src));
10931   effect( KILL cr );
10932   format %{ "FLD    $src\t# Convert float to long\n\t"
10933             "FLDCW  trunc mode\n\t"
10934             "SUB    ESP,8\n\t"
10935             "FISTp  [ESP + #0]\n\t"
10936             "FLDCW  std/24-bit mode\n\t"
10937             "POP    EAX\n\t"
10938             "POP    EDX\n\t"
10939             "CMP    EDX,0x80000000\n\t"
10940             "JNE,s  fast\n\t"
10941             "TEST   EAX,EAX\n\t"
10942             "JNE,s  fast\n\t"
10943             "FLD    $src\n\t"
10944             "CALL   d2l_wrapper\n"
10945       "fast:" %}
10946   // DPR2L_encoding works for FPR2L
10947   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10948   ins_pipe( pipe_slow );
10949 %}
10950 
10951 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10952 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10953   predicate (UseSSE>=1);
10954   match(Set dst (ConvF2L src));
10955   effect( KILL cr );
10956   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10957             "MOVSS  [ESP],$src\n\t"
10958             "FLD_S  [ESP]\n\t"
10959             "FLDCW  trunc mode\n\t"
10960             "FISTp  [ESP + #0]\n\t"
10961             "FLDCW  std/24-bit mode\n\t"
10962             "POP    EAX\n\t"
10963             "POP    EDX\n\t"
10964             "CMP    EDX,0x80000000\n\t"
10965             "JNE,s  fast\n\t"
10966             "TEST   EAX,EAX\n\t"
10967             "JNE,s  fast\n\t"
10968             "SUB    ESP,4\t# Convert float to long\n\t"
10969             "MOVSS  [ESP],$src\n\t"
10970             "FLD_S  [ESP]\n\t"
10971             "ADD    ESP,4\n\t"
10972             "CALL   d2l_wrapper\n"
10973       "fast:" %}
10974   ins_encode %{
10975     Label fast;
10976     __ subptr(rsp, 8);
10977     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10978     __ fld_s(Address(rsp, 0));
10979     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10980     __ fistp_d(Address(rsp, 0));
10981     // Restore the rounding mode, mask the exception
10982     if (Compile::current()->in_24_bit_fp_mode()) {
10983       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10984     } else {
10985       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10986     }
10987     // Load the converted long, adjust CPU stack
10988     __ pop(rax);
10989     __ pop(rdx);
10990     __ cmpl(rdx, 0x80000000);
10991     __ jccb(Assembler::notEqual, fast);
10992     __ testl(rax, rax);
10993     __ jccb(Assembler::notEqual, fast);
10994     __ subptr(rsp, 4);
10995     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10996     __ fld_s(Address(rsp, 0));
10997     __ addptr(rsp, 4);
10998     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10999     __ post_call_nop();
11000     __ bind(fast);
11001   %}
11002   ins_pipe( pipe_slow );
11003 %}
11004 
11005 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11006   predicate( UseSSE<=1 );
11007   match(Set dst (ConvI2D src));
11008   format %{ "FILD   $src\n\t"
11009             "FSTP   $dst" %}
11010   opcode(0xDB, 0x0);  /* DB /0 */
11011   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11012   ins_pipe( fpu_reg_mem );
11013 %}
11014 
11015 instruct convI2D_reg(regD dst, rRegI src) %{
11016   predicate( UseSSE>=2 && !UseXmmI2D );
11017   match(Set dst (ConvI2D src));
11018   format %{ "CVTSI2SD $dst,$src" %}
11019   ins_encode %{
11020     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11021   %}
11022   ins_pipe( pipe_slow );
11023 %}
11024 
11025 instruct convI2D_mem(regD dst, memory mem) %{
11026   predicate( UseSSE>=2 );
11027   match(Set dst (ConvI2D (LoadI mem)));
11028   format %{ "CVTSI2SD $dst,$mem" %}
11029   ins_encode %{
11030     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11031   %}
11032   ins_pipe( pipe_slow );
11033 %}
11034 
11035 instruct convXI2D_reg(regD dst, rRegI src)
11036 %{
11037   predicate( UseSSE>=2 && UseXmmI2D );
11038   match(Set dst (ConvI2D src));
11039 
11040   format %{ "MOVD  $dst,$src\n\t"
11041             "CVTDQ2PD $dst,$dst\t# i2d" %}
11042   ins_encode %{
11043     __ movdl($dst$$XMMRegister, $src$$Register);
11044     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11045   %}
11046   ins_pipe(pipe_slow); // XXX
11047 %}
11048 
11049 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11050   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11051   match(Set dst (ConvI2D (LoadI mem)));
11052   format %{ "FILD   $mem\n\t"
11053             "FSTP   $dst" %}
11054   opcode(0xDB);      /* DB /0 */
11055   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11056               Pop_Reg_DPR(dst));
11057   ins_pipe( fpu_reg_mem );
11058 %}
11059 
11060 // Convert a byte to a float; no rounding step needed.
11061 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11062   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11063   match(Set dst (ConvI2F src));
11064   format %{ "FILD   $src\n\t"
11065             "FSTP   $dst" %}
11066 
11067   opcode(0xDB, 0x0);  /* DB /0 */
11068   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11069   ins_pipe( fpu_reg_mem );
11070 %}
11071 
11072 // In 24-bit mode, force exponent rounding by storing back out
11073 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11074   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11075   match(Set dst (ConvI2F src));
11076   ins_cost(200);
11077   format %{ "FILD   $src\n\t"
11078             "FSTP_S $dst" %}
11079   opcode(0xDB, 0x0);  /* DB /0 */
11080   ins_encode( Push_Mem_I(src),
11081               Pop_Mem_FPR(dst));
11082   ins_pipe( fpu_mem_mem );
11083 %}
11084 
11085 // In 24-bit mode, force exponent rounding by storing back out
11086 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11087   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11088   match(Set dst (ConvI2F (LoadI mem)));
11089   ins_cost(200);
11090   format %{ "FILD   $mem\n\t"
11091             "FSTP_S $dst" %}
11092   opcode(0xDB);  /* DB /0 */
11093   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11094               Pop_Mem_FPR(dst));
11095   ins_pipe( fpu_mem_mem );
11096 %}
11097 
11098 // This instruction does not round to 24-bits
11099 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11100   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11101   match(Set dst (ConvI2F src));
11102   format %{ "FILD   $src\n\t"
11103             "FSTP   $dst" %}
11104   opcode(0xDB, 0x0);  /* DB /0 */
11105   ins_encode( Push_Mem_I(src),
11106               Pop_Reg_FPR(dst));
11107   ins_pipe( fpu_reg_mem );
11108 %}
11109 
11110 // This instruction does not round to 24-bits
11111 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11112   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11113   match(Set dst (ConvI2F (LoadI mem)));
11114   format %{ "FILD   $mem\n\t"
11115             "FSTP   $dst" %}
11116   opcode(0xDB);      /* DB /0 */
11117   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11118               Pop_Reg_FPR(dst));
11119   ins_pipe( fpu_reg_mem );
11120 %}
11121 
11122 // Convert an int to a float in xmm; no rounding step needed.
11123 instruct convI2F_reg(regF dst, rRegI src) %{
11124   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11125   match(Set dst (ConvI2F src));
11126   format %{ "CVTSI2SS $dst, $src" %}
11127   ins_encode %{
11128     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11129   %}
11130   ins_pipe( pipe_slow );
11131 %}
11132 
11133  instruct convXI2F_reg(regF dst, rRegI src)
11134 %{
11135   predicate( UseSSE>=2 && UseXmmI2F );
11136   match(Set dst (ConvI2F src));
11137 
11138   format %{ "MOVD  $dst,$src\n\t"
11139             "CVTDQ2PS $dst,$dst\t# i2f" %}
11140   ins_encode %{
11141     __ movdl($dst$$XMMRegister, $src$$Register);
11142     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11143   %}
11144   ins_pipe(pipe_slow); // XXX
11145 %}
11146 
11147 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11148   match(Set dst (ConvI2L src));
11149   effect(KILL cr);
11150   ins_cost(375);
11151   format %{ "MOV    $dst.lo,$src\n\t"
11152             "MOV    $dst.hi,$src\n\t"
11153             "SAR    $dst.hi,31" %}
11154   ins_encode(convert_int_long(dst,src));
11155   ins_pipe( ialu_reg_reg_long );
11156 %}
11157 
11158 // Zero-extend convert int to long
11159 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11160   match(Set dst (AndL (ConvI2L src) mask) );
11161   effect( KILL flags );
11162   ins_cost(250);
11163   format %{ "MOV    $dst.lo,$src\n\t"
11164             "XOR    $dst.hi,$dst.hi" %}
11165   opcode(0x33); // XOR
11166   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11167   ins_pipe( ialu_reg_reg_long );
11168 %}
11169 
11170 // Zero-extend long
11171 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11172   match(Set dst (AndL src mask) );
11173   effect( KILL flags );
11174   ins_cost(250);
11175   format %{ "MOV    $dst.lo,$src.lo\n\t"
11176             "XOR    $dst.hi,$dst.hi\n\t" %}
11177   opcode(0x33); // XOR
11178   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11179   ins_pipe( ialu_reg_reg_long );
11180 %}
11181 
11182 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11183   predicate (UseSSE<=1);
11184   match(Set dst (ConvL2D src));
11185   effect( KILL cr );
11186   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11187             "PUSH   $src.lo\n\t"
11188             "FILD   ST,[ESP + #0]\n\t"
11189             "ADD    ESP,8\n\t"
11190             "FSTP_D $dst\t# D-round" %}
11191   opcode(0xDF, 0x5);  /* DF /5 */
11192   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11193   ins_pipe( pipe_slow );
11194 %}
11195 
11196 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11197   predicate (UseSSE>=2);
11198   match(Set dst (ConvL2D src));
11199   effect( KILL cr );
11200   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11201             "PUSH   $src.lo\n\t"
11202             "FILD_D [ESP]\n\t"
11203             "FSTP_D [ESP]\n\t"
11204             "MOVSD  $dst,[ESP]\n\t"
11205             "ADD    ESP,8" %}
11206   opcode(0xDF, 0x5);  /* DF /5 */
11207   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11208   ins_pipe( pipe_slow );
11209 %}
11210 
11211 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11212   predicate (UseSSE>=1);
11213   match(Set dst (ConvL2F src));
11214   effect( KILL cr );
11215   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11216             "PUSH   $src.lo\n\t"
11217             "FILD_D [ESP]\n\t"
11218             "FSTP_S [ESP]\n\t"
11219             "MOVSS  $dst,[ESP]\n\t"
11220             "ADD    ESP,8" %}
11221   opcode(0xDF, 0x5);  /* DF /5 */
11222   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11223   ins_pipe( pipe_slow );
11224 %}
11225 
11226 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11227   match(Set dst (ConvL2F src));
11228   effect( KILL cr );
11229   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11230             "PUSH   $src.lo\n\t"
11231             "FILD   ST,[ESP + #0]\n\t"
11232             "ADD    ESP,8\n\t"
11233             "FSTP_S $dst\t# F-round" %}
11234   opcode(0xDF, 0x5);  /* DF /5 */
11235   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11236   ins_pipe( pipe_slow );
11237 %}
11238 
11239 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11240   match(Set dst (ConvL2I src));
11241   effect( DEF dst, USE src );
11242   format %{ "MOV    $dst,$src.lo" %}
11243   ins_encode(enc_CopyL_Lo(dst,src));
11244   ins_pipe( ialu_reg_reg );
11245 %}
11246 
11247 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11248   match(Set dst (MoveF2I src));
11249   effect( DEF dst, USE src );
11250   ins_cost(100);
11251   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11252   ins_encode %{
11253     __ movl($dst$$Register, Address(rsp, $src$$disp));
11254   %}
11255   ins_pipe( ialu_reg_mem );
11256 %}
11257 
11258 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11259   predicate(UseSSE==0);
11260   match(Set dst (MoveF2I src));
11261   effect( DEF dst, USE src );
11262 
11263   ins_cost(125);
11264   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11265   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11266   ins_pipe( fpu_mem_reg );
11267 %}
11268 
11269 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11270   predicate(UseSSE>=1);
11271   match(Set dst (MoveF2I src));
11272   effect( DEF dst, USE src );
11273 
11274   ins_cost(95);
11275   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11276   ins_encode %{
11277     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11278   %}
11279   ins_pipe( pipe_slow );
11280 %}
11281 
11282 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11283   predicate(UseSSE>=2);
11284   match(Set dst (MoveF2I src));
11285   effect( DEF dst, USE src );
11286   ins_cost(85);
11287   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11288   ins_encode %{
11289     __ movdl($dst$$Register, $src$$XMMRegister);
11290   %}
11291   ins_pipe( pipe_slow );
11292 %}
11293 
11294 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11295   match(Set dst (MoveI2F src));
11296   effect( DEF dst, USE src );
11297 
11298   ins_cost(100);
11299   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11300   ins_encode %{
11301     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11302   %}
11303   ins_pipe( ialu_mem_reg );
11304 %}
11305 
11306 
11307 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11308   predicate(UseSSE==0);
11309   match(Set dst (MoveI2F src));
11310   effect(DEF dst, USE src);
11311 
11312   ins_cost(125);
11313   format %{ "FLD_S  $src\n\t"
11314             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11315   opcode(0xD9);               /* D9 /0, FLD m32real */
11316   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11317               Pop_Reg_FPR(dst) );
11318   ins_pipe( fpu_reg_mem );
11319 %}
11320 
11321 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11322   predicate(UseSSE>=1);
11323   match(Set dst (MoveI2F src));
11324   effect( DEF dst, USE src );
11325 
11326   ins_cost(95);
11327   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11328   ins_encode %{
11329     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11330   %}
11331   ins_pipe( pipe_slow );
11332 %}
11333 
11334 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11335   predicate(UseSSE>=2);
11336   match(Set dst (MoveI2F src));
11337   effect( DEF dst, USE src );
11338 
11339   ins_cost(85);
11340   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11341   ins_encode %{
11342     __ movdl($dst$$XMMRegister, $src$$Register);
11343   %}
11344   ins_pipe( pipe_slow );
11345 %}
11346 
11347 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11348   match(Set dst (MoveD2L src));
11349   effect(DEF dst, USE src);
11350 
11351   ins_cost(250);
11352   format %{ "MOV    $dst.lo,$src\n\t"
11353             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11354   opcode(0x8B, 0x8B);
11355   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11356   ins_pipe( ialu_mem_long_reg );
11357 %}
11358 
11359 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11360   predicate(UseSSE<=1);
11361   match(Set dst (MoveD2L src));
11362   effect(DEF dst, USE src);
11363 
11364   ins_cost(125);
11365   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11366   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11367   ins_pipe( fpu_mem_reg );
11368 %}
11369 
11370 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11371   predicate(UseSSE>=2);
11372   match(Set dst (MoveD2L src));
11373   effect(DEF dst, USE src);
11374   ins_cost(95);
11375   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11376   ins_encode %{
11377     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11378   %}
11379   ins_pipe( pipe_slow );
11380 %}
11381 
11382 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11383   predicate(UseSSE>=2);
11384   match(Set dst (MoveD2L src));
11385   effect(DEF dst, USE src, TEMP tmp);
11386   ins_cost(85);
11387   format %{ "MOVD   $dst.lo,$src\n\t"
11388             "PSHUFLW $tmp,$src,0x4E\n\t"
11389             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11390   ins_encode %{
11391     __ movdl($dst$$Register, $src$$XMMRegister);
11392     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11393     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11394   %}
11395   ins_pipe( pipe_slow );
11396 %}
11397 
11398 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11399   match(Set dst (MoveL2D src));
11400   effect(DEF dst, USE src);
11401 
11402   ins_cost(200);
11403   format %{ "MOV    $dst,$src.lo\n\t"
11404             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11405   opcode(0x89, 0x89);
11406   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11407   ins_pipe( ialu_mem_long_reg );
11408 %}
11409 
11410 
11411 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11412   predicate(UseSSE<=1);
11413   match(Set dst (MoveL2D src));
11414   effect(DEF dst, USE src);
11415   ins_cost(125);
11416 
11417   format %{ "FLD_D  $src\n\t"
11418             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11419   opcode(0xDD);               /* DD /0, FLD m64real */
11420   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11421               Pop_Reg_DPR(dst) );
11422   ins_pipe( fpu_reg_mem );
11423 %}
11424 
11425 
11426 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11427   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11428   match(Set dst (MoveL2D src));
11429   effect(DEF dst, USE src);
11430 
11431   ins_cost(95);
11432   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11433   ins_encode %{
11434     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11435   %}
11436   ins_pipe( pipe_slow );
11437 %}
11438 
11439 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11440   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11441   match(Set dst (MoveL2D src));
11442   effect(DEF dst, USE src);
11443 
11444   ins_cost(95);
11445   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11446   ins_encode %{
11447     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11448   %}
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11453   predicate(UseSSE>=2);
11454   match(Set dst (MoveL2D src));
11455   effect(TEMP dst, USE src, TEMP tmp);
11456   ins_cost(85);
11457   format %{ "MOVD   $dst,$src.lo\n\t"
11458             "MOVD   $tmp,$src.hi\n\t"
11459             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11460   ins_encode %{
11461     __ movdl($dst$$XMMRegister, $src$$Register);
11462     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11463     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11464   %}
11465   ins_pipe( pipe_slow );
11466 %}
11467 
11468 //----------------------------- CompressBits/ExpandBits ------------------------
11469 
11470 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11471   predicate(n->bottom_type()->isa_long());
11472   match(Set dst (CompressBits src mask));
11473   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11474   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11475   ins_encode %{
11476     Label exit, partail_result;
11477     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11478     // Merge the results of upper and lower destination registers such that upper destination
11479     // results are contiguously laid out after the lower destination result.
11480     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11481     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11482     __ popcntl($rtmp$$Register, $mask$$Register);
11483     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11484     __ cmpl($rtmp$$Register, 32);
11485     __ jccb(Assembler::equal, exit);
11486     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11487     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11488     // Shift left the contents of upper destination register by true bit count of lower mask register
11489     // and merge with lower destination register.
11490     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11491     __ orl($dst$$Register, $rtmp$$Register);
11492     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11493     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11494     // since contents of upper destination have already been copied to lower destination
11495     // register.
11496     __ cmpl($rtmp$$Register, 0);
11497     __ jccb(Assembler::greater, partail_result);
11498     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11499     __ jmp(exit);
11500     __ bind(partail_result);
11501     // Perform right shift over upper destination register to move out bits already copied
11502     // to lower destination register.
11503     __ subl($rtmp$$Register, 32);
11504     __ negl($rtmp$$Register);
11505     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11506     __ bind(exit);
11507   %}
11508   ins_pipe( pipe_slow );
11509 %}
11510 
11511 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11512   predicate(n->bottom_type()->isa_long());
11513   match(Set dst (ExpandBits src mask));
11514   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11515   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11516   ins_encode %{
11517     // Extraction operation sequentially reads the bits from source register starting from LSB
11518     // and lays them out into destination register at bit locations corresponding to true bits
11519     // in mask register. Thus number of source bits read are equal to combined true bit count
11520     // of mask register pair.
11521     Label exit, mask_clipping;
11522     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11523     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11524     __ popcntl($rtmp$$Register, $mask$$Register);
11525     // If true bit count of lower mask register is 32 then none of bit of lower source register
11526     // will feed to upper destination register.
11527     __ cmpl($rtmp$$Register, 32);
11528     __ jccb(Assembler::equal, exit);
11529     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11530     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11531     // Shift right the contents of lower source register to remove already consumed bits.
11532     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11533     // Extract the bits from lower source register starting from LSB under the influence
11534     // of upper mask register.
11535     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11536     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11537     __ subl($rtmp$$Register, 32);
11538     __ negl($rtmp$$Register);
11539     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11540     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11541     // Clear the set bits in upper mask register which have been used to extract the contents
11542     // from lower source register.
11543     __ bind(mask_clipping);
11544     __ blsrl($mask$$Register, $mask$$Register);
11545     __ decrementl($rtmp$$Register, 1);
11546     __ jccb(Assembler::greater, mask_clipping);
11547     // Starting from LSB extract the bits from upper source register under the influence of
11548     // remaining set bits in upper mask register.
11549     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11550     // Merge the partial results extracted from lower and upper source register bits.
11551     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11552     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11553     __ bind(exit);
11554   %}
11555   ins_pipe( pipe_slow );
11556 %}
11557 
11558 // =======================================================================
11559 // fast clearing of an array
11560 // Small ClearArray non-AVX512.
11561 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11562   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11563   match(Set dummy (ClearArray cnt base));
11564   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11565 
11566   format %{ $$template
11567     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11568     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11569     $$emit$$"JG     LARGE\n\t"
11570     $$emit$$"SHL    ECX, 1\n\t"
11571     $$emit$$"DEC    ECX\n\t"
11572     $$emit$$"JS     DONE\t# Zero length\n\t"
11573     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11574     $$emit$$"DEC    ECX\n\t"
11575     $$emit$$"JGE    LOOP\n\t"
11576     $$emit$$"JMP    DONE\n\t"
11577     $$emit$$"# LARGE:\n\t"
11578     if (UseFastStosb) {
11579        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11580        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11581     } else if (UseXMMForObjInit) {
11582        $$emit$$"MOV     RDI,RAX\n\t"
11583        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11584        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11585        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11586        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11587        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11588        $$emit$$"ADD     0x40,RAX\n\t"
11589        $$emit$$"# L_zero_64_bytes:\n\t"
11590        $$emit$$"SUB     0x8,RCX\n\t"
11591        $$emit$$"JGE     L_loop\n\t"
11592        $$emit$$"ADD     0x4,RCX\n\t"
11593        $$emit$$"JL      L_tail\n\t"
11594        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11595        $$emit$$"ADD     0x20,RAX\n\t"
11596        $$emit$$"SUB     0x4,RCX\n\t"
11597        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11598        $$emit$$"ADD     0x4,RCX\n\t"
11599        $$emit$$"JLE     L_end\n\t"
11600        $$emit$$"DEC     RCX\n\t"
11601        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11602        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11603        $$emit$$"ADD     0x8,RAX\n\t"
11604        $$emit$$"DEC     RCX\n\t"
11605        $$emit$$"JGE     L_sloop\n\t"
11606        $$emit$$"# L_end:\n\t"
11607     } else {
11608        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11609        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11610     }
11611     $$emit$$"# DONE"
11612   %}
11613   ins_encode %{
11614     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11615                  $tmp$$XMMRegister, false, knoreg);
11616   %}
11617   ins_pipe( pipe_slow );
11618 %}
11619 
11620 // Small ClearArray AVX512 non-constant length.
11621 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11622   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11623   match(Set dummy (ClearArray cnt base));
11624   ins_cost(125);
11625   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11626 
11627   format %{ $$template
11628     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11629     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11630     $$emit$$"JG     LARGE\n\t"
11631     $$emit$$"SHL    ECX, 1\n\t"
11632     $$emit$$"DEC    ECX\n\t"
11633     $$emit$$"JS     DONE\t# Zero length\n\t"
11634     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11635     $$emit$$"DEC    ECX\n\t"
11636     $$emit$$"JGE    LOOP\n\t"
11637     $$emit$$"JMP    DONE\n\t"
11638     $$emit$$"# LARGE:\n\t"
11639     if (UseFastStosb) {
11640        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11641        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11642     } else if (UseXMMForObjInit) {
11643        $$emit$$"MOV     RDI,RAX\n\t"
11644        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11645        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11646        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11647        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11648        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11649        $$emit$$"ADD     0x40,RAX\n\t"
11650        $$emit$$"# L_zero_64_bytes:\n\t"
11651        $$emit$$"SUB     0x8,RCX\n\t"
11652        $$emit$$"JGE     L_loop\n\t"
11653        $$emit$$"ADD     0x4,RCX\n\t"
11654        $$emit$$"JL      L_tail\n\t"
11655        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11656        $$emit$$"ADD     0x20,RAX\n\t"
11657        $$emit$$"SUB     0x4,RCX\n\t"
11658        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11659        $$emit$$"ADD     0x4,RCX\n\t"
11660        $$emit$$"JLE     L_end\n\t"
11661        $$emit$$"DEC     RCX\n\t"
11662        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11663        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11664        $$emit$$"ADD     0x8,RAX\n\t"
11665        $$emit$$"DEC     RCX\n\t"
11666        $$emit$$"JGE     L_sloop\n\t"
11667        $$emit$$"# L_end:\n\t"
11668     } else {
11669        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11670        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11671     }
11672     $$emit$$"# DONE"
11673   %}
11674   ins_encode %{
11675     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11676                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11677   %}
11678   ins_pipe( pipe_slow );
11679 %}
11680 
11681 // Large ClearArray non-AVX512.
11682 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11683   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11684   match(Set dummy (ClearArray cnt base));
11685   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11686   format %{ $$template
11687     if (UseFastStosb) {
11688        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11689        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11690        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11691     } else if (UseXMMForObjInit) {
11692        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11693        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11694        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11695        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11696        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11697        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11698        $$emit$$"ADD     0x40,RAX\n\t"
11699        $$emit$$"# L_zero_64_bytes:\n\t"
11700        $$emit$$"SUB     0x8,RCX\n\t"
11701        $$emit$$"JGE     L_loop\n\t"
11702        $$emit$$"ADD     0x4,RCX\n\t"
11703        $$emit$$"JL      L_tail\n\t"
11704        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11705        $$emit$$"ADD     0x20,RAX\n\t"
11706        $$emit$$"SUB     0x4,RCX\n\t"
11707        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11708        $$emit$$"ADD     0x4,RCX\n\t"
11709        $$emit$$"JLE     L_end\n\t"
11710        $$emit$$"DEC     RCX\n\t"
11711        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11712        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11713        $$emit$$"ADD     0x8,RAX\n\t"
11714        $$emit$$"DEC     RCX\n\t"
11715        $$emit$$"JGE     L_sloop\n\t"
11716        $$emit$$"# L_end:\n\t"
11717     } else {
11718        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11719        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11720        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11721     }
11722     $$emit$$"# DONE"
11723   %}
11724   ins_encode %{
11725     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11726                  $tmp$$XMMRegister, true, knoreg);
11727   %}
11728   ins_pipe( pipe_slow );
11729 %}
11730 
11731 // Large ClearArray AVX512.
11732 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11733   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11734   match(Set dummy (ClearArray cnt base));
11735   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11736   format %{ $$template
11737     if (UseFastStosb) {
11738        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11739        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11740        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11741     } else if (UseXMMForObjInit) {
11742        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11743        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11744        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11745        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11746        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11747        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11748        $$emit$$"ADD     0x40,RAX\n\t"
11749        $$emit$$"# L_zero_64_bytes:\n\t"
11750        $$emit$$"SUB     0x8,RCX\n\t"
11751        $$emit$$"JGE     L_loop\n\t"
11752        $$emit$$"ADD     0x4,RCX\n\t"
11753        $$emit$$"JL      L_tail\n\t"
11754        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11755        $$emit$$"ADD     0x20,RAX\n\t"
11756        $$emit$$"SUB     0x4,RCX\n\t"
11757        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11758        $$emit$$"ADD     0x4,RCX\n\t"
11759        $$emit$$"JLE     L_end\n\t"
11760        $$emit$$"DEC     RCX\n\t"
11761        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11762        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11763        $$emit$$"ADD     0x8,RAX\n\t"
11764        $$emit$$"DEC     RCX\n\t"
11765        $$emit$$"JGE     L_sloop\n\t"
11766        $$emit$$"# L_end:\n\t"
11767     } else {
11768        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11769        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11770        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11771     }
11772     $$emit$$"# DONE"
11773   %}
11774   ins_encode %{
11775     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11776                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11777   %}
11778   ins_pipe( pipe_slow );
11779 %}
11780 
11781 // Small ClearArray AVX512 constant length.
11782 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11783 %{
11784   predicate(!((ClearArrayNode*)n)->is_large() &&
11785                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11786   match(Set dummy (ClearArray cnt base));
11787   ins_cost(100);
11788   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11789   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11790   ins_encode %{
11791    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11792   %}
11793   ins_pipe(pipe_slow);
11794 %}
11795 
11796 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11797                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11798   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11799   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11800   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11801 
11802   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11803   ins_encode %{
11804     __ string_compare($str1$$Register, $str2$$Register,
11805                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11806                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11807   %}
11808   ins_pipe( pipe_slow );
11809 %}
11810 
11811 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11812                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11813   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11814   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11815   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11816 
11817   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11818   ins_encode %{
11819     __ string_compare($str1$$Register, $str2$$Register,
11820                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11821                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11822   %}
11823   ins_pipe( pipe_slow );
11824 %}
11825 
11826 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11827                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11828   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11829   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11830   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11831 
11832   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11833   ins_encode %{
11834     __ string_compare($str1$$Register, $str2$$Register,
11835                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11836                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11837   %}
11838   ins_pipe( pipe_slow );
11839 %}
11840 
11841 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11842                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11843   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11844   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11845   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11846 
11847   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11848   ins_encode %{
11849     __ string_compare($str1$$Register, $str2$$Register,
11850                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11851                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11852   %}
11853   ins_pipe( pipe_slow );
11854 %}
11855 
11856 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11857                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11858   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11859   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11860   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11861 
11862   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11863   ins_encode %{
11864     __ string_compare($str1$$Register, $str2$$Register,
11865                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11866                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11867   %}
11868   ins_pipe( pipe_slow );
11869 %}
11870 
11871 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11872                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11873   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11874   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11875   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11876 
11877   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11878   ins_encode %{
11879     __ string_compare($str1$$Register, $str2$$Register,
11880                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11881                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11882   %}
11883   ins_pipe( pipe_slow );
11884 %}
11885 
11886 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11887                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11888   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11889   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11890   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11891 
11892   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11893   ins_encode %{
11894     __ string_compare($str2$$Register, $str1$$Register,
11895                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11896                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11897   %}
11898   ins_pipe( pipe_slow );
11899 %}
11900 
11901 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11902                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11903   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11904   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11905   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11906 
11907   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11908   ins_encode %{
11909     __ string_compare($str2$$Register, $str1$$Register,
11910                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11911                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11912   %}
11913   ins_pipe( pipe_slow );
11914 %}
11915 
11916 // fast string equals
11917 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11918                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11919   predicate(!VM_Version::supports_avx512vlbw());
11920   match(Set result (StrEquals (Binary str1 str2) cnt));
11921   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11922 
11923   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11924   ins_encode %{
11925     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11926                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11927                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11928   %}
11929 
11930   ins_pipe( pipe_slow );
11931 %}
11932 
11933 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11934                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11935   predicate(VM_Version::supports_avx512vlbw());
11936   match(Set result (StrEquals (Binary str1 str2) cnt));
11937   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11938 
11939   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11940   ins_encode %{
11941     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11942                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11943                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11944   %}
11945 
11946   ins_pipe( pipe_slow );
11947 %}
11948 
11949 
11950 // fast search of substring with known size.
11951 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11952                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11953   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11954   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11955   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11956 
11957   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11958   ins_encode %{
11959     int icnt2 = (int)$int_cnt2$$constant;
11960     if (icnt2 >= 16) {
11961       // IndexOf for constant substrings with size >= 16 elements
11962       // which don't need to be loaded through stack.
11963       __ string_indexofC8($str1$$Register, $str2$$Register,
11964                           $cnt1$$Register, $cnt2$$Register,
11965                           icnt2, $result$$Register,
11966                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11967     } else {
11968       // Small strings are loaded through stack if they cross page boundary.
11969       __ string_indexof($str1$$Register, $str2$$Register,
11970                         $cnt1$$Register, $cnt2$$Register,
11971                         icnt2, $result$$Register,
11972                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11973     }
11974   %}
11975   ins_pipe( pipe_slow );
11976 %}
11977 
11978 // fast search of substring with known size.
11979 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11980                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11981   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11982   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11983   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11984 
11985   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11986   ins_encode %{
11987     int icnt2 = (int)$int_cnt2$$constant;
11988     if (icnt2 >= 8) {
11989       // IndexOf for constant substrings with size >= 8 elements
11990       // which don't need to be loaded through stack.
11991       __ string_indexofC8($str1$$Register, $str2$$Register,
11992                           $cnt1$$Register, $cnt2$$Register,
11993                           icnt2, $result$$Register,
11994                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11995     } else {
11996       // Small strings are loaded through stack if they cross page boundary.
11997       __ string_indexof($str1$$Register, $str2$$Register,
11998                         $cnt1$$Register, $cnt2$$Register,
11999                         icnt2, $result$$Register,
12000                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12001     }
12002   %}
12003   ins_pipe( pipe_slow );
12004 %}
12005 
12006 // fast search of substring with known size.
12007 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12008                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12009   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12010   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12011   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12012 
12013   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12014   ins_encode %{
12015     int icnt2 = (int)$int_cnt2$$constant;
12016     if (icnt2 >= 8) {
12017       // IndexOf for constant substrings with size >= 8 elements
12018       // which don't need to be loaded through stack.
12019       __ string_indexofC8($str1$$Register, $str2$$Register,
12020                           $cnt1$$Register, $cnt2$$Register,
12021                           icnt2, $result$$Register,
12022                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12023     } else {
12024       // Small strings are loaded through stack if they cross page boundary.
12025       __ string_indexof($str1$$Register, $str2$$Register,
12026                         $cnt1$$Register, $cnt2$$Register,
12027                         icnt2, $result$$Register,
12028                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12029     }
12030   %}
12031   ins_pipe( pipe_slow );
12032 %}
12033 
12034 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12035                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12036   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12037   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12038   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12039 
12040   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12041   ins_encode %{
12042     __ string_indexof($str1$$Register, $str2$$Register,
12043                       $cnt1$$Register, $cnt2$$Register,
12044                       (-1), $result$$Register,
12045                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12046   %}
12047   ins_pipe( pipe_slow );
12048 %}
12049 
12050 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12051                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12052   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12053   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12054   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12055 
12056   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12057   ins_encode %{
12058     __ string_indexof($str1$$Register, $str2$$Register,
12059                       $cnt1$$Register, $cnt2$$Register,
12060                       (-1), $result$$Register,
12061                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12062   %}
12063   ins_pipe( pipe_slow );
12064 %}
12065 
12066 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12067                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12068   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12069   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12070   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12071 
12072   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12073   ins_encode %{
12074     __ string_indexof($str1$$Register, $str2$$Register,
12075                       $cnt1$$Register, $cnt2$$Register,
12076                       (-1), $result$$Register,
12077                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12078   %}
12079   ins_pipe( pipe_slow );
12080 %}
12081 
12082 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12083                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12084   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12085   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12086   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12087   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12088   ins_encode %{
12089     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12090                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12091   %}
12092   ins_pipe( pipe_slow );
12093 %}
12094 
12095 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12096                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12097   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12098   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12099   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12100   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12101   ins_encode %{
12102     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12103                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12104   %}
12105   ins_pipe( pipe_slow );
12106 %}
12107 
12108 
12109 // fast array equals
12110 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12111                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12112 %{
12113   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12114   match(Set result (AryEq ary1 ary2));
12115   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12116   //ins_cost(300);
12117 
12118   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12119   ins_encode %{
12120     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12121                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12122                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12123   %}
12124   ins_pipe( pipe_slow );
12125 %}
12126 
12127 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12128                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12129 %{
12130   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12131   match(Set result (AryEq ary1 ary2));
12132   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12133   //ins_cost(300);
12134 
12135   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12136   ins_encode %{
12137     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12138                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12139                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12140   %}
12141   ins_pipe( pipe_slow );
12142 %}
12143 
12144 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12145                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12146 %{
12147   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12148   match(Set result (AryEq ary1 ary2));
12149   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12150   //ins_cost(300);
12151 
12152   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12153   ins_encode %{
12154     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12155                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12156                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12157   %}
12158   ins_pipe( pipe_slow );
12159 %}
12160 
12161 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12162                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12163 %{
12164   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12165   match(Set result (AryEq ary1 ary2));
12166   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12167   //ins_cost(300);
12168 
12169   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12170   ins_encode %{
12171     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12172                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12173                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12174   %}
12175   ins_pipe( pipe_slow );
12176 %}
12177 
12178 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12179                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12180 %{
12181   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12182   match(Set result (CountPositives ary1 len));
12183   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12184 
12185   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12186   ins_encode %{
12187     __ count_positives($ary1$$Register, $len$$Register,
12188                        $result$$Register, $tmp3$$Register,
12189                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12190   %}
12191   ins_pipe( pipe_slow );
12192 %}
12193 
12194 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12195                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12196 %{
12197   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12198   match(Set result (CountPositives ary1 len));
12199   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12200 
12201   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12202   ins_encode %{
12203     __ count_positives($ary1$$Register, $len$$Register,
12204                        $result$$Register, $tmp3$$Register,
12205                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12206   %}
12207   ins_pipe( pipe_slow );
12208 %}
12209 
12210 
12211 // fast char[] to byte[] compression
12212 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12213                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12214   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12215   match(Set result (StrCompressedCopy src (Binary dst len)));
12216   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12217 
12218   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12219   ins_encode %{
12220     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12221                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12222                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12223                            knoreg, knoreg);
12224   %}
12225   ins_pipe( pipe_slow );
12226 %}
12227 
12228 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12229                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12230   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12231   match(Set result (StrCompressedCopy src (Binary dst len)));
12232   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12233 
12234   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12235   ins_encode %{
12236     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12237                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12238                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12239                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12240   %}
12241   ins_pipe( pipe_slow );
12242 %}
12243 
12244 // fast byte[] to char[] inflation
12245 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12246                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12247   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12248   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12249   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12250 
12251   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12252   ins_encode %{
12253     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12254                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12255   %}
12256   ins_pipe( pipe_slow );
12257 %}
12258 
12259 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12260                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12261   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12262   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12263   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12264 
12265   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12266   ins_encode %{
12267     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12268                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12269   %}
12270   ins_pipe( pipe_slow );
12271 %}
12272 
12273 // encode char[] to byte[] in ISO_8859_1
12274 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12275                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12276                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12277   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12278   match(Set result (EncodeISOArray src (Binary dst len)));
12279   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12280 
12281   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12282   ins_encode %{
12283     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12284                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12285                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12286   %}
12287   ins_pipe( pipe_slow );
12288 %}
12289 
12290 // encode char[] to byte[] in ASCII
12291 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12292                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12293                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12294   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12295   match(Set result (EncodeISOArray src (Binary dst len)));
12296   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12297 
12298   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12299   ins_encode %{
12300     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12301                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12302                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12303   %}
12304   ins_pipe( pipe_slow );
12305 %}
12306 
12307 //----------Control Flow Instructions------------------------------------------
12308 // Signed compare Instructions
12309 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12310   match(Set cr (CmpI op1 op2));
12311   effect( DEF cr, USE op1, USE op2 );
12312   format %{ "CMP    $op1,$op2" %}
12313   opcode(0x3B);  /* Opcode 3B /r */
12314   ins_encode( OpcP, RegReg( op1, op2) );
12315   ins_pipe( ialu_cr_reg_reg );
12316 %}
12317 
12318 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12319   match(Set cr (CmpI op1 op2));
12320   effect( DEF cr, USE op1 );
12321   format %{ "CMP    $op1,$op2" %}
12322   opcode(0x81,0x07);  /* Opcode 81 /7 */
12323   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12324   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12325   ins_pipe( ialu_cr_reg_imm );
12326 %}
12327 
12328 // Cisc-spilled version of cmpI_eReg
12329 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12330   match(Set cr (CmpI op1 (LoadI op2)));
12331 
12332   format %{ "CMP    $op1,$op2" %}
12333   ins_cost(500);
12334   opcode(0x3B);  /* Opcode 3B /r */
12335   ins_encode( OpcP, RegMem( op1, op2) );
12336   ins_pipe( ialu_cr_reg_mem );
12337 %}
12338 
12339 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12340   match(Set cr (CmpI src zero));
12341   effect( DEF cr, USE src );
12342 
12343   format %{ "TEST   $src,$src" %}
12344   opcode(0x85);
12345   ins_encode( OpcP, RegReg( src, src ) );
12346   ins_pipe( ialu_cr_reg_imm );
12347 %}
12348 
12349 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12350   match(Set cr (CmpI (AndI src con) zero));
12351 
12352   format %{ "TEST   $src,$con" %}
12353   opcode(0xF7,0x00);
12354   ins_encode( OpcP, RegOpc(src), Con32(con) );
12355   ins_pipe( ialu_cr_reg_imm );
12356 %}
12357 
12358 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12359   match(Set cr (CmpI (AndI src mem) zero));
12360 
12361   format %{ "TEST   $src,$mem" %}
12362   opcode(0x85);
12363   ins_encode( OpcP, RegMem( src, mem ) );
12364   ins_pipe( ialu_cr_reg_mem );
12365 %}
12366 
12367 // Unsigned compare Instructions; really, same as signed except they
12368 // produce an eFlagsRegU instead of eFlagsReg.
12369 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12370   match(Set cr (CmpU op1 op2));
12371 
12372   format %{ "CMPu   $op1,$op2" %}
12373   opcode(0x3B);  /* Opcode 3B /r */
12374   ins_encode( OpcP, RegReg( op1, op2) );
12375   ins_pipe( ialu_cr_reg_reg );
12376 %}
12377 
12378 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12379   match(Set cr (CmpU op1 op2));
12380 
12381   format %{ "CMPu   $op1,$op2" %}
12382   opcode(0x81,0x07);  /* Opcode 81 /7 */
12383   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12384   ins_pipe( ialu_cr_reg_imm );
12385 %}
12386 
12387 // // Cisc-spilled version of cmpU_eReg
12388 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12389   match(Set cr (CmpU op1 (LoadI op2)));
12390 
12391   format %{ "CMPu   $op1,$op2" %}
12392   ins_cost(500);
12393   opcode(0x3B);  /* Opcode 3B /r */
12394   ins_encode( OpcP, RegMem( op1, op2) );
12395   ins_pipe( ialu_cr_reg_mem );
12396 %}
12397 
12398 // // Cisc-spilled version of cmpU_eReg
12399 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12400 //  match(Set cr (CmpU (LoadI op1) op2));
12401 //
12402 //  format %{ "CMPu   $op1,$op2" %}
12403 //  ins_cost(500);
12404 //  opcode(0x39);  /* Opcode 39 /r */
12405 //  ins_encode( OpcP, RegMem( op1, op2) );
12406 //%}
12407 
12408 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12409   match(Set cr (CmpU src zero));
12410 
12411   format %{ "TESTu  $src,$src" %}
12412   opcode(0x85);
12413   ins_encode( OpcP, RegReg( src, src ) );
12414   ins_pipe( ialu_cr_reg_imm );
12415 %}
12416 
12417 // Unsigned pointer compare Instructions
12418 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12419   match(Set cr (CmpP op1 op2));
12420 
12421   format %{ "CMPu   $op1,$op2" %}
12422   opcode(0x3B);  /* Opcode 3B /r */
12423   ins_encode( OpcP, RegReg( op1, op2) );
12424   ins_pipe( ialu_cr_reg_reg );
12425 %}
12426 
12427 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12428   match(Set cr (CmpP op1 op2));
12429 
12430   format %{ "CMPu   $op1,$op2" %}
12431   opcode(0x81,0x07);  /* Opcode 81 /7 */
12432   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12433   ins_pipe( ialu_cr_reg_imm );
12434 %}
12435 
12436 // // Cisc-spilled version of cmpP_eReg
12437 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12438   match(Set cr (CmpP op1 (LoadP op2)));
12439 
12440   format %{ "CMPu   $op1,$op2" %}
12441   ins_cost(500);
12442   opcode(0x3B);  /* Opcode 3B /r */
12443   ins_encode( OpcP, RegMem( op1, op2) );
12444   ins_pipe( ialu_cr_reg_mem );
12445 %}
12446 
12447 // // Cisc-spilled version of cmpP_eReg
12448 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12449 //  match(Set cr (CmpP (LoadP op1) op2));
12450 //
12451 //  format %{ "CMPu   $op1,$op2" %}
12452 //  ins_cost(500);
12453 //  opcode(0x39);  /* Opcode 39 /r */
12454 //  ins_encode( OpcP, RegMem( op1, op2) );
12455 //%}
12456 
12457 // Compare raw pointer (used in out-of-heap check).
12458 // Only works because non-oop pointers must be raw pointers
12459 // and raw pointers have no anti-dependencies.
12460 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12461   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12462   match(Set cr (CmpP op1 (LoadP op2)));
12463 
12464   format %{ "CMPu   $op1,$op2" %}
12465   opcode(0x3B);  /* Opcode 3B /r */
12466   ins_encode( OpcP, RegMem( op1, op2) );
12467   ins_pipe( ialu_cr_reg_mem );
12468 %}
12469 
12470 //
12471 // This will generate a signed flags result. This should be ok
12472 // since any compare to a zero should be eq/neq.
12473 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12474   match(Set cr (CmpP src zero));
12475 
12476   format %{ "TEST   $src,$src" %}
12477   opcode(0x85);
12478   ins_encode( OpcP, RegReg( src, src ) );
12479   ins_pipe( ialu_cr_reg_imm );
12480 %}
12481 
12482 // Cisc-spilled version of testP_reg
12483 // This will generate a signed flags result. This should be ok
12484 // since any compare to a zero should be eq/neq.
12485 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12486   match(Set cr (CmpP (LoadP op) zero));
12487 
12488   format %{ "TEST   $op,0xFFFFFFFF" %}
12489   ins_cost(500);
12490   opcode(0xF7);               /* Opcode F7 /0 */
12491   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12492   ins_pipe( ialu_cr_reg_imm );
12493 %}
12494 
12495 // Yanked all unsigned pointer compare operations.
12496 // Pointer compares are done with CmpP which is already unsigned.
12497 
12498 //----------Max and Min--------------------------------------------------------
12499 // Min Instructions
12500 ////
12501 //   *** Min and Max using the conditional move are slower than the
12502 //   *** branch version on a Pentium III.
12503 // // Conditional move for min
12504 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12505 //  effect( USE_DEF op2, USE op1, USE cr );
12506 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12507 //  opcode(0x4C,0x0F);
12508 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12509 //  ins_pipe( pipe_cmov_reg );
12510 //%}
12511 //
12512 //// Min Register with Register (P6 version)
12513 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12514 //  predicate(VM_Version::supports_cmov() );
12515 //  match(Set op2 (MinI op1 op2));
12516 //  ins_cost(200);
12517 //  expand %{
12518 //    eFlagsReg cr;
12519 //    compI_eReg(cr,op1,op2);
12520 //    cmovI_reg_lt(op2,op1,cr);
12521 //  %}
12522 //%}
12523 
12524 // Min Register with Register (generic version)
12525 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12526   match(Set dst (MinI dst src));
12527   effect(KILL flags);
12528   ins_cost(300);
12529 
12530   format %{ "MIN    $dst,$src" %}
12531   opcode(0xCC);
12532   ins_encode( min_enc(dst,src) );
12533   ins_pipe( pipe_slow );
12534 %}
12535 
12536 // Max Register with Register
12537 //   *** Min and Max using the conditional move are slower than the
12538 //   *** branch version on a Pentium III.
12539 // // Conditional move for max
12540 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12541 //  effect( USE_DEF op2, USE op1, USE cr );
12542 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12543 //  opcode(0x4F,0x0F);
12544 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12545 //  ins_pipe( pipe_cmov_reg );
12546 //%}
12547 //
12548 // // Max Register with Register (P6 version)
12549 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12550 //  predicate(VM_Version::supports_cmov() );
12551 //  match(Set op2 (MaxI op1 op2));
12552 //  ins_cost(200);
12553 //  expand %{
12554 //    eFlagsReg cr;
12555 //    compI_eReg(cr,op1,op2);
12556 //    cmovI_reg_gt(op2,op1,cr);
12557 //  %}
12558 //%}
12559 
12560 // Max Register with Register (generic version)
12561 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12562   match(Set dst (MaxI dst src));
12563   effect(KILL flags);
12564   ins_cost(300);
12565 
12566   format %{ "MAX    $dst,$src" %}
12567   opcode(0xCC);
12568   ins_encode( max_enc(dst,src) );
12569   ins_pipe( pipe_slow );
12570 %}
12571 
12572 // ============================================================================
12573 // Counted Loop limit node which represents exact final iterator value.
12574 // Note: the resulting value should fit into integer range since
12575 // counted loops have limit check on overflow.
12576 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12577   match(Set limit (LoopLimit (Binary init limit) stride));
12578   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12579   ins_cost(300);
12580 
12581   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12582   ins_encode %{
12583     int strd = (int)$stride$$constant;
12584     assert(strd != 1 && strd != -1, "sanity");
12585     int m1 = (strd > 0) ? 1 : -1;
12586     // Convert limit to long (EAX:EDX)
12587     __ cdql();
12588     // Convert init to long (init:tmp)
12589     __ movl($tmp$$Register, $init$$Register);
12590     __ sarl($tmp$$Register, 31);
12591     // $limit - $init
12592     __ subl($limit$$Register, $init$$Register);
12593     __ sbbl($limit_hi$$Register, $tmp$$Register);
12594     // + ($stride - 1)
12595     if (strd > 0) {
12596       __ addl($limit$$Register, (strd - 1));
12597       __ adcl($limit_hi$$Register, 0);
12598       __ movl($tmp$$Register, strd);
12599     } else {
12600       __ addl($limit$$Register, (strd + 1));
12601       __ adcl($limit_hi$$Register, -1);
12602       __ lneg($limit_hi$$Register, $limit$$Register);
12603       __ movl($tmp$$Register, -strd);
12604     }
12605     // signed division: (EAX:EDX) / pos_stride
12606     __ idivl($tmp$$Register);
12607     if (strd < 0) {
12608       // restore sign
12609       __ negl($tmp$$Register);
12610     }
12611     // (EAX) * stride
12612     __ mull($tmp$$Register);
12613     // + init (ignore upper bits)
12614     __ addl($limit$$Register, $init$$Register);
12615   %}
12616   ins_pipe( pipe_slow );
12617 %}
12618 
12619 // ============================================================================
12620 // Branch Instructions
12621 // Jump Table
12622 instruct jumpXtnd(rRegI switch_val) %{
12623   match(Jump switch_val);
12624   ins_cost(350);
12625   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12626   ins_encode %{
12627     // Jump to Address(table_base + switch_reg)
12628     Address index(noreg, $switch_val$$Register, Address::times_1);
12629     __ jump(ArrayAddress($constantaddress, index), noreg);
12630   %}
12631   ins_pipe(pipe_jmp);
12632 %}
12633 
12634 // Jump Direct - Label defines a relative address from JMP+1
12635 instruct jmpDir(label labl) %{
12636   match(Goto);
12637   effect(USE labl);
12638 
12639   ins_cost(300);
12640   format %{ "JMP    $labl" %}
12641   size(5);
12642   ins_encode %{
12643     Label* L = $labl$$label;
12644     __ jmp(*L, false); // Always long jump
12645   %}
12646   ins_pipe( pipe_jmp );
12647 %}
12648 
12649 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12650 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12651   match(If cop cr);
12652   effect(USE labl);
12653 
12654   ins_cost(300);
12655   format %{ "J$cop    $labl" %}
12656   size(6);
12657   ins_encode %{
12658     Label* L = $labl$$label;
12659     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12660   %}
12661   ins_pipe( pipe_jcc );
12662 %}
12663 
12664 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12665 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12666   match(CountedLoopEnd cop cr);
12667   effect(USE labl);
12668 
12669   ins_cost(300);
12670   format %{ "J$cop    $labl\t# Loop end" %}
12671   size(6);
12672   ins_encode %{
12673     Label* L = $labl$$label;
12674     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12675   %}
12676   ins_pipe( pipe_jcc );
12677 %}
12678 
12679 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12680 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12681   match(CountedLoopEnd cop cmp);
12682   effect(USE labl);
12683 
12684   ins_cost(300);
12685   format %{ "J$cop,u  $labl\t# Loop end" %}
12686   size(6);
12687   ins_encode %{
12688     Label* L = $labl$$label;
12689     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12690   %}
12691   ins_pipe( pipe_jcc );
12692 %}
12693 
12694 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12695   match(CountedLoopEnd cop cmp);
12696   effect(USE labl);
12697 
12698   ins_cost(200);
12699   format %{ "J$cop,u  $labl\t# Loop end" %}
12700   size(6);
12701   ins_encode %{
12702     Label* L = $labl$$label;
12703     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12704   %}
12705   ins_pipe( pipe_jcc );
12706 %}
12707 
12708 // Jump Direct Conditional - using unsigned comparison
12709 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12710   match(If cop cmp);
12711   effect(USE labl);
12712 
12713   ins_cost(300);
12714   format %{ "J$cop,u  $labl" %}
12715   size(6);
12716   ins_encode %{
12717     Label* L = $labl$$label;
12718     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12719   %}
12720   ins_pipe(pipe_jcc);
12721 %}
12722 
12723 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12724   match(If cop cmp);
12725   effect(USE labl);
12726 
12727   ins_cost(200);
12728   format %{ "J$cop,u  $labl" %}
12729   size(6);
12730   ins_encode %{
12731     Label* L = $labl$$label;
12732     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12733   %}
12734   ins_pipe(pipe_jcc);
12735 %}
12736 
12737 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12738   match(If cop cmp);
12739   effect(USE labl);
12740 
12741   ins_cost(200);
12742   format %{ $$template
12743     if ($cop$$cmpcode == Assembler::notEqual) {
12744       $$emit$$"JP,u   $labl\n\t"
12745       $$emit$$"J$cop,u   $labl"
12746     } else {
12747       $$emit$$"JP,u   done\n\t"
12748       $$emit$$"J$cop,u   $labl\n\t"
12749       $$emit$$"done:"
12750     }
12751   %}
12752   ins_encode %{
12753     Label* l = $labl$$label;
12754     if ($cop$$cmpcode == Assembler::notEqual) {
12755       __ jcc(Assembler::parity, *l, false);
12756       __ jcc(Assembler::notEqual, *l, false);
12757     } else if ($cop$$cmpcode == Assembler::equal) {
12758       Label done;
12759       __ jccb(Assembler::parity, done);
12760       __ jcc(Assembler::equal, *l, false);
12761       __ bind(done);
12762     } else {
12763        ShouldNotReachHere();
12764     }
12765   %}
12766   ins_pipe(pipe_jcc);
12767 %}
12768 
12769 // ============================================================================
12770 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12771 // array for an instance of the superklass.  Set a hidden internal cache on a
12772 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12773 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12774 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12775   match(Set result (PartialSubtypeCheck sub super));
12776   effect( KILL rcx, KILL cr );
12777 
12778   ins_cost(1100);  // slightly larger than the next version
12779   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12780             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12781             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12782             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12783             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12784             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12785             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12786      "miss:\t" %}
12787 
12788   opcode(0x1); // Force a XOR of EDI
12789   ins_encode( enc_PartialSubtypeCheck() );
12790   ins_pipe( pipe_slow );
12791 %}
12792 
12793 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12794   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12795   effect( KILL rcx, KILL result );
12796 
12797   ins_cost(1000);
12798   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12799             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12800             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12801             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12802             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12803             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12804      "miss:\t" %}
12805 
12806   opcode(0x0);  // No need to XOR EDI
12807   ins_encode( enc_PartialSubtypeCheck() );
12808   ins_pipe( pipe_slow );
12809 %}
12810 
12811 // ============================================================================
12812 // Branch Instructions -- short offset versions
12813 //
12814 // These instructions are used to replace jumps of a long offset (the default
12815 // match) with jumps of a shorter offset.  These instructions are all tagged
12816 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12817 // match rules in general matching.  Instead, the ADLC generates a conversion
12818 // method in the MachNode which can be used to do in-place replacement of the
12819 // long variant with the shorter variant.  The compiler will determine if a
12820 // branch can be taken by the is_short_branch_offset() predicate in the machine
12821 // specific code section of the file.
12822 
12823 // Jump Direct - Label defines a relative address from JMP+1
12824 instruct jmpDir_short(label labl) %{
12825   match(Goto);
12826   effect(USE labl);
12827 
12828   ins_cost(300);
12829   format %{ "JMP,s  $labl" %}
12830   size(2);
12831   ins_encode %{
12832     Label* L = $labl$$label;
12833     __ jmpb(*L);
12834   %}
12835   ins_pipe( pipe_jmp );
12836   ins_short_branch(1);
12837 %}
12838 
12839 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12840 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12841   match(If cop cr);
12842   effect(USE labl);
12843 
12844   ins_cost(300);
12845   format %{ "J$cop,s  $labl" %}
12846   size(2);
12847   ins_encode %{
12848     Label* L = $labl$$label;
12849     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12850   %}
12851   ins_pipe( pipe_jcc );
12852   ins_short_branch(1);
12853 %}
12854 
12855 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12856 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12857   match(CountedLoopEnd cop cr);
12858   effect(USE labl);
12859 
12860   ins_cost(300);
12861   format %{ "J$cop,s  $labl\t# Loop end" %}
12862   size(2);
12863   ins_encode %{
12864     Label* L = $labl$$label;
12865     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12866   %}
12867   ins_pipe( pipe_jcc );
12868   ins_short_branch(1);
12869 %}
12870 
12871 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12872 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12873   match(CountedLoopEnd cop cmp);
12874   effect(USE labl);
12875 
12876   ins_cost(300);
12877   format %{ "J$cop,us $labl\t# Loop end" %}
12878   size(2);
12879   ins_encode %{
12880     Label* L = $labl$$label;
12881     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12882   %}
12883   ins_pipe( pipe_jcc );
12884   ins_short_branch(1);
12885 %}
12886 
12887 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12888   match(CountedLoopEnd cop cmp);
12889   effect(USE labl);
12890 
12891   ins_cost(300);
12892   format %{ "J$cop,us $labl\t# Loop end" %}
12893   size(2);
12894   ins_encode %{
12895     Label* L = $labl$$label;
12896     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12897   %}
12898   ins_pipe( pipe_jcc );
12899   ins_short_branch(1);
12900 %}
12901 
12902 // Jump Direct Conditional - using unsigned comparison
12903 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12904   match(If cop cmp);
12905   effect(USE labl);
12906 
12907   ins_cost(300);
12908   format %{ "J$cop,us $labl" %}
12909   size(2);
12910   ins_encode %{
12911     Label* L = $labl$$label;
12912     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12913   %}
12914   ins_pipe( pipe_jcc );
12915   ins_short_branch(1);
12916 %}
12917 
12918 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12919   match(If cop cmp);
12920   effect(USE labl);
12921 
12922   ins_cost(300);
12923   format %{ "J$cop,us $labl" %}
12924   size(2);
12925   ins_encode %{
12926     Label* L = $labl$$label;
12927     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12928   %}
12929   ins_pipe( pipe_jcc );
12930   ins_short_branch(1);
12931 %}
12932 
12933 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12934   match(If cop cmp);
12935   effect(USE labl);
12936 
12937   ins_cost(300);
12938   format %{ $$template
12939     if ($cop$$cmpcode == Assembler::notEqual) {
12940       $$emit$$"JP,u,s   $labl\n\t"
12941       $$emit$$"J$cop,u,s   $labl"
12942     } else {
12943       $$emit$$"JP,u,s   done\n\t"
12944       $$emit$$"J$cop,u,s  $labl\n\t"
12945       $$emit$$"done:"
12946     }
12947   %}
12948   size(4);
12949   ins_encode %{
12950     Label* l = $labl$$label;
12951     if ($cop$$cmpcode == Assembler::notEqual) {
12952       __ jccb(Assembler::parity, *l);
12953       __ jccb(Assembler::notEqual, *l);
12954     } else if ($cop$$cmpcode == Assembler::equal) {
12955       Label done;
12956       __ jccb(Assembler::parity, done);
12957       __ jccb(Assembler::equal, *l);
12958       __ bind(done);
12959     } else {
12960        ShouldNotReachHere();
12961     }
12962   %}
12963   ins_pipe(pipe_jcc);
12964   ins_short_branch(1);
12965 %}
12966 
12967 // ============================================================================
12968 // Long Compare
12969 //
12970 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12971 // is tricky.  The flavor of compare used depends on whether we are testing
12972 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12973 // The GE test is the negated LT test.  The LE test can be had by commuting
12974 // the operands (yielding a GE test) and then negating; negate again for the
12975 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12976 // NE test is negated from that.
12977 
12978 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12979 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12980 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12981 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12982 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12983 // foo match ends up with the wrong leaf.  One fix is to not match both
12984 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12985 // both forms beat the trinary form of long-compare and both are very useful
12986 // on Intel which has so few registers.
12987 
12988 // Manifest a CmpL result in an integer register.  Very painful.
12989 // This is the test to avoid.
12990 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12991   match(Set dst (CmpL3 src1 src2));
12992   effect( KILL flags );
12993   ins_cost(1000);
12994   format %{ "XOR    $dst,$dst\n\t"
12995             "CMP    $src1.hi,$src2.hi\n\t"
12996             "JLT,s  m_one\n\t"
12997             "JGT,s  p_one\n\t"
12998             "CMP    $src1.lo,$src2.lo\n\t"
12999             "JB,s   m_one\n\t"
13000             "JEQ,s  done\n"
13001     "p_one:\tINC    $dst\n\t"
13002             "JMP,s  done\n"
13003     "m_one:\tDEC    $dst\n"
13004      "done:" %}
13005   ins_encode %{
13006     Label p_one, m_one, done;
13007     __ xorptr($dst$$Register, $dst$$Register);
13008     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13009     __ jccb(Assembler::less,    m_one);
13010     __ jccb(Assembler::greater, p_one);
13011     __ cmpl($src1$$Register, $src2$$Register);
13012     __ jccb(Assembler::below,   m_one);
13013     __ jccb(Assembler::equal,   done);
13014     __ bind(p_one);
13015     __ incrementl($dst$$Register);
13016     __ jmpb(done);
13017     __ bind(m_one);
13018     __ decrementl($dst$$Register);
13019     __ bind(done);
13020   %}
13021   ins_pipe( pipe_slow );
13022 %}
13023 
13024 //======
13025 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13026 // compares.  Can be used for LE or GT compares by reversing arguments.
13027 // NOT GOOD FOR EQ/NE tests.
13028 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13029   match( Set flags (CmpL src zero ));
13030   ins_cost(100);
13031   format %{ "TEST   $src.hi,$src.hi" %}
13032   opcode(0x85);
13033   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13034   ins_pipe( ialu_cr_reg_reg );
13035 %}
13036 
13037 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13038 // compares.  Can be used for LE or GT compares by reversing arguments.
13039 // NOT GOOD FOR EQ/NE tests.
13040 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13041   match( Set flags (CmpL src1 src2 ));
13042   effect( TEMP tmp );
13043   ins_cost(300);
13044   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13045             "MOV    $tmp,$src1.hi\n\t"
13046             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13047   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13048   ins_pipe( ialu_cr_reg_reg );
13049 %}
13050 
13051 // Long compares reg < zero/req OR reg >= zero/req.
13052 // Just a wrapper for a normal branch, plus the predicate test.
13053 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13054   match(If cmp flags);
13055   effect(USE labl);
13056   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13057   expand %{
13058     jmpCon(cmp,flags,labl);    // JLT or JGE...
13059   %}
13060 %}
13061 
13062 //======
13063 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13064 // compares.  Can be used for LE or GT compares by reversing arguments.
13065 // NOT GOOD FOR EQ/NE tests.
13066 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13067   match(Set flags (CmpUL src zero));
13068   ins_cost(100);
13069   format %{ "TEST   $src.hi,$src.hi" %}
13070   opcode(0x85);
13071   ins_encode(OpcP, RegReg_Hi2(src, src));
13072   ins_pipe(ialu_cr_reg_reg);
13073 %}
13074 
13075 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13076 // compares.  Can be used for LE or GT compares by reversing arguments.
13077 // NOT GOOD FOR EQ/NE tests.
13078 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13079   match(Set flags (CmpUL src1 src2));
13080   effect(TEMP tmp);
13081   ins_cost(300);
13082   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13083             "MOV    $tmp,$src1.hi\n\t"
13084             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13085   ins_encode(long_cmp_flags2(src1, src2, tmp));
13086   ins_pipe(ialu_cr_reg_reg);
13087 %}
13088 
13089 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13090 // Just a wrapper for a normal branch, plus the predicate test.
13091 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13092   match(If cmp flags);
13093   effect(USE labl);
13094   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13095   expand %{
13096     jmpCon(cmp, flags, labl);    // JLT or JGE...
13097   %}
13098 %}
13099 
13100 // Compare 2 longs and CMOVE longs.
13101 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13102   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13103   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13104   ins_cost(400);
13105   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13106             "CMOV$cmp $dst.hi,$src.hi" %}
13107   opcode(0x0F,0x40);
13108   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13109   ins_pipe( pipe_cmov_reg_long );
13110 %}
13111 
13112 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13113   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13114   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13115   ins_cost(500);
13116   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13117             "CMOV$cmp $dst.hi,$src.hi" %}
13118   opcode(0x0F,0x40);
13119   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13120   ins_pipe( pipe_cmov_reg_long );
13121 %}
13122 
13123 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13124   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13125   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13126   ins_cost(400);
13127   expand %{
13128     cmovLL_reg_LTGE(cmp, flags, dst, src);
13129   %}
13130 %}
13131 
13132 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13133   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13134   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13135   ins_cost(500);
13136   expand %{
13137     cmovLL_mem_LTGE(cmp, flags, dst, src);
13138   %}
13139 %}
13140 
13141 // Compare 2 longs and CMOVE ints.
13142 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13143   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13144   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13145   ins_cost(200);
13146   format %{ "CMOV$cmp $dst,$src" %}
13147   opcode(0x0F,0x40);
13148   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13149   ins_pipe( pipe_cmov_reg );
13150 %}
13151 
13152 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13153   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13154   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13155   ins_cost(250);
13156   format %{ "CMOV$cmp $dst,$src" %}
13157   opcode(0x0F,0x40);
13158   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13159   ins_pipe( pipe_cmov_mem );
13160 %}
13161 
13162 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13163   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13164   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13165   ins_cost(200);
13166   expand %{
13167     cmovII_reg_LTGE(cmp, flags, dst, src);
13168   %}
13169 %}
13170 
13171 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13172   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13173   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13174   ins_cost(250);
13175   expand %{
13176     cmovII_mem_LTGE(cmp, flags, dst, src);
13177   %}
13178 %}
13179 
13180 // Compare 2 longs and CMOVE ptrs.
13181 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13182   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13183   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13184   ins_cost(200);
13185   format %{ "CMOV$cmp $dst,$src" %}
13186   opcode(0x0F,0x40);
13187   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13188   ins_pipe( pipe_cmov_reg );
13189 %}
13190 
13191 // Compare 2 unsigned longs and CMOVE ptrs.
13192 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13193   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13194   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13195   ins_cost(200);
13196   expand %{
13197     cmovPP_reg_LTGE(cmp,flags,dst,src);
13198   %}
13199 %}
13200 
13201 // Compare 2 longs and CMOVE doubles
13202 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13203   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13204   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13205   ins_cost(200);
13206   expand %{
13207     fcmovDPR_regS(cmp,flags,dst,src);
13208   %}
13209 %}
13210 
13211 // Compare 2 longs and CMOVE doubles
13212 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13213   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13214   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13215   ins_cost(200);
13216   expand %{
13217     fcmovD_regS(cmp,flags,dst,src);
13218   %}
13219 %}
13220 
13221 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13222   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13223   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13224   ins_cost(200);
13225   expand %{
13226     fcmovFPR_regS(cmp,flags,dst,src);
13227   %}
13228 %}
13229 
13230 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13231   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13232   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13233   ins_cost(200);
13234   expand %{
13235     fcmovF_regS(cmp,flags,dst,src);
13236   %}
13237 %}
13238 
13239 //======
13240 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13241 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13242   match( Set flags (CmpL src zero ));
13243   effect(TEMP tmp);
13244   ins_cost(200);
13245   format %{ "MOV    $tmp,$src.lo\n\t"
13246             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13247   ins_encode( long_cmp_flags0( src, tmp ) );
13248   ins_pipe( ialu_reg_reg_long );
13249 %}
13250 
13251 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13252 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13253   match( Set flags (CmpL src1 src2 ));
13254   ins_cost(200+300);
13255   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13256             "JNE,s  skip\n\t"
13257             "CMP    $src1.hi,$src2.hi\n\t"
13258      "skip:\t" %}
13259   ins_encode( long_cmp_flags1( src1, src2 ) );
13260   ins_pipe( ialu_cr_reg_reg );
13261 %}
13262 
13263 // Long compare reg == zero/reg OR reg != zero/reg
13264 // Just a wrapper for a normal branch, plus the predicate test.
13265 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13266   match(If cmp flags);
13267   effect(USE labl);
13268   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13269   expand %{
13270     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13271   %}
13272 %}
13273 
13274 //======
13275 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13276 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13277   match(Set flags (CmpUL src zero));
13278   effect(TEMP tmp);
13279   ins_cost(200);
13280   format %{ "MOV    $tmp,$src.lo\n\t"
13281             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13282   ins_encode(long_cmp_flags0(src, tmp));
13283   ins_pipe(ialu_reg_reg_long);
13284 %}
13285 
13286 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13287 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13288   match(Set flags (CmpUL src1 src2));
13289   ins_cost(200+300);
13290   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13291             "JNE,s  skip\n\t"
13292             "CMP    $src1.hi,$src2.hi\n\t"
13293      "skip:\t" %}
13294   ins_encode(long_cmp_flags1(src1, src2));
13295   ins_pipe(ialu_cr_reg_reg);
13296 %}
13297 
13298 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13299 // Just a wrapper for a normal branch, plus the predicate test.
13300 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13301   match(If cmp flags);
13302   effect(USE labl);
13303   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13304   expand %{
13305     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13306   %}
13307 %}
13308 
13309 // Compare 2 longs and CMOVE longs.
13310 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13311   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13312   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13313   ins_cost(400);
13314   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13315             "CMOV$cmp $dst.hi,$src.hi" %}
13316   opcode(0x0F,0x40);
13317   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13318   ins_pipe( pipe_cmov_reg_long );
13319 %}
13320 
13321 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13322   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13323   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13324   ins_cost(500);
13325   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13326             "CMOV$cmp $dst.hi,$src.hi" %}
13327   opcode(0x0F,0x40);
13328   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13329   ins_pipe( pipe_cmov_reg_long );
13330 %}
13331 
13332 // Compare 2 longs and CMOVE ints.
13333 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13334   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13335   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13336   ins_cost(200);
13337   format %{ "CMOV$cmp $dst,$src" %}
13338   opcode(0x0F,0x40);
13339   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13340   ins_pipe( pipe_cmov_reg );
13341 %}
13342 
13343 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13344   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13345   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13346   ins_cost(250);
13347   format %{ "CMOV$cmp $dst,$src" %}
13348   opcode(0x0F,0x40);
13349   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13350   ins_pipe( pipe_cmov_mem );
13351 %}
13352 
13353 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13354   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13355   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13356   ins_cost(200);
13357   expand %{
13358     cmovII_reg_EQNE(cmp, flags, dst, src);
13359   %}
13360 %}
13361 
13362 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13363   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13364   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13365   ins_cost(250);
13366   expand %{
13367     cmovII_mem_EQNE(cmp, flags, dst, src);
13368   %}
13369 %}
13370 
13371 // Compare 2 longs and CMOVE ptrs.
13372 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13373   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13374   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13375   ins_cost(200);
13376   format %{ "CMOV$cmp $dst,$src" %}
13377   opcode(0x0F,0x40);
13378   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13379   ins_pipe( pipe_cmov_reg );
13380 %}
13381 
13382 // Compare 2 unsigned longs and CMOVE ptrs.
13383 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13384   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13385   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13386   ins_cost(200);
13387   expand %{
13388     cmovPP_reg_EQNE(cmp,flags,dst,src);
13389   %}
13390 %}
13391 
13392 // Compare 2 longs and CMOVE doubles
13393 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13394   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13395   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13396   ins_cost(200);
13397   expand %{
13398     fcmovDPR_regS(cmp,flags,dst,src);
13399   %}
13400 %}
13401 
13402 // Compare 2 longs and CMOVE doubles
13403 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13404   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13405   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13406   ins_cost(200);
13407   expand %{
13408     fcmovD_regS(cmp,flags,dst,src);
13409   %}
13410 %}
13411 
13412 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13413   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13414   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13415   ins_cost(200);
13416   expand %{
13417     fcmovFPR_regS(cmp,flags,dst,src);
13418   %}
13419 %}
13420 
13421 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13422   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13423   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13424   ins_cost(200);
13425   expand %{
13426     fcmovF_regS(cmp,flags,dst,src);
13427   %}
13428 %}
13429 
13430 //======
13431 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13432 // Same as cmpL_reg_flags_LEGT except must negate src
13433 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13434   match( Set flags (CmpL src zero ));
13435   effect( TEMP tmp );
13436   ins_cost(300);
13437   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13438             "CMP    $tmp,$src.lo\n\t"
13439             "SBB    $tmp,$src.hi\n\t" %}
13440   ins_encode( long_cmp_flags3(src, tmp) );
13441   ins_pipe( ialu_reg_reg_long );
13442 %}
13443 
13444 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13445 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13446 // requires a commuted test to get the same result.
13447 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13448   match( Set flags (CmpL src1 src2 ));
13449   effect( TEMP tmp );
13450   ins_cost(300);
13451   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13452             "MOV    $tmp,$src2.hi\n\t"
13453             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13454   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13455   ins_pipe( ialu_cr_reg_reg );
13456 %}
13457 
13458 // Long compares reg < zero/req OR reg >= zero/req.
13459 // Just a wrapper for a normal branch, plus the predicate test
13460 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13461   match(If cmp flags);
13462   effect(USE labl);
13463   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13464   ins_cost(300);
13465   expand %{
13466     jmpCon(cmp,flags,labl);    // JGT or JLE...
13467   %}
13468 %}
13469 
13470 //======
13471 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13472 // Same as cmpUL_reg_flags_LEGT except must negate src
13473 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13474   match(Set flags (CmpUL src zero));
13475   effect(TEMP tmp);
13476   ins_cost(300);
13477   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13478             "CMP    $tmp,$src.lo\n\t"
13479             "SBB    $tmp,$src.hi\n\t" %}
13480   ins_encode(long_cmp_flags3(src, tmp));
13481   ins_pipe(ialu_reg_reg_long);
13482 %}
13483 
13484 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13485 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13486 // requires a commuted test to get the same result.
13487 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13488   match(Set flags (CmpUL src1 src2));
13489   effect(TEMP tmp);
13490   ins_cost(300);
13491   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13492             "MOV    $tmp,$src2.hi\n\t"
13493             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13494   ins_encode(long_cmp_flags2( src2, src1, tmp));
13495   ins_pipe(ialu_cr_reg_reg);
13496 %}
13497 
13498 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13499 // Just a wrapper for a normal branch, plus the predicate test
13500 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13501   match(If cmp flags);
13502   effect(USE labl);
13503   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13504   ins_cost(300);
13505   expand %{
13506     jmpCon(cmp, flags, labl);    // JGT or JLE...
13507   %}
13508 %}
13509 
13510 // Compare 2 longs and CMOVE longs.
13511 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13512   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13513   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13514   ins_cost(400);
13515   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13516             "CMOV$cmp $dst.hi,$src.hi" %}
13517   opcode(0x0F,0x40);
13518   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13519   ins_pipe( pipe_cmov_reg_long );
13520 %}
13521 
13522 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13523   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13524   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13525   ins_cost(500);
13526   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13527             "CMOV$cmp $dst.hi,$src.hi+4" %}
13528   opcode(0x0F,0x40);
13529   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13530   ins_pipe( pipe_cmov_reg_long );
13531 %}
13532 
13533 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13534   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13535   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13536   ins_cost(400);
13537   expand %{
13538     cmovLL_reg_LEGT(cmp, flags, dst, src);
13539   %}
13540 %}
13541 
13542 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13543   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13544   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13545   ins_cost(500);
13546   expand %{
13547     cmovLL_mem_LEGT(cmp, flags, dst, src);
13548   %}
13549 %}
13550 
13551 // Compare 2 longs and CMOVE ints.
13552 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13553   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13554   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13555   ins_cost(200);
13556   format %{ "CMOV$cmp $dst,$src" %}
13557   opcode(0x0F,0x40);
13558   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13559   ins_pipe( pipe_cmov_reg );
13560 %}
13561 
13562 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13563   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13564   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13565   ins_cost(250);
13566   format %{ "CMOV$cmp $dst,$src" %}
13567   opcode(0x0F,0x40);
13568   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13569   ins_pipe( pipe_cmov_mem );
13570 %}
13571 
13572 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13573   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13574   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13575   ins_cost(200);
13576   expand %{
13577     cmovII_reg_LEGT(cmp, flags, dst, src);
13578   %}
13579 %}
13580 
13581 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13582   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13583   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13584   ins_cost(250);
13585   expand %{
13586     cmovII_mem_LEGT(cmp, flags, dst, src);
13587   %}
13588 %}
13589 
13590 // Compare 2 longs and CMOVE ptrs.
13591 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13592   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13593   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13594   ins_cost(200);
13595   format %{ "CMOV$cmp $dst,$src" %}
13596   opcode(0x0F,0x40);
13597   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13598   ins_pipe( pipe_cmov_reg );
13599 %}
13600 
13601 // Compare 2 unsigned longs and CMOVE ptrs.
13602 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13603   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13604   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13605   ins_cost(200);
13606   expand %{
13607     cmovPP_reg_LEGT(cmp,flags,dst,src);
13608   %}
13609 %}
13610 
13611 // Compare 2 longs and CMOVE doubles
13612 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13613   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13614   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13615   ins_cost(200);
13616   expand %{
13617     fcmovDPR_regS(cmp,flags,dst,src);
13618   %}
13619 %}
13620 
13621 // Compare 2 longs and CMOVE doubles
13622 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13623   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13624   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13625   ins_cost(200);
13626   expand %{
13627     fcmovD_regS(cmp,flags,dst,src);
13628   %}
13629 %}
13630 
13631 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13632   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13633   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13634   ins_cost(200);
13635   expand %{
13636     fcmovFPR_regS(cmp,flags,dst,src);
13637   %}
13638 %}
13639 
13640 
13641 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13642   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13643   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13644   ins_cost(200);
13645   expand %{
13646     fcmovF_regS(cmp,flags,dst,src);
13647   %}
13648 %}
13649 
13650 
13651 // ============================================================================
13652 // Procedure Call/Return Instructions
13653 // Call Java Static Instruction
13654 // Note: If this code changes, the corresponding ret_addr_offset() and
13655 //       compute_padding() functions will have to be adjusted.
13656 instruct CallStaticJavaDirect(method meth) %{
13657   match(CallStaticJava);
13658   effect(USE meth);
13659 
13660   ins_cost(300);
13661   format %{ "CALL,static " %}
13662   opcode(0xE8); /* E8 cd */
13663   ins_encode( pre_call_resets,
13664               Java_Static_Call( meth ),
13665               call_epilog,
13666               post_call_FPU );
13667   ins_pipe( pipe_slow );
13668   ins_alignment(4);
13669 %}
13670 
13671 // Call Java Dynamic Instruction
13672 // Note: If this code changes, the corresponding ret_addr_offset() and
13673 //       compute_padding() functions will have to be adjusted.
13674 instruct CallDynamicJavaDirect(method meth) %{
13675   match(CallDynamicJava);
13676   effect(USE meth);
13677 
13678   ins_cost(300);
13679   format %{ "MOV    EAX,(oop)-1\n\t"
13680             "CALL,dynamic" %}
13681   opcode(0xE8); /* E8 cd */
13682   ins_encode( pre_call_resets,
13683               Java_Dynamic_Call( meth ),
13684               call_epilog,
13685               post_call_FPU );
13686   ins_pipe( pipe_slow );
13687   ins_alignment(4);
13688 %}
13689 
13690 // Call Runtime Instruction
13691 instruct CallRuntimeDirect(method meth) %{
13692   match(CallRuntime );
13693   effect(USE meth);
13694 
13695   ins_cost(300);
13696   format %{ "CALL,runtime " %}
13697   opcode(0xE8); /* E8 cd */
13698   // Use FFREEs to clear entries in float stack
13699   ins_encode( pre_call_resets,
13700               FFree_Float_Stack_All,
13701               Java_To_Runtime( meth ),
13702               post_call_FPU );
13703   ins_pipe( pipe_slow );
13704 %}
13705 
13706 // Call runtime without safepoint
13707 instruct CallLeafDirect(method meth) %{
13708   match(CallLeaf);
13709   effect(USE meth);
13710 
13711   ins_cost(300);
13712   format %{ "CALL_LEAF,runtime " %}
13713   opcode(0xE8); /* E8 cd */
13714   ins_encode( pre_call_resets,
13715               FFree_Float_Stack_All,
13716               Java_To_Runtime( meth ),
13717               Verify_FPU_For_Leaf, post_call_FPU );
13718   ins_pipe( pipe_slow );
13719 %}
13720 
13721 instruct CallLeafNoFPDirect(method meth) %{
13722   match(CallLeafNoFP);
13723   effect(USE meth);
13724 
13725   ins_cost(300);
13726   format %{ "CALL_LEAF_NOFP,runtime " %}
13727   opcode(0xE8); /* E8 cd */
13728   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13729   ins_pipe( pipe_slow );
13730 %}
13731 
13732 
13733 // Return Instruction
13734 // Remove the return address & jump to it.
13735 instruct Ret() %{
13736   match(Return);
13737   format %{ "RET" %}
13738   opcode(0xC3);
13739   ins_encode(OpcP);
13740   ins_pipe( pipe_jmp );
13741 %}
13742 
13743 // Tail Call; Jump from runtime stub to Java code.
13744 // Also known as an 'interprocedural jump'.
13745 // Target of jump will eventually return to caller.
13746 // TailJump below removes the return address.
13747 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13748   match(TailCall jump_target method_ptr);
13749   ins_cost(300);
13750   format %{ "JMP    $jump_target \t# EBX holds method" %}
13751   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13752   ins_encode( OpcP, RegOpc(jump_target) );
13753   ins_pipe( pipe_jmp );
13754 %}
13755 
13756 
13757 // Tail Jump; remove the return address; jump to target.
13758 // TailCall above leaves the return address around.
13759 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13760   match( TailJump jump_target ex_oop );
13761   ins_cost(300);
13762   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13763             "JMP    $jump_target " %}
13764   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13765   ins_encode( enc_pop_rdx,
13766               OpcP, RegOpc(jump_target) );
13767   ins_pipe( pipe_jmp );
13768 %}
13769 
13770 // Create exception oop: created by stack-crawling runtime code.
13771 // Created exception is now available to this handler, and is setup
13772 // just prior to jumping to this handler.  No code emitted.
13773 instruct CreateException( eAXRegP ex_oop )
13774 %{
13775   match(Set ex_oop (CreateEx));
13776 
13777   size(0);
13778   // use the following format syntax
13779   format %{ "# exception oop is in EAX; no code emitted" %}
13780   ins_encode();
13781   ins_pipe( empty );
13782 %}
13783 
13784 
13785 // Rethrow exception:
13786 // The exception oop will come in the first argument position.
13787 // Then JUMP (not call) to the rethrow stub code.
13788 instruct RethrowException()
13789 %{
13790   match(Rethrow);
13791 
13792   // use the following format syntax
13793   format %{ "JMP    rethrow_stub" %}
13794   ins_encode(enc_rethrow);
13795   ins_pipe( pipe_jmp );
13796 %}
13797 
13798 // inlined locking and unlocking
13799 
13800 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13801   predicate(Compile::current()->use_rtm());
13802   match(Set cr (FastLock object box));
13803   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13804   ins_cost(300);
13805   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13806   ins_encode %{
13807     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13808                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13809                  _rtm_counters, _stack_rtm_counters,
13810                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13811                  true, ra_->C->profile_rtm());
13812   %}
13813   ins_pipe(pipe_slow);
13814 %}
13815 
13816 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13817   predicate(!Compile::current()->use_rtm());
13818   match(Set cr (FastLock object box));
13819   effect(TEMP tmp, TEMP scr, USE_KILL box);
13820   ins_cost(300);
13821   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13822   ins_encode %{
13823     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13824                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13825   %}
13826   ins_pipe(pipe_slow);
13827 %}
13828 
13829 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13830   match(Set cr (FastUnlock object box));
13831   effect(TEMP tmp, USE_KILL box);
13832   ins_cost(300);
13833   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13834   ins_encode %{
13835     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13836   %}
13837   ins_pipe(pipe_slow);
13838 %}
13839 
13840 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13841   predicate(Matcher::vector_length(n) <= 32);
13842   match(Set dst (MaskAll src));
13843   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13844   ins_encode %{
13845     int mask_len = Matcher::vector_length(this);
13846     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13847   %}
13848   ins_pipe( pipe_slow );
13849 %}
13850 
13851 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13852   predicate(Matcher::vector_length(n) > 32);
13853   match(Set dst (MaskAll src));
13854   effect(TEMP ktmp);
13855   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13856   ins_encode %{
13857     int mask_len = Matcher::vector_length(this);
13858     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13859   %}
13860   ins_pipe( pipe_slow );
13861 %}
13862 
13863 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13864   predicate(Matcher::vector_length(n) > 32);
13865   match(Set dst (MaskAll src));
13866   effect(TEMP ktmp);
13867   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13868   ins_encode %{
13869     int mask_len = Matcher::vector_length(this);
13870     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13871   %}
13872   ins_pipe( pipe_slow );
13873 %}
13874 
13875 // ============================================================================
13876 // Safepoint Instruction
13877 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13878   match(SafePoint poll);
13879   effect(KILL cr, USE poll);
13880 
13881   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13882   ins_cost(125);
13883   // EBP would need size(3)
13884   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13885   ins_encode %{
13886     __ relocate(relocInfo::poll_type);
13887     address pre_pc = __ pc();
13888     __ testl(rax, Address($poll$$Register, 0));
13889     address post_pc = __ pc();
13890     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13891   %}
13892   ins_pipe(ialu_reg_mem);
13893 %}
13894 
13895 
13896 // ============================================================================
13897 // This name is KNOWN by the ADLC and cannot be changed.
13898 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13899 // for this guy.
13900 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13901   match(Set dst (ThreadLocal));
13902   effect(DEF dst, KILL cr);
13903 
13904   format %{ "MOV    $dst, Thread::current()" %}
13905   ins_encode %{
13906     Register dstReg = as_Register($dst$$reg);
13907     __ get_thread(dstReg);
13908   %}
13909   ins_pipe( ialu_reg_fat );
13910 %}
13911 
13912 
13913 
13914 //----------PEEPHOLE RULES-----------------------------------------------------
13915 // These must follow all instruction definitions as they use the names
13916 // defined in the instructions definitions.
13917 //
13918 // peepmatch ( root_instr_name [preceding_instruction]* );
13919 //
13920 // peepconstraint %{
13921 // (instruction_number.operand_name relational_op instruction_number.operand_name
13922 //  [, ...] );
13923 // // instruction numbers are zero-based using left to right order in peepmatch
13924 //
13925 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13926 // // provide an instruction_number.operand_name for each operand that appears
13927 // // in the replacement instruction's match rule
13928 //
13929 // ---------VM FLAGS---------------------------------------------------------
13930 //
13931 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13932 //
13933 // Each peephole rule is given an identifying number starting with zero and
13934 // increasing by one in the order seen by the parser.  An individual peephole
13935 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13936 // on the command-line.
13937 //
13938 // ---------CURRENT LIMITATIONS----------------------------------------------
13939 //
13940 // Only match adjacent instructions in same basic block
13941 // Only equality constraints
13942 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13943 // Only one replacement instruction
13944 //
13945 // ---------EXAMPLE----------------------------------------------------------
13946 //
13947 // // pertinent parts of existing instructions in architecture description
13948 // instruct movI(rRegI dst, rRegI src) %{
13949 //   match(Set dst (CopyI src));
13950 // %}
13951 //
13952 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13953 //   match(Set dst (AddI dst src));
13954 //   effect(KILL cr);
13955 // %}
13956 //
13957 // // Change (inc mov) to lea
13958 // peephole %{
13959 //   // increment preceded by register-register move
13960 //   peepmatch ( incI_eReg movI );
13961 //   // require that the destination register of the increment
13962 //   // match the destination register of the move
13963 //   peepconstraint ( 0.dst == 1.dst );
13964 //   // construct a replacement instruction that sets
13965 //   // the destination to ( move's source register + one )
13966 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13967 // %}
13968 //
13969 // Implementation no longer uses movX instructions since
13970 // machine-independent system no longer uses CopyX nodes.
13971 //
13972 // peephole %{
13973 //   peepmatch ( incI_eReg movI );
13974 //   peepconstraint ( 0.dst == 1.dst );
13975 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13976 // %}
13977 //
13978 // peephole %{
13979 //   peepmatch ( decI_eReg movI );
13980 //   peepconstraint ( 0.dst == 1.dst );
13981 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13982 // %}
13983 //
13984 // peephole %{
13985 //   peepmatch ( addI_eReg_imm movI );
13986 //   peepconstraint ( 0.dst == 1.dst );
13987 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13988 // %}
13989 //
13990 // peephole %{
13991 //   peepmatch ( addP_eReg_imm movP );
13992 //   peepconstraint ( 0.dst == 1.dst );
13993 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13994 // %}
13995 
13996 // // Change load of spilled value to only a spill
13997 // instruct storeI(memory mem, rRegI src) %{
13998 //   match(Set mem (StoreI mem src));
13999 // %}
14000 //
14001 // instruct loadI(rRegI dst, memory mem) %{
14002 //   match(Set dst (LoadI mem));
14003 // %}
14004 //
14005 peephole %{
14006   peepmatch ( loadI storeI );
14007   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14008   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14009 %}
14010 
14011 //----------SMARTSPILL RULES---------------------------------------------------
14012 // These must follow all instruction definitions as they use the names
14013 // defined in the instructions definitions.