1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  661               "JA      #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  712       C->output()->add_stub(stub);
  713       code_stub = &stub->entry();
  714     }
  715     __ relocate(relocInfo::poll_return_type);
  716     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  717   }
  718 }
  719 
  720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  721   return MachNode::size(ra_); // too many variables; just compute it
  722                               // the hard way
  723 }
  724 
  725 int MachEpilogNode::reloc() const {
  726   return 0; // a large enough number
  727 }
  728 
  729 const Pipeline * MachEpilogNode::pipeline() const {
  730   return MachNode::pipeline_class();
  731 }
  732 
  733 //=============================================================================
  734 
  735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  736 static enum RC rc_class( OptoReg::Name reg ) {
  737 
  738   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  739   if (OptoReg::is_stack(reg)) return rc_stack;
  740 
  741   VMReg r = OptoReg::as_VMReg(reg);
  742   if (r->is_Register()) return rc_int;
  743   if (r->is_FloatRegister()) {
  744     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  745     return rc_float;
  746   }
  747   if (r->is_KRegister()) return rc_kreg;
  748   assert(r->is_XMMRegister(), "must be");
  749   return rc_xmm;
  750 }
  751 
  752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  753                         int opcode, const char *op_str, int size, outputStream* st ) {
  754   if( cbuf ) {
  755     emit_opcode  (*cbuf, opcode );
  756     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757 #ifndef PRODUCT
  758   } else if( !do_size ) {
  759     if( size != 0 ) st->print("\n\t");
  760     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  761       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  762       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  763     } else { // FLD, FST, PUSH, POP
  764       st->print("%s [ESP + #%d]",op_str,offset);
  765     }
  766 #endif
  767   }
  768   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  769   return size+3+offset_size;
  770 }
  771 
  772 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  774                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  775   int in_size_in_bits = Assembler::EVEX_32bit;
  776   int evex_encoding = 0;
  777   if (reg_lo+1 == reg_hi) {
  778     in_size_in_bits = Assembler::EVEX_64bit;
  779     evex_encoding = Assembler::VEX_W;
  780   }
  781   if (cbuf) {
  782     MacroAssembler _masm(cbuf);
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     _masm.set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (cbuf) {
  835     MacroAssembler _masm(cbuf);
  836     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  837     _masm.set_managed();
  838     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  839       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     } else {
  842       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  844     }
  845 #ifndef PRODUCT
  846   } else if (!do_size) {
  847     if (size != 0) st->print("\n\t");
  848     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  849       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       } else {
  852         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       }
  854     } else {
  855       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  856         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       } else {
  858         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       }
  860     }
  861 #endif
  862   }
  863   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  864   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  865   int sz = (UseAVX > 2) ? 6 : 4;
  866   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  867       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  868   return size + sz;
  869 }
  870 
  871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  872                             int src_hi, int dst_hi, int size, outputStream* st ) {
  873   // 32-bit
  874   if (cbuf) {
  875     MacroAssembler _masm(cbuf);
  876     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  877     _masm.set_managed();
  878     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  879              as_Register(Matcher::_regEncode[src_lo]));
  880 #ifndef PRODUCT
  881   } else if (!do_size) {
  882     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  883 #endif
  884   }
  885   return (UseAVX> 2) ? 6 : 4;
  886 }
  887 
  888 
  889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  890                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  891   // 32-bit
  892   if (cbuf) {
  893     MacroAssembler _masm(cbuf);
  894     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  895     _masm.set_managed();
  896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  897              as_XMMRegister(Matcher::_regEncode[src_lo]));
  898 #ifndef PRODUCT
  899   } else if (!do_size) {
  900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  901 #endif
  902   }
  903   return (UseAVX> 2) ? 6 : 4;
  904 }
  905 
  906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  907   if( cbuf ) {
  908     emit_opcode(*cbuf, 0x8B );
  909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  910 #ifndef PRODUCT
  911   } else if( !do_size ) {
  912     if( size != 0 ) st->print("\n\t");
  913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  914 #endif
  915   }
  916   return size+2;
  917 }
  918 
  919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  920                                  int offset, int size, outputStream* st ) {
  921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  922     if( cbuf ) {
  923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  925 #ifndef PRODUCT
  926     } else if( !do_size ) {
  927       if( size != 0 ) st->print("\n\t");
  928       st->print("FLD    %s",Matcher::regName[src_lo]);
  929 #endif
  930     }
  931     size += 2;
  932   }
  933 
  934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  935   const char *op_str;
  936   int op;
  937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  939     op = 0xDD;
  940   } else {                   // 32-bit store
  941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  942     op = 0xD9;
  943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  944   }
  945 
  946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  947 }
  948 
  949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  952 
  953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  954                             int stack_offset, int reg, uint ireg, outputStream* st);
  955 
  956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  957                                      int dst_offset, uint ireg, outputStream* st) {
  958   if (cbuf) {
  959     MacroAssembler _masm(cbuf);
  960     switch (ireg) {
  961     case Op_VecS:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       break;
  965     case Op_VecD:
  966       __ pushl(Address(rsp, src_offset));
  967       __ popl (Address(rsp, dst_offset));
  968       __ pushl(Address(rsp, src_offset+4));
  969       __ popl (Address(rsp, dst_offset+4));
  970       break;
  971     case Op_VecX:
  972       __ movdqu(Address(rsp, -16), xmm0);
  973       __ movdqu(xmm0, Address(rsp, src_offset));
  974       __ movdqu(Address(rsp, dst_offset), xmm0);
  975       __ movdqu(xmm0, Address(rsp, -16));
  976       break;
  977     case Op_VecY:
  978       __ vmovdqu(Address(rsp, -32), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, src_offset));
  980       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  981       __ vmovdqu(xmm0, Address(rsp, -32));
  982       break;
  983     case Op_VecZ:
  984       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  986       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  987       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  988       break;
  989     default:
  990       ShouldNotReachHere();
  991     }
  992 #ifndef PRODUCT
  993   } else {
  994     switch (ireg) {
  995     case Op_VecS:
  996       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  997                 "popl    [rsp + #%d]",
  998                 src_offset, dst_offset);
  999       break;
 1000     case Op_VecD:
 1001       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1002                 "popq    [rsp + #%d]\n\t"
 1003                 "pushl   [rsp + #%d]\n\t"
 1004                 "popq    [rsp + #%d]",
 1005                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1006       break;
 1007      case Op_VecX:
 1008       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1009                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1010                 "movdqu  [rsp + #%d], xmm0\n\t"
 1011                 "movdqu  xmm0, [rsp - #16]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecY:
 1015       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #32]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecZ:
 1022       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #64]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     default:
 1029       ShouldNotReachHere();
 1030     }
 1031 #endif
 1032   }
 1033 }
 1034 
 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1036   // Get registers to move
 1037   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1038   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1039   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1040   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1041 
 1042   enum RC src_second_rc = rc_class(src_second);
 1043   enum RC src_first_rc = rc_class(src_first);
 1044   enum RC dst_second_rc = rc_class(dst_second);
 1045   enum RC dst_first_rc = rc_class(dst_first);
 1046 
 1047   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1048 
 1049   // Generate spill code!
 1050   int size = 0;
 1051 
 1052   if( src_first == dst_first && src_second == dst_second )
 1053     return size;            // Self copy, no move
 1054 
 1055   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1056     uint ireg = ideal_reg();
 1057     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1058     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1059     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1060     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1061       // mem -> mem
 1062       int src_offset = ra_->reg2offset(src_first);
 1063       int dst_offset = ra_->reg2offset(dst_first);
 1064       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1066       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1067     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1068       int stack_offset = ra_->reg2offset(dst_first);
 1069       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1070     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1071       int stack_offset = ra_->reg2offset(src_first);
 1072       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1073     } else {
 1074       ShouldNotReachHere();
 1075     }
 1076     return 0;
 1077   }
 1078 
 1079   // --------------------------------------
 1080   // Check for mem-mem move.  push/pop to move.
 1081   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1082     if( src_second == dst_first ) { // overlapping stack copy ranges
 1083       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1084       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1085       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1086       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1087     }
 1088     // move low bits
 1089     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1090     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1091     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094     }
 1095     return size;
 1096   }
 1097 
 1098   // --------------------------------------
 1099   // Check for integer reg-reg copy
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1101     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1102 
 1103   // Check for integer store
 1104   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1105     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1106 
 1107   // Check for integer load
 1108   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1109     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1110 
 1111   // Check for integer reg-xmm reg copy
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1114             "no 64 bit integer-float reg moves" );
 1115     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1116   }
 1117   // --------------------------------------
 1118   // Check for float reg-reg copy
 1119   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1121             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1122     if( cbuf ) {
 1123 
 1124       // Note the mucking with the register encode to compensate for the 0/1
 1125       // indexing issue mentioned in a comment in the reg_def sections
 1126       // for FPR registers many lines above here.
 1127 
 1128       if( src_first != FPR1L_num ) {
 1129         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1130         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1131         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1132         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1133      } else {
 1134         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1135         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1136      }
 1137 #ifndef PRODUCT
 1138     } else if( !do_size ) {
 1139       if( size != 0 ) st->print("\n\t");
 1140       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1141       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1142 #endif
 1143     }
 1144     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1145   }
 1146 
 1147   // Check for float store
 1148   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1149     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1150   }
 1151 
 1152   // Check for float load
 1153   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1154     int offset = ra_->reg2offset(src_first);
 1155     const char *op_str;
 1156     int op;
 1157     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1158       op_str = "FLD_D";
 1159       op = 0xDD;
 1160     } else {                   // 32-bit load
 1161       op_str = "FLD_S";
 1162       op = 0xD9;
 1163       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1164     }
 1165     if( cbuf ) {
 1166       emit_opcode  (*cbuf, op );
 1167       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1168       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1169       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1170 #ifndef PRODUCT
 1171     } else if( !do_size ) {
 1172       if( size != 0 ) st->print("\n\t");
 1173       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1174 #endif
 1175     }
 1176     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1177     return size + 3+offset_size+2;
 1178   }
 1179 
 1180   // Check for xmm reg-reg copy
 1181   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1182     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1183             (src_first+1 == src_second && dst_first+1 == dst_second),
 1184             "no non-adjacent float-moves" );
 1185     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1186   }
 1187 
 1188   // Check for xmm reg-integer reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1191             "no 64 bit float-integer reg moves" );
 1192     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm store
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1197     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1198   }
 1199 
 1200   // Check for float xmm load
 1201   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1202     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1203   }
 1204 
 1205   // Copy from float reg to xmm reg
 1206   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1207     // copy to the top of stack from floating point reg
 1208     // and use LEA to preserve flags
 1209     if( cbuf ) {
 1210       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1211       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1212       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1213       emit_d8(*cbuf,0xF8);
 1214 #ifndef PRODUCT
 1215     } else if( !do_size ) {
 1216       if( size != 0 ) st->print("\n\t");
 1217       st->print("LEA    ESP,[ESP-8]");
 1218 #endif
 1219     }
 1220     size += 4;
 1221 
 1222     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1223 
 1224     // Copy from the temp memory to the xmm reg.
 1225     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1226 
 1227     if( cbuf ) {
 1228       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1229       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1230       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1231       emit_d8(*cbuf,0x08);
 1232 #ifndef PRODUCT
 1233     } else if( !do_size ) {
 1234       if( size != 0 ) st->print("\n\t");
 1235       st->print("LEA    ESP,[ESP+8]");
 1236 #endif
 1237     }
 1238     size += 4;
 1239     return size;
 1240   }
 1241 
 1242   // AVX-512 opmask specific spilling.
 1243   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1244     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1245     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1246     int offset = ra_->reg2offset(src_first);
 1247     if (cbuf != nullptr) {
 1248       MacroAssembler _masm(cbuf);
 1249       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1250 #ifndef PRODUCT
 1251     } else {
 1252       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1253 #endif
 1254     }
 1255     return 0;
 1256   }
 1257 
 1258   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1259     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1260     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1261     int offset = ra_->reg2offset(dst_first);
 1262     if (cbuf != nullptr) {
 1263       MacroAssembler _masm(cbuf);
 1264       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1265 #ifndef PRODUCT
 1266     } else {
 1267       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1268 #endif
 1269     }
 1270     return 0;
 1271   }
 1272 
 1273   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1274     Unimplemented();
 1275     return 0;
 1276   }
 1277 
 1278   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1279     Unimplemented();
 1280     return 0;
 1281   }
 1282 
 1283   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1284     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1285     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1286     if (cbuf != nullptr) {
 1287       MacroAssembler _masm(cbuf);
 1288       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1289 #ifndef PRODUCT
 1290     } else {
 1291       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1292 #endif
 1293     }
 1294     return 0;
 1295   }
 1296 
 1297   assert( size > 0, "missed a case" );
 1298 
 1299   // --------------------------------------------------------------------
 1300   // Check for second bits still needing moving.
 1301   if( src_second == dst_second )
 1302     return size;               // Self copy; no move
 1303   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1304 
 1305   // Check for second word int-int move
 1306   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1307     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1308 
 1309   // Check for second word integer store
 1310   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1311     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1312 
 1313   // Check for second word integer load
 1314   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1315     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1316 
 1317   Unimplemented();
 1318   return 0; // Mute compiler
 1319 }
 1320 
 1321 #ifndef PRODUCT
 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1323   implementation( nullptr, ra_, false, st );
 1324 }
 1325 #endif
 1326 
 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1328   implementation( &cbuf, ra_, false, nullptr );
 1329 }
 1330 
 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1332   return MachNode::size(ra_);
 1333 }
 1334 
 1335 
 1336 //=============================================================================
 1337 #ifndef PRODUCT
 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_reg_first(this);
 1341   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1342 }
 1343 #endif
 1344 
 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1346   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1347   int reg = ra_->get_encode(this);
 1348   if( offset >= 128 ) {
 1349     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1350     emit_rm(cbuf, 0x2, reg, 0x04);
 1351     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1352     emit_d32(cbuf, offset);
 1353   }
 1354   else {
 1355     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1356     emit_rm(cbuf, 0x1, reg, 0x04);
 1357     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1358     emit_d8(cbuf, offset);
 1359   }
 1360 }
 1361 
 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1364   if( offset >= 128 ) {
 1365     return 7;
 1366   }
 1367   else {
 1368     return 4;
 1369   }
 1370 }
 1371 
 1372 //=============================================================================
 1373 #ifndef PRODUCT
 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1375   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1376   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1377   st->print_cr("\tNOP");
 1378   st->print_cr("\tNOP");
 1379   if( !OptoBreakpoint )
 1380     st->print_cr("\tNOP");
 1381 }
 1382 #endif
 1383 
 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1385   MacroAssembler masm(&cbuf);
 1386   masm.ic_check(CodeEntryAlignment);
 1387 }
 1388 
 1389 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1390   return MachNode::size(ra_); // too many variables; just compute it
 1391                               // the hard way
 1392 }
 1393 
 1394 
 1395 //=============================================================================
 1396 
 1397 // Vector calling convention not supported.
 1398 bool Matcher::supports_vector_calling_convention() {
 1399   return false;
 1400 }
 1401 
 1402 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1403   Unimplemented();
 1404   return OptoRegPair(0, 0);
 1405 }
 1406 
 1407 // Is this branch offset short enough that a short branch can be used?
 1408 //
 1409 // NOTE: If the platform does not provide any short branch variants, then
 1410 //       this method should return false for offset 0.
 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1412   // The passed offset is relative to address of the branch.
 1413   // On 86 a branch displacement is calculated relative to address
 1414   // of a next instruction.
 1415   offset -= br_size;
 1416 
 1417   // the short version of jmpConUCF2 contains multiple branches,
 1418   // making the reach slightly less
 1419   if (rule == jmpConUCF2_rule)
 1420     return (-126 <= offset && offset <= 125);
 1421   return (-128 <= offset && offset <= 127);
 1422 }
 1423 
 1424 // Return whether or not this register is ever used as an argument.  This
 1425 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1426 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1427 // arguments in those registers not be available to the callee.
 1428 bool Matcher::can_be_java_arg( int reg ) {
 1429   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1430   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1431   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1432   return false;
 1433 }
 1434 
 1435 bool Matcher::is_spillable_arg( int reg ) {
 1436   return can_be_java_arg(reg);
 1437 }
 1438 
 1439 uint Matcher::int_pressure_limit()
 1440 {
 1441   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1442 }
 1443 
 1444 uint Matcher::float_pressure_limit()
 1445 {
 1446   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1447 }
 1448 
 1449 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1450   // Use hardware integer DIV instruction when
 1451   // it is faster than a code which use multiply.
 1452   // Only when constant divisor fits into 32 bit
 1453   // (min_jint is excluded to get only correct
 1454   // positive 32 bit values from negative).
 1455   return VM_Version::has_fast_idiv() &&
 1456          (divisor == (int)divisor && divisor != min_jint);
 1457 }
 1458 
 1459 // Register for DIVI projection of divmodI
 1460 RegMask Matcher::divI_proj_mask() {
 1461   return EAX_REG_mask();
 1462 }
 1463 
 1464 // Register for MODI projection of divmodI
 1465 RegMask Matcher::modI_proj_mask() {
 1466   return EDX_REG_mask();
 1467 }
 1468 
 1469 // Register for DIVL projection of divmodL
 1470 RegMask Matcher::divL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 // Register for MODL projection of divmodL
 1476 RegMask Matcher::modL_proj_mask() {
 1477   ShouldNotReachHere();
 1478   return RegMask();
 1479 }
 1480 
 1481 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1482   return NO_REG_mask();
 1483 }
 1484 
 1485 // Returns true if the high 32 bits of the value is known to be zero.
 1486 bool is_operand_hi32_zero(Node* n) {
 1487   int opc = n->Opcode();
 1488   if (opc == Op_AndL) {
 1489     Node* o2 = n->in(2);
 1490     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1491       return true;
 1492     }
 1493   }
 1494   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1495     return true;
 1496   }
 1497   return false;
 1498 }
 1499 
 1500 %}
 1501 
 1502 //----------ENCODING BLOCK-----------------------------------------------------
 1503 // This block specifies the encoding classes used by the compiler to output
 1504 // byte streams.  Encoding classes generate functions which are called by
 1505 // Machine Instruction Nodes in order to generate the bit encoding of the
 1506 // instruction.  Operands specify their base encoding interface with the
 1507 // interface keyword.  There are currently supported four interfaces,
 1508 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1509 // operand to generate a function which returns its register number when
 1510 // queried.   CONST_INTER causes an operand to generate a function which
 1511 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1512 // operand to generate four functions which return the Base Register, the
 1513 // Index Register, the Scale Value, and the Offset Value of the operand when
 1514 // queried.  COND_INTER causes an operand to generate six functions which
 1515 // return the encoding code (ie - encoding bits for the instruction)
 1516 // associated with each basic boolean condition for a conditional instruction.
 1517 // Instructions specify two basic values for encoding.  They use the
 1518 // ins_encode keyword to specify their encoding class (which must be one of
 1519 // the class names specified in the encoding block), and they use the
 1520 // opcode keyword to specify, in order, their primary, secondary, and
 1521 // tertiary opcode.  Only the opcode sections which a particular instruction
 1522 // needs for encoding need to be specified.
 1523 encode %{
 1524   // Build emit functions for each basic byte or larger field in the intel
 1525   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1526   // code in the enc_class source block.  Emit functions will live in the
 1527   // main source block for now.  In future, we can generalize this by
 1528   // adding a syntax that specifies the sizes of fields in an order,
 1529   // so that the adlc can build the emit functions automagically
 1530 
 1531   // Emit primary opcode
 1532   enc_class OpcP %{
 1533     emit_opcode(cbuf, $primary);
 1534   %}
 1535 
 1536   // Emit secondary opcode
 1537   enc_class OpcS %{
 1538     emit_opcode(cbuf, $secondary);
 1539   %}
 1540 
 1541   // Emit opcode directly
 1542   enc_class Opcode(immI d8) %{
 1543     emit_opcode(cbuf, $d8$$constant);
 1544   %}
 1545 
 1546   enc_class SizePrefix %{
 1547     emit_opcode(cbuf,0x66);
 1548   %}
 1549 
 1550   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1551     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1552   %}
 1553 
 1554   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1555     emit_opcode(cbuf,$opcode$$constant);
 1556     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1557   %}
 1558 
 1559   enc_class mov_r32_imm0( rRegI dst ) %{
 1560     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1561     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1562   %}
 1563 
 1564   enc_class cdq_enc %{
 1565     // Full implementation of Java idiv and irem; checks for
 1566     // special case as described in JVM spec., p.243 & p.271.
 1567     //
 1568     //         normal case                           special case
 1569     //
 1570     // input : rax,: dividend                         min_int
 1571     //         reg: divisor                          -1
 1572     //
 1573     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1574     //         rdx: remainder (= rax, irem reg)       0
 1575     //
 1576     //  Code sequnce:
 1577     //
 1578     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1579     //  0F 85 0B 00 00 00    jne         normal_case
 1580     //  33 D2                xor         rdx,edx
 1581     //  83 F9 FF             cmp         rcx,0FFh
 1582     //  0F 84 03 00 00 00    je          done
 1583     //                  normal_case:
 1584     //  99                   cdq
 1585     //  F7 F9                idiv        rax,ecx
 1586     //                  done:
 1587     //
 1588     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1589     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1591     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1592     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1594     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1595     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1596     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1597     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1598     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1599     // normal_case:
 1600     emit_opcode(cbuf,0x99);                                         // cdq
 1601     // idiv (note: must be emitted by the user of this rule)
 1602     // normal:
 1603   %}
 1604 
 1605   // Dense encoding for older common ops
 1606   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1607     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1608   %}
 1609 
 1610 
 1611   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1612   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1613     // Check for 8-bit immediate, and set sign extend bit in opcode
 1614     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1615       emit_opcode(cbuf, $primary | 0x02);
 1616     }
 1617     else {                          // If 32-bit immediate
 1618       emit_opcode(cbuf, $primary);
 1619     }
 1620   %}
 1621 
 1622   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1623     // Emit primary opcode and set sign-extend bit
 1624     // Check for 8-bit immediate, and set sign extend bit in opcode
 1625     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1626       emit_opcode(cbuf, $primary | 0x02);    }
 1627     else {                          // If 32-bit immediate
 1628       emit_opcode(cbuf, $primary);
 1629     }
 1630     // Emit r/m byte with secondary opcode, after primary opcode.
 1631     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1632   %}
 1633 
 1634   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1635     // Check for 8-bit immediate, and set sign extend bit in opcode
 1636     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1637       $$$emit8$imm$$constant;
 1638     }
 1639     else {                          // If 32-bit immediate
 1640       // Output immediate
 1641       $$$emit32$imm$$constant;
 1642     }
 1643   %}
 1644 
 1645   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1646     // Emit primary opcode and set sign-extend bit
 1647     // Check for 8-bit immediate, and set sign extend bit in opcode
 1648     int con = (int)$imm$$constant; // Throw away top bits
 1649     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1650     // Emit r/m byte with secondary opcode, after primary opcode.
 1651     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1652     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1653     else                               emit_d32(cbuf,con);
 1654   %}
 1655 
 1656   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1657     // Emit primary opcode and set sign-extend bit
 1658     // Check for 8-bit immediate, and set sign extend bit in opcode
 1659     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1660     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1661     // Emit r/m byte with tertiary opcode, after primary opcode.
 1662     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1663     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1664     else                               emit_d32(cbuf,con);
 1665   %}
 1666 
 1667   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1668     emit_cc(cbuf, $secondary, $dst$$reg );
 1669   %}
 1670 
 1671   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1672     int destlo = $dst$$reg;
 1673     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1674     // bswap lo
 1675     emit_opcode(cbuf, 0x0F);
 1676     emit_cc(cbuf, 0xC8, destlo);
 1677     // bswap hi
 1678     emit_opcode(cbuf, 0x0F);
 1679     emit_cc(cbuf, 0xC8, desthi);
 1680     // xchg lo and hi
 1681     emit_opcode(cbuf, 0x87);
 1682     emit_rm(cbuf, 0x3, destlo, desthi);
 1683   %}
 1684 
 1685   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1686     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1687   %}
 1688 
 1689   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1690     $$$emit8$primary;
 1691     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1692   %}
 1693 
 1694   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1695     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1696     emit_d8(cbuf, op >> 8 );
 1697     emit_d8(cbuf, op & 255);
 1698   %}
 1699 
 1700   // emulate a CMOV with a conditional branch around a MOV
 1701   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1702     // Invert sense of branch from sense of CMOV
 1703     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1704     emit_d8( cbuf, $brOffs$$constant );
 1705   %}
 1706 
 1707   enc_class enc_PartialSubtypeCheck( ) %{
 1708     Register Redi = as_Register(EDI_enc); // result register
 1709     Register Reax = as_Register(EAX_enc); // super class
 1710     Register Recx = as_Register(ECX_enc); // killed
 1711     Register Resi = as_Register(ESI_enc); // sub class
 1712     Label miss;
 1713 
 1714     MacroAssembler _masm(&cbuf);
 1715     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1716                                      nullptr, &miss,
 1717                                      /*set_cond_codes:*/ true);
 1718     if ($primary) {
 1719       __ xorptr(Redi, Redi);
 1720     }
 1721     __ bind(miss);
 1722   %}
 1723 
 1724   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1725     MacroAssembler masm(&cbuf);
 1726     int start = masm.offset();
 1727     if (UseSSE >= 2) {
 1728       if (VerifyFPU) {
 1729         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1730       }
 1731     } else {
 1732       // External c_calling_convention expects the FPU stack to be 'clean'.
 1733       // Compiled code leaves it dirty.  Do cleanup now.
 1734       masm.empty_FPU_stack();
 1735     }
 1736     if (sizeof_FFree_Float_Stack_All == -1) {
 1737       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1738     } else {
 1739       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1740     }
 1741   %}
 1742 
 1743   enc_class Verify_FPU_For_Leaf %{
 1744     if( VerifyFPU ) {
 1745       MacroAssembler masm(&cbuf);
 1746       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1747     }
 1748   %}
 1749 
 1750   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1751     // This is the instruction starting address for relocation info.
 1752     MacroAssembler _masm(&cbuf);
 1753     cbuf.set_insts_mark();
 1754     $$$emit8$primary;
 1755     // CALL directly to the runtime
 1756     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1757                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1758     __ post_call_nop();
 1759 
 1760     if (UseSSE >= 2) {
 1761       MacroAssembler _masm(&cbuf);
 1762       BasicType rt = tf()->return_type();
 1763 
 1764       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1765         // A C runtime call where the return value is unused.  In SSE2+
 1766         // mode the result needs to be removed from the FPU stack.  It's
 1767         // likely that this function call could be removed by the
 1768         // optimizer if the C function is a pure function.
 1769         __ ffree(0);
 1770       } else if (rt == T_FLOAT) {
 1771         __ lea(rsp, Address(rsp, -4));
 1772         __ fstp_s(Address(rsp, 0));
 1773         __ movflt(xmm0, Address(rsp, 0));
 1774         __ lea(rsp, Address(rsp,  4));
 1775       } else if (rt == T_DOUBLE) {
 1776         __ lea(rsp, Address(rsp, -8));
 1777         __ fstp_d(Address(rsp, 0));
 1778         __ movdbl(xmm0, Address(rsp, 0));
 1779         __ lea(rsp, Address(rsp,  8));
 1780       }
 1781     }
 1782   %}
 1783 
 1784   enc_class pre_call_resets %{
 1785     // If method sets FPU control word restore it here
 1786     debug_only(int off0 = cbuf.insts_size());
 1787     if (ra_->C->in_24_bit_fp_mode()) {
 1788       MacroAssembler _masm(&cbuf);
 1789       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1790     }
 1791     // Clear upper bits of YMM registers when current compiled code uses
 1792     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1793     MacroAssembler _masm(&cbuf);
 1794     __ vzeroupper();
 1795     debug_only(int off1 = cbuf.insts_size());
 1796     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1797   %}
 1798 
 1799   enc_class post_call_FPU %{
 1800     // If method sets FPU control word do it here also
 1801     if (Compile::current()->in_24_bit_fp_mode()) {
 1802       MacroAssembler masm(&cbuf);
 1803       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1804     }
 1805   %}
 1806 
 1807   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1808     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1809     // who we intended to call.
 1810     MacroAssembler _masm(&cbuf);
 1811     cbuf.set_insts_mark();
 1812     $$$emit8$primary;
 1813 
 1814     if (!_method) {
 1815       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1816                      runtime_call_Relocation::spec(),
 1817                      RELOC_IMM32);
 1818       __ post_call_nop();
 1819     } else {
 1820       int method_index = resolved_method_index(cbuf);
 1821       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1822                                                   : static_call_Relocation::spec(method_index);
 1823       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1824                      rspec, RELOC_DISP32);
 1825       __ post_call_nop();
 1826       address mark = cbuf.insts_mark();
 1827       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1828         // Calls of the same statically bound method can share
 1829         // a stub to the interpreter.
 1830         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1831       } else {
 1832         // Emit stubs for static call.
 1833         address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark);
 1834         if (stub == nullptr) {
 1835           ciEnv::current()->record_failure("CodeCache is full");
 1836           return;
 1837         }
 1838       }
 1839     }
 1840   %}
 1841 
 1842   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1843     MacroAssembler _masm(&cbuf);
 1844     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1845     __ post_call_nop();
 1846   %}
 1847 
 1848   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1849     int disp = in_bytes(Method::from_compiled_offset());
 1850     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1851 
 1852     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1853     MacroAssembler _masm(&cbuf);
 1854     cbuf.set_insts_mark();
 1855     $$$emit8$primary;
 1856     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1857     emit_d8(cbuf, disp);             // Displacement
 1858     __ post_call_nop();
 1859   %}
 1860 
 1861 //   Following encoding is no longer used, but may be restored if calling
 1862 //   convention changes significantly.
 1863 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1864 //
 1865 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1866 //     // int ic_reg     = Matcher::inline_cache_reg();
 1867 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1868 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1869 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1870 //
 1871 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1872 //     // // so we load it immediately before the call
 1873 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1874 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1875 //
 1876 //     // xor rbp,ebp
 1877 //     emit_opcode(cbuf, 0x33);
 1878 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1879 //
 1880 //     // CALL to interpreter.
 1881 //     cbuf.set_insts_mark();
 1882 //     $$$emit8$primary;
 1883 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1884 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1885 //   %}
 1886 
 1887   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1888     $$$emit8$primary;
 1889     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1890     $$$emit8$shift$$constant;
 1891   %}
 1892 
 1893   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1894     // Load immediate does not have a zero or sign extended version
 1895     // for 8-bit immediates
 1896     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1897     $$$emit32$src$$constant;
 1898   %}
 1899 
 1900   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1901     // Load immediate does not have a zero or sign extended version
 1902     // for 8-bit immediates
 1903     emit_opcode(cbuf, $primary + $dst$$reg);
 1904     $$$emit32$src$$constant;
 1905   %}
 1906 
 1907   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1908     // Load immediate does not have a zero or sign extended version
 1909     // for 8-bit immediates
 1910     int dst_enc = $dst$$reg;
 1911     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1912     if (src_con == 0) {
 1913       // xor dst, dst
 1914       emit_opcode(cbuf, 0x33);
 1915       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1916     } else {
 1917       emit_opcode(cbuf, $primary + dst_enc);
 1918       emit_d32(cbuf, src_con);
 1919     }
 1920   %}
 1921 
 1922   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1923     // Load immediate does not have a zero or sign extended version
 1924     // for 8-bit immediates
 1925     int dst_enc = $dst$$reg + 2;
 1926     int src_con = ((julong)($src$$constant)) >> 32;
 1927     if (src_con == 0) {
 1928       // xor dst, dst
 1929       emit_opcode(cbuf, 0x33);
 1930       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1931     } else {
 1932       emit_opcode(cbuf, $primary + dst_enc);
 1933       emit_d32(cbuf, src_con);
 1934     }
 1935   %}
 1936 
 1937 
 1938   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1939   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1940     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1941   %}
 1942 
 1943   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1944     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1945   %}
 1946 
 1947   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1948     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1949   %}
 1950 
 1951   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1952     $$$emit8$primary;
 1953     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1954   %}
 1955 
 1956   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1957     $$$emit8$secondary;
 1958     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1959   %}
 1960 
 1961   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1962     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1963   %}
 1964 
 1965   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1966     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1967   %}
 1968 
 1969   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1970     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1971   %}
 1972 
 1973   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1974     // Output immediate
 1975     $$$emit32$src$$constant;
 1976   %}
 1977 
 1978   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1979     // Output Float immediate bits
 1980     jfloat jf = $src$$constant;
 1981     int    jf_as_bits = jint_cast( jf );
 1982     emit_d32(cbuf, jf_as_bits);
 1983   %}
 1984 
 1985   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1986     // Output Float immediate bits
 1987     jfloat jf = $src$$constant;
 1988     int    jf_as_bits = jint_cast( jf );
 1989     emit_d32(cbuf, jf_as_bits);
 1990   %}
 1991 
 1992   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1993     // Output immediate
 1994     $$$emit16$src$$constant;
 1995   %}
 1996 
 1997   enc_class Con_d32(immI src) %{
 1998     emit_d32(cbuf,$src$$constant);
 1999   %}
 2000 
 2001   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2002     // Output immediate memory reference
 2003     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2004     emit_d32(cbuf, 0x00);
 2005   %}
 2006 
 2007   enc_class lock_prefix( ) %{
 2008     emit_opcode(cbuf,0xF0);         // [Lock]
 2009   %}
 2010 
 2011   // Cmp-xchg long value.
 2012   // Note: we need to swap rbx, and rcx before and after the
 2013   //       cmpxchg8 instruction because the instruction uses
 2014   //       rcx as the high order word of the new value to store but
 2015   //       our register encoding uses rbx,.
 2016   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2017 
 2018     // XCHG  rbx,ecx
 2019     emit_opcode(cbuf,0x87);
 2020     emit_opcode(cbuf,0xD9);
 2021     // [Lock]
 2022     emit_opcode(cbuf,0xF0);
 2023     // CMPXCHG8 [Eptr]
 2024     emit_opcode(cbuf,0x0F);
 2025     emit_opcode(cbuf,0xC7);
 2026     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2027     // XCHG  rbx,ecx
 2028     emit_opcode(cbuf,0x87);
 2029     emit_opcode(cbuf,0xD9);
 2030   %}
 2031 
 2032   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2033     // [Lock]
 2034     emit_opcode(cbuf,0xF0);
 2035 
 2036     // CMPXCHG [Eptr]
 2037     emit_opcode(cbuf,0x0F);
 2038     emit_opcode(cbuf,0xB1);
 2039     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2040   %}
 2041 
 2042   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2043     // [Lock]
 2044     emit_opcode(cbuf,0xF0);
 2045 
 2046     // CMPXCHGB [Eptr]
 2047     emit_opcode(cbuf,0x0F);
 2048     emit_opcode(cbuf,0xB0);
 2049     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2050   %}
 2051 
 2052   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2053     // [Lock]
 2054     emit_opcode(cbuf,0xF0);
 2055 
 2056     // 16-bit mode
 2057     emit_opcode(cbuf, 0x66);
 2058 
 2059     // CMPXCHGW [Eptr]
 2060     emit_opcode(cbuf,0x0F);
 2061     emit_opcode(cbuf,0xB1);
 2062     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2063   %}
 2064 
 2065   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2066     int res_encoding = $res$$reg;
 2067 
 2068     // MOV  res,0
 2069     emit_opcode( cbuf, 0xB8 + res_encoding);
 2070     emit_d32( cbuf, 0 );
 2071     // JNE,s  fail
 2072     emit_opcode(cbuf,0x75);
 2073     emit_d8(cbuf, 5 );
 2074     // MOV  res,1
 2075     emit_opcode( cbuf, 0xB8 + res_encoding);
 2076     emit_d32( cbuf, 1 );
 2077     // fail:
 2078   %}
 2079 
 2080   enc_class set_instruction_start( ) %{
 2081     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2082   %}
 2083 
 2084   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2085     int reg_encoding = $ereg$$reg;
 2086     int base  = $mem$$base;
 2087     int index = $mem$$index;
 2088     int scale = $mem$$scale;
 2089     int displace = $mem$$disp;
 2090     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2091     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2092   %}
 2093 
 2094   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2095     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2096     int base  = $mem$$base;
 2097     int index = $mem$$index;
 2098     int scale = $mem$$scale;
 2099     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2100     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2101     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2102   %}
 2103 
 2104   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2105     int r1, r2;
 2106     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2107     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2108     emit_opcode(cbuf,0x0F);
 2109     emit_opcode(cbuf,$tertiary);
 2110     emit_rm(cbuf, 0x3, r1, r2);
 2111     emit_d8(cbuf,$cnt$$constant);
 2112     emit_d8(cbuf,$primary);
 2113     emit_rm(cbuf, 0x3, $secondary, r1);
 2114     emit_d8(cbuf,$cnt$$constant);
 2115   %}
 2116 
 2117   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2118     emit_opcode( cbuf, 0x8B ); // Move
 2119     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2120     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2121       emit_d8(cbuf,$primary);
 2122       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2123       emit_d8(cbuf,$cnt$$constant-32);
 2124     }
 2125     emit_d8(cbuf,$primary);
 2126     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2127     emit_d8(cbuf,31);
 2128   %}
 2129 
 2130   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2131     int r1, r2;
 2132     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2133     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2134 
 2135     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2136     emit_rm(cbuf, 0x3, r1, r2);
 2137     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2138       emit_opcode(cbuf,$primary);
 2139       emit_rm(cbuf, 0x3, $secondary, r1);
 2140       emit_d8(cbuf,$cnt$$constant-32);
 2141     }
 2142     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2143     emit_rm(cbuf, 0x3, r2, r2);
 2144   %}
 2145 
 2146   // Clone of RegMem but accepts an extra parameter to access each
 2147   // half of a double in memory; it never needs relocation info.
 2148   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2149     emit_opcode(cbuf,$opcode$$constant);
 2150     int reg_encoding = $rm_reg$$reg;
 2151     int base     = $mem$$base;
 2152     int index    = $mem$$index;
 2153     int scale    = $mem$$scale;
 2154     int displace = $mem$$disp + $disp_for_half$$constant;
 2155     relocInfo::relocType disp_reloc = relocInfo::none;
 2156     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2157   %}
 2158 
 2159   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2160   //
 2161   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2162   // and it never needs relocation information.
 2163   // Frequently used to move data between FPU's Stack Top and memory.
 2164   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2165     int rm_byte_opcode = $rm_opcode$$constant;
 2166     int base     = $mem$$base;
 2167     int index    = $mem$$index;
 2168     int scale    = $mem$$scale;
 2169     int displace = $mem$$disp;
 2170     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2171     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2172   %}
 2173 
 2174   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2175     int rm_byte_opcode = $rm_opcode$$constant;
 2176     int base     = $mem$$base;
 2177     int index    = $mem$$index;
 2178     int scale    = $mem$$scale;
 2179     int displace = $mem$$disp;
 2180     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2181     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2182   %}
 2183 
 2184   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2185     int reg_encoding = $dst$$reg;
 2186     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2187     int index        = 0x04;            // 0x04 indicates no index
 2188     int scale        = 0x00;            // 0x00 indicates no scale
 2189     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2190     relocInfo::relocType disp_reloc = relocInfo::none;
 2191     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2192   %}
 2193 
 2194   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2195     // Compare dst,src
 2196     emit_opcode(cbuf,0x3B);
 2197     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2198     // jmp dst < src around move
 2199     emit_opcode(cbuf,0x7C);
 2200     emit_d8(cbuf,2);
 2201     // move dst,src
 2202     emit_opcode(cbuf,0x8B);
 2203     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2204   %}
 2205 
 2206   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2207     // Compare dst,src
 2208     emit_opcode(cbuf,0x3B);
 2209     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2210     // jmp dst > src around move
 2211     emit_opcode(cbuf,0x7F);
 2212     emit_d8(cbuf,2);
 2213     // move dst,src
 2214     emit_opcode(cbuf,0x8B);
 2215     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2216   %}
 2217 
 2218   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2219     // If src is FPR1, we can just FST to store it.
 2220     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2221     int reg_encoding = 0x2; // Just store
 2222     int base  = $mem$$base;
 2223     int index = $mem$$index;
 2224     int scale = $mem$$scale;
 2225     int displace = $mem$$disp;
 2226     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2227     if( $src$$reg != FPR1L_enc ) {
 2228       reg_encoding = 0x3;  // Store & pop
 2229       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2230       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2231     }
 2232     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2233     emit_opcode(cbuf,$primary);
 2234     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2235   %}
 2236 
 2237   enc_class neg_reg(rRegI dst) %{
 2238     // NEG $dst
 2239     emit_opcode(cbuf,0xF7);
 2240     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2241   %}
 2242 
 2243   enc_class setLT_reg(eCXRegI dst) %{
 2244     // SETLT $dst
 2245     emit_opcode(cbuf,0x0F);
 2246     emit_opcode(cbuf,0x9C);
 2247     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2248   %}
 2249 
 2250   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2251     int tmpReg = $tmp$$reg;
 2252 
 2253     // SUB $p,$q
 2254     emit_opcode(cbuf,0x2B);
 2255     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2256     // SBB $tmp,$tmp
 2257     emit_opcode(cbuf,0x1B);
 2258     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2259     // AND $tmp,$y
 2260     emit_opcode(cbuf,0x23);
 2261     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2262     // ADD $p,$tmp
 2263     emit_opcode(cbuf,0x03);
 2264     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2265   %}
 2266 
 2267   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2268     // TEST shift,32
 2269     emit_opcode(cbuf,0xF7);
 2270     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2271     emit_d32(cbuf,0x20);
 2272     // JEQ,s small
 2273     emit_opcode(cbuf, 0x74);
 2274     emit_d8(cbuf, 0x04);
 2275     // MOV    $dst.hi,$dst.lo
 2276     emit_opcode( cbuf, 0x8B );
 2277     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2278     // CLR    $dst.lo
 2279     emit_opcode(cbuf, 0x33);
 2280     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2281 // small:
 2282     // SHLD   $dst.hi,$dst.lo,$shift
 2283     emit_opcode(cbuf,0x0F);
 2284     emit_opcode(cbuf,0xA5);
 2285     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2286     // SHL    $dst.lo,$shift"
 2287     emit_opcode(cbuf,0xD3);
 2288     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2289   %}
 2290 
 2291   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2292     // TEST shift,32
 2293     emit_opcode(cbuf,0xF7);
 2294     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2295     emit_d32(cbuf,0x20);
 2296     // JEQ,s small
 2297     emit_opcode(cbuf, 0x74);
 2298     emit_d8(cbuf, 0x04);
 2299     // MOV    $dst.lo,$dst.hi
 2300     emit_opcode( cbuf, 0x8B );
 2301     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2302     // CLR    $dst.hi
 2303     emit_opcode(cbuf, 0x33);
 2304     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2305 // small:
 2306     // SHRD   $dst.lo,$dst.hi,$shift
 2307     emit_opcode(cbuf,0x0F);
 2308     emit_opcode(cbuf,0xAD);
 2309     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2310     // SHR    $dst.hi,$shift"
 2311     emit_opcode(cbuf,0xD3);
 2312     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2313   %}
 2314 
 2315   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2316     // TEST shift,32
 2317     emit_opcode(cbuf,0xF7);
 2318     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2319     emit_d32(cbuf,0x20);
 2320     // JEQ,s small
 2321     emit_opcode(cbuf, 0x74);
 2322     emit_d8(cbuf, 0x05);
 2323     // MOV    $dst.lo,$dst.hi
 2324     emit_opcode( cbuf, 0x8B );
 2325     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2326     // SAR    $dst.hi,31
 2327     emit_opcode(cbuf, 0xC1);
 2328     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2329     emit_d8(cbuf, 0x1F );
 2330 // small:
 2331     // SHRD   $dst.lo,$dst.hi,$shift
 2332     emit_opcode(cbuf,0x0F);
 2333     emit_opcode(cbuf,0xAD);
 2334     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2335     // SAR    $dst.hi,$shift"
 2336     emit_opcode(cbuf,0xD3);
 2337     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338   %}
 2339 
 2340 
 2341   // ----------------- Encodings for floating point unit -----------------
 2342   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2343   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2344     $$$emit8$primary;
 2345     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2346   %}
 2347 
 2348   // Pop argument in FPR0 with FSTP ST(0)
 2349   enc_class PopFPU() %{
 2350     emit_opcode( cbuf, 0xDD );
 2351     emit_d8( cbuf, 0xD8 );
 2352   %}
 2353 
 2354   // !!!!! equivalent to Pop_Reg_F
 2355   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2356     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2357     emit_d8( cbuf, 0xD8+$dst$$reg );
 2358   %}
 2359 
 2360   enc_class Push_Reg_DPR( regDPR dst ) %{
 2361     emit_opcode( cbuf, 0xD9 );
 2362     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2363   %}
 2364 
 2365   enc_class strictfp_bias1( regDPR dst ) %{
 2366     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2367     emit_opcode( cbuf, 0x2D );
 2368     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2369     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2370     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2371   %}
 2372 
 2373   enc_class strictfp_bias2( regDPR dst ) %{
 2374     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2375     emit_opcode( cbuf, 0x2D );
 2376     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2377     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2378     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2379   %}
 2380 
 2381   // Special case for moving an integer register to a stack slot.
 2382   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2383     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2384   %}
 2385 
 2386   // Special case for moving a register to a stack slot.
 2387   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2388     // Opcode already emitted
 2389     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2390     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2391     emit_d32(cbuf, $dst$$disp);   // Displacement
 2392   %}
 2393 
 2394   // Push the integer in stackSlot 'src' onto FP-stack
 2395   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2396     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2397   %}
 2398 
 2399   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2400   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2401     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2402   %}
 2403 
 2404   // Same as Pop_Mem_F except for opcode
 2405   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2406   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2407     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2408   %}
 2409 
 2410   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2411     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2412     emit_d8( cbuf, 0xD8+$dst$$reg );
 2413   %}
 2414 
 2415   enc_class Push_Reg_FPR( regFPR dst ) %{
 2416     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2417     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2418   %}
 2419 
 2420   // Push FPU's float to a stack-slot, and pop FPU-stack
 2421   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2422     int pop = 0x02;
 2423     if ($src$$reg != FPR1L_enc) {
 2424       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2425       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2426       pop = 0x03;
 2427     }
 2428     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2429   %}
 2430 
 2431   // Push FPU's double to a stack-slot, and pop FPU-stack
 2432   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2433     int pop = 0x02;
 2434     if ($src$$reg != FPR1L_enc) {
 2435       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2436       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2437       pop = 0x03;
 2438     }
 2439     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2440   %}
 2441 
 2442   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2443   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2444     int pop = 0xD0 - 1; // -1 since we skip FLD
 2445     if ($src$$reg != FPR1L_enc) {
 2446       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2447       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2448       pop = 0xD8;
 2449     }
 2450     emit_opcode( cbuf, 0xDD );
 2451     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2452   %}
 2453 
 2454 
 2455   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2456     // load dst in FPR0
 2457     emit_opcode( cbuf, 0xD9 );
 2458     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2459     if ($src$$reg != FPR1L_enc) {
 2460       // fincstp
 2461       emit_opcode (cbuf, 0xD9);
 2462       emit_opcode (cbuf, 0xF7);
 2463       // swap src with FPR1:
 2464       // FXCH FPR1 with src
 2465       emit_opcode(cbuf, 0xD9);
 2466       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2467       // fdecstp
 2468       emit_opcode (cbuf, 0xD9);
 2469       emit_opcode (cbuf, 0xF6);
 2470     }
 2471   %}
 2472 
 2473   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2474     MacroAssembler _masm(&cbuf);
 2475     __ subptr(rsp, 8);
 2476     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2477     __ fld_d(Address(rsp, 0));
 2478     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2479     __ fld_d(Address(rsp, 0));
 2480   %}
 2481 
 2482   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ subptr(rsp, 4);
 2485     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2486     __ fld_s(Address(rsp, 0));
 2487     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2488     __ fld_s(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_ResultD(regD dst) %{
 2492     MacroAssembler _masm(&cbuf);
 2493     __ fstp_d(Address(rsp, 0));
 2494     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2495     __ addptr(rsp, 8);
 2496   %}
 2497 
 2498   enc_class Push_ResultF(regF dst, immI d8) %{
 2499     MacroAssembler _masm(&cbuf);
 2500     __ fstp_s(Address(rsp, 0));
 2501     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2502     __ addptr(rsp, $d8$$constant);
 2503   %}
 2504 
 2505   enc_class Push_SrcD(regD src) %{
 2506     MacroAssembler _masm(&cbuf);
 2507     __ subptr(rsp, 8);
 2508     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2509     __ fld_d(Address(rsp, 0));
 2510   %}
 2511 
 2512   enc_class push_stack_temp_qword() %{
 2513     MacroAssembler _masm(&cbuf);
 2514     __ subptr(rsp, 8);
 2515   %}
 2516 
 2517   enc_class pop_stack_temp_qword() %{
 2518     MacroAssembler _masm(&cbuf);
 2519     __ addptr(rsp, 8);
 2520   %}
 2521 
 2522   enc_class push_xmm_to_fpr1(regD src) %{
 2523     MacroAssembler _masm(&cbuf);
 2524     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2525     __ fld_d(Address(rsp, 0));
 2526   %}
 2527 
 2528   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2529     if ($src$$reg != FPR1L_enc) {
 2530       // fincstp
 2531       emit_opcode (cbuf, 0xD9);
 2532       emit_opcode (cbuf, 0xF7);
 2533       // FXCH FPR1 with src
 2534       emit_opcode(cbuf, 0xD9);
 2535       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2536       // fdecstp
 2537       emit_opcode (cbuf, 0xD9);
 2538       emit_opcode (cbuf, 0xF6);
 2539     }
 2540     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2541     // // FSTP   FPR$dst$$reg
 2542     // emit_opcode( cbuf, 0xDD );
 2543     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2544   %}
 2545 
 2546   enc_class fnstsw_sahf_skip_parity() %{
 2547     // fnstsw ax
 2548     emit_opcode( cbuf, 0xDF );
 2549     emit_opcode( cbuf, 0xE0 );
 2550     // sahf
 2551     emit_opcode( cbuf, 0x9E );
 2552     // jnp  ::skip
 2553     emit_opcode( cbuf, 0x7B );
 2554     emit_opcode( cbuf, 0x05 );
 2555   %}
 2556 
 2557   enc_class emitModDPR() %{
 2558     // fprem must be iterative
 2559     // :: loop
 2560     // fprem
 2561     emit_opcode( cbuf, 0xD9 );
 2562     emit_opcode( cbuf, 0xF8 );
 2563     // wait
 2564     emit_opcode( cbuf, 0x9b );
 2565     // fnstsw ax
 2566     emit_opcode( cbuf, 0xDF );
 2567     emit_opcode( cbuf, 0xE0 );
 2568     // sahf
 2569     emit_opcode( cbuf, 0x9E );
 2570     // jp  ::loop
 2571     emit_opcode( cbuf, 0x0F );
 2572     emit_opcode( cbuf, 0x8A );
 2573     emit_opcode( cbuf, 0xF4 );
 2574     emit_opcode( cbuf, 0xFF );
 2575     emit_opcode( cbuf, 0xFF );
 2576     emit_opcode( cbuf, 0xFF );
 2577   %}
 2578 
 2579   enc_class fpu_flags() %{
 2580     // fnstsw_ax
 2581     emit_opcode( cbuf, 0xDF);
 2582     emit_opcode( cbuf, 0xE0);
 2583     // test ax,0x0400
 2584     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2585     emit_opcode( cbuf, 0xA9 );
 2586     emit_d16   ( cbuf, 0x0400 );
 2587     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2588     // // test rax,0x0400
 2589     // emit_opcode( cbuf, 0xA9 );
 2590     // emit_d32   ( cbuf, 0x00000400 );
 2591     //
 2592     // jz exit (no unordered comparison)
 2593     emit_opcode( cbuf, 0x74 );
 2594     emit_d8    ( cbuf, 0x02 );
 2595     // mov ah,1 - treat as LT case (set carry flag)
 2596     emit_opcode( cbuf, 0xB4 );
 2597     emit_d8    ( cbuf, 0x01 );
 2598     // sahf
 2599     emit_opcode( cbuf, 0x9E);
 2600   %}
 2601 
 2602   enc_class cmpF_P6_fixup() %{
 2603     // Fixup the integer flags in case comparison involved a NaN
 2604     //
 2605     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2606     emit_opcode( cbuf, 0x7B );
 2607     emit_d8    ( cbuf, 0x03 );
 2608     // MOV AH,1 - treat as LT case (set carry flag)
 2609     emit_opcode( cbuf, 0xB4 );
 2610     emit_d8    ( cbuf, 0x01 );
 2611     // SAHF
 2612     emit_opcode( cbuf, 0x9E);
 2613     // NOP     // target for branch to avoid branch to branch
 2614     emit_opcode( cbuf, 0x90);
 2615   %}
 2616 
 2617 //     fnstsw_ax();
 2618 //     sahf();
 2619 //     movl(dst, nan_result);
 2620 //     jcc(Assembler::parity, exit);
 2621 //     movl(dst, less_result);
 2622 //     jcc(Assembler::below, exit);
 2623 //     movl(dst, equal_result);
 2624 //     jcc(Assembler::equal, exit);
 2625 //     movl(dst, greater_result);
 2626 
 2627 // less_result     =  1;
 2628 // greater_result  = -1;
 2629 // equal_result    = 0;
 2630 // nan_result      = -1;
 2631 
 2632   enc_class CmpF_Result(rRegI dst) %{
 2633     // fnstsw_ax();
 2634     emit_opcode( cbuf, 0xDF);
 2635     emit_opcode( cbuf, 0xE0);
 2636     // sahf
 2637     emit_opcode( cbuf, 0x9E);
 2638     // movl(dst, nan_result);
 2639     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2640     emit_d32( cbuf, -1 );
 2641     // jcc(Assembler::parity, exit);
 2642     emit_opcode( cbuf, 0x7A );
 2643     emit_d8    ( cbuf, 0x13 );
 2644     // movl(dst, less_result);
 2645     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2646     emit_d32( cbuf, -1 );
 2647     // jcc(Assembler::below, exit);
 2648     emit_opcode( cbuf, 0x72 );
 2649     emit_d8    ( cbuf, 0x0C );
 2650     // movl(dst, equal_result);
 2651     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2652     emit_d32( cbuf, 0 );
 2653     // jcc(Assembler::equal, exit);
 2654     emit_opcode( cbuf, 0x74 );
 2655     emit_d8    ( cbuf, 0x05 );
 2656     // movl(dst, greater_result);
 2657     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2658     emit_d32( cbuf, 1 );
 2659   %}
 2660 
 2661 
 2662   // Compare the longs and set flags
 2663   // BROKEN!  Do Not use as-is
 2664   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2665     // CMP    $src1.hi,$src2.hi
 2666     emit_opcode( cbuf, 0x3B );
 2667     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2668     // JNE,s  done
 2669     emit_opcode(cbuf,0x75);
 2670     emit_d8(cbuf, 2 );
 2671     // CMP    $src1.lo,$src2.lo
 2672     emit_opcode( cbuf, 0x3B );
 2673     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2674 // done:
 2675   %}
 2676 
 2677   enc_class convert_int_long( regL dst, rRegI src ) %{
 2678     // mov $dst.lo,$src
 2679     int dst_encoding = $dst$$reg;
 2680     int src_encoding = $src$$reg;
 2681     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2682     // mov $dst.hi,$src
 2683     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2684     // sar $dst.hi,31
 2685     emit_opcode( cbuf, 0xC1 );
 2686     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2687     emit_d8(cbuf, 0x1F );
 2688   %}
 2689 
 2690   enc_class convert_long_double( eRegL src ) %{
 2691     // push $src.hi
 2692     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2693     // push $src.lo
 2694     emit_opcode(cbuf, 0x50+$src$$reg  );
 2695     // fild 64-bits at [SP]
 2696     emit_opcode(cbuf,0xdf);
 2697     emit_d8(cbuf, 0x6C);
 2698     emit_d8(cbuf, 0x24);
 2699     emit_d8(cbuf, 0x00);
 2700     // pop stack
 2701     emit_opcode(cbuf, 0x83); // add  SP, #8
 2702     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2703     emit_d8(cbuf, 0x8);
 2704   %}
 2705 
 2706   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2707     // IMUL   EDX:EAX,$src1
 2708     emit_opcode( cbuf, 0xF7 );
 2709     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2710     // SAR    EDX,$cnt-32
 2711     int shift_count = ((int)$cnt$$constant) - 32;
 2712     if (shift_count > 0) {
 2713       emit_opcode(cbuf, 0xC1);
 2714       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2715       emit_d8(cbuf, shift_count);
 2716     }
 2717   %}
 2718 
 2719   // this version doesn't have add sp, 8
 2720   enc_class convert_long_double2( eRegL src ) %{
 2721     // push $src.hi
 2722     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2723     // push $src.lo
 2724     emit_opcode(cbuf, 0x50+$src$$reg  );
 2725     // fild 64-bits at [SP]
 2726     emit_opcode(cbuf,0xdf);
 2727     emit_d8(cbuf, 0x6C);
 2728     emit_d8(cbuf, 0x24);
 2729     emit_d8(cbuf, 0x00);
 2730   %}
 2731 
 2732   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2733     // Basic idea: long = (long)int * (long)int
 2734     // IMUL EDX:EAX, src
 2735     emit_opcode( cbuf, 0xF7 );
 2736     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2737   %}
 2738 
 2739   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2740     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2741     // MUL EDX:EAX, src
 2742     emit_opcode( cbuf, 0xF7 );
 2743     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2744   %}
 2745 
 2746   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2747     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2748     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2749     // MOV    $tmp,$src.lo
 2750     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2751     // IMUL   $tmp,EDX
 2752     emit_opcode( cbuf, 0x0F );
 2753     emit_opcode( cbuf, 0xAF );
 2754     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2755     // MOV    EDX,$src.hi
 2756     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2757     // IMUL   EDX,EAX
 2758     emit_opcode( cbuf, 0x0F );
 2759     emit_opcode( cbuf, 0xAF );
 2760     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2761     // ADD    $tmp,EDX
 2762     emit_opcode( cbuf, 0x03 );
 2763     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2764     // MUL   EDX:EAX,$src.lo
 2765     emit_opcode( cbuf, 0xF7 );
 2766     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2767     // ADD    EDX,ESI
 2768     emit_opcode( cbuf, 0x03 );
 2769     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2770   %}
 2771 
 2772   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2773     // Basic idea: lo(result) = lo(src * y_lo)
 2774     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2775     // IMUL   $tmp,EDX,$src
 2776     emit_opcode( cbuf, 0x6B );
 2777     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2778     emit_d8( cbuf, (int)$src$$constant );
 2779     // MOV    EDX,$src
 2780     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2781     emit_d32( cbuf, (int)$src$$constant );
 2782     // MUL   EDX:EAX,EDX
 2783     emit_opcode( cbuf, 0xF7 );
 2784     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2785     // ADD    EDX,ESI
 2786     emit_opcode( cbuf, 0x03 );
 2787     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2788   %}
 2789 
 2790   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2791     // PUSH src1.hi
 2792     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2793     // PUSH src1.lo
 2794     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2795     // PUSH src2.hi
 2796     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2797     // PUSH src2.lo
 2798     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2799     // CALL directly to the runtime
 2800     MacroAssembler _masm(&cbuf);
 2801     cbuf.set_insts_mark();
 2802     emit_opcode(cbuf,0xE8);       // Call into runtime
 2803     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2804     __ post_call_nop();
 2805     // Restore stack
 2806     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2807     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2808     emit_d8(cbuf, 4*4);
 2809   %}
 2810 
 2811   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2812     // PUSH src1.hi
 2813     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2814     // PUSH src1.lo
 2815     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2816     // PUSH src2.hi
 2817     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2818     // PUSH src2.lo
 2819     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2820     // CALL directly to the runtime
 2821     MacroAssembler _masm(&cbuf);
 2822     cbuf.set_insts_mark();
 2823     emit_opcode(cbuf,0xE8);       // Call into runtime
 2824     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2825     __ post_call_nop();
 2826     // Restore stack
 2827     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2828     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2829     emit_d8(cbuf, 4*4);
 2830   %}
 2831 
 2832   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2833     // MOV   $tmp,$src.lo
 2834     emit_opcode(cbuf, 0x8B);
 2835     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2836     // OR    $tmp,$src.hi
 2837     emit_opcode(cbuf, 0x0B);
 2838     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2839   %}
 2840 
 2841   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2842     // CMP    $src1.lo,$src2.lo
 2843     emit_opcode( cbuf, 0x3B );
 2844     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2845     // JNE,s  skip
 2846     emit_cc(cbuf, 0x70, 0x5);
 2847     emit_d8(cbuf,2);
 2848     // CMP    $src1.hi,$src2.hi
 2849     emit_opcode( cbuf, 0x3B );
 2850     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2851   %}
 2852 
 2853   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2854     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2855     emit_opcode( cbuf, 0x3B );
 2856     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2857     // MOV    $tmp,$src1.hi
 2858     emit_opcode( cbuf, 0x8B );
 2859     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2860     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2861     emit_opcode( cbuf, 0x1B );
 2862     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2863   %}
 2864 
 2865   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2866     // XOR    $tmp,$tmp
 2867     emit_opcode(cbuf,0x33);  // XOR
 2868     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2869     // CMP    $tmp,$src.lo
 2870     emit_opcode( cbuf, 0x3B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2872     // SBB    $tmp,$src.hi
 2873     emit_opcode( cbuf, 0x1B );
 2874     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2875   %}
 2876 
 2877  // Sniff, sniff... smells like Gnu Superoptimizer
 2878   enc_class neg_long( eRegL dst ) %{
 2879     emit_opcode(cbuf,0xF7);    // NEG hi
 2880     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2881     emit_opcode(cbuf,0xF7);    // NEG lo
 2882     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2883     emit_opcode(cbuf,0x83);    // SBB hi,0
 2884     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2885     emit_d8    (cbuf,0 );
 2886   %}
 2887 
 2888   enc_class enc_pop_rdx() %{
 2889     emit_opcode(cbuf,0x5A);
 2890   %}
 2891 
 2892   enc_class enc_rethrow() %{
 2893     MacroAssembler _masm(&cbuf);
 2894     cbuf.set_insts_mark();
 2895     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2896     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2897                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2898     __ post_call_nop();
 2899   %}
 2900 
 2901 
 2902   // Convert a double to an int.  Java semantics require we do complex
 2903   // manglelations in the corner cases.  So we set the rounding mode to
 2904   // 'zero', store the darned double down as an int, and reset the
 2905   // rounding mode to 'nearest'.  The hardware throws an exception which
 2906   // patches up the correct value directly to the stack.
 2907   enc_class DPR2I_encoding( regDPR src ) %{
 2908     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2909     // exceptions here, so that a NAN or other corner-case value will
 2910     // thrown an exception (but normal values get converted at full speed).
 2911     // However, I2C adapters and other float-stack manglers leave pending
 2912     // invalid-op exceptions hanging.  We would have to clear them before
 2913     // enabling them and that is more expensive than just testing for the
 2914     // invalid value Intel stores down in the corner cases.
 2915     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2916     emit_opcode(cbuf,0x2D);
 2917     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2918     // Allocate a word
 2919     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2920     emit_opcode(cbuf,0xEC);
 2921     emit_d8(cbuf,0x04);
 2922     // Encoding assumes a double has been pushed into FPR0.
 2923     // Store down the double as an int, popping the FPU stack
 2924     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2925     emit_opcode(cbuf,0x1C);
 2926     emit_d8(cbuf,0x24);
 2927     // Restore the rounding mode; mask the exception
 2928     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2929     emit_opcode(cbuf,0x2D);
 2930     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2931         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2932         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2933 
 2934     // Load the converted int; adjust CPU stack
 2935     emit_opcode(cbuf,0x58);       // POP EAX
 2936     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2937     emit_d32   (cbuf,0x80000000); //         0x80000000
 2938     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2939     emit_d8    (cbuf,0x07);       // Size of slow_call
 2940     // Push src onto stack slow-path
 2941     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2942     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2943     // CALL directly to the runtime
 2944     MacroAssembler _masm(&cbuf);
 2945     cbuf.set_insts_mark();
 2946     emit_opcode(cbuf,0xE8);       // Call into runtime
 2947     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2948     __ post_call_nop();
 2949     // Carry on here...
 2950   %}
 2951 
 2952   enc_class DPR2L_encoding( regDPR src ) %{
 2953     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2954     emit_opcode(cbuf,0x2D);
 2955     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2956     // Allocate a word
 2957     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2958     emit_opcode(cbuf,0xEC);
 2959     emit_d8(cbuf,0x08);
 2960     // Encoding assumes a double has been pushed into FPR0.
 2961     // Store down the double as a long, popping the FPU stack
 2962     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2963     emit_opcode(cbuf,0x3C);
 2964     emit_d8(cbuf,0x24);
 2965     // Restore the rounding mode; mask the exception
 2966     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2967     emit_opcode(cbuf,0x2D);
 2968     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2969         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2970         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2971 
 2972     // Load the converted int; adjust CPU stack
 2973     emit_opcode(cbuf,0x58);       // POP EAX
 2974     emit_opcode(cbuf,0x5A);       // POP EDX
 2975     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2976     emit_d8    (cbuf,0xFA);       // rdx
 2977     emit_d32   (cbuf,0x80000000); //         0x80000000
 2978     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2979     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2980     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2981     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2982     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2983     emit_d8    (cbuf,0x07);       // Size of slow_call
 2984     // Push src onto stack slow-path
 2985     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2986     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2987     // CALL directly to the runtime
 2988     MacroAssembler _masm(&cbuf);
 2989     cbuf.set_insts_mark();
 2990     emit_opcode(cbuf,0xE8);       // Call into runtime
 2991     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2992     __ post_call_nop();
 2993     // Carry on here...
 2994   %}
 2995 
 2996   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2997     // Operand was loaded from memory into fp ST (stack top)
 2998     // FMUL   ST,$src  /* D8 C8+i */
 2999     emit_opcode(cbuf, 0xD8);
 3000     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3001   %}
 3002 
 3003   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3004     // FADDP  ST,src2  /* D8 C0+i */
 3005     emit_opcode(cbuf, 0xD8);
 3006     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3007     //could use FADDP  src2,fpST  /* DE C0+i */
 3008   %}
 3009 
 3010   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3011     // FADDP  src2,ST  /* DE C0+i */
 3012     emit_opcode(cbuf, 0xDE);
 3013     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3014   %}
 3015 
 3016   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3017     // Operand has been loaded into fp ST (stack top)
 3018       // FSUB   ST,$src1
 3019       emit_opcode(cbuf, 0xD8);
 3020       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3021 
 3022       // FDIV
 3023       emit_opcode(cbuf, 0xD8);
 3024       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3025   %}
 3026 
 3027   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3028     // Operand was loaded from memory into fp ST (stack top)
 3029     // FADD   ST,$src  /* D8 C0+i */
 3030     emit_opcode(cbuf, 0xD8);
 3031     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3032 
 3033     // FMUL  ST,src2  /* D8 C*+i */
 3034     emit_opcode(cbuf, 0xD8);
 3035     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3036   %}
 3037 
 3038 
 3039   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3040     // Operand was loaded from memory into fp ST (stack top)
 3041     // FADD   ST,$src  /* D8 C0+i */
 3042     emit_opcode(cbuf, 0xD8);
 3043     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3044 
 3045     // FMULP  src2,ST  /* DE C8+i */
 3046     emit_opcode(cbuf, 0xDE);
 3047     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3048   %}
 3049 
 3050   // Atomically load the volatile long
 3051   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3052     emit_opcode(cbuf,0xDF);
 3053     int rm_byte_opcode = 0x05;
 3054     int base     = $mem$$base;
 3055     int index    = $mem$$index;
 3056     int scale    = $mem$$scale;
 3057     int displace = $mem$$disp;
 3058     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3059     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3060     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3061   %}
 3062 
 3063   // Volatile Store Long.  Must be atomic, so move it into
 3064   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3065   // target address before the store (for null-ptr checks)
 3066   // so the memory operand is used twice in the encoding.
 3067   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3068     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3069     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3070     emit_opcode(cbuf,0xDF);
 3071     int rm_byte_opcode = 0x07;
 3072     int base     = $mem$$base;
 3073     int index    = $mem$$index;
 3074     int scale    = $mem$$scale;
 3075     int displace = $mem$$disp;
 3076     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3077     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3078   %}
 3079 
 3080 %}
 3081 
 3082 
 3083 //----------FRAME--------------------------------------------------------------
 3084 // Definition of frame structure and management information.
 3085 //
 3086 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3087 //                             |   (to get allocators register number
 3088 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3089 //  r   CALLER     |        |
 3090 //  o     |        +--------+      pad to even-align allocators stack-slot
 3091 //  w     V        |  pad0  |        numbers; owned by CALLER
 3092 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3093 //  h     ^        |   in   |  5
 3094 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3095 //  |     |        |        |  3
 3096 //  |     |        +--------+
 3097 //  V     |        | old out|      Empty on Intel, window on Sparc
 3098 //        |    old |preserve|      Must be even aligned.
 3099 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3100 //        |        |   in   |  3   area for Intel ret address
 3101 //     Owned by    |preserve|      Empty on Sparc.
 3102 //       SELF      +--------+
 3103 //        |        |  pad2  |  2   pad to align old SP
 3104 //        |        +--------+  1
 3105 //        |        | locks  |  0
 3106 //        |        +--------+----> OptoReg::stack0(), even aligned
 3107 //        |        |  pad1  | 11   pad to align new SP
 3108 //        |        +--------+
 3109 //        |        |        | 10
 3110 //        |        | spills |  9   spills
 3111 //        V        |        |  8   (pad0 slot for callee)
 3112 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3113 //        ^        |  out   |  7
 3114 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3115 //     Owned by    +--------+
 3116 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3117 //        |    new |preserve|      Must be even-aligned.
 3118 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3119 //        |        |        |
 3120 //
 3121 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3122 //         known from SELF's arguments and the Java calling convention.
 3123 //         Region 6-7 is determined per call site.
 3124 // Note 2: If the calling convention leaves holes in the incoming argument
 3125 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3126 //         are owned by the CALLEE.  Holes should not be necessary in the
 3127 //         incoming area, as the Java calling convention is completely under
 3128 //         the control of the AD file.  Doubles can be sorted and packed to
 3129 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3130 //         varargs C calling conventions.
 3131 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3132 //         even aligned with pad0 as needed.
 3133 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3134 //         region 6-11 is even aligned; it may be padded out more so that
 3135 //         the region from SP to FP meets the minimum stack alignment.
 3136 
 3137 frame %{
 3138   // These three registers define part of the calling convention
 3139   // between compiled code and the interpreter.
 3140   inline_cache_reg(EAX);                // Inline Cache Register
 3141 
 3142   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3143   cisc_spilling_operand_name(indOffset32);
 3144 
 3145   // Number of stack slots consumed by locking an object
 3146   sync_stack_slots(1);
 3147 
 3148   // Compiled code's Frame Pointer
 3149   frame_pointer(ESP);
 3150   // Interpreter stores its frame pointer in a register which is
 3151   // stored to the stack by I2CAdaptors.
 3152   // I2CAdaptors convert from interpreted java to compiled java.
 3153   interpreter_frame_pointer(EBP);
 3154 
 3155   // Stack alignment requirement
 3156   // Alignment size in bytes (128-bit -> 16 bytes)
 3157   stack_alignment(StackAlignmentInBytes);
 3158 
 3159   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3160   // for calls to C.  Supports the var-args backing area for register parms.
 3161   varargs_C_out_slots_killed(0);
 3162 
 3163   // The after-PROLOG location of the return address.  Location of
 3164   // return address specifies a type (REG or STACK) and a number
 3165   // representing the register number (i.e. - use a register name) or
 3166   // stack slot.
 3167   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3168   // Otherwise, it is above the locks and verification slot and alignment word
 3169   return_addr(STACK - 1 +
 3170               align_up((Compile::current()->in_preserve_stack_slots() +
 3171                         Compile::current()->fixed_slots()),
 3172                        stack_alignment_in_slots()));
 3173 
 3174   // Location of C & interpreter return values
 3175   c_return_value %{
 3176     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3177     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3178     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3179 
 3180     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3181     // that C functions return float and double results in XMM0.
 3182     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3183       return OptoRegPair(XMM0b_num,XMM0_num);
 3184     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3185       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3186 
 3187     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3188   %}
 3189 
 3190   // Location of return values
 3191   return_value %{
 3192     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3193     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3194     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3195     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3196       return OptoRegPair(XMM0b_num,XMM0_num);
 3197     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3198       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3199     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3200   %}
 3201 
 3202 %}
 3203 
 3204 //----------ATTRIBUTES---------------------------------------------------------
 3205 //----------Operand Attributes-------------------------------------------------
 3206 op_attrib op_cost(0);        // Required cost attribute
 3207 
 3208 //----------Instruction Attributes---------------------------------------------
 3209 ins_attrib ins_cost(100);       // Required cost attribute
 3210 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3211 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3212                                 // non-matching short branch variant of some
 3213                                                             // long branch?
 3214 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3215                                 // specifies the alignment that some part of the instruction (not
 3216                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3217                                 // function must be provided for the instruction
 3218 
 3219 //----------OPERANDS-----------------------------------------------------------
 3220 // Operand definitions must precede instruction definitions for correct parsing
 3221 // in the ADLC because operands constitute user defined types which are used in
 3222 // instruction definitions.
 3223 
 3224 //----------Simple Operands----------------------------------------------------
 3225 // Immediate Operands
 3226 // Integer Immediate
 3227 operand immI() %{
 3228   match(ConI);
 3229 
 3230   op_cost(10);
 3231   format %{ %}
 3232   interface(CONST_INTER);
 3233 %}
 3234 
 3235 // Constant for test vs zero
 3236 operand immI_0() %{
 3237   predicate(n->get_int() == 0);
 3238   match(ConI);
 3239 
 3240   op_cost(0);
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 // Constant for increment
 3246 operand immI_1() %{
 3247   predicate(n->get_int() == 1);
 3248   match(ConI);
 3249 
 3250   op_cost(0);
 3251   format %{ %}
 3252   interface(CONST_INTER);
 3253 %}
 3254 
 3255 // Constant for decrement
 3256 operand immI_M1() %{
 3257   predicate(n->get_int() == -1);
 3258   match(ConI);
 3259 
 3260   op_cost(0);
 3261   format %{ %}
 3262   interface(CONST_INTER);
 3263 %}
 3264 
 3265 // Valid scale values for addressing modes
 3266 operand immI2() %{
 3267   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3268   match(ConI);
 3269 
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 operand immI8() %{
 3275   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3276   match(ConI);
 3277 
 3278   op_cost(5);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 operand immU8() %{
 3284   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3285   match(ConI);
 3286 
 3287   op_cost(5);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 operand immI16() %{
 3293   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3294   match(ConI);
 3295 
 3296   op_cost(10);
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 // Int Immediate non-negative
 3302 operand immU31()
 3303 %{
 3304   predicate(n->get_int() >= 0);
 3305   match(ConI);
 3306 
 3307   op_cost(0);
 3308   format %{ %}
 3309   interface(CONST_INTER);
 3310 %}
 3311 
 3312 // Constant for long shifts
 3313 operand immI_32() %{
 3314   predicate( n->get_int() == 32 );
 3315   match(ConI);
 3316 
 3317   op_cost(0);
 3318   format %{ %}
 3319   interface(CONST_INTER);
 3320 %}
 3321 
 3322 operand immI_1_31() %{
 3323   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3324   match(ConI);
 3325 
 3326   op_cost(0);
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_32_63() %{
 3332   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3333   match(ConI);
 3334   op_cost(0);
 3335 
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 operand immI_2() %{
 3341   predicate( n->get_int() == 2 );
 3342   match(ConI);
 3343 
 3344   op_cost(0);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand immI_3() %{
 3350   predicate( n->get_int() == 3 );
 3351   match(ConI);
 3352 
 3353   op_cost(0);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 operand immI_4()
 3359 %{
 3360   predicate(n->get_int() == 4);
 3361   match(ConI);
 3362 
 3363   op_cost(0);
 3364   format %{ %}
 3365   interface(CONST_INTER);
 3366 %}
 3367 
 3368 operand immI_8()
 3369 %{
 3370   predicate(n->get_int() == 8);
 3371   match(ConI);
 3372 
 3373   op_cost(0);
 3374   format %{ %}
 3375   interface(CONST_INTER);
 3376 %}
 3377 
 3378 // Pointer Immediate
 3379 operand immP() %{
 3380   match(ConP);
 3381 
 3382   op_cost(10);
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Null Pointer Immediate
 3388 operand immP0() %{
 3389   predicate( n->get_ptr() == 0 );
 3390   match(ConP);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long Immediate
 3398 operand immL() %{
 3399   match(ConL);
 3400 
 3401   op_cost(20);
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long Immediate zero
 3407 operand immL0() %{
 3408   predicate( n->get_long() == 0L );
 3409   match(ConL);
 3410   op_cost(0);
 3411 
 3412   format %{ %}
 3413   interface(CONST_INTER);
 3414 %}
 3415 
 3416 // Long Immediate zero
 3417 operand immL_M1() %{
 3418   predicate( n->get_long() == -1L );
 3419   match(ConL);
 3420   op_cost(0);
 3421 
 3422   format %{ %}
 3423   interface(CONST_INTER);
 3424 %}
 3425 
 3426 // Long immediate from 0 to 127.
 3427 // Used for a shorter form of long mul by 10.
 3428 operand immL_127() %{
 3429   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3430   match(ConL);
 3431   op_cost(0);
 3432 
 3433   format %{ %}
 3434   interface(CONST_INTER);
 3435 %}
 3436 
 3437 // Long Immediate: low 32-bit mask
 3438 operand immL_32bits() %{
 3439   predicate(n->get_long() == 0xFFFFFFFFL);
 3440   match(ConL);
 3441   op_cost(0);
 3442 
 3443   format %{ %}
 3444   interface(CONST_INTER);
 3445 %}
 3446 
 3447 // Long Immediate: low 32-bit mask
 3448 operand immL32() %{
 3449   predicate(n->get_long() == (int)(n->get_long()));
 3450   match(ConL);
 3451   op_cost(20);
 3452 
 3453   format %{ %}
 3454   interface(CONST_INTER);
 3455 %}
 3456 
 3457 //Double Immediate zero
 3458 operand immDPR0() %{
 3459   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3460   // bug that generates code such that NaNs compare equal to 0.0
 3461   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3462   match(ConD);
 3463 
 3464   op_cost(5);
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 // Double Immediate one
 3470 operand immDPR1() %{
 3471   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3472   match(ConD);
 3473 
 3474   op_cost(5);
 3475   format %{ %}
 3476   interface(CONST_INTER);
 3477 %}
 3478 
 3479 // Double Immediate
 3480 operand immDPR() %{
 3481   predicate(UseSSE<=1);
 3482   match(ConD);
 3483 
 3484   op_cost(5);
 3485   format %{ %}
 3486   interface(CONST_INTER);
 3487 %}
 3488 
 3489 operand immD() %{
 3490   predicate(UseSSE>=2);
 3491   match(ConD);
 3492 
 3493   op_cost(5);
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 // Double Immediate zero
 3499 operand immD0() %{
 3500   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3501   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3502   // compare equal to -0.0.
 3503   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3504   match(ConD);
 3505 
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Float Immediate zero
 3511 operand immFPR0() %{
 3512   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3513   match(ConF);
 3514 
 3515   op_cost(5);
 3516   format %{ %}
 3517   interface(CONST_INTER);
 3518 %}
 3519 
 3520 // Float Immediate one
 3521 operand immFPR1() %{
 3522   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3523   match(ConF);
 3524 
 3525   op_cost(5);
 3526   format %{ %}
 3527   interface(CONST_INTER);
 3528 %}
 3529 
 3530 // Float Immediate
 3531 operand immFPR() %{
 3532   predicate( UseSSE == 0 );
 3533   match(ConF);
 3534 
 3535   op_cost(5);
 3536   format %{ %}
 3537   interface(CONST_INTER);
 3538 %}
 3539 
 3540 // Float Immediate
 3541 operand immF() %{
 3542   predicate(UseSSE >= 1);
 3543   match(ConF);
 3544 
 3545   op_cost(5);
 3546   format %{ %}
 3547   interface(CONST_INTER);
 3548 %}
 3549 
 3550 // Float Immediate zero.  Zero and not -0.0
 3551 operand immF0() %{
 3552   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3553   match(ConF);
 3554 
 3555   op_cost(5);
 3556   format %{ %}
 3557   interface(CONST_INTER);
 3558 %}
 3559 
 3560 // Immediates for special shifts (sign extend)
 3561 
 3562 // Constants for increment
 3563 operand immI_16() %{
 3564   predicate( n->get_int() == 16 );
 3565   match(ConI);
 3566 
 3567   format %{ %}
 3568   interface(CONST_INTER);
 3569 %}
 3570 
 3571 operand immI_24() %{
 3572   predicate( n->get_int() == 24 );
 3573   match(ConI);
 3574 
 3575   format %{ %}
 3576   interface(CONST_INTER);
 3577 %}
 3578 
 3579 // Constant for byte-wide masking
 3580 operand immI_255() %{
 3581   predicate( n->get_int() == 255 );
 3582   match(ConI);
 3583 
 3584   format %{ %}
 3585   interface(CONST_INTER);
 3586 %}
 3587 
 3588 // Constant for short-wide masking
 3589 operand immI_65535() %{
 3590   predicate(n->get_int() == 65535);
 3591   match(ConI);
 3592 
 3593   format %{ %}
 3594   interface(CONST_INTER);
 3595 %}
 3596 
 3597 operand kReg()
 3598 %{
 3599   constraint(ALLOC_IN_RC(vectmask_reg));
 3600   match(RegVectMask);
 3601   format %{%}
 3602   interface(REG_INTER);
 3603 %}
 3604 
 3605 // Register Operands
 3606 // Integer Register
 3607 operand rRegI() %{
 3608   constraint(ALLOC_IN_RC(int_reg));
 3609   match(RegI);
 3610   match(xRegI);
 3611   match(eAXRegI);
 3612   match(eBXRegI);
 3613   match(eCXRegI);
 3614   match(eDXRegI);
 3615   match(eDIRegI);
 3616   match(eSIRegI);
 3617 
 3618   format %{ %}
 3619   interface(REG_INTER);
 3620 %}
 3621 
 3622 // Subset of Integer Register
 3623 operand xRegI(rRegI reg) %{
 3624   constraint(ALLOC_IN_RC(int_x_reg));
 3625   match(reg);
 3626   match(eAXRegI);
 3627   match(eBXRegI);
 3628   match(eCXRegI);
 3629   match(eDXRegI);
 3630 
 3631   format %{ %}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 // Special Registers
 3636 operand eAXRegI(xRegI reg) %{
 3637   constraint(ALLOC_IN_RC(eax_reg));
 3638   match(reg);
 3639   match(rRegI);
 3640 
 3641   format %{ "EAX" %}
 3642   interface(REG_INTER);
 3643 %}
 3644 
 3645 // Special Registers
 3646 operand eBXRegI(xRegI reg) %{
 3647   constraint(ALLOC_IN_RC(ebx_reg));
 3648   match(reg);
 3649   match(rRegI);
 3650 
 3651   format %{ "EBX" %}
 3652   interface(REG_INTER);
 3653 %}
 3654 
 3655 operand eCXRegI(xRegI reg) %{
 3656   constraint(ALLOC_IN_RC(ecx_reg));
 3657   match(reg);
 3658   match(rRegI);
 3659 
 3660   format %{ "ECX" %}
 3661   interface(REG_INTER);
 3662 %}
 3663 
 3664 operand eDXRegI(xRegI reg) %{
 3665   constraint(ALLOC_IN_RC(edx_reg));
 3666   match(reg);
 3667   match(rRegI);
 3668 
 3669   format %{ "EDX" %}
 3670   interface(REG_INTER);
 3671 %}
 3672 
 3673 operand eDIRegI(xRegI reg) %{
 3674   constraint(ALLOC_IN_RC(edi_reg));
 3675   match(reg);
 3676   match(rRegI);
 3677 
 3678   format %{ "EDI" %}
 3679   interface(REG_INTER);
 3680 %}
 3681 
 3682 operand nadxRegI() %{
 3683   constraint(ALLOC_IN_RC(nadx_reg));
 3684   match(RegI);
 3685   match(eBXRegI);
 3686   match(eCXRegI);
 3687   match(eSIRegI);
 3688   match(eDIRegI);
 3689 
 3690   format %{ %}
 3691   interface(REG_INTER);
 3692 %}
 3693 
 3694 operand ncxRegI() %{
 3695   constraint(ALLOC_IN_RC(ncx_reg));
 3696   match(RegI);
 3697   match(eAXRegI);
 3698   match(eDXRegI);
 3699   match(eSIRegI);
 3700   match(eDIRegI);
 3701 
 3702   format %{ %}
 3703   interface(REG_INTER);
 3704 %}
 3705 
 3706 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3707 // //
 3708 operand eSIRegI(xRegI reg) %{
 3709    constraint(ALLOC_IN_RC(esi_reg));
 3710    match(reg);
 3711    match(rRegI);
 3712 
 3713    format %{ "ESI" %}
 3714    interface(REG_INTER);
 3715 %}
 3716 
 3717 // Pointer Register
 3718 operand anyRegP() %{
 3719   constraint(ALLOC_IN_RC(any_reg));
 3720   match(RegP);
 3721   match(eAXRegP);
 3722   match(eBXRegP);
 3723   match(eCXRegP);
 3724   match(eDIRegP);
 3725   match(eRegP);
 3726 
 3727   format %{ %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand eRegP() %{
 3732   constraint(ALLOC_IN_RC(int_reg));
 3733   match(RegP);
 3734   match(eAXRegP);
 3735   match(eBXRegP);
 3736   match(eCXRegP);
 3737   match(eDIRegP);
 3738 
 3739   format %{ %}
 3740   interface(REG_INTER);
 3741 %}
 3742 
 3743 operand rRegP() %{
 3744   constraint(ALLOC_IN_RC(int_reg));
 3745   match(RegP);
 3746   match(eAXRegP);
 3747   match(eBXRegP);
 3748   match(eCXRegP);
 3749   match(eDIRegP);
 3750 
 3751   format %{ %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 // On windows95, EBP is not safe to use for implicit null tests.
 3756 operand eRegP_no_EBP() %{
 3757   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3758   match(RegP);
 3759   match(eAXRegP);
 3760   match(eBXRegP);
 3761   match(eCXRegP);
 3762   match(eDIRegP);
 3763 
 3764   op_cost(100);
 3765   format %{ %}
 3766   interface(REG_INTER);
 3767 %}
 3768 
 3769 operand pRegP() %{
 3770   constraint(ALLOC_IN_RC(p_reg));
 3771   match(RegP);
 3772   match(eBXRegP);
 3773   match(eDXRegP);
 3774   match(eSIRegP);
 3775   match(eDIRegP);
 3776 
 3777   format %{ %}
 3778   interface(REG_INTER);
 3779 %}
 3780 
 3781 // Special Registers
 3782 // Return a pointer value
 3783 operand eAXRegP(eRegP reg) %{
 3784   constraint(ALLOC_IN_RC(eax_reg));
 3785   match(reg);
 3786   format %{ "EAX" %}
 3787   interface(REG_INTER);
 3788 %}
 3789 
 3790 // Used in AtomicAdd
 3791 operand eBXRegP(eRegP reg) %{
 3792   constraint(ALLOC_IN_RC(ebx_reg));
 3793   match(reg);
 3794   format %{ "EBX" %}
 3795   interface(REG_INTER);
 3796 %}
 3797 
 3798 // Tail-call (interprocedural jump) to interpreter
 3799 operand eCXRegP(eRegP reg) %{
 3800   constraint(ALLOC_IN_RC(ecx_reg));
 3801   match(reg);
 3802   format %{ "ECX" %}
 3803   interface(REG_INTER);
 3804 %}
 3805 
 3806 operand eDXRegP(eRegP reg) %{
 3807   constraint(ALLOC_IN_RC(edx_reg));
 3808   match(reg);
 3809   format %{ "EDX" %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand eSIRegP(eRegP reg) %{
 3814   constraint(ALLOC_IN_RC(esi_reg));
 3815   match(reg);
 3816   format %{ "ESI" %}
 3817   interface(REG_INTER);
 3818 %}
 3819 
 3820 // Used in rep stosw
 3821 operand eDIRegP(eRegP reg) %{
 3822   constraint(ALLOC_IN_RC(edi_reg));
 3823   match(reg);
 3824   format %{ "EDI" %}
 3825   interface(REG_INTER);
 3826 %}
 3827 
 3828 operand eRegL() %{
 3829   constraint(ALLOC_IN_RC(long_reg));
 3830   match(RegL);
 3831   match(eADXRegL);
 3832 
 3833   format %{ %}
 3834   interface(REG_INTER);
 3835 %}
 3836 
 3837 operand eADXRegL( eRegL reg ) %{
 3838   constraint(ALLOC_IN_RC(eadx_reg));
 3839   match(reg);
 3840 
 3841   format %{ "EDX:EAX" %}
 3842   interface(REG_INTER);
 3843 %}
 3844 
 3845 operand eBCXRegL( eRegL reg ) %{
 3846   constraint(ALLOC_IN_RC(ebcx_reg));
 3847   match(reg);
 3848 
 3849   format %{ "EBX:ECX" %}
 3850   interface(REG_INTER);
 3851 %}
 3852 
 3853 operand eBDPRegL( eRegL reg ) %{
 3854   constraint(ALLOC_IN_RC(ebpd_reg));
 3855   match(reg);
 3856 
 3857   format %{ "EBP:EDI" %}
 3858   interface(REG_INTER);
 3859 %}
 3860 // Special case for integer high multiply
 3861 operand eADXRegL_low_only() %{
 3862   constraint(ALLOC_IN_RC(eadx_reg));
 3863   match(RegL);
 3864 
 3865   format %{ "EAX" %}
 3866   interface(REG_INTER);
 3867 %}
 3868 
 3869 // Flags register, used as output of compare instructions
 3870 operand rFlagsReg() %{
 3871   constraint(ALLOC_IN_RC(int_flags));
 3872   match(RegFlags);
 3873 
 3874   format %{ "EFLAGS" %}
 3875   interface(REG_INTER);
 3876 %}
 3877 
 3878 // Flags register, used as output of compare instructions
 3879 operand eFlagsReg() %{
 3880   constraint(ALLOC_IN_RC(int_flags));
 3881   match(RegFlags);
 3882 
 3883   format %{ "EFLAGS" %}
 3884   interface(REG_INTER);
 3885 %}
 3886 
 3887 // Flags register, used as output of FLOATING POINT compare instructions
 3888 operand eFlagsRegU() %{
 3889   constraint(ALLOC_IN_RC(int_flags));
 3890   match(RegFlags);
 3891 
 3892   format %{ "EFLAGS_U" %}
 3893   interface(REG_INTER);
 3894 %}
 3895 
 3896 operand eFlagsRegUCF() %{
 3897   constraint(ALLOC_IN_RC(int_flags));
 3898   match(RegFlags);
 3899   predicate(false);
 3900 
 3901   format %{ "EFLAGS_U_CF" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 // Condition Code Register used by long compare
 3906 operand flagsReg_long_LTGE() %{
 3907   constraint(ALLOC_IN_RC(int_flags));
 3908   match(RegFlags);
 3909   format %{ "FLAGS_LTGE" %}
 3910   interface(REG_INTER);
 3911 %}
 3912 operand flagsReg_long_EQNE() %{
 3913   constraint(ALLOC_IN_RC(int_flags));
 3914   match(RegFlags);
 3915   format %{ "FLAGS_EQNE" %}
 3916   interface(REG_INTER);
 3917 %}
 3918 operand flagsReg_long_LEGT() %{
 3919   constraint(ALLOC_IN_RC(int_flags));
 3920   match(RegFlags);
 3921   format %{ "FLAGS_LEGT" %}
 3922   interface(REG_INTER);
 3923 %}
 3924 
 3925 // Condition Code Register used by unsigned long compare
 3926 operand flagsReg_ulong_LTGE() %{
 3927   constraint(ALLOC_IN_RC(int_flags));
 3928   match(RegFlags);
 3929   format %{ "FLAGS_U_LTGE" %}
 3930   interface(REG_INTER);
 3931 %}
 3932 operand flagsReg_ulong_EQNE() %{
 3933   constraint(ALLOC_IN_RC(int_flags));
 3934   match(RegFlags);
 3935   format %{ "FLAGS_U_EQNE" %}
 3936   interface(REG_INTER);
 3937 %}
 3938 operand flagsReg_ulong_LEGT() %{
 3939   constraint(ALLOC_IN_RC(int_flags));
 3940   match(RegFlags);
 3941   format %{ "FLAGS_U_LEGT" %}
 3942   interface(REG_INTER);
 3943 %}
 3944 
 3945 // Float register operands
 3946 operand regDPR() %{
 3947   predicate( UseSSE < 2 );
 3948   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3949   match(RegD);
 3950   match(regDPR1);
 3951   match(regDPR2);
 3952   format %{ %}
 3953   interface(REG_INTER);
 3954 %}
 3955 
 3956 operand regDPR1(regDPR reg) %{
 3957   predicate( UseSSE < 2 );
 3958   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3959   match(reg);
 3960   format %{ "FPR1" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 operand regDPR2(regDPR reg) %{
 3965   predicate( UseSSE < 2 );
 3966   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3967   match(reg);
 3968   format %{ "FPR2" %}
 3969   interface(REG_INTER);
 3970 %}
 3971 
 3972 operand regnotDPR1(regDPR reg) %{
 3973   predicate( UseSSE < 2 );
 3974   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3975   match(reg);
 3976   format %{ %}
 3977   interface(REG_INTER);
 3978 %}
 3979 
 3980 // Float register operands
 3981 operand regFPR() %{
 3982   predicate( UseSSE < 2 );
 3983   constraint(ALLOC_IN_RC(fp_flt_reg));
 3984   match(RegF);
 3985   match(regFPR1);
 3986   format %{ %}
 3987   interface(REG_INTER);
 3988 %}
 3989 
 3990 // Float register operands
 3991 operand regFPR1(regFPR reg) %{
 3992   predicate( UseSSE < 2 );
 3993   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3994   match(reg);
 3995   format %{ "FPR1" %}
 3996   interface(REG_INTER);
 3997 %}
 3998 
 3999 // XMM Float register operands
 4000 operand regF() %{
 4001   predicate( UseSSE>=1 );
 4002   constraint(ALLOC_IN_RC(float_reg_legacy));
 4003   match(RegF);
 4004   format %{ %}
 4005   interface(REG_INTER);
 4006 %}
 4007 
 4008 operand legRegF() %{
 4009   predicate( UseSSE>=1 );
 4010   constraint(ALLOC_IN_RC(float_reg_legacy));
 4011   match(RegF);
 4012   format %{ %}
 4013   interface(REG_INTER);
 4014 %}
 4015 
 4016 // Float register operands
 4017 operand vlRegF() %{
 4018    constraint(ALLOC_IN_RC(float_reg_vl));
 4019    match(RegF);
 4020 
 4021    format %{ %}
 4022    interface(REG_INTER);
 4023 %}
 4024 
 4025 // XMM Double register operands
 4026 operand regD() %{
 4027   predicate( UseSSE>=2 );
 4028   constraint(ALLOC_IN_RC(double_reg_legacy));
 4029   match(RegD);
 4030   format %{ %}
 4031   interface(REG_INTER);
 4032 %}
 4033 
 4034 // Double register operands
 4035 operand legRegD() %{
 4036   predicate( UseSSE>=2 );
 4037   constraint(ALLOC_IN_RC(double_reg_legacy));
 4038   match(RegD);
 4039   format %{ %}
 4040   interface(REG_INTER);
 4041 %}
 4042 
 4043 operand vlRegD() %{
 4044    constraint(ALLOC_IN_RC(double_reg_vl));
 4045    match(RegD);
 4046 
 4047    format %{ %}
 4048    interface(REG_INTER);
 4049 %}
 4050 
 4051 //----------Memory Operands----------------------------------------------------
 4052 // Direct Memory Operand
 4053 operand direct(immP addr) %{
 4054   match(addr);
 4055 
 4056   format %{ "[$addr]" %}
 4057   interface(MEMORY_INTER) %{
 4058     base(0xFFFFFFFF);
 4059     index(0x4);
 4060     scale(0x0);
 4061     disp($addr);
 4062   %}
 4063 %}
 4064 
 4065 // Indirect Memory Operand
 4066 operand indirect(eRegP reg) %{
 4067   constraint(ALLOC_IN_RC(int_reg));
 4068   match(reg);
 4069 
 4070   format %{ "[$reg]" %}
 4071   interface(MEMORY_INTER) %{
 4072     base($reg);
 4073     index(0x4);
 4074     scale(0x0);
 4075     disp(0x0);
 4076   %}
 4077 %}
 4078 
 4079 // Indirect Memory Plus Short Offset Operand
 4080 operand indOffset8(eRegP reg, immI8 off) %{
 4081   match(AddP reg off);
 4082 
 4083   format %{ "[$reg + $off]" %}
 4084   interface(MEMORY_INTER) %{
 4085     base($reg);
 4086     index(0x4);
 4087     scale(0x0);
 4088     disp($off);
 4089   %}
 4090 %}
 4091 
 4092 // Indirect Memory Plus Long Offset Operand
 4093 operand indOffset32(eRegP reg, immI off) %{
 4094   match(AddP reg off);
 4095 
 4096   format %{ "[$reg + $off]" %}
 4097   interface(MEMORY_INTER) %{
 4098     base($reg);
 4099     index(0x4);
 4100     scale(0x0);
 4101     disp($off);
 4102   %}
 4103 %}
 4104 
 4105 // Indirect Memory Plus Long Offset Operand
 4106 operand indOffset32X(rRegI reg, immP off) %{
 4107   match(AddP off reg);
 4108 
 4109   format %{ "[$reg + $off]" %}
 4110   interface(MEMORY_INTER) %{
 4111     base($reg);
 4112     index(0x4);
 4113     scale(0x0);
 4114     disp($off);
 4115   %}
 4116 %}
 4117 
 4118 // Indirect Memory Plus Index Register Plus Offset Operand
 4119 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4120   match(AddP (AddP reg ireg) off);
 4121 
 4122   op_cost(10);
 4123   format %{"[$reg + $off + $ireg]" %}
 4124   interface(MEMORY_INTER) %{
 4125     base($reg);
 4126     index($ireg);
 4127     scale(0x0);
 4128     disp($off);
 4129   %}
 4130 %}
 4131 
 4132 // Indirect Memory Plus Index Register Plus Offset Operand
 4133 operand indIndex(eRegP reg, rRegI ireg) %{
 4134   match(AddP reg ireg);
 4135 
 4136   op_cost(10);
 4137   format %{"[$reg + $ireg]" %}
 4138   interface(MEMORY_INTER) %{
 4139     base($reg);
 4140     index($ireg);
 4141     scale(0x0);
 4142     disp(0x0);
 4143   %}
 4144 %}
 4145 
 4146 // // -------------------------------------------------------------------------
 4147 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4148 // // -------------------------------------------------------------------------
 4149 // // Scaled Memory Operands
 4150 // // Indirect Memory Times Scale Plus Offset Operand
 4151 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4152 //   match(AddP off (LShiftI ireg scale));
 4153 //
 4154 //   op_cost(10);
 4155 //   format %{"[$off + $ireg << $scale]" %}
 4156 //   interface(MEMORY_INTER) %{
 4157 //     base(0x4);
 4158 //     index($ireg);
 4159 //     scale($scale);
 4160 //     disp($off);
 4161 //   %}
 4162 // %}
 4163 
 4164 // Indirect Memory Times Scale Plus Index Register
 4165 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4166   match(AddP reg (LShiftI ireg scale));
 4167 
 4168   op_cost(10);
 4169   format %{"[$reg + $ireg << $scale]" %}
 4170   interface(MEMORY_INTER) %{
 4171     base($reg);
 4172     index($ireg);
 4173     scale($scale);
 4174     disp(0x0);
 4175   %}
 4176 %}
 4177 
 4178 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4179 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4180   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4181 
 4182   op_cost(10);
 4183   format %{"[$reg + $off + $ireg << $scale]" %}
 4184   interface(MEMORY_INTER) %{
 4185     base($reg);
 4186     index($ireg);
 4187     scale($scale);
 4188     disp($off);
 4189   %}
 4190 %}
 4191 
 4192 //----------Load Long Memory Operands------------------------------------------
 4193 // The load-long idiom will use it's address expression again after loading
 4194 // the first word of the long.  If the load-long destination overlaps with
 4195 // registers used in the addressing expression, the 2nd half will be loaded
 4196 // from a clobbered address.  Fix this by requiring that load-long use
 4197 // address registers that do not overlap with the load-long target.
 4198 
 4199 // load-long support
 4200 operand load_long_RegP() %{
 4201   constraint(ALLOC_IN_RC(esi_reg));
 4202   match(RegP);
 4203   match(eSIRegP);
 4204   op_cost(100);
 4205   format %{  %}
 4206   interface(REG_INTER);
 4207 %}
 4208 
 4209 // Indirect Memory Operand Long
 4210 operand load_long_indirect(load_long_RegP reg) %{
 4211   constraint(ALLOC_IN_RC(esi_reg));
 4212   match(reg);
 4213 
 4214   format %{ "[$reg]" %}
 4215   interface(MEMORY_INTER) %{
 4216     base($reg);
 4217     index(0x4);
 4218     scale(0x0);
 4219     disp(0x0);
 4220   %}
 4221 %}
 4222 
 4223 // Indirect Memory Plus Long Offset Operand
 4224 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4225   match(AddP reg off);
 4226 
 4227   format %{ "[$reg + $off]" %}
 4228   interface(MEMORY_INTER) %{
 4229     base($reg);
 4230     index(0x4);
 4231     scale(0x0);
 4232     disp($off);
 4233   %}
 4234 %}
 4235 
 4236 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4237 
 4238 
 4239 //----------Special Memory Operands--------------------------------------------
 4240 // Stack Slot Operand - This operand is used for loading and storing temporary
 4241 //                      values on the stack where a match requires a value to
 4242 //                      flow through memory.
 4243 operand stackSlotP(sRegP reg) %{
 4244   constraint(ALLOC_IN_RC(stack_slots));
 4245   // No match rule because this operand is only generated in matching
 4246   format %{ "[$reg]" %}
 4247   interface(MEMORY_INTER) %{
 4248     base(0x4);   // ESP
 4249     index(0x4);  // No Index
 4250     scale(0x0);  // No Scale
 4251     disp($reg);  // Stack Offset
 4252   %}
 4253 %}
 4254 
 4255 operand stackSlotI(sRegI reg) %{
 4256   constraint(ALLOC_IN_RC(stack_slots));
 4257   // No match rule because this operand is only generated in matching
 4258   format %{ "[$reg]" %}
 4259   interface(MEMORY_INTER) %{
 4260     base(0x4);   // ESP
 4261     index(0x4);  // No Index
 4262     scale(0x0);  // No Scale
 4263     disp($reg);  // Stack Offset
 4264   %}
 4265 %}
 4266 
 4267 operand stackSlotF(sRegF reg) %{
 4268   constraint(ALLOC_IN_RC(stack_slots));
 4269   // No match rule because this operand is only generated in matching
 4270   format %{ "[$reg]" %}
 4271   interface(MEMORY_INTER) %{
 4272     base(0x4);   // ESP
 4273     index(0x4);  // No Index
 4274     scale(0x0);  // No Scale
 4275     disp($reg);  // Stack Offset
 4276   %}
 4277 %}
 4278 
 4279 operand stackSlotD(sRegD reg) %{
 4280   constraint(ALLOC_IN_RC(stack_slots));
 4281   // No match rule because this operand is only generated in matching
 4282   format %{ "[$reg]" %}
 4283   interface(MEMORY_INTER) %{
 4284     base(0x4);   // ESP
 4285     index(0x4);  // No Index
 4286     scale(0x0);  // No Scale
 4287     disp($reg);  // Stack Offset
 4288   %}
 4289 %}
 4290 
 4291 operand stackSlotL(sRegL reg) %{
 4292   constraint(ALLOC_IN_RC(stack_slots));
 4293   // No match rule because this operand is only generated in matching
 4294   format %{ "[$reg]" %}
 4295   interface(MEMORY_INTER) %{
 4296     base(0x4);   // ESP
 4297     index(0x4);  // No Index
 4298     scale(0x0);  // No Scale
 4299     disp($reg);  // Stack Offset
 4300   %}
 4301 %}
 4302 
 4303 //----------Conditional Branch Operands----------------------------------------
 4304 // Comparison Op  - This is the operation of the comparison, and is limited to
 4305 //                  the following set of codes:
 4306 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4307 //
 4308 // Other attributes of the comparison, such as unsignedness, are specified
 4309 // by the comparison instruction that sets a condition code flags register.
 4310 // That result is represented by a flags operand whose subtype is appropriate
 4311 // to the unsignedness (etc.) of the comparison.
 4312 //
 4313 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4314 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4315 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4316 
 4317 // Comparison Code
 4318 operand cmpOp() %{
 4319   match(Bool);
 4320 
 4321   format %{ "" %}
 4322   interface(COND_INTER) %{
 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0xC, "l");
 4326     greater_equal(0xD, "ge");
 4327     less_equal(0xE, "le");
 4328     greater(0xF, "g");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 // Comparison Code, unsigned compare.  Used by FP also, with
 4335 // C2 (unordered) turned into GT or LT already.  The other bits
 4336 // C0 and C3 are turned into Carry & Zero flags.
 4337 operand cmpOpU() %{
 4338   match(Bool);
 4339 
 4340   format %{ "" %}
 4341   interface(COND_INTER) %{
 4342     equal(0x4, "e");
 4343     not_equal(0x5, "ne");
 4344     less(0x2, "b");
 4345     greater_equal(0x3, "nb");
 4346     less_equal(0x6, "be");
 4347     greater(0x7, "nbe");
 4348     overflow(0x0, "o");
 4349     no_overflow(0x1, "no");
 4350   %}
 4351 %}
 4352 
 4353 // Floating comparisons that don't require any fixup for the unordered case
 4354 operand cmpOpUCF() %{
 4355   match(Bool);
 4356   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4357             n->as_Bool()->_test._test == BoolTest::ge ||
 4358             n->as_Bool()->_test._test == BoolTest::le ||
 4359             n->as_Bool()->_test._test == BoolTest::gt);
 4360   format %{ "" %}
 4361   interface(COND_INTER) %{
 4362     equal(0x4, "e");
 4363     not_equal(0x5, "ne");
 4364     less(0x2, "b");
 4365     greater_equal(0x3, "nb");
 4366     less_equal(0x6, "be");
 4367     greater(0x7, "nbe");
 4368     overflow(0x0, "o");
 4369     no_overflow(0x1, "no");
 4370   %}
 4371 %}
 4372 
 4373 
 4374 // Floating comparisons that can be fixed up with extra conditional jumps
 4375 operand cmpOpUCF2() %{
 4376   match(Bool);
 4377   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4378             n->as_Bool()->_test._test == BoolTest::eq);
 4379   format %{ "" %}
 4380   interface(COND_INTER) %{
 4381     equal(0x4, "e");
 4382     not_equal(0x5, "ne");
 4383     less(0x2, "b");
 4384     greater_equal(0x3, "nb");
 4385     less_equal(0x6, "be");
 4386     greater(0x7, "nbe");
 4387     overflow(0x0, "o");
 4388     no_overflow(0x1, "no");
 4389   %}
 4390 %}
 4391 
 4392 // Comparison Code for FP conditional move
 4393 operand cmpOp_fcmov() %{
 4394   match(Bool);
 4395 
 4396   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4397             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4398   format %{ "" %}
 4399   interface(COND_INTER) %{
 4400     equal        (0x0C8);
 4401     not_equal    (0x1C8);
 4402     less         (0x0C0);
 4403     greater_equal(0x1C0);
 4404     less_equal   (0x0D0);
 4405     greater      (0x1D0);
 4406     overflow(0x0, "o"); // not really supported by the instruction
 4407     no_overflow(0x1, "no"); // not really supported by the instruction
 4408   %}
 4409 %}
 4410 
 4411 // Comparison Code used in long compares
 4412 operand cmpOp_commute() %{
 4413   match(Bool);
 4414 
 4415   format %{ "" %}
 4416   interface(COND_INTER) %{
 4417     equal(0x4, "e");
 4418     not_equal(0x5, "ne");
 4419     less(0xF, "g");
 4420     greater_equal(0xE, "le");
 4421     less_equal(0xD, "ge");
 4422     greater(0xC, "l");
 4423     overflow(0x0, "o");
 4424     no_overflow(0x1, "no");
 4425   %}
 4426 %}
 4427 
 4428 // Comparison Code used in unsigned long compares
 4429 operand cmpOpU_commute() %{
 4430   match(Bool);
 4431 
 4432   format %{ "" %}
 4433   interface(COND_INTER) %{
 4434     equal(0x4, "e");
 4435     not_equal(0x5, "ne");
 4436     less(0x7, "nbe");
 4437     greater_equal(0x6, "be");
 4438     less_equal(0x3, "nb");
 4439     greater(0x2, "b");
 4440     overflow(0x0, "o");
 4441     no_overflow(0x1, "no");
 4442   %}
 4443 %}
 4444 
 4445 //----------OPERAND CLASSES----------------------------------------------------
 4446 // Operand Classes are groups of operands that are used as to simplify
 4447 // instruction definitions by not requiring the AD writer to specify separate
 4448 // instructions for every form of operand when the instruction accepts
 4449 // multiple operand types with the same basic encoding and format.  The classic
 4450 // case of this is memory operands.
 4451 
 4452 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4453                indIndex, indIndexScale, indIndexScaleOffset);
 4454 
 4455 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4456 // This means some kind of offset is always required and you cannot use
 4457 // an oop as the offset (done when working on static globals).
 4458 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4459                     indIndex, indIndexScale, indIndexScaleOffset);
 4460 
 4461 
 4462 //----------PIPELINE-----------------------------------------------------------
 4463 // Rules which define the behavior of the target architectures pipeline.
 4464 pipeline %{
 4465 
 4466 //----------ATTRIBUTES---------------------------------------------------------
 4467 attributes %{
 4468   variable_size_instructions;        // Fixed size instructions
 4469   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4470   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4471   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4472   instruction_fetch_units = 1;       // of 16 bytes
 4473 
 4474   // List of nop instructions
 4475   nops( MachNop );
 4476 %}
 4477 
 4478 //----------RESOURCES----------------------------------------------------------
 4479 // Resources are the functional units available to the machine
 4480 
 4481 // Generic P2/P3 pipeline
 4482 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4483 // 3 instructions decoded per cycle.
 4484 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4485 // 2 ALU op, only ALU0 handles mul/div instructions.
 4486 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4487            MS0, MS1, MEM = MS0 | MS1,
 4488            BR, FPU,
 4489            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4490 
 4491 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4492 // Pipeline Description specifies the stages in the machine's pipeline
 4493 
 4494 // Generic P2/P3 pipeline
 4495 pipe_desc(S0, S1, S2, S3, S4, S5);
 4496 
 4497 //----------PIPELINE CLASSES---------------------------------------------------
 4498 // Pipeline Classes describe the stages in which input and output are
 4499 // referenced by the hardware pipeline.
 4500 
 4501 // Naming convention: ialu or fpu
 4502 // Then: _reg
 4503 // Then: _reg if there is a 2nd register
 4504 // Then: _long if it's a pair of instructions implementing a long
 4505 // Then: _fat if it requires the big decoder
 4506 //   Or: _mem if it requires the big decoder and a memory unit.
 4507 
 4508 // Integer ALU reg operation
 4509 pipe_class ialu_reg(rRegI dst) %{
 4510     single_instruction;
 4511     dst    : S4(write);
 4512     dst    : S3(read);
 4513     DECODE : S0;        // any decoder
 4514     ALU    : S3;        // any alu
 4515 %}
 4516 
 4517 // Long ALU reg operation
 4518 pipe_class ialu_reg_long(eRegL dst) %{
 4519     instruction_count(2);
 4520     dst    : S4(write);
 4521     dst    : S3(read);
 4522     DECODE : S0(2);     // any 2 decoders
 4523     ALU    : S3(2);     // both alus
 4524 %}
 4525 
 4526 // Integer ALU reg operation using big decoder
 4527 pipe_class ialu_reg_fat(rRegI dst) %{
 4528     single_instruction;
 4529     dst    : S4(write);
 4530     dst    : S3(read);
 4531     D0     : S0;        // big decoder only
 4532     ALU    : S3;        // any alu
 4533 %}
 4534 
 4535 // Long ALU reg operation using big decoder
 4536 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4537     instruction_count(2);
 4538     dst    : S4(write);
 4539     dst    : S3(read);
 4540     D0     : S0(2);     // big decoder only; twice
 4541     ALU    : S3(2);     // any 2 alus
 4542 %}
 4543 
 4544 // Integer ALU reg-reg operation
 4545 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4546     single_instruction;
 4547     dst    : S4(write);
 4548     src    : S3(read);
 4549     DECODE : S0;        // any decoder
 4550     ALU    : S3;        // any alu
 4551 %}
 4552 
 4553 // Long ALU reg-reg operation
 4554 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4555     instruction_count(2);
 4556     dst    : S4(write);
 4557     src    : S3(read);
 4558     DECODE : S0(2);     // any 2 decoders
 4559     ALU    : S3(2);     // both alus
 4560 %}
 4561 
 4562 // Integer ALU reg-reg operation
 4563 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4564     single_instruction;
 4565     dst    : S4(write);
 4566     src    : S3(read);
 4567     D0     : S0;        // big decoder only
 4568     ALU    : S3;        // any alu
 4569 %}
 4570 
 4571 // Long ALU reg-reg operation
 4572 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4573     instruction_count(2);
 4574     dst    : S4(write);
 4575     src    : S3(read);
 4576     D0     : S0(2);     // big decoder only; twice
 4577     ALU    : S3(2);     // both alus
 4578 %}
 4579 
 4580 // Integer ALU reg-mem operation
 4581 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4582     single_instruction;
 4583     dst    : S5(write);
 4584     mem    : S3(read);
 4585     D0     : S0;        // big decoder only
 4586     ALU    : S4;        // any alu
 4587     MEM    : S3;        // any mem
 4588 %}
 4589 
 4590 // Long ALU reg-mem operation
 4591 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4592     instruction_count(2);
 4593     dst    : S5(write);
 4594     mem    : S3(read);
 4595     D0     : S0(2);     // big decoder only; twice
 4596     ALU    : S4(2);     // any 2 alus
 4597     MEM    : S3(2);     // both mems
 4598 %}
 4599 
 4600 // Integer mem operation (prefetch)
 4601 pipe_class ialu_mem(memory mem)
 4602 %{
 4603     single_instruction;
 4604     mem    : S3(read);
 4605     D0     : S0;        // big decoder only
 4606     MEM    : S3;        // any mem
 4607 %}
 4608 
 4609 // Integer Store to Memory
 4610 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4611     single_instruction;
 4612     mem    : S3(read);
 4613     src    : S5(read);
 4614     D0     : S0;        // big decoder only
 4615     ALU    : S4;        // any alu
 4616     MEM    : S3;
 4617 %}
 4618 
 4619 // Long Store to Memory
 4620 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4621     instruction_count(2);
 4622     mem    : S3(read);
 4623     src    : S5(read);
 4624     D0     : S0(2);     // big decoder only; twice
 4625     ALU    : S4(2);     // any 2 alus
 4626     MEM    : S3(2);     // Both mems
 4627 %}
 4628 
 4629 // Integer Store to Memory
 4630 pipe_class ialu_mem_imm(memory mem) %{
 4631     single_instruction;
 4632     mem    : S3(read);
 4633     D0     : S0;        // big decoder only
 4634     ALU    : S4;        // any alu
 4635     MEM    : S3;
 4636 %}
 4637 
 4638 // Integer ALU0 reg-reg operation
 4639 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4640     single_instruction;
 4641     dst    : S4(write);
 4642     src    : S3(read);
 4643     D0     : S0;        // Big decoder only
 4644     ALU0   : S3;        // only alu0
 4645 %}
 4646 
 4647 // Integer ALU0 reg-mem operation
 4648 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4649     single_instruction;
 4650     dst    : S5(write);
 4651     mem    : S3(read);
 4652     D0     : S0;        // big decoder only
 4653     ALU0   : S4;        // ALU0 only
 4654     MEM    : S3;        // any mem
 4655 %}
 4656 
 4657 // Integer ALU reg-reg operation
 4658 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4659     single_instruction;
 4660     cr     : S4(write);
 4661     src1   : S3(read);
 4662     src2   : S3(read);
 4663     DECODE : S0;        // any decoder
 4664     ALU    : S3;        // any alu
 4665 %}
 4666 
 4667 // Integer ALU reg-imm operation
 4668 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4669     single_instruction;
 4670     cr     : S4(write);
 4671     src1   : S3(read);
 4672     DECODE : S0;        // any decoder
 4673     ALU    : S3;        // any alu
 4674 %}
 4675 
 4676 // Integer ALU reg-mem operation
 4677 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4678     single_instruction;
 4679     cr     : S4(write);
 4680     src1   : S3(read);
 4681     src2   : S3(read);
 4682     D0     : S0;        // big decoder only
 4683     ALU    : S4;        // any alu
 4684     MEM    : S3;
 4685 %}
 4686 
 4687 // Conditional move reg-reg
 4688 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4689     instruction_count(4);
 4690     y      : S4(read);
 4691     q      : S3(read);
 4692     p      : S3(read);
 4693     DECODE : S0(4);     // any decoder
 4694 %}
 4695 
 4696 // Conditional move reg-reg
 4697 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4698     single_instruction;
 4699     dst    : S4(write);
 4700     src    : S3(read);
 4701     cr     : S3(read);
 4702     DECODE : S0;        // any decoder
 4703 %}
 4704 
 4705 // Conditional move reg-mem
 4706 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4707     single_instruction;
 4708     dst    : S4(write);
 4709     src    : S3(read);
 4710     cr     : S3(read);
 4711     DECODE : S0;        // any decoder
 4712     MEM    : S3;
 4713 %}
 4714 
 4715 // Conditional move reg-reg long
 4716 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4717     single_instruction;
 4718     dst    : S4(write);
 4719     src    : S3(read);
 4720     cr     : S3(read);
 4721     DECODE : S0(2);     // any 2 decoders
 4722 %}
 4723 
 4724 // Conditional move double reg-reg
 4725 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4726     single_instruction;
 4727     dst    : S4(write);
 4728     src    : S3(read);
 4729     cr     : S3(read);
 4730     DECODE : S0;        // any decoder
 4731 %}
 4732 
 4733 // Float reg-reg operation
 4734 pipe_class fpu_reg(regDPR dst) %{
 4735     instruction_count(2);
 4736     dst    : S3(read);
 4737     DECODE : S0(2);     // any 2 decoders
 4738     FPU    : S3;
 4739 %}
 4740 
 4741 // Float reg-reg operation
 4742 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4743     instruction_count(2);
 4744     dst    : S4(write);
 4745     src    : S3(read);
 4746     DECODE : S0(2);     // any 2 decoders
 4747     FPU    : S3;
 4748 %}
 4749 
 4750 // Float reg-reg operation
 4751 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4752     instruction_count(3);
 4753     dst    : S4(write);
 4754     src1   : S3(read);
 4755     src2   : S3(read);
 4756     DECODE : S0(3);     // any 3 decoders
 4757     FPU    : S3(2);
 4758 %}
 4759 
 4760 // Float reg-reg operation
 4761 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4762     instruction_count(4);
 4763     dst    : S4(write);
 4764     src1   : S3(read);
 4765     src2   : S3(read);
 4766     src3   : S3(read);
 4767     DECODE : S0(4);     // any 3 decoders
 4768     FPU    : S3(2);
 4769 %}
 4770 
 4771 // Float reg-reg operation
 4772 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4773     instruction_count(4);
 4774     dst    : S4(write);
 4775     src1   : S3(read);
 4776     src2   : S3(read);
 4777     src3   : S3(read);
 4778     DECODE : S1(3);     // any 3 decoders
 4779     D0     : S0;        // Big decoder only
 4780     FPU    : S3(2);
 4781     MEM    : S3;
 4782 %}
 4783 
 4784 // Float reg-mem operation
 4785 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4786     instruction_count(2);
 4787     dst    : S5(write);
 4788     mem    : S3(read);
 4789     D0     : S0;        // big decoder only
 4790     DECODE : S1;        // any decoder for FPU POP
 4791     FPU    : S4;
 4792     MEM    : S3;        // any mem
 4793 %}
 4794 
 4795 // Float reg-mem operation
 4796 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4797     instruction_count(3);
 4798     dst    : S5(write);
 4799     src1   : S3(read);
 4800     mem    : S3(read);
 4801     D0     : S0;        // big decoder only
 4802     DECODE : S1(2);     // any decoder for FPU POP
 4803     FPU    : S4;
 4804     MEM    : S3;        // any mem
 4805 %}
 4806 
 4807 // Float mem-reg operation
 4808 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4809     instruction_count(2);
 4810     src    : S5(read);
 4811     mem    : S3(read);
 4812     DECODE : S0;        // any decoder for FPU PUSH
 4813     D0     : S1;        // big decoder only
 4814     FPU    : S4;
 4815     MEM    : S3;        // any mem
 4816 %}
 4817 
 4818 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4819     instruction_count(3);
 4820     src1   : S3(read);
 4821     src2   : S3(read);
 4822     mem    : S3(read);
 4823     DECODE : S0(2);     // any decoder for FPU PUSH
 4824     D0     : S1;        // big decoder only
 4825     FPU    : S4;
 4826     MEM    : S3;        // any mem
 4827 %}
 4828 
 4829 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4830     instruction_count(3);
 4831     src1   : S3(read);
 4832     src2   : S3(read);
 4833     mem    : S4(read);
 4834     DECODE : S0;        // any decoder for FPU PUSH
 4835     D0     : S0(2);     // big decoder only
 4836     FPU    : S4;
 4837     MEM    : S3(2);     // any mem
 4838 %}
 4839 
 4840 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4841     instruction_count(2);
 4842     src1   : S3(read);
 4843     dst    : S4(read);
 4844     D0     : S0(2);     // big decoder only
 4845     MEM    : S3(2);     // any mem
 4846 %}
 4847 
 4848 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4849     instruction_count(3);
 4850     src1   : S3(read);
 4851     src2   : S3(read);
 4852     dst    : S4(read);
 4853     D0     : S0(3);     // big decoder only
 4854     FPU    : S4;
 4855     MEM    : S3(3);     // any mem
 4856 %}
 4857 
 4858 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4859     instruction_count(3);
 4860     src1   : S4(read);
 4861     mem    : S4(read);
 4862     DECODE : S0;        // any decoder for FPU PUSH
 4863     D0     : S0(2);     // big decoder only
 4864     FPU    : S4;
 4865     MEM    : S3(2);     // any mem
 4866 %}
 4867 
 4868 // Float load constant
 4869 pipe_class fpu_reg_con(regDPR dst) %{
 4870     instruction_count(2);
 4871     dst    : S5(write);
 4872     D0     : S0;        // big decoder only for the load
 4873     DECODE : S1;        // any decoder for FPU POP
 4874     FPU    : S4;
 4875     MEM    : S3;        // any mem
 4876 %}
 4877 
 4878 // Float load constant
 4879 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4880     instruction_count(3);
 4881     dst    : S5(write);
 4882     src    : S3(read);
 4883     D0     : S0;        // big decoder only for the load
 4884     DECODE : S1(2);     // any decoder for FPU POP
 4885     FPU    : S4;
 4886     MEM    : S3;        // any mem
 4887 %}
 4888 
 4889 // UnConditional branch
 4890 pipe_class pipe_jmp( label labl ) %{
 4891     single_instruction;
 4892     BR   : S3;
 4893 %}
 4894 
 4895 // Conditional branch
 4896 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4897     single_instruction;
 4898     cr    : S1(read);
 4899     BR    : S3;
 4900 %}
 4901 
 4902 // Allocation idiom
 4903 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4904     instruction_count(1); force_serialization;
 4905     fixed_latency(6);
 4906     heap_ptr : S3(read);
 4907     DECODE   : S0(3);
 4908     D0       : S2;
 4909     MEM      : S3;
 4910     ALU      : S3(2);
 4911     dst      : S5(write);
 4912     BR       : S5;
 4913 %}
 4914 
 4915 // Generic big/slow expanded idiom
 4916 pipe_class pipe_slow(  ) %{
 4917     instruction_count(10); multiple_bundles; force_serialization;
 4918     fixed_latency(100);
 4919     D0  : S0(2);
 4920     MEM : S3(2);
 4921 %}
 4922 
 4923 // The real do-nothing guy
 4924 pipe_class empty( ) %{
 4925     instruction_count(0);
 4926 %}
 4927 
 4928 // Define the class for the Nop node
 4929 define %{
 4930    MachNop = empty;
 4931 %}
 4932 
 4933 %}
 4934 
 4935 //----------INSTRUCTIONS-------------------------------------------------------
 4936 //
 4937 // match      -- States which machine-independent subtree may be replaced
 4938 //               by this instruction.
 4939 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4940 //               selection to identify a minimum cost tree of machine
 4941 //               instructions that matches a tree of machine-independent
 4942 //               instructions.
 4943 // format     -- A string providing the disassembly for this instruction.
 4944 //               The value of an instruction's operand may be inserted
 4945 //               by referring to it with a '$' prefix.
 4946 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4947 //               to within an encode class as $primary, $secondary, and $tertiary
 4948 //               respectively.  The primary opcode is commonly used to
 4949 //               indicate the type of machine instruction, while secondary
 4950 //               and tertiary are often used for prefix options or addressing
 4951 //               modes.
 4952 // ins_encode -- A list of encode classes with parameters. The encode class
 4953 //               name must have been defined in an 'enc_class' specification
 4954 //               in the encode section of the architecture description.
 4955 
 4956 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4957 // Load Float
 4958 instruct MoveF2LEG(legRegF dst, regF src) %{
 4959   match(Set dst src);
 4960   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4961   ins_encode %{
 4962     ShouldNotReachHere();
 4963   %}
 4964   ins_pipe( fpu_reg_reg );
 4965 %}
 4966 
 4967 // Load Float
 4968 instruct MoveLEG2F(regF dst, legRegF src) %{
 4969   match(Set dst src);
 4970   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4971   ins_encode %{
 4972     ShouldNotReachHere();
 4973   %}
 4974   ins_pipe( fpu_reg_reg );
 4975 %}
 4976 
 4977 // Load Float
 4978 instruct MoveF2VL(vlRegF dst, regF src) %{
 4979   match(Set dst src);
 4980   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4981   ins_encode %{
 4982     ShouldNotReachHere();
 4983   %}
 4984   ins_pipe( fpu_reg_reg );
 4985 %}
 4986 
 4987 // Load Float
 4988 instruct MoveVL2F(regF dst, vlRegF src) %{
 4989   match(Set dst src);
 4990   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4991   ins_encode %{
 4992     ShouldNotReachHere();
 4993   %}
 4994   ins_pipe( fpu_reg_reg );
 4995 %}
 4996 
 4997 
 4998 
 4999 // Load Double
 5000 instruct MoveD2LEG(legRegD dst, regD src) %{
 5001   match(Set dst src);
 5002   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5003   ins_encode %{
 5004     ShouldNotReachHere();
 5005   %}
 5006   ins_pipe( fpu_reg_reg );
 5007 %}
 5008 
 5009 // Load Double
 5010 instruct MoveLEG2D(regD dst, legRegD src) %{
 5011   match(Set dst src);
 5012   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5013   ins_encode %{
 5014     ShouldNotReachHere();
 5015   %}
 5016   ins_pipe( fpu_reg_reg );
 5017 %}
 5018 
 5019 // Load Double
 5020 instruct MoveD2VL(vlRegD dst, regD src) %{
 5021   match(Set dst src);
 5022   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5023   ins_encode %{
 5024     ShouldNotReachHere();
 5025   %}
 5026   ins_pipe( fpu_reg_reg );
 5027 %}
 5028 
 5029 // Load Double
 5030 instruct MoveVL2D(regD dst, vlRegD src) %{
 5031   match(Set dst src);
 5032   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5033   ins_encode %{
 5034     ShouldNotReachHere();
 5035   %}
 5036   ins_pipe( fpu_reg_reg );
 5037 %}
 5038 
 5039 //----------BSWAP-Instruction--------------------------------------------------
 5040 instruct bytes_reverse_int(rRegI dst) %{
 5041   match(Set dst (ReverseBytesI dst));
 5042 
 5043   format %{ "BSWAP  $dst" %}
 5044   opcode(0x0F, 0xC8);
 5045   ins_encode( OpcP, OpcSReg(dst) );
 5046   ins_pipe( ialu_reg );
 5047 %}
 5048 
 5049 instruct bytes_reverse_long(eRegL dst) %{
 5050   match(Set dst (ReverseBytesL dst));
 5051 
 5052   format %{ "BSWAP  $dst.lo\n\t"
 5053             "BSWAP  $dst.hi\n\t"
 5054             "XCHG   $dst.lo $dst.hi" %}
 5055 
 5056   ins_cost(125);
 5057   ins_encode( bswap_long_bytes(dst) );
 5058   ins_pipe( ialu_reg_reg);
 5059 %}
 5060 
 5061 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5062   match(Set dst (ReverseBytesUS dst));
 5063   effect(KILL cr);
 5064 
 5065   format %{ "BSWAP  $dst\n\t"
 5066             "SHR    $dst,16\n\t" %}
 5067   ins_encode %{
 5068     __ bswapl($dst$$Register);
 5069     __ shrl($dst$$Register, 16);
 5070   %}
 5071   ins_pipe( ialu_reg );
 5072 %}
 5073 
 5074 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5075   match(Set dst (ReverseBytesS dst));
 5076   effect(KILL cr);
 5077 
 5078   format %{ "BSWAP  $dst\n\t"
 5079             "SAR    $dst,16\n\t" %}
 5080   ins_encode %{
 5081     __ bswapl($dst$$Register);
 5082     __ sarl($dst$$Register, 16);
 5083   %}
 5084   ins_pipe( ialu_reg );
 5085 %}
 5086 
 5087 
 5088 //---------- Zeros Count Instructions ------------------------------------------
 5089 
 5090 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5091   predicate(UseCountLeadingZerosInstruction);
 5092   match(Set dst (CountLeadingZerosI src));
 5093   effect(KILL cr);
 5094 
 5095   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5096   ins_encode %{
 5097     __ lzcntl($dst$$Register, $src$$Register);
 5098   %}
 5099   ins_pipe(ialu_reg);
 5100 %}
 5101 
 5102 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5103   predicate(!UseCountLeadingZerosInstruction);
 5104   match(Set dst (CountLeadingZerosI src));
 5105   effect(KILL cr);
 5106 
 5107   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5108             "JNZ    skip\n\t"
 5109             "MOV    $dst, -1\n"
 5110       "skip:\n\t"
 5111             "NEG    $dst\n\t"
 5112             "ADD    $dst, 31" %}
 5113   ins_encode %{
 5114     Register Rdst = $dst$$Register;
 5115     Register Rsrc = $src$$Register;
 5116     Label skip;
 5117     __ bsrl(Rdst, Rsrc);
 5118     __ jccb(Assembler::notZero, skip);
 5119     __ movl(Rdst, -1);
 5120     __ bind(skip);
 5121     __ negl(Rdst);
 5122     __ addl(Rdst, BitsPerInt - 1);
 5123   %}
 5124   ins_pipe(ialu_reg);
 5125 %}
 5126 
 5127 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5128   predicate(UseCountLeadingZerosInstruction);
 5129   match(Set dst (CountLeadingZerosL src));
 5130   effect(TEMP dst, KILL cr);
 5131 
 5132   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5133             "JNC    done\n\t"
 5134             "LZCNT  $dst, $src.lo\n\t"
 5135             "ADD    $dst, 32\n"
 5136       "done:" %}
 5137   ins_encode %{
 5138     Register Rdst = $dst$$Register;
 5139     Register Rsrc = $src$$Register;
 5140     Label done;
 5141     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5142     __ jccb(Assembler::carryClear, done);
 5143     __ lzcntl(Rdst, Rsrc);
 5144     __ addl(Rdst, BitsPerInt);
 5145     __ bind(done);
 5146   %}
 5147   ins_pipe(ialu_reg);
 5148 %}
 5149 
 5150 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5151   predicate(!UseCountLeadingZerosInstruction);
 5152   match(Set dst (CountLeadingZerosL src));
 5153   effect(TEMP dst, KILL cr);
 5154 
 5155   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5156             "JZ     msw_is_zero\n\t"
 5157             "ADD    $dst, 32\n\t"
 5158             "JMP    not_zero\n"
 5159       "msw_is_zero:\n\t"
 5160             "BSR    $dst, $src.lo\n\t"
 5161             "JNZ    not_zero\n\t"
 5162             "MOV    $dst, -1\n"
 5163       "not_zero:\n\t"
 5164             "NEG    $dst\n\t"
 5165             "ADD    $dst, 63\n" %}
 5166  ins_encode %{
 5167     Register Rdst = $dst$$Register;
 5168     Register Rsrc = $src$$Register;
 5169     Label msw_is_zero;
 5170     Label not_zero;
 5171     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5172     __ jccb(Assembler::zero, msw_is_zero);
 5173     __ addl(Rdst, BitsPerInt);
 5174     __ jmpb(not_zero);
 5175     __ bind(msw_is_zero);
 5176     __ bsrl(Rdst, Rsrc);
 5177     __ jccb(Assembler::notZero, not_zero);
 5178     __ movl(Rdst, -1);
 5179     __ bind(not_zero);
 5180     __ negl(Rdst);
 5181     __ addl(Rdst, BitsPerLong - 1);
 5182   %}
 5183   ins_pipe(ialu_reg);
 5184 %}
 5185 
 5186 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5187   predicate(UseCountTrailingZerosInstruction);
 5188   match(Set dst (CountTrailingZerosI src));
 5189   effect(KILL cr);
 5190 
 5191   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5192   ins_encode %{
 5193     __ tzcntl($dst$$Register, $src$$Register);
 5194   %}
 5195   ins_pipe(ialu_reg);
 5196 %}
 5197 
 5198 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5199   predicate(!UseCountTrailingZerosInstruction);
 5200   match(Set dst (CountTrailingZerosI src));
 5201   effect(KILL cr);
 5202 
 5203   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5204             "JNZ    done\n\t"
 5205             "MOV    $dst, 32\n"
 5206       "done:" %}
 5207   ins_encode %{
 5208     Register Rdst = $dst$$Register;
 5209     Label done;
 5210     __ bsfl(Rdst, $src$$Register);
 5211     __ jccb(Assembler::notZero, done);
 5212     __ movl(Rdst, BitsPerInt);
 5213     __ bind(done);
 5214   %}
 5215   ins_pipe(ialu_reg);
 5216 %}
 5217 
 5218 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5219   predicate(UseCountTrailingZerosInstruction);
 5220   match(Set dst (CountTrailingZerosL src));
 5221   effect(TEMP dst, KILL cr);
 5222 
 5223   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5224             "JNC    done\n\t"
 5225             "TZCNT  $dst, $src.hi\n\t"
 5226             "ADD    $dst, 32\n"
 5227             "done:" %}
 5228   ins_encode %{
 5229     Register Rdst = $dst$$Register;
 5230     Register Rsrc = $src$$Register;
 5231     Label done;
 5232     __ tzcntl(Rdst, Rsrc);
 5233     __ jccb(Assembler::carryClear, done);
 5234     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5235     __ addl(Rdst, BitsPerInt);
 5236     __ bind(done);
 5237   %}
 5238   ins_pipe(ialu_reg);
 5239 %}
 5240 
 5241 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5242   predicate(!UseCountTrailingZerosInstruction);
 5243   match(Set dst (CountTrailingZerosL src));
 5244   effect(TEMP dst, KILL cr);
 5245 
 5246   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5247             "JNZ    done\n\t"
 5248             "BSF    $dst, $src.hi\n\t"
 5249             "JNZ    msw_not_zero\n\t"
 5250             "MOV    $dst, 32\n"
 5251       "msw_not_zero:\n\t"
 5252             "ADD    $dst, 32\n"
 5253       "done:" %}
 5254   ins_encode %{
 5255     Register Rdst = $dst$$Register;
 5256     Register Rsrc = $src$$Register;
 5257     Label msw_not_zero;
 5258     Label done;
 5259     __ bsfl(Rdst, Rsrc);
 5260     __ jccb(Assembler::notZero, done);
 5261     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5262     __ jccb(Assembler::notZero, msw_not_zero);
 5263     __ movl(Rdst, BitsPerInt);
 5264     __ bind(msw_not_zero);
 5265     __ addl(Rdst, BitsPerInt);
 5266     __ bind(done);
 5267   %}
 5268   ins_pipe(ialu_reg);
 5269 %}
 5270 
 5271 
 5272 //---------- Population Count Instructions -------------------------------------
 5273 
 5274 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5275   predicate(UsePopCountInstruction);
 5276   match(Set dst (PopCountI src));
 5277   effect(KILL cr);
 5278 
 5279   format %{ "POPCNT $dst, $src" %}
 5280   ins_encode %{
 5281     __ popcntl($dst$$Register, $src$$Register);
 5282   %}
 5283   ins_pipe(ialu_reg);
 5284 %}
 5285 
 5286 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5287   predicate(UsePopCountInstruction);
 5288   match(Set dst (PopCountI (LoadI mem)));
 5289   effect(KILL cr);
 5290 
 5291   format %{ "POPCNT $dst, $mem" %}
 5292   ins_encode %{
 5293     __ popcntl($dst$$Register, $mem$$Address);
 5294   %}
 5295   ins_pipe(ialu_reg);
 5296 %}
 5297 
 5298 // Note: Long.bitCount(long) returns an int.
 5299 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5300   predicate(UsePopCountInstruction);
 5301   match(Set dst (PopCountL src));
 5302   effect(KILL cr, TEMP tmp, TEMP dst);
 5303 
 5304   format %{ "POPCNT $dst, $src.lo\n\t"
 5305             "POPCNT $tmp, $src.hi\n\t"
 5306             "ADD    $dst, $tmp" %}
 5307   ins_encode %{
 5308     __ popcntl($dst$$Register, $src$$Register);
 5309     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5310     __ addl($dst$$Register, $tmp$$Register);
 5311   %}
 5312   ins_pipe(ialu_reg);
 5313 %}
 5314 
 5315 // Note: Long.bitCount(long) returns an int.
 5316 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5317   predicate(UsePopCountInstruction);
 5318   match(Set dst (PopCountL (LoadL mem)));
 5319   effect(KILL cr, TEMP tmp, TEMP dst);
 5320 
 5321   format %{ "POPCNT $dst, $mem\n\t"
 5322             "POPCNT $tmp, $mem+4\n\t"
 5323             "ADD    $dst, $tmp" %}
 5324   ins_encode %{
 5325     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5326     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5327     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5328     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5329     __ addl($dst$$Register, $tmp$$Register);
 5330   %}
 5331   ins_pipe(ialu_reg);
 5332 %}
 5333 
 5334 
 5335 //----------Load/Store/Move Instructions---------------------------------------
 5336 //----------Load Instructions--------------------------------------------------
 5337 // Load Byte (8bit signed)
 5338 instruct loadB(xRegI dst, memory mem) %{
 5339   match(Set dst (LoadB mem));
 5340 
 5341   ins_cost(125);
 5342   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5343 
 5344   ins_encode %{
 5345     __ movsbl($dst$$Register, $mem$$Address);
 5346   %}
 5347 
 5348   ins_pipe(ialu_reg_mem);
 5349 %}
 5350 
 5351 // Load Byte (8bit signed) into Long Register
 5352 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5353   match(Set dst (ConvI2L (LoadB mem)));
 5354   effect(KILL cr);
 5355 
 5356   ins_cost(375);
 5357   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5358             "MOV    $dst.hi,$dst.lo\n\t"
 5359             "SAR    $dst.hi,7" %}
 5360 
 5361   ins_encode %{
 5362     __ movsbl($dst$$Register, $mem$$Address);
 5363     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5364     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5365   %}
 5366 
 5367   ins_pipe(ialu_reg_mem);
 5368 %}
 5369 
 5370 // Load Unsigned Byte (8bit UNsigned)
 5371 instruct loadUB(xRegI dst, memory mem) %{
 5372   match(Set dst (LoadUB mem));
 5373 
 5374   ins_cost(125);
 5375   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5376 
 5377   ins_encode %{
 5378     __ movzbl($dst$$Register, $mem$$Address);
 5379   %}
 5380 
 5381   ins_pipe(ialu_reg_mem);
 5382 %}
 5383 
 5384 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5385 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5386   match(Set dst (ConvI2L (LoadUB mem)));
 5387   effect(KILL cr);
 5388 
 5389   ins_cost(250);
 5390   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5391             "XOR    $dst.hi,$dst.hi" %}
 5392 
 5393   ins_encode %{
 5394     Register Rdst = $dst$$Register;
 5395     __ movzbl(Rdst, $mem$$Address);
 5396     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5397   %}
 5398 
 5399   ins_pipe(ialu_reg_mem);
 5400 %}
 5401 
 5402 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5403 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5404   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5405   effect(KILL cr);
 5406 
 5407   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5408             "XOR    $dst.hi,$dst.hi\n\t"
 5409             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5410   ins_encode %{
 5411     Register Rdst = $dst$$Register;
 5412     __ movzbl(Rdst, $mem$$Address);
 5413     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5414     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5415   %}
 5416   ins_pipe(ialu_reg_mem);
 5417 %}
 5418 
 5419 // Load Short (16bit signed)
 5420 instruct loadS(rRegI dst, memory mem) %{
 5421   match(Set dst (LoadS mem));
 5422 
 5423   ins_cost(125);
 5424   format %{ "MOVSX  $dst,$mem\t# short" %}
 5425 
 5426   ins_encode %{
 5427     __ movswl($dst$$Register, $mem$$Address);
 5428   %}
 5429 
 5430   ins_pipe(ialu_reg_mem);
 5431 %}
 5432 
 5433 // Load Short (16 bit signed) to Byte (8 bit signed)
 5434 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5435   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5436 
 5437   ins_cost(125);
 5438   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5439   ins_encode %{
 5440     __ movsbl($dst$$Register, $mem$$Address);
 5441   %}
 5442   ins_pipe(ialu_reg_mem);
 5443 %}
 5444 
 5445 // Load Short (16bit signed) into Long Register
 5446 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5447   match(Set dst (ConvI2L (LoadS mem)));
 5448   effect(KILL cr);
 5449 
 5450   ins_cost(375);
 5451   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5452             "MOV    $dst.hi,$dst.lo\n\t"
 5453             "SAR    $dst.hi,15" %}
 5454 
 5455   ins_encode %{
 5456     __ movswl($dst$$Register, $mem$$Address);
 5457     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5458     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5459   %}
 5460 
 5461   ins_pipe(ialu_reg_mem);
 5462 %}
 5463 
 5464 // Load Unsigned Short/Char (16bit unsigned)
 5465 instruct loadUS(rRegI dst, memory mem) %{
 5466   match(Set dst (LoadUS mem));
 5467 
 5468   ins_cost(125);
 5469   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5470 
 5471   ins_encode %{
 5472     __ movzwl($dst$$Register, $mem$$Address);
 5473   %}
 5474 
 5475   ins_pipe(ialu_reg_mem);
 5476 %}
 5477 
 5478 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5479 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5480   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5481 
 5482   ins_cost(125);
 5483   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5484   ins_encode %{
 5485     __ movsbl($dst$$Register, $mem$$Address);
 5486   %}
 5487   ins_pipe(ialu_reg_mem);
 5488 %}
 5489 
 5490 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5491 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5492   match(Set dst (ConvI2L (LoadUS mem)));
 5493   effect(KILL cr);
 5494 
 5495   ins_cost(250);
 5496   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5497             "XOR    $dst.hi,$dst.hi" %}
 5498 
 5499   ins_encode %{
 5500     __ movzwl($dst$$Register, $mem$$Address);
 5501     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5502   %}
 5503 
 5504   ins_pipe(ialu_reg_mem);
 5505 %}
 5506 
 5507 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5508 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5509   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5510   effect(KILL cr);
 5511 
 5512   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5513             "XOR    $dst.hi,$dst.hi" %}
 5514   ins_encode %{
 5515     Register Rdst = $dst$$Register;
 5516     __ movzbl(Rdst, $mem$$Address);
 5517     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5518   %}
 5519   ins_pipe(ialu_reg_mem);
 5520 %}
 5521 
 5522 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5523 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5524   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5525   effect(KILL cr);
 5526 
 5527   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5528             "XOR    $dst.hi,$dst.hi\n\t"
 5529             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5530   ins_encode %{
 5531     Register Rdst = $dst$$Register;
 5532     __ movzwl(Rdst, $mem$$Address);
 5533     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5534     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5535   %}
 5536   ins_pipe(ialu_reg_mem);
 5537 %}
 5538 
 5539 // Load Integer
 5540 instruct loadI(rRegI dst, memory mem) %{
 5541   match(Set dst (LoadI mem));
 5542 
 5543   ins_cost(125);
 5544   format %{ "MOV    $dst,$mem\t# int" %}
 5545 
 5546   ins_encode %{
 5547     __ movl($dst$$Register, $mem$$Address);
 5548   %}
 5549 
 5550   ins_pipe(ialu_reg_mem);
 5551 %}
 5552 
 5553 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5554 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5555   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5556 
 5557   ins_cost(125);
 5558   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5559   ins_encode %{
 5560     __ movsbl($dst$$Register, $mem$$Address);
 5561   %}
 5562   ins_pipe(ialu_reg_mem);
 5563 %}
 5564 
 5565 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5566 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5567   match(Set dst (AndI (LoadI mem) mask));
 5568 
 5569   ins_cost(125);
 5570   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5571   ins_encode %{
 5572     __ movzbl($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Integer (32 bit signed) to Short (16 bit signed)
 5578 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5579   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5580 
 5581   ins_cost(125);
 5582   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5583   ins_encode %{
 5584     __ movswl($dst$$Register, $mem$$Address);
 5585   %}
 5586   ins_pipe(ialu_reg_mem);
 5587 %}
 5588 
 5589 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5590 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5591   match(Set dst (AndI (LoadI mem) mask));
 5592 
 5593   ins_cost(125);
 5594   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5595   ins_encode %{
 5596     __ movzwl($dst$$Register, $mem$$Address);
 5597   %}
 5598   ins_pipe(ialu_reg_mem);
 5599 %}
 5600 
 5601 // Load Integer into Long Register
 5602 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5603   match(Set dst (ConvI2L (LoadI mem)));
 5604   effect(KILL cr);
 5605 
 5606   ins_cost(375);
 5607   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5608             "MOV    $dst.hi,$dst.lo\n\t"
 5609             "SAR    $dst.hi,31" %}
 5610 
 5611   ins_encode %{
 5612     __ movl($dst$$Register, $mem$$Address);
 5613     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5614     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5615   %}
 5616 
 5617   ins_pipe(ialu_reg_mem);
 5618 %}
 5619 
 5620 // Load Integer with mask 0xFF into Long Register
 5621 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5622   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5623   effect(KILL cr);
 5624 
 5625   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5626             "XOR    $dst.hi,$dst.hi" %}
 5627   ins_encode %{
 5628     Register Rdst = $dst$$Register;
 5629     __ movzbl(Rdst, $mem$$Address);
 5630     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5631   %}
 5632   ins_pipe(ialu_reg_mem);
 5633 %}
 5634 
 5635 // Load Integer with mask 0xFFFF into Long Register
 5636 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5637   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5638   effect(KILL cr);
 5639 
 5640   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5641             "XOR    $dst.hi,$dst.hi" %}
 5642   ins_encode %{
 5643     Register Rdst = $dst$$Register;
 5644     __ movzwl(Rdst, $mem$$Address);
 5645     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5646   %}
 5647   ins_pipe(ialu_reg_mem);
 5648 %}
 5649 
 5650 // Load Integer with 31-bit mask into Long Register
 5651 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5652   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5653   effect(KILL cr);
 5654 
 5655   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5656             "XOR    $dst.hi,$dst.hi\n\t"
 5657             "AND    $dst.lo,$mask" %}
 5658   ins_encode %{
 5659     Register Rdst = $dst$$Register;
 5660     __ movl(Rdst, $mem$$Address);
 5661     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5662     __ andl(Rdst, $mask$$constant);
 5663   %}
 5664   ins_pipe(ialu_reg_mem);
 5665 %}
 5666 
 5667 // Load Unsigned Integer into Long Register
 5668 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5669   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5670   effect(KILL cr);
 5671 
 5672   ins_cost(250);
 5673   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5674             "XOR    $dst.hi,$dst.hi" %}
 5675 
 5676   ins_encode %{
 5677     __ movl($dst$$Register, $mem$$Address);
 5678     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5679   %}
 5680 
 5681   ins_pipe(ialu_reg_mem);
 5682 %}
 5683 
 5684 // Load Long.  Cannot clobber address while loading, so restrict address
 5685 // register to ESI
 5686 instruct loadL(eRegL dst, load_long_memory mem) %{
 5687   predicate(!((LoadLNode*)n)->require_atomic_access());
 5688   match(Set dst (LoadL mem));
 5689 
 5690   ins_cost(250);
 5691   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5692             "MOV    $dst.hi,$mem+4" %}
 5693 
 5694   ins_encode %{
 5695     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5696     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5697     __ movl($dst$$Register, Amemlo);
 5698     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5699   %}
 5700 
 5701   ins_pipe(ialu_reg_long_mem);
 5702 %}
 5703 
 5704 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5705 // then store it down to the stack and reload on the int
 5706 // side.
 5707 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5708   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5709   match(Set dst (LoadL mem));
 5710 
 5711   ins_cost(200);
 5712   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5713             "FISTp  $dst" %}
 5714   ins_encode(enc_loadL_volatile(mem,dst));
 5715   ins_pipe( fpu_reg_mem );
 5716 %}
 5717 
 5718 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5719   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5720   match(Set dst (LoadL mem));
 5721   effect(TEMP tmp);
 5722   ins_cost(180);
 5723   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5724             "MOVSD  $dst,$tmp" %}
 5725   ins_encode %{
 5726     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5727     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5728   %}
 5729   ins_pipe( pipe_slow );
 5730 %}
 5731 
 5732 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5733   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5734   match(Set dst (LoadL mem));
 5735   effect(TEMP tmp);
 5736   ins_cost(160);
 5737   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5738             "MOVD   $dst.lo,$tmp\n\t"
 5739             "PSRLQ  $tmp,32\n\t"
 5740             "MOVD   $dst.hi,$tmp" %}
 5741   ins_encode %{
 5742     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5743     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5744     __ psrlq($tmp$$XMMRegister, 32);
 5745     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5746   %}
 5747   ins_pipe( pipe_slow );
 5748 %}
 5749 
 5750 // Load Range
 5751 instruct loadRange(rRegI dst, memory mem) %{
 5752   match(Set dst (LoadRange mem));
 5753 
 5754   ins_cost(125);
 5755   format %{ "MOV    $dst,$mem" %}
 5756   opcode(0x8B);
 5757   ins_encode( OpcP, RegMem(dst,mem));
 5758   ins_pipe( ialu_reg_mem );
 5759 %}
 5760 
 5761 
 5762 // Load Pointer
 5763 instruct loadP(eRegP dst, memory mem) %{
 5764   match(Set dst (LoadP mem));
 5765 
 5766   ins_cost(125);
 5767   format %{ "MOV    $dst,$mem" %}
 5768   opcode(0x8B);
 5769   ins_encode( OpcP, RegMem(dst,mem));
 5770   ins_pipe( ialu_reg_mem );
 5771 %}
 5772 
 5773 // Load Klass Pointer
 5774 instruct loadKlass(eRegP dst, memory mem) %{
 5775   match(Set dst (LoadKlass mem));
 5776 
 5777   ins_cost(125);
 5778   format %{ "MOV    $dst,$mem" %}
 5779   opcode(0x8B);
 5780   ins_encode( OpcP, RegMem(dst,mem));
 5781   ins_pipe( ialu_reg_mem );
 5782 %}
 5783 
 5784 // Load Double
 5785 instruct loadDPR(regDPR dst, memory mem) %{
 5786   predicate(UseSSE<=1);
 5787   match(Set dst (LoadD mem));
 5788 
 5789   ins_cost(150);
 5790   format %{ "FLD_D  ST,$mem\n\t"
 5791             "FSTP   $dst" %}
 5792   opcode(0xDD);               /* DD /0 */
 5793   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5794               Pop_Reg_DPR(dst) );
 5795   ins_pipe( fpu_reg_mem );
 5796 %}
 5797 
 5798 // Load Double to XMM
 5799 instruct loadD(regD dst, memory mem) %{
 5800   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5801   match(Set dst (LoadD mem));
 5802   ins_cost(145);
 5803   format %{ "MOVSD  $dst,$mem" %}
 5804   ins_encode %{
 5805     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5806   %}
 5807   ins_pipe( pipe_slow );
 5808 %}
 5809 
 5810 instruct loadD_partial(regD dst, memory mem) %{
 5811   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5812   match(Set dst (LoadD mem));
 5813   ins_cost(145);
 5814   format %{ "MOVLPD $dst,$mem" %}
 5815   ins_encode %{
 5816     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5817   %}
 5818   ins_pipe( pipe_slow );
 5819 %}
 5820 
 5821 // Load to XMM register (single-precision floating point)
 5822 // MOVSS instruction
 5823 instruct loadF(regF dst, memory mem) %{
 5824   predicate(UseSSE>=1);
 5825   match(Set dst (LoadF mem));
 5826   ins_cost(145);
 5827   format %{ "MOVSS  $dst,$mem" %}
 5828   ins_encode %{
 5829     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5830   %}
 5831   ins_pipe( pipe_slow );
 5832 %}
 5833 
 5834 // Load Float
 5835 instruct loadFPR(regFPR dst, memory mem) %{
 5836   predicate(UseSSE==0);
 5837   match(Set dst (LoadF mem));
 5838 
 5839   ins_cost(150);
 5840   format %{ "FLD_S  ST,$mem\n\t"
 5841             "FSTP   $dst" %}
 5842   opcode(0xD9);               /* D9 /0 */
 5843   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5844               Pop_Reg_FPR(dst) );
 5845   ins_pipe( fpu_reg_mem );
 5846 %}
 5847 
 5848 // Load Effective Address
 5849 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5850   match(Set dst mem);
 5851 
 5852   ins_cost(110);
 5853   format %{ "LEA    $dst,$mem" %}
 5854   opcode(0x8D);
 5855   ins_encode( OpcP, RegMem(dst,mem));
 5856   ins_pipe( ialu_reg_reg_fat );
 5857 %}
 5858 
 5859 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5860   match(Set dst mem);
 5861 
 5862   ins_cost(110);
 5863   format %{ "LEA    $dst,$mem" %}
 5864   opcode(0x8D);
 5865   ins_encode( OpcP, RegMem(dst,mem));
 5866   ins_pipe( ialu_reg_reg_fat );
 5867 %}
 5868 
 5869 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5870   match(Set dst mem);
 5871 
 5872   ins_cost(110);
 5873   format %{ "LEA    $dst,$mem" %}
 5874   opcode(0x8D);
 5875   ins_encode( OpcP, RegMem(dst,mem));
 5876   ins_pipe( ialu_reg_reg_fat );
 5877 %}
 5878 
 5879 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5880   match(Set dst mem);
 5881 
 5882   ins_cost(110);
 5883   format %{ "LEA    $dst,$mem" %}
 5884   opcode(0x8D);
 5885   ins_encode( OpcP, RegMem(dst,mem));
 5886   ins_pipe( ialu_reg_reg_fat );
 5887 %}
 5888 
 5889 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5890   match(Set dst mem);
 5891 
 5892   ins_cost(110);
 5893   format %{ "LEA    $dst,$mem" %}
 5894   opcode(0x8D);
 5895   ins_encode( OpcP, RegMem(dst,mem));
 5896   ins_pipe( ialu_reg_reg_fat );
 5897 %}
 5898 
 5899 // Load Constant
 5900 instruct loadConI(rRegI dst, immI src) %{
 5901   match(Set dst src);
 5902 
 5903   format %{ "MOV    $dst,$src" %}
 5904   ins_encode( LdImmI(dst, src) );
 5905   ins_pipe( ialu_reg_fat );
 5906 %}
 5907 
 5908 // Load Constant zero
 5909 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5910   match(Set dst src);
 5911   effect(KILL cr);
 5912 
 5913   ins_cost(50);
 5914   format %{ "XOR    $dst,$dst" %}
 5915   opcode(0x33);  /* + rd */
 5916   ins_encode( OpcP, RegReg( dst, dst ) );
 5917   ins_pipe( ialu_reg );
 5918 %}
 5919 
 5920 instruct loadConP(eRegP dst, immP src) %{
 5921   match(Set dst src);
 5922 
 5923   format %{ "MOV    $dst,$src" %}
 5924   opcode(0xB8);  /* + rd */
 5925   ins_encode( LdImmP(dst, src) );
 5926   ins_pipe( ialu_reg_fat );
 5927 %}
 5928 
 5929 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5930   match(Set dst src);
 5931   effect(KILL cr);
 5932   ins_cost(200);
 5933   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5934             "MOV    $dst.hi,$src.hi" %}
 5935   opcode(0xB8);
 5936   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5937   ins_pipe( ialu_reg_long_fat );
 5938 %}
 5939 
 5940 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5941   match(Set dst src);
 5942   effect(KILL cr);
 5943   ins_cost(150);
 5944   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5945             "XOR    $dst.hi,$dst.hi" %}
 5946   opcode(0x33,0x33);
 5947   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5948   ins_pipe( ialu_reg_long );
 5949 %}
 5950 
 5951 // The instruction usage is guarded by predicate in operand immFPR().
 5952 instruct loadConFPR(regFPR dst, immFPR con) %{
 5953   match(Set dst con);
 5954   ins_cost(125);
 5955   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5956             "FSTP   $dst" %}
 5957   ins_encode %{
 5958     __ fld_s($constantaddress($con));
 5959     __ fstp_d($dst$$reg);
 5960   %}
 5961   ins_pipe(fpu_reg_con);
 5962 %}
 5963 
 5964 // The instruction usage is guarded by predicate in operand immFPR0().
 5965 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5966   match(Set dst con);
 5967   ins_cost(125);
 5968   format %{ "FLDZ   ST\n\t"
 5969             "FSTP   $dst" %}
 5970   ins_encode %{
 5971     __ fldz();
 5972     __ fstp_d($dst$$reg);
 5973   %}
 5974   ins_pipe(fpu_reg_con);
 5975 %}
 5976 
 5977 // The instruction usage is guarded by predicate in operand immFPR1().
 5978 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5979   match(Set dst con);
 5980   ins_cost(125);
 5981   format %{ "FLD1   ST\n\t"
 5982             "FSTP   $dst" %}
 5983   ins_encode %{
 5984     __ fld1();
 5985     __ fstp_d($dst$$reg);
 5986   %}
 5987   ins_pipe(fpu_reg_con);
 5988 %}
 5989 
 5990 // The instruction usage is guarded by predicate in operand immF().
 5991 instruct loadConF(regF dst, immF con) %{
 5992   match(Set dst con);
 5993   ins_cost(125);
 5994   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5995   ins_encode %{
 5996     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5997   %}
 5998   ins_pipe(pipe_slow);
 5999 %}
 6000 
 6001 // The instruction usage is guarded by predicate in operand immF0().
 6002 instruct loadConF0(regF dst, immF0 src) %{
 6003   match(Set dst src);
 6004   ins_cost(100);
 6005   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6006   ins_encode %{
 6007     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6008   %}
 6009   ins_pipe(pipe_slow);
 6010 %}
 6011 
 6012 // The instruction usage is guarded by predicate in operand immDPR().
 6013 instruct loadConDPR(regDPR dst, immDPR con) %{
 6014   match(Set dst con);
 6015   ins_cost(125);
 6016 
 6017   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6018             "FSTP   $dst" %}
 6019   ins_encode %{
 6020     __ fld_d($constantaddress($con));
 6021     __ fstp_d($dst$$reg);
 6022   %}
 6023   ins_pipe(fpu_reg_con);
 6024 %}
 6025 
 6026 // The instruction usage is guarded by predicate in operand immDPR0().
 6027 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6028   match(Set dst con);
 6029   ins_cost(125);
 6030 
 6031   format %{ "FLDZ   ST\n\t"
 6032             "FSTP   $dst" %}
 6033   ins_encode %{
 6034     __ fldz();
 6035     __ fstp_d($dst$$reg);
 6036   %}
 6037   ins_pipe(fpu_reg_con);
 6038 %}
 6039 
 6040 // The instruction usage is guarded by predicate in operand immDPR1().
 6041 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6042   match(Set dst con);
 6043   ins_cost(125);
 6044 
 6045   format %{ "FLD1   ST\n\t"
 6046             "FSTP   $dst" %}
 6047   ins_encode %{
 6048     __ fld1();
 6049     __ fstp_d($dst$$reg);
 6050   %}
 6051   ins_pipe(fpu_reg_con);
 6052 %}
 6053 
 6054 // The instruction usage is guarded by predicate in operand immD().
 6055 instruct loadConD(regD dst, immD con) %{
 6056   match(Set dst con);
 6057   ins_cost(125);
 6058   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6059   ins_encode %{
 6060     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6061   %}
 6062   ins_pipe(pipe_slow);
 6063 %}
 6064 
 6065 // The instruction usage is guarded by predicate in operand immD0().
 6066 instruct loadConD0(regD dst, immD0 src) %{
 6067   match(Set dst src);
 6068   ins_cost(100);
 6069   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6070   ins_encode %{
 6071     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6072   %}
 6073   ins_pipe( pipe_slow );
 6074 %}
 6075 
 6076 // Load Stack Slot
 6077 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6078   match(Set dst src);
 6079   ins_cost(125);
 6080 
 6081   format %{ "MOV    $dst,$src" %}
 6082   opcode(0x8B);
 6083   ins_encode( OpcP, RegMem(dst,src));
 6084   ins_pipe( ialu_reg_mem );
 6085 %}
 6086 
 6087 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6088   match(Set dst src);
 6089 
 6090   ins_cost(200);
 6091   format %{ "MOV    $dst,$src.lo\n\t"
 6092             "MOV    $dst+4,$src.hi" %}
 6093   opcode(0x8B, 0x8B);
 6094   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6095   ins_pipe( ialu_mem_long_reg );
 6096 %}
 6097 
 6098 // Load Stack Slot
 6099 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6100   match(Set dst src);
 6101   ins_cost(125);
 6102 
 6103   format %{ "MOV    $dst,$src" %}
 6104   opcode(0x8B);
 6105   ins_encode( OpcP, RegMem(dst,src));
 6106   ins_pipe( ialu_reg_mem );
 6107 %}
 6108 
 6109 // Load Stack Slot
 6110 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6111   match(Set dst src);
 6112   ins_cost(125);
 6113 
 6114   format %{ "FLD_S  $src\n\t"
 6115             "FSTP   $dst" %}
 6116   opcode(0xD9);               /* D9 /0, FLD m32real */
 6117   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6118               Pop_Reg_FPR(dst) );
 6119   ins_pipe( fpu_reg_mem );
 6120 %}
 6121 
 6122 // Load Stack Slot
 6123 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6124   match(Set dst src);
 6125   ins_cost(125);
 6126 
 6127   format %{ "FLD_D  $src\n\t"
 6128             "FSTP   $dst" %}
 6129   opcode(0xDD);               /* DD /0, FLD m64real */
 6130   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6131               Pop_Reg_DPR(dst) );
 6132   ins_pipe( fpu_reg_mem );
 6133 %}
 6134 
 6135 // Prefetch instructions for allocation.
 6136 // Must be safe to execute with invalid address (cannot fault).
 6137 
 6138 instruct prefetchAlloc0( memory mem ) %{
 6139   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6140   match(PrefetchAllocation mem);
 6141   ins_cost(0);
 6142   size(0);
 6143   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6144   ins_encode();
 6145   ins_pipe(empty);
 6146 %}
 6147 
 6148 instruct prefetchAlloc( memory mem ) %{
 6149   predicate(AllocatePrefetchInstr==3);
 6150   match( PrefetchAllocation mem );
 6151   ins_cost(100);
 6152 
 6153   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6154   ins_encode %{
 6155     __ prefetchw($mem$$Address);
 6156   %}
 6157   ins_pipe(ialu_mem);
 6158 %}
 6159 
 6160 instruct prefetchAllocNTA( memory mem ) %{
 6161   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6162   match(PrefetchAllocation mem);
 6163   ins_cost(100);
 6164 
 6165   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6166   ins_encode %{
 6167     __ prefetchnta($mem$$Address);
 6168   %}
 6169   ins_pipe(ialu_mem);
 6170 %}
 6171 
 6172 instruct prefetchAllocT0( memory mem ) %{
 6173   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6174   match(PrefetchAllocation mem);
 6175   ins_cost(100);
 6176 
 6177   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6178   ins_encode %{
 6179     __ prefetcht0($mem$$Address);
 6180   %}
 6181   ins_pipe(ialu_mem);
 6182 %}
 6183 
 6184 instruct prefetchAllocT2( memory mem ) %{
 6185   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6186   match(PrefetchAllocation mem);
 6187   ins_cost(100);
 6188 
 6189   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6190   ins_encode %{
 6191     __ prefetcht2($mem$$Address);
 6192   %}
 6193   ins_pipe(ialu_mem);
 6194 %}
 6195 
 6196 //----------Store Instructions-------------------------------------------------
 6197 
 6198 // Store Byte
 6199 instruct storeB(memory mem, xRegI src) %{
 6200   match(Set mem (StoreB mem src));
 6201 
 6202   ins_cost(125);
 6203   format %{ "MOV8   $mem,$src" %}
 6204   opcode(0x88);
 6205   ins_encode( OpcP, RegMem( src, mem ) );
 6206   ins_pipe( ialu_mem_reg );
 6207 %}
 6208 
 6209 // Store Char/Short
 6210 instruct storeC(memory mem, rRegI src) %{
 6211   match(Set mem (StoreC mem src));
 6212 
 6213   ins_cost(125);
 6214   format %{ "MOV16  $mem,$src" %}
 6215   opcode(0x89, 0x66);
 6216   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6217   ins_pipe( ialu_mem_reg );
 6218 %}
 6219 
 6220 // Store Integer
 6221 instruct storeI(memory mem, rRegI src) %{
 6222   match(Set mem (StoreI mem src));
 6223 
 6224   ins_cost(125);
 6225   format %{ "MOV    $mem,$src" %}
 6226   opcode(0x89);
 6227   ins_encode( OpcP, RegMem( src, mem ) );
 6228   ins_pipe( ialu_mem_reg );
 6229 %}
 6230 
 6231 // Store Long
 6232 instruct storeL(long_memory mem, eRegL src) %{
 6233   predicate(!((StoreLNode*)n)->require_atomic_access());
 6234   match(Set mem (StoreL mem src));
 6235 
 6236   ins_cost(200);
 6237   format %{ "MOV    $mem,$src.lo\n\t"
 6238             "MOV    $mem+4,$src.hi" %}
 6239   opcode(0x89, 0x89);
 6240   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6241   ins_pipe( ialu_mem_long_reg );
 6242 %}
 6243 
 6244 // Store Long to Integer
 6245 instruct storeL2I(memory mem, eRegL src) %{
 6246   match(Set mem (StoreI mem (ConvL2I src)));
 6247 
 6248   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6249   ins_encode %{
 6250     __ movl($mem$$Address, $src$$Register);
 6251   %}
 6252   ins_pipe(ialu_mem_reg);
 6253 %}
 6254 
 6255 // Volatile Store Long.  Must be atomic, so move it into
 6256 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6257 // target address before the store (for null-ptr checks)
 6258 // so the memory operand is used twice in the encoding.
 6259 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6260   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6261   match(Set mem (StoreL mem src));
 6262   effect( KILL cr );
 6263   ins_cost(400);
 6264   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6265             "FILD   $src\n\t"
 6266             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6267   opcode(0x3B);
 6268   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6269   ins_pipe( fpu_reg_mem );
 6270 %}
 6271 
 6272 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6273   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6274   match(Set mem (StoreL mem src));
 6275   effect( TEMP tmp, KILL cr );
 6276   ins_cost(380);
 6277   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6278             "MOVSD  $tmp,$src\n\t"
 6279             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6280   ins_encode %{
 6281     __ cmpl(rax, $mem$$Address);
 6282     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6283     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6284   %}
 6285   ins_pipe( pipe_slow );
 6286 %}
 6287 
 6288 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6289   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6290   match(Set mem (StoreL mem src));
 6291   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6292   ins_cost(360);
 6293   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6294             "MOVD   $tmp,$src.lo\n\t"
 6295             "MOVD   $tmp2,$src.hi\n\t"
 6296             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6297             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6298   ins_encode %{
 6299     __ cmpl(rax, $mem$$Address);
 6300     __ movdl($tmp$$XMMRegister, $src$$Register);
 6301     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6302     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6303     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6304   %}
 6305   ins_pipe( pipe_slow );
 6306 %}
 6307 
 6308 // Store Pointer; for storing unknown oops and raw pointers
 6309 instruct storeP(memory mem, anyRegP src) %{
 6310   match(Set mem (StoreP mem src));
 6311 
 6312   ins_cost(125);
 6313   format %{ "MOV    $mem,$src" %}
 6314   opcode(0x89);
 6315   ins_encode( OpcP, RegMem( src, mem ) );
 6316   ins_pipe( ialu_mem_reg );
 6317 %}
 6318 
 6319 // Store Integer Immediate
 6320 instruct storeImmI(memory mem, immI src) %{
 6321   match(Set mem (StoreI mem src));
 6322 
 6323   ins_cost(150);
 6324   format %{ "MOV    $mem,$src" %}
 6325   opcode(0xC7);               /* C7 /0 */
 6326   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6327   ins_pipe( ialu_mem_imm );
 6328 %}
 6329 
 6330 // Store Short/Char Immediate
 6331 instruct storeImmI16(memory mem, immI16 src) %{
 6332   predicate(UseStoreImmI16);
 6333   match(Set mem (StoreC mem src));
 6334 
 6335   ins_cost(150);
 6336   format %{ "MOV16  $mem,$src" %}
 6337   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6338   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6339   ins_pipe( ialu_mem_imm );
 6340 %}
 6341 
 6342 // Store Pointer Immediate; null pointers or constant oops that do not
 6343 // need card-mark barriers.
 6344 instruct storeImmP(memory mem, immP src) %{
 6345   match(Set mem (StoreP mem src));
 6346 
 6347   ins_cost(150);
 6348   format %{ "MOV    $mem,$src" %}
 6349   opcode(0xC7);               /* C7 /0 */
 6350   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6351   ins_pipe( ialu_mem_imm );
 6352 %}
 6353 
 6354 // Store Byte Immediate
 6355 instruct storeImmB(memory mem, immI8 src) %{
 6356   match(Set mem (StoreB mem src));
 6357 
 6358   ins_cost(150);
 6359   format %{ "MOV8   $mem,$src" %}
 6360   opcode(0xC6);               /* C6 /0 */
 6361   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6362   ins_pipe( ialu_mem_imm );
 6363 %}
 6364 
 6365 // Store CMS card-mark Immediate
 6366 instruct storeImmCM(memory mem, immI8 src) %{
 6367   match(Set mem (StoreCM mem src));
 6368 
 6369   ins_cost(150);
 6370   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6371   opcode(0xC6);               /* C6 /0 */
 6372   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6373   ins_pipe( ialu_mem_imm );
 6374 %}
 6375 
 6376 // Store Double
 6377 instruct storeDPR( memory mem, regDPR1 src) %{
 6378   predicate(UseSSE<=1);
 6379   match(Set mem (StoreD mem src));
 6380 
 6381   ins_cost(100);
 6382   format %{ "FST_D  $mem,$src" %}
 6383   opcode(0xDD);       /* DD /2 */
 6384   ins_encode( enc_FPR_store(mem,src) );
 6385   ins_pipe( fpu_mem_reg );
 6386 %}
 6387 
 6388 // Store double does rounding on x86
 6389 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6390   predicate(UseSSE<=1);
 6391   match(Set mem (StoreD mem (RoundDouble src)));
 6392 
 6393   ins_cost(100);
 6394   format %{ "FST_D  $mem,$src\t# round" %}
 6395   opcode(0xDD);       /* DD /2 */
 6396   ins_encode( enc_FPR_store(mem,src) );
 6397   ins_pipe( fpu_mem_reg );
 6398 %}
 6399 
 6400 // Store XMM register to memory (double-precision floating points)
 6401 // MOVSD instruction
 6402 instruct storeD(memory mem, regD src) %{
 6403   predicate(UseSSE>=2);
 6404   match(Set mem (StoreD mem src));
 6405   ins_cost(95);
 6406   format %{ "MOVSD  $mem,$src" %}
 6407   ins_encode %{
 6408     __ movdbl($mem$$Address, $src$$XMMRegister);
 6409   %}
 6410   ins_pipe( pipe_slow );
 6411 %}
 6412 
 6413 // Store XMM register to memory (single-precision floating point)
 6414 // MOVSS instruction
 6415 instruct storeF(memory mem, regF src) %{
 6416   predicate(UseSSE>=1);
 6417   match(Set mem (StoreF mem src));
 6418   ins_cost(95);
 6419   format %{ "MOVSS  $mem,$src" %}
 6420   ins_encode %{
 6421     __ movflt($mem$$Address, $src$$XMMRegister);
 6422   %}
 6423   ins_pipe( pipe_slow );
 6424 %}
 6425 
 6426 
 6427 // Store Float
 6428 instruct storeFPR( memory mem, regFPR1 src) %{
 6429   predicate(UseSSE==0);
 6430   match(Set mem (StoreF mem src));
 6431 
 6432   ins_cost(100);
 6433   format %{ "FST_S  $mem,$src" %}
 6434   opcode(0xD9);       /* D9 /2 */
 6435   ins_encode( enc_FPR_store(mem,src) );
 6436   ins_pipe( fpu_mem_reg );
 6437 %}
 6438 
 6439 // Store Float does rounding on x86
 6440 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6441   predicate(UseSSE==0);
 6442   match(Set mem (StoreF mem (RoundFloat src)));
 6443 
 6444   ins_cost(100);
 6445   format %{ "FST_S  $mem,$src\t# round" %}
 6446   opcode(0xD9);       /* D9 /2 */
 6447   ins_encode( enc_FPR_store(mem,src) );
 6448   ins_pipe( fpu_mem_reg );
 6449 %}
 6450 
 6451 // Store Float does rounding on x86
 6452 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6453   predicate(UseSSE<=1);
 6454   match(Set mem (StoreF mem (ConvD2F src)));
 6455 
 6456   ins_cost(100);
 6457   format %{ "FST_S  $mem,$src\t# D-round" %}
 6458   opcode(0xD9);       /* D9 /2 */
 6459   ins_encode( enc_FPR_store(mem,src) );
 6460   ins_pipe( fpu_mem_reg );
 6461 %}
 6462 
 6463 // Store immediate Float value (it is faster than store from FPU register)
 6464 // The instruction usage is guarded by predicate in operand immFPR().
 6465 instruct storeFPR_imm( memory mem, immFPR src) %{
 6466   match(Set mem (StoreF mem src));
 6467 
 6468   ins_cost(50);
 6469   format %{ "MOV    $mem,$src\t# store float" %}
 6470   opcode(0xC7);               /* C7 /0 */
 6471   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6472   ins_pipe( ialu_mem_imm );
 6473 %}
 6474 
 6475 // Store immediate Float value (it is faster than store from XMM register)
 6476 // The instruction usage is guarded by predicate in operand immF().
 6477 instruct storeF_imm( memory mem, immF src) %{
 6478   match(Set mem (StoreF mem src));
 6479 
 6480   ins_cost(50);
 6481   format %{ "MOV    $mem,$src\t# store float" %}
 6482   opcode(0xC7);               /* C7 /0 */
 6483   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6484   ins_pipe( ialu_mem_imm );
 6485 %}
 6486 
 6487 // Store Integer to stack slot
 6488 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6489   match(Set dst src);
 6490 
 6491   ins_cost(100);
 6492   format %{ "MOV    $dst,$src" %}
 6493   opcode(0x89);
 6494   ins_encode( OpcPRegSS( dst, src ) );
 6495   ins_pipe( ialu_mem_reg );
 6496 %}
 6497 
 6498 // Store Integer to stack slot
 6499 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6500   match(Set dst src);
 6501 
 6502   ins_cost(100);
 6503   format %{ "MOV    $dst,$src" %}
 6504   opcode(0x89);
 6505   ins_encode( OpcPRegSS( dst, src ) );
 6506   ins_pipe( ialu_mem_reg );
 6507 %}
 6508 
 6509 // Store Long to stack slot
 6510 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6511   match(Set dst src);
 6512 
 6513   ins_cost(200);
 6514   format %{ "MOV    $dst,$src.lo\n\t"
 6515             "MOV    $dst+4,$src.hi" %}
 6516   opcode(0x89, 0x89);
 6517   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6518   ins_pipe( ialu_mem_long_reg );
 6519 %}
 6520 
 6521 //----------MemBar Instructions-----------------------------------------------
 6522 // Memory barrier flavors
 6523 
 6524 instruct membar_acquire() %{
 6525   match(MemBarAcquire);
 6526   match(LoadFence);
 6527   ins_cost(400);
 6528 
 6529   size(0);
 6530   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6531   ins_encode();
 6532   ins_pipe(empty);
 6533 %}
 6534 
 6535 instruct membar_acquire_lock() %{
 6536   match(MemBarAcquireLock);
 6537   ins_cost(0);
 6538 
 6539   size(0);
 6540   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6541   ins_encode( );
 6542   ins_pipe(empty);
 6543 %}
 6544 
 6545 instruct membar_release() %{
 6546   match(MemBarRelease);
 6547   match(StoreFence);
 6548   ins_cost(400);
 6549 
 6550   size(0);
 6551   format %{ "MEMBAR-release ! (empty encoding)" %}
 6552   ins_encode( );
 6553   ins_pipe(empty);
 6554 %}
 6555 
 6556 instruct membar_release_lock() %{
 6557   match(MemBarReleaseLock);
 6558   ins_cost(0);
 6559 
 6560   size(0);
 6561   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6562   ins_encode( );
 6563   ins_pipe(empty);
 6564 %}
 6565 
 6566 instruct membar_volatile(eFlagsReg cr) %{
 6567   match(MemBarVolatile);
 6568   effect(KILL cr);
 6569   ins_cost(400);
 6570 
 6571   format %{
 6572     $$template
 6573     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6574   %}
 6575   ins_encode %{
 6576     __ membar(Assembler::StoreLoad);
 6577   %}
 6578   ins_pipe(pipe_slow);
 6579 %}
 6580 
 6581 instruct unnecessary_membar_volatile() %{
 6582   match(MemBarVolatile);
 6583   predicate(Matcher::post_store_load_barrier(n));
 6584   ins_cost(0);
 6585 
 6586   size(0);
 6587   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6588   ins_encode( );
 6589   ins_pipe(empty);
 6590 %}
 6591 
 6592 instruct membar_storestore() %{
 6593   match(MemBarStoreStore);
 6594   match(StoreStoreFence);
 6595   ins_cost(0);
 6596 
 6597   size(0);
 6598   format %{ "MEMBAR-storestore (empty encoding)" %}
 6599   ins_encode( );
 6600   ins_pipe(empty);
 6601 %}
 6602 
 6603 //----------Move Instructions--------------------------------------------------
 6604 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6605   match(Set dst (CastX2P src));
 6606   format %{ "# X2P  $dst, $src" %}
 6607   ins_encode( /*empty encoding*/ );
 6608   ins_cost(0);
 6609   ins_pipe(empty);
 6610 %}
 6611 
 6612 instruct castP2X(rRegI dst, eRegP src ) %{
 6613   match(Set dst (CastP2X src));
 6614   ins_cost(50);
 6615   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6616   ins_encode( enc_Copy( dst, src) );
 6617   ins_pipe( ialu_reg_reg );
 6618 %}
 6619 
 6620 //----------Conditional Move---------------------------------------------------
 6621 // Conditional move
 6622 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6623   predicate(!VM_Version::supports_cmov() );
 6624   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6625   ins_cost(200);
 6626   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6627             "MOV    $dst,$src\n"
 6628       "skip:" %}
 6629   ins_encode %{
 6630     Label Lskip;
 6631     // Invert sense of branch from sense of CMOV
 6632     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6633     __ movl($dst$$Register, $src$$Register);
 6634     __ bind(Lskip);
 6635   %}
 6636   ins_pipe( pipe_cmov_reg );
 6637 %}
 6638 
 6639 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6640   predicate(!VM_Version::supports_cmov() );
 6641   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6642   ins_cost(200);
 6643   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6644             "MOV    $dst,$src\n"
 6645       "skip:" %}
 6646   ins_encode %{
 6647     Label Lskip;
 6648     // Invert sense of branch from sense of CMOV
 6649     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6650     __ movl($dst$$Register, $src$$Register);
 6651     __ bind(Lskip);
 6652   %}
 6653   ins_pipe( pipe_cmov_reg );
 6654 %}
 6655 
 6656 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6657   predicate(VM_Version::supports_cmov() );
 6658   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6659   ins_cost(200);
 6660   format %{ "CMOV$cop $dst,$src" %}
 6661   opcode(0x0F,0x40);
 6662   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6663   ins_pipe( pipe_cmov_reg );
 6664 %}
 6665 
 6666 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6667   predicate(VM_Version::supports_cmov() );
 6668   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6669   ins_cost(200);
 6670   format %{ "CMOV$cop $dst,$src" %}
 6671   opcode(0x0F,0x40);
 6672   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6673   ins_pipe( pipe_cmov_reg );
 6674 %}
 6675 
 6676 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6677   predicate(VM_Version::supports_cmov() );
 6678   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6679   ins_cost(200);
 6680   expand %{
 6681     cmovI_regU(cop, cr, dst, src);
 6682   %}
 6683 %}
 6684 
 6685 // Conditional move
 6686 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6687   predicate(VM_Version::supports_cmov() );
 6688   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6689   ins_cost(250);
 6690   format %{ "CMOV$cop $dst,$src" %}
 6691   opcode(0x0F,0x40);
 6692   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6693   ins_pipe( pipe_cmov_mem );
 6694 %}
 6695 
 6696 // Conditional move
 6697 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6698   predicate(VM_Version::supports_cmov() );
 6699   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6700   ins_cost(250);
 6701   format %{ "CMOV$cop $dst,$src" %}
 6702   opcode(0x0F,0x40);
 6703   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6704   ins_pipe( pipe_cmov_mem );
 6705 %}
 6706 
 6707 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6708   predicate(VM_Version::supports_cmov() );
 6709   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6710   ins_cost(250);
 6711   expand %{
 6712     cmovI_memU(cop, cr, dst, src);
 6713   %}
 6714 %}
 6715 
 6716 // Conditional move
 6717 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6718   predicate(VM_Version::supports_cmov() );
 6719   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6720   ins_cost(200);
 6721   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6722   opcode(0x0F,0x40);
 6723   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6724   ins_pipe( pipe_cmov_reg );
 6725 %}
 6726 
 6727 // Conditional move (non-P6 version)
 6728 // Note:  a CMoveP is generated for  stubs and native wrappers
 6729 //        regardless of whether we are on a P6, so we
 6730 //        emulate a cmov here
 6731 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6732   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6733   ins_cost(300);
 6734   format %{ "Jn$cop   skip\n\t"
 6735           "MOV    $dst,$src\t# pointer\n"
 6736       "skip:" %}
 6737   opcode(0x8b);
 6738   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6739   ins_pipe( pipe_cmov_reg );
 6740 %}
 6741 
 6742 // Conditional move
 6743 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6744   predicate(VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6746   ins_cost(200);
 6747   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6748   opcode(0x0F,0x40);
 6749   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6750   ins_pipe( pipe_cmov_reg );
 6751 %}
 6752 
 6753 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6754   predicate(VM_Version::supports_cmov() );
 6755   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6756   ins_cost(200);
 6757   expand %{
 6758     cmovP_regU(cop, cr, dst, src);
 6759   %}
 6760 %}
 6761 
 6762 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6763 // correctly meets the two pointer arguments; one is an incoming
 6764 // register but the other is a memory operand.  ALSO appears to
 6765 // be buggy with implicit null checks.
 6766 //
 6767 //// Conditional move
 6768 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6769 //  predicate(VM_Version::supports_cmov() );
 6770 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6771 //  ins_cost(250);
 6772 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6773 //  opcode(0x0F,0x40);
 6774 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6775 //  ins_pipe( pipe_cmov_mem );
 6776 //%}
 6777 //
 6778 //// Conditional move
 6779 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6780 //  predicate(VM_Version::supports_cmov() );
 6781 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6782 //  ins_cost(250);
 6783 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6784 //  opcode(0x0F,0x40);
 6785 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6786 //  ins_pipe( pipe_cmov_mem );
 6787 //%}
 6788 
 6789 // Conditional move
 6790 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6791   predicate(UseSSE<=1);
 6792   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6793   ins_cost(200);
 6794   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6795   opcode(0xDA);
 6796   ins_encode( enc_cmov_dpr(cop,src) );
 6797   ins_pipe( pipe_cmovDPR_reg );
 6798 %}
 6799 
 6800 // Conditional move
 6801 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6802   predicate(UseSSE==0);
 6803   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6804   ins_cost(200);
 6805   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6806   opcode(0xDA);
 6807   ins_encode( enc_cmov_dpr(cop,src) );
 6808   ins_pipe( pipe_cmovDPR_reg );
 6809 %}
 6810 
 6811 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6812 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6813   predicate(UseSSE<=1);
 6814   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6815   ins_cost(200);
 6816   format %{ "Jn$cop   skip\n\t"
 6817             "MOV    $dst,$src\t# double\n"
 6818       "skip:" %}
 6819   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6820   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6821   ins_pipe( pipe_cmovDPR_reg );
 6822 %}
 6823 
 6824 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6825 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6826   predicate(UseSSE==0);
 6827   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6828   ins_cost(200);
 6829   format %{ "Jn$cop    skip\n\t"
 6830             "MOV    $dst,$src\t# float\n"
 6831       "skip:" %}
 6832   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6833   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6834   ins_pipe( pipe_cmovDPR_reg );
 6835 %}
 6836 
 6837 // No CMOVE with SSE/SSE2
 6838 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6839   predicate (UseSSE>=1);
 6840   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6841   ins_cost(200);
 6842   format %{ "Jn$cop   skip\n\t"
 6843             "MOVSS  $dst,$src\t# float\n"
 6844       "skip:" %}
 6845   ins_encode %{
 6846     Label skip;
 6847     // Invert sense of branch from sense of CMOV
 6848     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6849     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6850     __ bind(skip);
 6851   %}
 6852   ins_pipe( pipe_slow );
 6853 %}
 6854 
 6855 // No CMOVE with SSE/SSE2
 6856 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6857   predicate (UseSSE>=2);
 6858   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6859   ins_cost(200);
 6860   format %{ "Jn$cop   skip\n\t"
 6861             "MOVSD  $dst,$src\t# float\n"
 6862       "skip:" %}
 6863   ins_encode %{
 6864     Label skip;
 6865     // Invert sense of branch from sense of CMOV
 6866     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6867     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6868     __ bind(skip);
 6869   %}
 6870   ins_pipe( pipe_slow );
 6871 %}
 6872 
 6873 // unsigned version
 6874 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6875   predicate (UseSSE>=1);
 6876   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6877   ins_cost(200);
 6878   format %{ "Jn$cop   skip\n\t"
 6879             "MOVSS  $dst,$src\t# float\n"
 6880       "skip:" %}
 6881   ins_encode %{
 6882     Label skip;
 6883     // Invert sense of branch from sense of CMOV
 6884     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6885     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6886     __ bind(skip);
 6887   %}
 6888   ins_pipe( pipe_slow );
 6889 %}
 6890 
 6891 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6892   predicate (UseSSE>=1);
 6893   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6894   ins_cost(200);
 6895   expand %{
 6896     fcmovF_regU(cop, cr, dst, src);
 6897   %}
 6898 %}
 6899 
 6900 // unsigned version
 6901 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6902   predicate (UseSSE>=2);
 6903   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6904   ins_cost(200);
 6905   format %{ "Jn$cop   skip\n\t"
 6906             "MOVSD  $dst,$src\t# float\n"
 6907       "skip:" %}
 6908   ins_encode %{
 6909     Label skip;
 6910     // Invert sense of branch from sense of CMOV
 6911     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6912     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6913     __ bind(skip);
 6914   %}
 6915   ins_pipe( pipe_slow );
 6916 %}
 6917 
 6918 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6919   predicate (UseSSE>=2);
 6920   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6921   ins_cost(200);
 6922   expand %{
 6923     fcmovD_regU(cop, cr, dst, src);
 6924   %}
 6925 %}
 6926 
 6927 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6928   predicate(VM_Version::supports_cmov() );
 6929   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6930   ins_cost(200);
 6931   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6932             "CMOV$cop $dst.hi,$src.hi" %}
 6933   opcode(0x0F,0x40);
 6934   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6935   ins_pipe( pipe_cmov_reg_long );
 6936 %}
 6937 
 6938 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6939   predicate(VM_Version::supports_cmov() );
 6940   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6941   ins_cost(200);
 6942   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6943             "CMOV$cop $dst.hi,$src.hi" %}
 6944   opcode(0x0F,0x40);
 6945   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6946   ins_pipe( pipe_cmov_reg_long );
 6947 %}
 6948 
 6949 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6950   predicate(VM_Version::supports_cmov() );
 6951   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6952   ins_cost(200);
 6953   expand %{
 6954     cmovL_regU(cop, cr, dst, src);
 6955   %}
 6956 %}
 6957 
 6958 //----------Arithmetic Instructions--------------------------------------------
 6959 //----------Addition Instructions----------------------------------------------
 6960 
 6961 // Integer Addition Instructions
 6962 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6963   match(Set dst (AddI dst src));
 6964   effect(KILL cr);
 6965 
 6966   size(2);
 6967   format %{ "ADD    $dst,$src" %}
 6968   opcode(0x03);
 6969   ins_encode( OpcP, RegReg( dst, src) );
 6970   ins_pipe( ialu_reg_reg );
 6971 %}
 6972 
 6973 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6974   match(Set dst (AddI dst src));
 6975   effect(KILL cr);
 6976 
 6977   format %{ "ADD    $dst,$src" %}
 6978   opcode(0x81, 0x00); /* /0 id */
 6979   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6980   ins_pipe( ialu_reg );
 6981 %}
 6982 
 6983 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6984   predicate(UseIncDec);
 6985   match(Set dst (AddI dst src));
 6986   effect(KILL cr);
 6987 
 6988   size(1);
 6989   format %{ "INC    $dst" %}
 6990   opcode(0x40); /*  */
 6991   ins_encode( Opc_plus( primary, dst ) );
 6992   ins_pipe( ialu_reg );
 6993 %}
 6994 
 6995 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6996   match(Set dst (AddI src0 src1));
 6997   ins_cost(110);
 6998 
 6999   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7000   opcode(0x8D); /* 0x8D /r */
 7001   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7002   ins_pipe( ialu_reg_reg );
 7003 %}
 7004 
 7005 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7006   match(Set dst (AddP src0 src1));
 7007   ins_cost(110);
 7008 
 7009   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7010   opcode(0x8D); /* 0x8D /r */
 7011   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7012   ins_pipe( ialu_reg_reg );
 7013 %}
 7014 
 7015 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7016   predicate(UseIncDec);
 7017   match(Set dst (AddI dst src));
 7018   effect(KILL cr);
 7019 
 7020   size(1);
 7021   format %{ "DEC    $dst" %}
 7022   opcode(0x48); /*  */
 7023   ins_encode( Opc_plus( primary, dst ) );
 7024   ins_pipe( ialu_reg );
 7025 %}
 7026 
 7027 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7028   match(Set dst (AddP dst src));
 7029   effect(KILL cr);
 7030 
 7031   size(2);
 7032   format %{ "ADD    $dst,$src" %}
 7033   opcode(0x03);
 7034   ins_encode( OpcP, RegReg( dst, src) );
 7035   ins_pipe( ialu_reg_reg );
 7036 %}
 7037 
 7038 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7039   match(Set dst (AddP dst src));
 7040   effect(KILL cr);
 7041 
 7042   format %{ "ADD    $dst,$src" %}
 7043   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7044   // ins_encode( RegImm( dst, src) );
 7045   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7046   ins_pipe( ialu_reg );
 7047 %}
 7048 
 7049 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7050   match(Set dst (AddI dst (LoadI src)));
 7051   effect(KILL cr);
 7052 
 7053   ins_cost(150);
 7054   format %{ "ADD    $dst,$src" %}
 7055   opcode(0x03);
 7056   ins_encode( OpcP, RegMem( dst, src) );
 7057   ins_pipe( ialu_reg_mem );
 7058 %}
 7059 
 7060 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7061   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7062   effect(KILL cr);
 7063 
 7064   ins_cost(150);
 7065   format %{ "ADD    $dst,$src" %}
 7066   opcode(0x01);  /* Opcode 01 /r */
 7067   ins_encode( OpcP, RegMem( src, dst ) );
 7068   ins_pipe( ialu_mem_reg );
 7069 %}
 7070 
 7071 // Add Memory with Immediate
 7072 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7073   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7074   effect(KILL cr);
 7075 
 7076   ins_cost(125);
 7077   format %{ "ADD    $dst,$src" %}
 7078   opcode(0x81);               /* Opcode 81 /0 id */
 7079   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7080   ins_pipe( ialu_mem_imm );
 7081 %}
 7082 
 7083 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7084   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7085   effect(KILL cr);
 7086 
 7087   ins_cost(125);
 7088   format %{ "INC    $dst" %}
 7089   opcode(0xFF);               /* Opcode FF /0 */
 7090   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7091   ins_pipe( ialu_mem_imm );
 7092 %}
 7093 
 7094 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7095   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7096   effect(KILL cr);
 7097 
 7098   ins_cost(125);
 7099   format %{ "DEC    $dst" %}
 7100   opcode(0xFF);               /* Opcode FF /1 */
 7101   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7102   ins_pipe( ialu_mem_imm );
 7103 %}
 7104 
 7105 
 7106 instruct checkCastPP( eRegP dst ) %{
 7107   match(Set dst (CheckCastPP dst));
 7108 
 7109   size(0);
 7110   format %{ "#checkcastPP of $dst" %}
 7111   ins_encode( /*empty encoding*/ );
 7112   ins_pipe( empty );
 7113 %}
 7114 
 7115 instruct castPP( eRegP dst ) %{
 7116   match(Set dst (CastPP dst));
 7117   format %{ "#castPP of $dst" %}
 7118   ins_encode( /*empty encoding*/ );
 7119   ins_pipe( empty );
 7120 %}
 7121 
 7122 instruct castII( rRegI dst ) %{
 7123   match(Set dst (CastII dst));
 7124   format %{ "#castII of $dst" %}
 7125   ins_encode( /*empty encoding*/ );
 7126   ins_cost(0);
 7127   ins_pipe( empty );
 7128 %}
 7129 
 7130 instruct castLL( eRegL dst ) %{
 7131   match(Set dst (CastLL dst));
 7132   format %{ "#castLL of $dst" %}
 7133   ins_encode( /*empty encoding*/ );
 7134   ins_cost(0);
 7135   ins_pipe( empty );
 7136 %}
 7137 
 7138 instruct castFF( regF dst ) %{
 7139   predicate(UseSSE >= 1);
 7140   match(Set dst (CastFF dst));
 7141   format %{ "#castFF of $dst" %}
 7142   ins_encode( /*empty encoding*/ );
 7143   ins_cost(0);
 7144   ins_pipe( empty );
 7145 %}
 7146 
 7147 instruct castDD( regD dst ) %{
 7148   predicate(UseSSE >= 2);
 7149   match(Set dst (CastDD dst));
 7150   format %{ "#castDD of $dst" %}
 7151   ins_encode( /*empty encoding*/ );
 7152   ins_cost(0);
 7153   ins_pipe( empty );
 7154 %}
 7155 
 7156 instruct castFF_PR( regFPR dst ) %{
 7157   predicate(UseSSE < 1);
 7158   match(Set dst (CastFF dst));
 7159   format %{ "#castFF of $dst" %}
 7160   ins_encode( /*empty encoding*/ );
 7161   ins_cost(0);
 7162   ins_pipe( empty );
 7163 %}
 7164 
 7165 instruct castDD_PR( regDPR dst ) %{
 7166   predicate(UseSSE < 2);
 7167   match(Set dst (CastDD dst));
 7168   format %{ "#castDD of $dst" %}
 7169   ins_encode( /*empty encoding*/ );
 7170   ins_cost(0);
 7171   ins_pipe( empty );
 7172 %}
 7173 
 7174 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7175 
 7176 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7177   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7178   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7179   effect(KILL cr, KILL oldval);
 7180   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7181             "MOV    $res,0\n\t"
 7182             "JNE,s  fail\n\t"
 7183             "MOV    $res,1\n"
 7184           "fail:" %}
 7185   ins_encode( enc_cmpxchg8(mem_ptr),
 7186               enc_flags_ne_to_boolean(res) );
 7187   ins_pipe( pipe_cmpxchg );
 7188 %}
 7189 
 7190 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7191   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7192   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7193   effect(KILL cr, KILL oldval);
 7194   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7195             "MOV    $res,0\n\t"
 7196             "JNE,s  fail\n\t"
 7197             "MOV    $res,1\n"
 7198           "fail:" %}
 7199   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7200   ins_pipe( pipe_cmpxchg );
 7201 %}
 7202 
 7203 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7204   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7205   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7206   effect(KILL cr, KILL oldval);
 7207   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7208             "MOV    $res,0\n\t"
 7209             "JNE,s  fail\n\t"
 7210             "MOV    $res,1\n"
 7211           "fail:" %}
 7212   ins_encode( enc_cmpxchgb(mem_ptr),
 7213               enc_flags_ne_to_boolean(res) );
 7214   ins_pipe( pipe_cmpxchg );
 7215 %}
 7216 
 7217 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7218   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7219   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7220   effect(KILL cr, KILL oldval);
 7221   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7222             "MOV    $res,0\n\t"
 7223             "JNE,s  fail\n\t"
 7224             "MOV    $res,1\n"
 7225           "fail:" %}
 7226   ins_encode( enc_cmpxchgw(mem_ptr),
 7227               enc_flags_ne_to_boolean(res) );
 7228   ins_pipe( pipe_cmpxchg );
 7229 %}
 7230 
 7231 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7232   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7233   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7234   effect(KILL cr, KILL oldval);
 7235   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7236             "MOV    $res,0\n\t"
 7237             "JNE,s  fail\n\t"
 7238             "MOV    $res,1\n"
 7239           "fail:" %}
 7240   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7241   ins_pipe( pipe_cmpxchg );
 7242 %}
 7243 
 7244 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7245   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7246   effect(KILL cr);
 7247   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7248   ins_encode( enc_cmpxchg8(mem_ptr) );
 7249   ins_pipe( pipe_cmpxchg );
 7250 %}
 7251 
 7252 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7253   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7254   effect(KILL cr);
 7255   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7256   ins_encode( enc_cmpxchg(mem_ptr) );
 7257   ins_pipe( pipe_cmpxchg );
 7258 %}
 7259 
 7260 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7261   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7262   effect(KILL cr);
 7263   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7264   ins_encode( enc_cmpxchgb(mem_ptr) );
 7265   ins_pipe( pipe_cmpxchg );
 7266 %}
 7267 
 7268 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7269   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7270   effect(KILL cr);
 7271   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7272   ins_encode( enc_cmpxchgw(mem_ptr) );
 7273   ins_pipe( pipe_cmpxchg );
 7274 %}
 7275 
 7276 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7277   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7278   effect(KILL cr);
 7279   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7280   ins_encode( enc_cmpxchg(mem_ptr) );
 7281   ins_pipe( pipe_cmpxchg );
 7282 %}
 7283 
 7284 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7285   predicate(n->as_LoadStore()->result_not_used());
 7286   match(Set dummy (GetAndAddB mem add));
 7287   effect(KILL cr);
 7288   format %{ "ADDB  [$mem],$add" %}
 7289   ins_encode %{
 7290     __ lock();
 7291     __ addb($mem$$Address, $add$$constant);
 7292   %}
 7293   ins_pipe( pipe_cmpxchg );
 7294 %}
 7295 
 7296 // Important to match to xRegI: only 8-bit regs.
 7297 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7298   match(Set newval (GetAndAddB mem newval));
 7299   effect(KILL cr);
 7300   format %{ "XADDB  [$mem],$newval" %}
 7301   ins_encode %{
 7302     __ lock();
 7303     __ xaddb($mem$$Address, $newval$$Register);
 7304   %}
 7305   ins_pipe( pipe_cmpxchg );
 7306 %}
 7307 
 7308 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7309   predicate(n->as_LoadStore()->result_not_used());
 7310   match(Set dummy (GetAndAddS mem add));
 7311   effect(KILL cr);
 7312   format %{ "ADDS  [$mem],$add" %}
 7313   ins_encode %{
 7314     __ lock();
 7315     __ addw($mem$$Address, $add$$constant);
 7316   %}
 7317   ins_pipe( pipe_cmpxchg );
 7318 %}
 7319 
 7320 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7321   match(Set newval (GetAndAddS mem newval));
 7322   effect(KILL cr);
 7323   format %{ "XADDS  [$mem],$newval" %}
 7324   ins_encode %{
 7325     __ lock();
 7326     __ xaddw($mem$$Address, $newval$$Register);
 7327   %}
 7328   ins_pipe( pipe_cmpxchg );
 7329 %}
 7330 
 7331 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7332   predicate(n->as_LoadStore()->result_not_used());
 7333   match(Set dummy (GetAndAddI mem add));
 7334   effect(KILL cr);
 7335   format %{ "ADDL  [$mem],$add" %}
 7336   ins_encode %{
 7337     __ lock();
 7338     __ addl($mem$$Address, $add$$constant);
 7339   %}
 7340   ins_pipe( pipe_cmpxchg );
 7341 %}
 7342 
 7343 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7344   match(Set newval (GetAndAddI mem newval));
 7345   effect(KILL cr);
 7346   format %{ "XADDL  [$mem],$newval" %}
 7347   ins_encode %{
 7348     __ lock();
 7349     __ xaddl($mem$$Address, $newval$$Register);
 7350   %}
 7351   ins_pipe( pipe_cmpxchg );
 7352 %}
 7353 
 7354 // Important to match to xRegI: only 8-bit regs.
 7355 instruct xchgB( memory mem, xRegI newval) %{
 7356   match(Set newval (GetAndSetB mem newval));
 7357   format %{ "XCHGB  $newval,[$mem]" %}
 7358   ins_encode %{
 7359     __ xchgb($newval$$Register, $mem$$Address);
 7360   %}
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct xchgS( memory mem, rRegI newval) %{
 7365   match(Set newval (GetAndSetS mem newval));
 7366   format %{ "XCHGW  $newval,[$mem]" %}
 7367   ins_encode %{
 7368     __ xchgw($newval$$Register, $mem$$Address);
 7369   %}
 7370   ins_pipe( pipe_cmpxchg );
 7371 %}
 7372 
 7373 instruct xchgI( memory mem, rRegI newval) %{
 7374   match(Set newval (GetAndSetI mem newval));
 7375   format %{ "XCHGL  $newval,[$mem]" %}
 7376   ins_encode %{
 7377     __ xchgl($newval$$Register, $mem$$Address);
 7378   %}
 7379   ins_pipe( pipe_cmpxchg );
 7380 %}
 7381 
 7382 instruct xchgP( memory mem, pRegP newval) %{
 7383   match(Set newval (GetAndSetP mem newval));
 7384   format %{ "XCHGL  $newval,[$mem]" %}
 7385   ins_encode %{
 7386     __ xchgl($newval$$Register, $mem$$Address);
 7387   %}
 7388   ins_pipe( pipe_cmpxchg );
 7389 %}
 7390 
 7391 //----------Subtraction Instructions-------------------------------------------
 7392 
 7393 // Integer Subtraction Instructions
 7394 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7395   match(Set dst (SubI dst src));
 7396   effect(KILL cr);
 7397 
 7398   size(2);
 7399   format %{ "SUB    $dst,$src" %}
 7400   opcode(0x2B);
 7401   ins_encode( OpcP, RegReg( dst, src) );
 7402   ins_pipe( ialu_reg_reg );
 7403 %}
 7404 
 7405 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7406   match(Set dst (SubI dst src));
 7407   effect(KILL cr);
 7408 
 7409   format %{ "SUB    $dst,$src" %}
 7410   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7411   // ins_encode( RegImm( dst, src) );
 7412   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7413   ins_pipe( ialu_reg );
 7414 %}
 7415 
 7416 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7417   match(Set dst (SubI dst (LoadI src)));
 7418   effect(KILL cr);
 7419 
 7420   ins_cost(150);
 7421   format %{ "SUB    $dst,$src" %}
 7422   opcode(0x2B);
 7423   ins_encode( OpcP, RegMem( dst, src) );
 7424   ins_pipe( ialu_reg_mem );
 7425 %}
 7426 
 7427 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7428   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7429   effect(KILL cr);
 7430 
 7431   ins_cost(150);
 7432   format %{ "SUB    $dst,$src" %}
 7433   opcode(0x29);  /* Opcode 29 /r */
 7434   ins_encode( OpcP, RegMem( src, dst ) );
 7435   ins_pipe( ialu_mem_reg );
 7436 %}
 7437 
 7438 // Subtract from a pointer
 7439 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7440   match(Set dst (AddP dst (SubI zero src)));
 7441   effect(KILL cr);
 7442 
 7443   size(2);
 7444   format %{ "SUB    $dst,$src" %}
 7445   opcode(0x2B);
 7446   ins_encode( OpcP, RegReg( dst, src) );
 7447   ins_pipe( ialu_reg_reg );
 7448 %}
 7449 
 7450 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7451   match(Set dst (SubI zero dst));
 7452   effect(KILL cr);
 7453 
 7454   size(2);
 7455   format %{ "NEG    $dst" %}
 7456   opcode(0xF7,0x03);  // Opcode F7 /3
 7457   ins_encode( OpcP, RegOpc( dst ) );
 7458   ins_pipe( ialu_reg );
 7459 %}
 7460 
 7461 //----------Multiplication/Division Instructions-------------------------------
 7462 // Integer Multiplication Instructions
 7463 // Multiply Register
 7464 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7465   match(Set dst (MulI dst src));
 7466   effect(KILL cr);
 7467 
 7468   size(3);
 7469   ins_cost(300);
 7470   format %{ "IMUL   $dst,$src" %}
 7471   opcode(0xAF, 0x0F);
 7472   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7473   ins_pipe( ialu_reg_reg_alu0 );
 7474 %}
 7475 
 7476 // Multiply 32-bit Immediate
 7477 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7478   match(Set dst (MulI src imm));
 7479   effect(KILL cr);
 7480 
 7481   ins_cost(300);
 7482   format %{ "IMUL   $dst,$src,$imm" %}
 7483   opcode(0x69);  /* 69 /r id */
 7484   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7485   ins_pipe( ialu_reg_reg_alu0 );
 7486 %}
 7487 
 7488 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7489   match(Set dst src);
 7490   effect(KILL cr);
 7491 
 7492   // Note that this is artificially increased to make it more expensive than loadConL
 7493   ins_cost(250);
 7494   format %{ "MOV    EAX,$src\t// low word only" %}
 7495   opcode(0xB8);
 7496   ins_encode( LdImmL_Lo(dst, src) );
 7497   ins_pipe( ialu_reg_fat );
 7498 %}
 7499 
 7500 // Multiply by 32-bit Immediate, taking the shifted high order results
 7501 //  (special case for shift by 32)
 7502 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7503   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7504   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7505              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7506              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7507   effect(USE src1, KILL cr);
 7508 
 7509   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7510   ins_cost(0*100 + 1*400 - 150);
 7511   format %{ "IMUL   EDX:EAX,$src1" %}
 7512   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7513   ins_pipe( pipe_slow );
 7514 %}
 7515 
 7516 // Multiply by 32-bit Immediate, taking the shifted high order results
 7517 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7518   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7519   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7520              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7521              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7522   effect(USE src1, KILL cr);
 7523 
 7524   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7525   ins_cost(1*100 + 1*400 - 150);
 7526   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7527             "SAR    EDX,$cnt-32" %}
 7528   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7529   ins_pipe( pipe_slow );
 7530 %}
 7531 
 7532 // Multiply Memory 32-bit Immediate
 7533 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7534   match(Set dst (MulI (LoadI src) imm));
 7535   effect(KILL cr);
 7536 
 7537   ins_cost(300);
 7538   format %{ "IMUL   $dst,$src,$imm" %}
 7539   opcode(0x69);  /* 69 /r id */
 7540   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7541   ins_pipe( ialu_reg_mem_alu0 );
 7542 %}
 7543 
 7544 // Multiply Memory
 7545 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7546   match(Set dst (MulI dst (LoadI src)));
 7547   effect(KILL cr);
 7548 
 7549   ins_cost(350);
 7550   format %{ "IMUL   $dst,$src" %}
 7551   opcode(0xAF, 0x0F);
 7552   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7553   ins_pipe( ialu_reg_mem_alu0 );
 7554 %}
 7555 
 7556 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7557 %{
 7558   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7559   effect(KILL cr, KILL src2);
 7560 
 7561   expand %{ mulI_eReg(dst, src1, cr);
 7562            mulI_eReg(src2, src3, cr);
 7563            addI_eReg(dst, src2, cr); %}
 7564 %}
 7565 
 7566 // Multiply Register Int to Long
 7567 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7568   // Basic Idea: long = (long)int * (long)int
 7569   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7570   effect(DEF dst, USE src, USE src1, KILL flags);
 7571 
 7572   ins_cost(300);
 7573   format %{ "IMUL   $dst,$src1" %}
 7574 
 7575   ins_encode( long_int_multiply( dst, src1 ) );
 7576   ins_pipe( ialu_reg_reg_alu0 );
 7577 %}
 7578 
 7579 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7580   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7581   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7582   effect(KILL flags);
 7583 
 7584   ins_cost(300);
 7585   format %{ "MUL    $dst,$src1" %}
 7586 
 7587   ins_encode( long_uint_multiply(dst, src1) );
 7588   ins_pipe( ialu_reg_reg_alu0 );
 7589 %}
 7590 
 7591 // Multiply Register Long
 7592 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7593   match(Set dst (MulL dst src));
 7594   effect(KILL cr, TEMP tmp);
 7595   ins_cost(4*100+3*400);
 7596 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7597 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7598   format %{ "MOV    $tmp,$src.lo\n\t"
 7599             "IMUL   $tmp,EDX\n\t"
 7600             "MOV    EDX,$src.hi\n\t"
 7601             "IMUL   EDX,EAX\n\t"
 7602             "ADD    $tmp,EDX\n\t"
 7603             "MUL    EDX:EAX,$src.lo\n\t"
 7604             "ADD    EDX,$tmp" %}
 7605   ins_encode( long_multiply( dst, src, tmp ) );
 7606   ins_pipe( pipe_slow );
 7607 %}
 7608 
 7609 // Multiply Register Long where the left operand's high 32 bits are zero
 7610 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7611   predicate(is_operand_hi32_zero(n->in(1)));
 7612   match(Set dst (MulL dst src));
 7613   effect(KILL cr, TEMP tmp);
 7614   ins_cost(2*100+2*400);
 7615 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7616 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7617   format %{ "MOV    $tmp,$src.hi\n\t"
 7618             "IMUL   $tmp,EAX\n\t"
 7619             "MUL    EDX:EAX,$src.lo\n\t"
 7620             "ADD    EDX,$tmp" %}
 7621   ins_encode %{
 7622     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7623     __ imull($tmp$$Register, rax);
 7624     __ mull($src$$Register);
 7625     __ addl(rdx, $tmp$$Register);
 7626   %}
 7627   ins_pipe( pipe_slow );
 7628 %}
 7629 
 7630 // Multiply Register Long where the right operand's high 32 bits are zero
 7631 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7632   predicate(is_operand_hi32_zero(n->in(2)));
 7633   match(Set dst (MulL dst src));
 7634   effect(KILL cr, TEMP tmp);
 7635   ins_cost(2*100+2*400);
 7636 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7637 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7638   format %{ "MOV    $tmp,$src.lo\n\t"
 7639             "IMUL   $tmp,EDX\n\t"
 7640             "MUL    EDX:EAX,$src.lo\n\t"
 7641             "ADD    EDX,$tmp" %}
 7642   ins_encode %{
 7643     __ movl($tmp$$Register, $src$$Register);
 7644     __ imull($tmp$$Register, rdx);
 7645     __ mull($src$$Register);
 7646     __ addl(rdx, $tmp$$Register);
 7647   %}
 7648   ins_pipe( pipe_slow );
 7649 %}
 7650 
 7651 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7652 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7653   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7654   match(Set dst (MulL dst src));
 7655   effect(KILL cr);
 7656   ins_cost(1*400);
 7657 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7658 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7659   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7660   ins_encode %{
 7661     __ mull($src$$Register);
 7662   %}
 7663   ins_pipe( pipe_slow );
 7664 %}
 7665 
 7666 // Multiply Register Long by small constant
 7667 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7668   match(Set dst (MulL dst src));
 7669   effect(KILL cr, TEMP tmp);
 7670   ins_cost(2*100+2*400);
 7671   size(12);
 7672 // Basic idea: lo(result) = lo(src * EAX)
 7673 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7674   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7675             "MOV    EDX,$src\n\t"
 7676             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7677             "ADD    EDX,$tmp" %}
 7678   ins_encode( long_multiply_con( dst, src, tmp ) );
 7679   ins_pipe( pipe_slow );
 7680 %}
 7681 
 7682 // Integer DIV with Register
 7683 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7684   match(Set rax (DivI rax div));
 7685   effect(KILL rdx, KILL cr);
 7686   size(26);
 7687   ins_cost(30*100+10*100);
 7688   format %{ "CMP    EAX,0x80000000\n\t"
 7689             "JNE,s  normal\n\t"
 7690             "XOR    EDX,EDX\n\t"
 7691             "CMP    ECX,-1\n\t"
 7692             "JE,s   done\n"
 7693     "normal: CDQ\n\t"
 7694             "IDIV   $div\n\t"
 7695     "done:"        %}
 7696   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7697   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7698   ins_pipe( ialu_reg_reg_alu0 );
 7699 %}
 7700 
 7701 // Divide Register Long
 7702 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7703   match(Set dst (DivL src1 src2));
 7704   effect(CALL);
 7705   ins_cost(10000);
 7706   format %{ "PUSH   $src1.hi\n\t"
 7707             "PUSH   $src1.lo\n\t"
 7708             "PUSH   $src2.hi\n\t"
 7709             "PUSH   $src2.lo\n\t"
 7710             "CALL   SharedRuntime::ldiv\n\t"
 7711             "ADD    ESP,16" %}
 7712   ins_encode( long_div(src1,src2) );
 7713   ins_pipe( pipe_slow );
 7714 %}
 7715 
 7716 // Integer DIVMOD with Register, both quotient and mod results
 7717 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7718   match(DivModI rax div);
 7719   effect(KILL cr);
 7720   size(26);
 7721   ins_cost(30*100+10*100);
 7722   format %{ "CMP    EAX,0x80000000\n\t"
 7723             "JNE,s  normal\n\t"
 7724             "XOR    EDX,EDX\n\t"
 7725             "CMP    ECX,-1\n\t"
 7726             "JE,s   done\n"
 7727     "normal: CDQ\n\t"
 7728             "IDIV   $div\n\t"
 7729     "done:"        %}
 7730   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7731   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7732   ins_pipe( pipe_slow );
 7733 %}
 7734 
 7735 // Integer MOD with Register
 7736 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7737   match(Set rdx (ModI rax div));
 7738   effect(KILL rax, KILL cr);
 7739 
 7740   size(26);
 7741   ins_cost(300);
 7742   format %{ "CDQ\n\t"
 7743             "IDIV   $div" %}
 7744   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7745   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7746   ins_pipe( ialu_reg_reg_alu0 );
 7747 %}
 7748 
 7749 // Remainder Register Long
 7750 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7751   match(Set dst (ModL src1 src2));
 7752   effect(CALL);
 7753   ins_cost(10000);
 7754   format %{ "PUSH   $src1.hi\n\t"
 7755             "PUSH   $src1.lo\n\t"
 7756             "PUSH   $src2.hi\n\t"
 7757             "PUSH   $src2.lo\n\t"
 7758             "CALL   SharedRuntime::lrem\n\t"
 7759             "ADD    ESP,16" %}
 7760   ins_encode( long_mod(src1,src2) );
 7761   ins_pipe( pipe_slow );
 7762 %}
 7763 
 7764 // Divide Register Long (no special case since divisor != -1)
 7765 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7766   match(Set dst (DivL dst imm));
 7767   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7768   ins_cost(1000);
 7769   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7770             "XOR    $tmp2,$tmp2\n\t"
 7771             "CMP    $tmp,EDX\n\t"
 7772             "JA,s   fast\n\t"
 7773             "MOV    $tmp2,EAX\n\t"
 7774             "MOV    EAX,EDX\n\t"
 7775             "MOV    EDX,0\n\t"
 7776             "JLE,s  pos\n\t"
 7777             "LNEG   EAX : $tmp2\n\t"
 7778             "DIV    $tmp # unsigned division\n\t"
 7779             "XCHG   EAX,$tmp2\n\t"
 7780             "DIV    $tmp\n\t"
 7781             "LNEG   $tmp2 : EAX\n\t"
 7782             "JMP,s  done\n"
 7783     "pos:\n\t"
 7784             "DIV    $tmp\n\t"
 7785             "XCHG   EAX,$tmp2\n"
 7786     "fast:\n\t"
 7787             "DIV    $tmp\n"
 7788     "done:\n\t"
 7789             "MOV    EDX,$tmp2\n\t"
 7790             "NEG    EDX:EAX # if $imm < 0" %}
 7791   ins_encode %{
 7792     int con = (int)$imm$$constant;
 7793     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7794     int pcon = (con > 0) ? con : -con;
 7795     Label Lfast, Lpos, Ldone;
 7796 
 7797     __ movl($tmp$$Register, pcon);
 7798     __ xorl($tmp2$$Register,$tmp2$$Register);
 7799     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7800     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7801 
 7802     __ movl($tmp2$$Register, $dst$$Register); // save
 7803     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7804     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7805     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7806 
 7807     // Negative dividend.
 7808     // convert value to positive to use unsigned division
 7809     __ lneg($dst$$Register, $tmp2$$Register);
 7810     __ divl($tmp$$Register);
 7811     __ xchgl($dst$$Register, $tmp2$$Register);
 7812     __ divl($tmp$$Register);
 7813     // revert result back to negative
 7814     __ lneg($tmp2$$Register, $dst$$Register);
 7815     __ jmpb(Ldone);
 7816 
 7817     __ bind(Lpos);
 7818     __ divl($tmp$$Register); // Use unsigned division
 7819     __ xchgl($dst$$Register, $tmp2$$Register);
 7820     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7821 
 7822     __ bind(Lfast);
 7823     // fast path: src is positive
 7824     __ divl($tmp$$Register); // Use unsigned division
 7825 
 7826     __ bind(Ldone);
 7827     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7828     if (con < 0) {
 7829       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7830     }
 7831   %}
 7832   ins_pipe( pipe_slow );
 7833 %}
 7834 
 7835 // Remainder Register Long (remainder fit into 32 bits)
 7836 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7837   match(Set dst (ModL dst imm));
 7838   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7839   ins_cost(1000);
 7840   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7841             "CMP    $tmp,EDX\n\t"
 7842             "JA,s   fast\n\t"
 7843             "MOV    $tmp2,EAX\n\t"
 7844             "MOV    EAX,EDX\n\t"
 7845             "MOV    EDX,0\n\t"
 7846             "JLE,s  pos\n\t"
 7847             "LNEG   EAX : $tmp2\n\t"
 7848             "DIV    $tmp # unsigned division\n\t"
 7849             "MOV    EAX,$tmp2\n\t"
 7850             "DIV    $tmp\n\t"
 7851             "NEG    EDX\n\t"
 7852             "JMP,s  done\n"
 7853     "pos:\n\t"
 7854             "DIV    $tmp\n\t"
 7855             "MOV    EAX,$tmp2\n"
 7856     "fast:\n\t"
 7857             "DIV    $tmp\n"
 7858     "done:\n\t"
 7859             "MOV    EAX,EDX\n\t"
 7860             "SAR    EDX,31\n\t" %}
 7861   ins_encode %{
 7862     int con = (int)$imm$$constant;
 7863     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7864     int pcon = (con > 0) ? con : -con;
 7865     Label  Lfast, Lpos, Ldone;
 7866 
 7867     __ movl($tmp$$Register, pcon);
 7868     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7869     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7870 
 7871     __ movl($tmp2$$Register, $dst$$Register); // save
 7872     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7873     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7874     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7875 
 7876     // Negative dividend.
 7877     // convert value to positive to use unsigned division
 7878     __ lneg($dst$$Register, $tmp2$$Register);
 7879     __ divl($tmp$$Register);
 7880     __ movl($dst$$Register, $tmp2$$Register);
 7881     __ divl($tmp$$Register);
 7882     // revert remainder back to negative
 7883     __ negl(HIGH_FROM_LOW($dst$$Register));
 7884     __ jmpb(Ldone);
 7885 
 7886     __ bind(Lpos);
 7887     __ divl($tmp$$Register);
 7888     __ movl($dst$$Register, $tmp2$$Register);
 7889 
 7890     __ bind(Lfast);
 7891     // fast path: src is positive
 7892     __ divl($tmp$$Register);
 7893 
 7894     __ bind(Ldone);
 7895     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7896     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7897 
 7898   %}
 7899   ins_pipe( pipe_slow );
 7900 %}
 7901 
 7902 // Integer Shift Instructions
 7903 // Shift Left by one
 7904 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7905   match(Set dst (LShiftI dst shift));
 7906   effect(KILL cr);
 7907 
 7908   size(2);
 7909   format %{ "SHL    $dst,$shift" %}
 7910   opcode(0xD1, 0x4);  /* D1 /4 */
 7911   ins_encode( OpcP, RegOpc( dst ) );
 7912   ins_pipe( ialu_reg );
 7913 %}
 7914 
 7915 // Shift Left by 8-bit immediate
 7916 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7917   match(Set dst (LShiftI dst shift));
 7918   effect(KILL cr);
 7919 
 7920   size(3);
 7921   format %{ "SHL    $dst,$shift" %}
 7922   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7923   ins_encode( RegOpcImm( dst, shift) );
 7924   ins_pipe( ialu_reg );
 7925 %}
 7926 
 7927 // Shift Left by variable
 7928 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7929   match(Set dst (LShiftI dst shift));
 7930   effect(KILL cr);
 7931 
 7932   size(2);
 7933   format %{ "SHL    $dst,$shift" %}
 7934   opcode(0xD3, 0x4);  /* D3 /4 */
 7935   ins_encode( OpcP, RegOpc( dst ) );
 7936   ins_pipe( ialu_reg_reg );
 7937 %}
 7938 
 7939 // Arithmetic shift right by one
 7940 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7941   match(Set dst (RShiftI dst shift));
 7942   effect(KILL cr);
 7943 
 7944   size(2);
 7945   format %{ "SAR    $dst,$shift" %}
 7946   opcode(0xD1, 0x7);  /* D1 /7 */
 7947   ins_encode( OpcP, RegOpc( dst ) );
 7948   ins_pipe( ialu_reg );
 7949 %}
 7950 
 7951 // Arithmetic shift right by one
 7952 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7953   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7954   effect(KILL cr);
 7955   format %{ "SAR    $dst,$shift" %}
 7956   opcode(0xD1, 0x7);  /* D1 /7 */
 7957   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 7958   ins_pipe( ialu_mem_imm );
 7959 %}
 7960 
 7961 // Arithmetic Shift Right by 8-bit immediate
 7962 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7963   match(Set dst (RShiftI dst shift));
 7964   effect(KILL cr);
 7965 
 7966   size(3);
 7967   format %{ "SAR    $dst,$shift" %}
 7968   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7969   ins_encode( RegOpcImm( dst, shift ) );
 7970   ins_pipe( ialu_mem_imm );
 7971 %}
 7972 
 7973 // Arithmetic Shift Right by 8-bit immediate
 7974 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7975   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7976   effect(KILL cr);
 7977 
 7978   format %{ "SAR    $dst,$shift" %}
 7979   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7980   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 7981   ins_pipe( ialu_mem_imm );
 7982 %}
 7983 
 7984 // Arithmetic Shift Right by variable
 7985 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7986   match(Set dst (RShiftI dst shift));
 7987   effect(KILL cr);
 7988 
 7989   size(2);
 7990   format %{ "SAR    $dst,$shift" %}
 7991   opcode(0xD3, 0x7);  /* D3 /7 */
 7992   ins_encode( OpcP, RegOpc( dst ) );
 7993   ins_pipe( ialu_reg_reg );
 7994 %}
 7995 
 7996 // Logical shift right by one
 7997 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7998   match(Set dst (URShiftI dst shift));
 7999   effect(KILL cr);
 8000 
 8001   size(2);
 8002   format %{ "SHR    $dst,$shift" %}
 8003   opcode(0xD1, 0x5);  /* D1 /5 */
 8004   ins_encode( OpcP, RegOpc( dst ) );
 8005   ins_pipe( ialu_reg );
 8006 %}
 8007 
 8008 // Logical Shift Right by 8-bit immediate
 8009 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8010   match(Set dst (URShiftI dst shift));
 8011   effect(KILL cr);
 8012 
 8013   size(3);
 8014   format %{ "SHR    $dst,$shift" %}
 8015   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8016   ins_encode( RegOpcImm( dst, shift) );
 8017   ins_pipe( ialu_reg );
 8018 %}
 8019 
 8020 
 8021 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8022 // This idiom is used by the compiler for the i2b bytecode.
 8023 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8024   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8025 
 8026   size(3);
 8027   format %{ "MOVSX  $dst,$src :8" %}
 8028   ins_encode %{
 8029     __ movsbl($dst$$Register, $src$$Register);
 8030   %}
 8031   ins_pipe(ialu_reg_reg);
 8032 %}
 8033 
 8034 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8035 // This idiom is used by the compiler the i2s bytecode.
 8036 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8037   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8038 
 8039   size(3);
 8040   format %{ "MOVSX  $dst,$src :16" %}
 8041   ins_encode %{
 8042     __ movswl($dst$$Register, $src$$Register);
 8043   %}
 8044   ins_pipe(ialu_reg_reg);
 8045 %}
 8046 
 8047 
 8048 // Logical Shift Right by variable
 8049 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8050   match(Set dst (URShiftI dst shift));
 8051   effect(KILL cr);
 8052 
 8053   size(2);
 8054   format %{ "SHR    $dst,$shift" %}
 8055   opcode(0xD3, 0x5);  /* D3 /5 */
 8056   ins_encode( OpcP, RegOpc( dst ) );
 8057   ins_pipe( ialu_reg_reg );
 8058 %}
 8059 
 8060 
 8061 //----------Logical Instructions-----------------------------------------------
 8062 //----------Integer Logical Instructions---------------------------------------
 8063 // And Instructions
 8064 // And Register with Register
 8065 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8066   match(Set dst (AndI dst src));
 8067   effect(KILL cr);
 8068 
 8069   size(2);
 8070   format %{ "AND    $dst,$src" %}
 8071   opcode(0x23);
 8072   ins_encode( OpcP, RegReg( dst, src) );
 8073   ins_pipe( ialu_reg_reg );
 8074 %}
 8075 
 8076 // And Register with Immediate
 8077 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8078   match(Set dst (AndI dst src));
 8079   effect(KILL cr);
 8080 
 8081   format %{ "AND    $dst,$src" %}
 8082   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8083   // ins_encode( RegImm( dst, src) );
 8084   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8085   ins_pipe( ialu_reg );
 8086 %}
 8087 
 8088 // And Register with Memory
 8089 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8090   match(Set dst (AndI dst (LoadI src)));
 8091   effect(KILL cr);
 8092 
 8093   ins_cost(150);
 8094   format %{ "AND    $dst,$src" %}
 8095   opcode(0x23);
 8096   ins_encode( OpcP, RegMem( dst, src) );
 8097   ins_pipe( ialu_reg_mem );
 8098 %}
 8099 
 8100 // And Memory with Register
 8101 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8102   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8103   effect(KILL cr);
 8104 
 8105   ins_cost(150);
 8106   format %{ "AND    $dst,$src" %}
 8107   opcode(0x21);  /* Opcode 21 /r */
 8108   ins_encode( OpcP, RegMem( src, dst ) );
 8109   ins_pipe( ialu_mem_reg );
 8110 %}
 8111 
 8112 // And Memory with Immediate
 8113 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8114   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8115   effect(KILL cr);
 8116 
 8117   ins_cost(125);
 8118   format %{ "AND    $dst,$src" %}
 8119   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8120   // ins_encode( MemImm( dst, src) );
 8121   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8122   ins_pipe( ialu_mem_imm );
 8123 %}
 8124 
 8125 // BMI1 instructions
 8126 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8127   match(Set dst (AndI (XorI src1 minus_1) src2));
 8128   predicate(UseBMI1Instructions);
 8129   effect(KILL cr);
 8130 
 8131   format %{ "ANDNL  $dst, $src1, $src2" %}
 8132 
 8133   ins_encode %{
 8134     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8135   %}
 8136   ins_pipe(ialu_reg);
 8137 %}
 8138 
 8139 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8140   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8141   predicate(UseBMI1Instructions);
 8142   effect(KILL cr);
 8143 
 8144   ins_cost(125);
 8145   format %{ "ANDNL  $dst, $src1, $src2" %}
 8146 
 8147   ins_encode %{
 8148     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8149   %}
 8150   ins_pipe(ialu_reg_mem);
 8151 %}
 8152 
 8153 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8154   match(Set dst (AndI (SubI imm_zero src) src));
 8155   predicate(UseBMI1Instructions);
 8156   effect(KILL cr);
 8157 
 8158   format %{ "BLSIL  $dst, $src" %}
 8159 
 8160   ins_encode %{
 8161     __ blsil($dst$$Register, $src$$Register);
 8162   %}
 8163   ins_pipe(ialu_reg);
 8164 %}
 8165 
 8166 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8167   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8168   predicate(UseBMI1Instructions);
 8169   effect(KILL cr);
 8170 
 8171   ins_cost(125);
 8172   format %{ "BLSIL  $dst, $src" %}
 8173 
 8174   ins_encode %{
 8175     __ blsil($dst$$Register, $src$$Address);
 8176   %}
 8177   ins_pipe(ialu_reg_mem);
 8178 %}
 8179 
 8180 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8181 %{
 8182   match(Set dst (XorI (AddI src minus_1) src));
 8183   predicate(UseBMI1Instructions);
 8184   effect(KILL cr);
 8185 
 8186   format %{ "BLSMSKL $dst, $src" %}
 8187 
 8188   ins_encode %{
 8189     __ blsmskl($dst$$Register, $src$$Register);
 8190   %}
 8191 
 8192   ins_pipe(ialu_reg);
 8193 %}
 8194 
 8195 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8196 %{
 8197   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8198   predicate(UseBMI1Instructions);
 8199   effect(KILL cr);
 8200 
 8201   ins_cost(125);
 8202   format %{ "BLSMSKL $dst, $src" %}
 8203 
 8204   ins_encode %{
 8205     __ blsmskl($dst$$Register, $src$$Address);
 8206   %}
 8207 
 8208   ins_pipe(ialu_reg_mem);
 8209 %}
 8210 
 8211 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8212 %{
 8213   match(Set dst (AndI (AddI src minus_1) src) );
 8214   predicate(UseBMI1Instructions);
 8215   effect(KILL cr);
 8216 
 8217   format %{ "BLSRL  $dst, $src" %}
 8218 
 8219   ins_encode %{
 8220     __ blsrl($dst$$Register, $src$$Register);
 8221   %}
 8222 
 8223   ins_pipe(ialu_reg);
 8224 %}
 8225 
 8226 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8227 %{
 8228   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8229   predicate(UseBMI1Instructions);
 8230   effect(KILL cr);
 8231 
 8232   ins_cost(125);
 8233   format %{ "BLSRL  $dst, $src" %}
 8234 
 8235   ins_encode %{
 8236     __ blsrl($dst$$Register, $src$$Address);
 8237   %}
 8238 
 8239   ins_pipe(ialu_reg_mem);
 8240 %}
 8241 
 8242 // Or Instructions
 8243 // Or Register with Register
 8244 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8245   match(Set dst (OrI dst src));
 8246   effect(KILL cr);
 8247 
 8248   size(2);
 8249   format %{ "OR     $dst,$src" %}
 8250   opcode(0x0B);
 8251   ins_encode( OpcP, RegReg( dst, src) );
 8252   ins_pipe( ialu_reg_reg );
 8253 %}
 8254 
 8255 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8256   match(Set dst (OrI dst (CastP2X src)));
 8257   effect(KILL cr);
 8258 
 8259   size(2);
 8260   format %{ "OR     $dst,$src" %}
 8261   opcode(0x0B);
 8262   ins_encode( OpcP, RegReg( dst, src) );
 8263   ins_pipe( ialu_reg_reg );
 8264 %}
 8265 
 8266 
 8267 // Or Register with Immediate
 8268 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8269   match(Set dst (OrI dst src));
 8270   effect(KILL cr);
 8271 
 8272   format %{ "OR     $dst,$src" %}
 8273   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8274   // ins_encode( RegImm( dst, src) );
 8275   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8276   ins_pipe( ialu_reg );
 8277 %}
 8278 
 8279 // Or Register with Memory
 8280 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8281   match(Set dst (OrI dst (LoadI src)));
 8282   effect(KILL cr);
 8283 
 8284   ins_cost(150);
 8285   format %{ "OR     $dst,$src" %}
 8286   opcode(0x0B);
 8287   ins_encode( OpcP, RegMem( dst, src) );
 8288   ins_pipe( ialu_reg_mem );
 8289 %}
 8290 
 8291 // Or Memory with Register
 8292 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8293   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8294   effect(KILL cr);
 8295 
 8296   ins_cost(150);
 8297   format %{ "OR     $dst,$src" %}
 8298   opcode(0x09);  /* Opcode 09 /r */
 8299   ins_encode( OpcP, RegMem( src, dst ) );
 8300   ins_pipe( ialu_mem_reg );
 8301 %}
 8302 
 8303 // Or Memory with Immediate
 8304 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8305   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8306   effect(KILL cr);
 8307 
 8308   ins_cost(125);
 8309   format %{ "OR     $dst,$src" %}
 8310   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8311   // ins_encode( MemImm( dst, src) );
 8312   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8313   ins_pipe( ialu_mem_imm );
 8314 %}
 8315 
 8316 // ROL/ROR
 8317 // ROL expand
 8318 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8319   effect(USE_DEF dst, USE shift, KILL cr);
 8320 
 8321   format %{ "ROL    $dst, $shift" %}
 8322   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8323   ins_encode( OpcP, RegOpc( dst ));
 8324   ins_pipe( ialu_reg );
 8325 %}
 8326 
 8327 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8328   effect(USE_DEF dst, USE shift, KILL cr);
 8329 
 8330   format %{ "ROL    $dst, $shift" %}
 8331   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8332   ins_encode( RegOpcImm(dst, shift) );
 8333   ins_pipe(ialu_reg);
 8334 %}
 8335 
 8336 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8337   effect(USE_DEF dst, USE shift, KILL cr);
 8338 
 8339   format %{ "ROL    $dst, $shift" %}
 8340   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8341   ins_encode(OpcP, RegOpc(dst));
 8342   ins_pipe( ialu_reg_reg );
 8343 %}
 8344 // end of ROL expand
 8345 
 8346 // ROL 32bit by one once
 8347 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8348   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8349 
 8350   expand %{
 8351     rolI_eReg_imm1(dst, lshift, cr);
 8352   %}
 8353 %}
 8354 
 8355 // ROL 32bit var by imm8 once
 8356 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8357   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8358   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8359 
 8360   expand %{
 8361     rolI_eReg_imm8(dst, lshift, cr);
 8362   %}
 8363 %}
 8364 
 8365 // ROL 32bit var by var once
 8366 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8367   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8368 
 8369   expand %{
 8370     rolI_eReg_CL(dst, shift, cr);
 8371   %}
 8372 %}
 8373 
 8374 // ROL 32bit var by var once
 8375 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8376   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8377 
 8378   expand %{
 8379     rolI_eReg_CL(dst, shift, cr);
 8380   %}
 8381 %}
 8382 
 8383 // ROR expand
 8384 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8385   effect(USE_DEF dst, USE shift, KILL cr);
 8386 
 8387   format %{ "ROR    $dst, $shift" %}
 8388   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8389   ins_encode( OpcP, RegOpc( dst ) );
 8390   ins_pipe( ialu_reg );
 8391 %}
 8392 
 8393 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8394   effect (USE_DEF dst, USE shift, KILL cr);
 8395 
 8396   format %{ "ROR    $dst, $shift" %}
 8397   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8398   ins_encode( RegOpcImm(dst, shift) );
 8399   ins_pipe( ialu_reg );
 8400 %}
 8401 
 8402 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8403   effect(USE_DEF dst, USE shift, KILL cr);
 8404 
 8405   format %{ "ROR    $dst, $shift" %}
 8406   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8407   ins_encode(OpcP, RegOpc(dst));
 8408   ins_pipe( ialu_reg_reg );
 8409 %}
 8410 // end of ROR expand
 8411 
 8412 // ROR right once
 8413 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8414   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8415 
 8416   expand %{
 8417     rorI_eReg_imm1(dst, rshift, cr);
 8418   %}
 8419 %}
 8420 
 8421 // ROR 32bit by immI8 once
 8422 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8423   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8424   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8425 
 8426   expand %{
 8427     rorI_eReg_imm8(dst, rshift, cr);
 8428   %}
 8429 %}
 8430 
 8431 // ROR 32bit var by var once
 8432 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8433   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8434 
 8435   expand %{
 8436     rorI_eReg_CL(dst, shift, cr);
 8437   %}
 8438 %}
 8439 
 8440 // ROR 32bit var by var once
 8441 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8442   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8443 
 8444   expand %{
 8445     rorI_eReg_CL(dst, shift, cr);
 8446   %}
 8447 %}
 8448 
 8449 // Xor Instructions
 8450 // Xor Register with Register
 8451 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8452   match(Set dst (XorI dst src));
 8453   effect(KILL cr);
 8454 
 8455   size(2);
 8456   format %{ "XOR    $dst,$src" %}
 8457   opcode(0x33);
 8458   ins_encode( OpcP, RegReg( dst, src) );
 8459   ins_pipe( ialu_reg_reg );
 8460 %}
 8461 
 8462 // Xor Register with Immediate -1
 8463 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8464   match(Set dst (XorI dst imm));
 8465 
 8466   size(2);
 8467   format %{ "NOT    $dst" %}
 8468   ins_encode %{
 8469      __ notl($dst$$Register);
 8470   %}
 8471   ins_pipe( ialu_reg );
 8472 %}
 8473 
 8474 // Xor Register with Immediate
 8475 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8476   match(Set dst (XorI dst src));
 8477   effect(KILL cr);
 8478 
 8479   format %{ "XOR    $dst,$src" %}
 8480   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8481   // ins_encode( RegImm( dst, src) );
 8482   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8483   ins_pipe( ialu_reg );
 8484 %}
 8485 
 8486 // Xor Register with Memory
 8487 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8488   match(Set dst (XorI dst (LoadI src)));
 8489   effect(KILL cr);
 8490 
 8491   ins_cost(150);
 8492   format %{ "XOR    $dst,$src" %}
 8493   opcode(0x33);
 8494   ins_encode( OpcP, RegMem(dst, src) );
 8495   ins_pipe( ialu_reg_mem );
 8496 %}
 8497 
 8498 // Xor Memory with Register
 8499 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8500   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8501   effect(KILL cr);
 8502 
 8503   ins_cost(150);
 8504   format %{ "XOR    $dst,$src" %}
 8505   opcode(0x31);  /* Opcode 31 /r */
 8506   ins_encode( OpcP, RegMem( src, dst ) );
 8507   ins_pipe( ialu_mem_reg );
 8508 %}
 8509 
 8510 // Xor Memory with Immediate
 8511 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8512   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8513   effect(KILL cr);
 8514 
 8515   ins_cost(125);
 8516   format %{ "XOR    $dst,$src" %}
 8517   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8518   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8519   ins_pipe( ialu_mem_imm );
 8520 %}
 8521 
 8522 //----------Convert Int to Boolean---------------------------------------------
 8523 
 8524 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8525   effect( DEF dst, USE src );
 8526   format %{ "MOV    $dst,$src" %}
 8527   ins_encode( enc_Copy( dst, src) );
 8528   ins_pipe( ialu_reg_reg );
 8529 %}
 8530 
 8531 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8532   effect( USE_DEF dst, USE src, KILL cr );
 8533 
 8534   size(4);
 8535   format %{ "NEG    $dst\n\t"
 8536             "ADC    $dst,$src" %}
 8537   ins_encode( neg_reg(dst),
 8538               OpcRegReg(0x13,dst,src) );
 8539   ins_pipe( ialu_reg_reg_long );
 8540 %}
 8541 
 8542 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8543   match(Set dst (Conv2B src));
 8544 
 8545   expand %{
 8546     movI_nocopy(dst,src);
 8547     ci2b(dst,src,cr);
 8548   %}
 8549 %}
 8550 
 8551 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8552   effect( DEF dst, USE src );
 8553   format %{ "MOV    $dst,$src" %}
 8554   ins_encode( enc_Copy( dst, src) );
 8555   ins_pipe( ialu_reg_reg );
 8556 %}
 8557 
 8558 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8559   effect( USE_DEF dst, USE src, KILL cr );
 8560   format %{ "NEG    $dst\n\t"
 8561             "ADC    $dst,$src" %}
 8562   ins_encode( neg_reg(dst),
 8563               OpcRegReg(0x13,dst,src) );
 8564   ins_pipe( ialu_reg_reg_long );
 8565 %}
 8566 
 8567 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8568   match(Set dst (Conv2B src));
 8569 
 8570   expand %{
 8571     movP_nocopy(dst,src);
 8572     cp2b(dst,src,cr);
 8573   %}
 8574 %}
 8575 
 8576 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8577   match(Set dst (CmpLTMask p q));
 8578   effect(KILL cr);
 8579   ins_cost(400);
 8580 
 8581   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8582   format %{ "XOR    $dst,$dst\n\t"
 8583             "CMP    $p,$q\n\t"
 8584             "SETlt  $dst\n\t"
 8585             "NEG    $dst" %}
 8586   ins_encode %{
 8587     Register Rp = $p$$Register;
 8588     Register Rq = $q$$Register;
 8589     Register Rd = $dst$$Register;
 8590     Label done;
 8591     __ xorl(Rd, Rd);
 8592     __ cmpl(Rp, Rq);
 8593     __ setb(Assembler::less, Rd);
 8594     __ negl(Rd);
 8595   %}
 8596 
 8597   ins_pipe(pipe_slow);
 8598 %}
 8599 
 8600 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8601   match(Set dst (CmpLTMask dst zero));
 8602   effect(DEF dst, KILL cr);
 8603   ins_cost(100);
 8604 
 8605   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8606   ins_encode %{
 8607   __ sarl($dst$$Register, 31);
 8608   %}
 8609   ins_pipe(ialu_reg);
 8610 %}
 8611 
 8612 /* better to save a register than avoid a branch */
 8613 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8614   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8615   effect(KILL cr);
 8616   ins_cost(400);
 8617   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8618             "JGE    done\n\t"
 8619             "ADD    $p,$y\n"
 8620             "done:  " %}
 8621   ins_encode %{
 8622     Register Rp = $p$$Register;
 8623     Register Rq = $q$$Register;
 8624     Register Ry = $y$$Register;
 8625     Label done;
 8626     __ subl(Rp, Rq);
 8627     __ jccb(Assembler::greaterEqual, done);
 8628     __ addl(Rp, Ry);
 8629     __ bind(done);
 8630   %}
 8631 
 8632   ins_pipe(pipe_cmplt);
 8633 %}
 8634 
 8635 /* better to save a register than avoid a branch */
 8636 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8637   match(Set y (AndI (CmpLTMask p q) y));
 8638   effect(KILL cr);
 8639 
 8640   ins_cost(300);
 8641 
 8642   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8643             "JLT      done\n\t"
 8644             "XORL     $y, $y\n"
 8645             "done:  " %}
 8646   ins_encode %{
 8647     Register Rp = $p$$Register;
 8648     Register Rq = $q$$Register;
 8649     Register Ry = $y$$Register;
 8650     Label done;
 8651     __ cmpl(Rp, Rq);
 8652     __ jccb(Assembler::less, done);
 8653     __ xorl(Ry, Ry);
 8654     __ bind(done);
 8655   %}
 8656 
 8657   ins_pipe(pipe_cmplt);
 8658 %}
 8659 
 8660 /* If I enable this, I encourage spilling in the inner loop of compress.
 8661 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8662   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8663 */
 8664 //----------Overflow Math Instructions-----------------------------------------
 8665 
 8666 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8667 %{
 8668   match(Set cr (OverflowAddI op1 op2));
 8669   effect(DEF cr, USE_KILL op1, USE op2);
 8670 
 8671   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8672 
 8673   ins_encode %{
 8674     __ addl($op1$$Register, $op2$$Register);
 8675   %}
 8676   ins_pipe(ialu_reg_reg);
 8677 %}
 8678 
 8679 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8680 %{
 8681   match(Set cr (OverflowAddI op1 op2));
 8682   effect(DEF cr, USE_KILL op1, USE op2);
 8683 
 8684   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8685 
 8686   ins_encode %{
 8687     __ addl($op1$$Register, $op2$$constant);
 8688   %}
 8689   ins_pipe(ialu_reg_reg);
 8690 %}
 8691 
 8692 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8693 %{
 8694   match(Set cr (OverflowSubI op1 op2));
 8695 
 8696   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8697   ins_encode %{
 8698     __ cmpl($op1$$Register, $op2$$Register);
 8699   %}
 8700   ins_pipe(ialu_reg_reg);
 8701 %}
 8702 
 8703 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8704 %{
 8705   match(Set cr (OverflowSubI op1 op2));
 8706 
 8707   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8708   ins_encode %{
 8709     __ cmpl($op1$$Register, $op2$$constant);
 8710   %}
 8711   ins_pipe(ialu_reg_reg);
 8712 %}
 8713 
 8714 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8715 %{
 8716   match(Set cr (OverflowSubI zero op2));
 8717   effect(DEF cr, USE_KILL op2);
 8718 
 8719   format %{ "NEG    $op2\t# overflow check int" %}
 8720   ins_encode %{
 8721     __ negl($op2$$Register);
 8722   %}
 8723   ins_pipe(ialu_reg_reg);
 8724 %}
 8725 
 8726 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8727 %{
 8728   match(Set cr (OverflowMulI op1 op2));
 8729   effect(DEF cr, USE_KILL op1, USE op2);
 8730 
 8731   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8732   ins_encode %{
 8733     __ imull($op1$$Register, $op2$$Register);
 8734   %}
 8735   ins_pipe(ialu_reg_reg_alu0);
 8736 %}
 8737 
 8738 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8739 %{
 8740   match(Set cr (OverflowMulI op1 op2));
 8741   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8742 
 8743   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8744   ins_encode %{
 8745     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8746   %}
 8747   ins_pipe(ialu_reg_reg_alu0);
 8748 %}
 8749 
 8750 // Integer Absolute Instructions
 8751 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8752 %{
 8753   match(Set dst (AbsI src));
 8754   effect(TEMP dst, TEMP tmp, KILL cr);
 8755   format %{ "movl $tmp, $src\n\t"
 8756             "sarl $tmp, 31\n\t"
 8757             "movl $dst, $src\n\t"
 8758             "xorl $dst, $tmp\n\t"
 8759             "subl $dst, $tmp\n"
 8760           %}
 8761   ins_encode %{
 8762     __ movl($tmp$$Register, $src$$Register);
 8763     __ sarl($tmp$$Register, 31);
 8764     __ movl($dst$$Register, $src$$Register);
 8765     __ xorl($dst$$Register, $tmp$$Register);
 8766     __ subl($dst$$Register, $tmp$$Register);
 8767   %}
 8768 
 8769   ins_pipe(ialu_reg_reg);
 8770 %}
 8771 
 8772 //----------Long Instructions------------------------------------------------
 8773 // Add Long Register with Register
 8774 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8775   match(Set dst (AddL dst src));
 8776   effect(KILL cr);
 8777   ins_cost(200);
 8778   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8779             "ADC    $dst.hi,$src.hi" %}
 8780   opcode(0x03, 0x13);
 8781   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8782   ins_pipe( ialu_reg_reg_long );
 8783 %}
 8784 
 8785 // Add Long Register with Immediate
 8786 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8787   match(Set dst (AddL dst src));
 8788   effect(KILL cr);
 8789   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8790             "ADC    $dst.hi,$src.hi" %}
 8791   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8792   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8793   ins_pipe( ialu_reg_long );
 8794 %}
 8795 
 8796 // Add Long Register with Memory
 8797 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8798   match(Set dst (AddL dst (LoadL mem)));
 8799   effect(KILL cr);
 8800   ins_cost(125);
 8801   format %{ "ADD    $dst.lo,$mem\n\t"
 8802             "ADC    $dst.hi,$mem+4" %}
 8803   opcode(0x03, 0x13);
 8804   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8805   ins_pipe( ialu_reg_long_mem );
 8806 %}
 8807 
 8808 // Subtract Long Register with Register.
 8809 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8810   match(Set dst (SubL dst src));
 8811   effect(KILL cr);
 8812   ins_cost(200);
 8813   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8814             "SBB    $dst.hi,$src.hi" %}
 8815   opcode(0x2B, 0x1B);
 8816   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8817   ins_pipe( ialu_reg_reg_long );
 8818 %}
 8819 
 8820 // Subtract Long Register with Immediate
 8821 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8822   match(Set dst (SubL dst src));
 8823   effect(KILL cr);
 8824   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8825             "SBB    $dst.hi,$src.hi" %}
 8826   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8827   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8828   ins_pipe( ialu_reg_long );
 8829 %}
 8830 
 8831 // Subtract Long Register with Memory
 8832 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8833   match(Set dst (SubL dst (LoadL mem)));
 8834   effect(KILL cr);
 8835   ins_cost(125);
 8836   format %{ "SUB    $dst.lo,$mem\n\t"
 8837             "SBB    $dst.hi,$mem+4" %}
 8838   opcode(0x2B, 0x1B);
 8839   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8840   ins_pipe( ialu_reg_long_mem );
 8841 %}
 8842 
 8843 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8844   match(Set dst (SubL zero dst));
 8845   effect(KILL cr);
 8846   ins_cost(300);
 8847   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8848   ins_encode( neg_long(dst) );
 8849   ins_pipe( ialu_reg_reg_long );
 8850 %}
 8851 
 8852 // And Long Register with Register
 8853 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8854   match(Set dst (AndL dst src));
 8855   effect(KILL cr);
 8856   format %{ "AND    $dst.lo,$src.lo\n\t"
 8857             "AND    $dst.hi,$src.hi" %}
 8858   opcode(0x23,0x23);
 8859   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8860   ins_pipe( ialu_reg_reg_long );
 8861 %}
 8862 
 8863 // And Long Register with Immediate
 8864 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8865   match(Set dst (AndL dst src));
 8866   effect(KILL cr);
 8867   format %{ "AND    $dst.lo,$src.lo\n\t"
 8868             "AND    $dst.hi,$src.hi" %}
 8869   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8870   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8871   ins_pipe( ialu_reg_long );
 8872 %}
 8873 
 8874 // And Long Register with Memory
 8875 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8876   match(Set dst (AndL dst (LoadL mem)));
 8877   effect(KILL cr);
 8878   ins_cost(125);
 8879   format %{ "AND    $dst.lo,$mem\n\t"
 8880             "AND    $dst.hi,$mem+4" %}
 8881   opcode(0x23, 0x23);
 8882   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8883   ins_pipe( ialu_reg_long_mem );
 8884 %}
 8885 
 8886 // BMI1 instructions
 8887 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8888   match(Set dst (AndL (XorL src1 minus_1) src2));
 8889   predicate(UseBMI1Instructions);
 8890   effect(KILL cr, TEMP dst);
 8891 
 8892   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8893             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8894          %}
 8895 
 8896   ins_encode %{
 8897     Register Rdst = $dst$$Register;
 8898     Register Rsrc1 = $src1$$Register;
 8899     Register Rsrc2 = $src2$$Register;
 8900     __ andnl(Rdst, Rsrc1, Rsrc2);
 8901     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8902   %}
 8903   ins_pipe(ialu_reg_reg_long);
 8904 %}
 8905 
 8906 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8907   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8908   predicate(UseBMI1Instructions);
 8909   effect(KILL cr, TEMP dst);
 8910 
 8911   ins_cost(125);
 8912   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8913             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8914          %}
 8915 
 8916   ins_encode %{
 8917     Register Rdst = $dst$$Register;
 8918     Register Rsrc1 = $src1$$Register;
 8919     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8920 
 8921     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8922     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8923   %}
 8924   ins_pipe(ialu_reg_mem);
 8925 %}
 8926 
 8927 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8928   match(Set dst (AndL (SubL imm_zero src) src));
 8929   predicate(UseBMI1Instructions);
 8930   effect(KILL cr, TEMP dst);
 8931 
 8932   format %{ "MOVL   $dst.hi, 0\n\t"
 8933             "BLSIL  $dst.lo, $src.lo\n\t"
 8934             "JNZ    done\n\t"
 8935             "BLSIL  $dst.hi, $src.hi\n"
 8936             "done:"
 8937          %}
 8938 
 8939   ins_encode %{
 8940     Label done;
 8941     Register Rdst = $dst$$Register;
 8942     Register Rsrc = $src$$Register;
 8943     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8944     __ blsil(Rdst, Rsrc);
 8945     __ jccb(Assembler::notZero, done);
 8946     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8947     __ bind(done);
 8948   %}
 8949   ins_pipe(ialu_reg);
 8950 %}
 8951 
 8952 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8953   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8954   predicate(UseBMI1Instructions);
 8955   effect(KILL cr, TEMP dst);
 8956 
 8957   ins_cost(125);
 8958   format %{ "MOVL   $dst.hi, 0\n\t"
 8959             "BLSIL  $dst.lo, $src\n\t"
 8960             "JNZ    done\n\t"
 8961             "BLSIL  $dst.hi, $src+4\n"
 8962             "done:"
 8963          %}
 8964 
 8965   ins_encode %{
 8966     Label done;
 8967     Register Rdst = $dst$$Register;
 8968     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8969 
 8970     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8971     __ blsil(Rdst, $src$$Address);
 8972     __ jccb(Assembler::notZero, done);
 8973     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8974     __ bind(done);
 8975   %}
 8976   ins_pipe(ialu_reg_mem);
 8977 %}
 8978 
 8979 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8980 %{
 8981   match(Set dst (XorL (AddL src minus_1) src));
 8982   predicate(UseBMI1Instructions);
 8983   effect(KILL cr, TEMP dst);
 8984 
 8985   format %{ "MOVL    $dst.hi, 0\n\t"
 8986             "BLSMSKL $dst.lo, $src.lo\n\t"
 8987             "JNC     done\n\t"
 8988             "BLSMSKL $dst.hi, $src.hi\n"
 8989             "done:"
 8990          %}
 8991 
 8992   ins_encode %{
 8993     Label done;
 8994     Register Rdst = $dst$$Register;
 8995     Register Rsrc = $src$$Register;
 8996     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8997     __ blsmskl(Rdst, Rsrc);
 8998     __ jccb(Assembler::carryClear, done);
 8999     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9000     __ bind(done);
 9001   %}
 9002 
 9003   ins_pipe(ialu_reg);
 9004 %}
 9005 
 9006 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9007 %{
 9008   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9009   predicate(UseBMI1Instructions);
 9010   effect(KILL cr, TEMP dst);
 9011 
 9012   ins_cost(125);
 9013   format %{ "MOVL    $dst.hi, 0\n\t"
 9014             "BLSMSKL $dst.lo, $src\n\t"
 9015             "JNC     done\n\t"
 9016             "BLSMSKL $dst.hi, $src+4\n"
 9017             "done:"
 9018          %}
 9019 
 9020   ins_encode %{
 9021     Label done;
 9022     Register Rdst = $dst$$Register;
 9023     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9024 
 9025     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9026     __ blsmskl(Rdst, $src$$Address);
 9027     __ jccb(Assembler::carryClear, done);
 9028     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9029     __ bind(done);
 9030   %}
 9031 
 9032   ins_pipe(ialu_reg_mem);
 9033 %}
 9034 
 9035 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9036 %{
 9037   match(Set dst (AndL (AddL src minus_1) src) );
 9038   predicate(UseBMI1Instructions);
 9039   effect(KILL cr, TEMP dst);
 9040 
 9041   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9042             "BLSRL  $dst.lo, $src.lo\n\t"
 9043             "JNC    done\n\t"
 9044             "BLSRL  $dst.hi, $src.hi\n"
 9045             "done:"
 9046   %}
 9047 
 9048   ins_encode %{
 9049     Label done;
 9050     Register Rdst = $dst$$Register;
 9051     Register Rsrc = $src$$Register;
 9052     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9053     __ blsrl(Rdst, Rsrc);
 9054     __ jccb(Assembler::carryClear, done);
 9055     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9056     __ bind(done);
 9057   %}
 9058 
 9059   ins_pipe(ialu_reg);
 9060 %}
 9061 
 9062 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9063 %{
 9064   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9065   predicate(UseBMI1Instructions);
 9066   effect(KILL cr, TEMP dst);
 9067 
 9068   ins_cost(125);
 9069   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9070             "BLSRL  $dst.lo, $src\n\t"
 9071             "JNC    done\n\t"
 9072             "BLSRL  $dst.hi, $src+4\n"
 9073             "done:"
 9074   %}
 9075 
 9076   ins_encode %{
 9077     Label done;
 9078     Register Rdst = $dst$$Register;
 9079     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9080     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9081     __ blsrl(Rdst, $src$$Address);
 9082     __ jccb(Assembler::carryClear, done);
 9083     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9084     __ bind(done);
 9085   %}
 9086 
 9087   ins_pipe(ialu_reg_mem);
 9088 %}
 9089 
 9090 // Or Long Register with Register
 9091 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9092   match(Set dst (OrL dst src));
 9093   effect(KILL cr);
 9094   format %{ "OR     $dst.lo,$src.lo\n\t"
 9095             "OR     $dst.hi,$src.hi" %}
 9096   opcode(0x0B,0x0B);
 9097   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9098   ins_pipe( ialu_reg_reg_long );
 9099 %}
 9100 
 9101 // Or Long Register with Immediate
 9102 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9103   match(Set dst (OrL dst src));
 9104   effect(KILL cr);
 9105   format %{ "OR     $dst.lo,$src.lo\n\t"
 9106             "OR     $dst.hi,$src.hi" %}
 9107   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9108   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9109   ins_pipe( ialu_reg_long );
 9110 %}
 9111 
 9112 // Or Long Register with Memory
 9113 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9114   match(Set dst (OrL dst (LoadL mem)));
 9115   effect(KILL cr);
 9116   ins_cost(125);
 9117   format %{ "OR     $dst.lo,$mem\n\t"
 9118             "OR     $dst.hi,$mem+4" %}
 9119   opcode(0x0B,0x0B);
 9120   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9121   ins_pipe( ialu_reg_long_mem );
 9122 %}
 9123 
 9124 // Xor Long Register with Register
 9125 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9126   match(Set dst (XorL dst src));
 9127   effect(KILL cr);
 9128   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9129             "XOR    $dst.hi,$src.hi" %}
 9130   opcode(0x33,0x33);
 9131   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9132   ins_pipe( ialu_reg_reg_long );
 9133 %}
 9134 
 9135 // Xor Long Register with Immediate -1
 9136 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9137   match(Set dst (XorL dst imm));
 9138   format %{ "NOT    $dst.lo\n\t"
 9139             "NOT    $dst.hi" %}
 9140   ins_encode %{
 9141      __ notl($dst$$Register);
 9142      __ notl(HIGH_FROM_LOW($dst$$Register));
 9143   %}
 9144   ins_pipe( ialu_reg_long );
 9145 %}
 9146 
 9147 // Xor Long Register with Immediate
 9148 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9149   match(Set dst (XorL dst src));
 9150   effect(KILL cr);
 9151   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9152             "XOR    $dst.hi,$src.hi" %}
 9153   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9154   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9155   ins_pipe( ialu_reg_long );
 9156 %}
 9157 
 9158 // Xor Long Register with Memory
 9159 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9160   match(Set dst (XorL dst (LoadL mem)));
 9161   effect(KILL cr);
 9162   ins_cost(125);
 9163   format %{ "XOR    $dst.lo,$mem\n\t"
 9164             "XOR    $dst.hi,$mem+4" %}
 9165   opcode(0x33,0x33);
 9166   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9167   ins_pipe( ialu_reg_long_mem );
 9168 %}
 9169 
 9170 // Shift Left Long by 1
 9171 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9172   predicate(UseNewLongLShift);
 9173   match(Set dst (LShiftL dst cnt));
 9174   effect(KILL cr);
 9175   ins_cost(100);
 9176   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9177             "ADC    $dst.hi,$dst.hi" %}
 9178   ins_encode %{
 9179     __ addl($dst$$Register,$dst$$Register);
 9180     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9181   %}
 9182   ins_pipe( ialu_reg_long );
 9183 %}
 9184 
 9185 // Shift Left Long by 2
 9186 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9187   predicate(UseNewLongLShift);
 9188   match(Set dst (LShiftL dst cnt));
 9189   effect(KILL cr);
 9190   ins_cost(100);
 9191   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9192             "ADC    $dst.hi,$dst.hi\n\t"
 9193             "ADD    $dst.lo,$dst.lo\n\t"
 9194             "ADC    $dst.hi,$dst.hi" %}
 9195   ins_encode %{
 9196     __ addl($dst$$Register,$dst$$Register);
 9197     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9198     __ addl($dst$$Register,$dst$$Register);
 9199     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9200   %}
 9201   ins_pipe( ialu_reg_long );
 9202 %}
 9203 
 9204 // Shift Left Long by 3
 9205 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9206   predicate(UseNewLongLShift);
 9207   match(Set dst (LShiftL dst cnt));
 9208   effect(KILL cr);
 9209   ins_cost(100);
 9210   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9211             "ADC    $dst.hi,$dst.hi\n\t"
 9212             "ADD    $dst.lo,$dst.lo\n\t"
 9213             "ADC    $dst.hi,$dst.hi\n\t"
 9214             "ADD    $dst.lo,$dst.lo\n\t"
 9215             "ADC    $dst.hi,$dst.hi" %}
 9216   ins_encode %{
 9217     __ addl($dst$$Register,$dst$$Register);
 9218     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9219     __ addl($dst$$Register,$dst$$Register);
 9220     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9221     __ addl($dst$$Register,$dst$$Register);
 9222     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9223   %}
 9224   ins_pipe( ialu_reg_long );
 9225 %}
 9226 
 9227 // Shift Left Long by 1-31
 9228 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9229   match(Set dst (LShiftL dst cnt));
 9230   effect(KILL cr);
 9231   ins_cost(200);
 9232   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9233             "SHL    $dst.lo,$cnt" %}
 9234   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9235   ins_encode( move_long_small_shift(dst,cnt) );
 9236   ins_pipe( ialu_reg_long );
 9237 %}
 9238 
 9239 // Shift Left Long by 32-63
 9240 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9241   match(Set dst (LShiftL dst cnt));
 9242   effect(KILL cr);
 9243   ins_cost(300);
 9244   format %{ "MOV    $dst.hi,$dst.lo\n"
 9245           "\tSHL    $dst.hi,$cnt-32\n"
 9246           "\tXOR    $dst.lo,$dst.lo" %}
 9247   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9248   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9249   ins_pipe( ialu_reg_long );
 9250 %}
 9251 
 9252 // Shift Left Long by variable
 9253 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9254   match(Set dst (LShiftL dst shift));
 9255   effect(KILL cr);
 9256   ins_cost(500+200);
 9257   size(17);
 9258   format %{ "TEST   $shift,32\n\t"
 9259             "JEQ,s  small\n\t"
 9260             "MOV    $dst.hi,$dst.lo\n\t"
 9261             "XOR    $dst.lo,$dst.lo\n"
 9262     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9263             "SHL    $dst.lo,$shift" %}
 9264   ins_encode( shift_left_long( dst, shift ) );
 9265   ins_pipe( pipe_slow );
 9266 %}
 9267 
 9268 // Shift Right Long by 1-31
 9269 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9270   match(Set dst (URShiftL dst cnt));
 9271   effect(KILL cr);
 9272   ins_cost(200);
 9273   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9274             "SHR    $dst.hi,$cnt" %}
 9275   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9276   ins_encode( move_long_small_shift(dst,cnt) );
 9277   ins_pipe( ialu_reg_long );
 9278 %}
 9279 
 9280 // Shift Right Long by 32-63
 9281 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9282   match(Set dst (URShiftL dst cnt));
 9283   effect(KILL cr);
 9284   ins_cost(300);
 9285   format %{ "MOV    $dst.lo,$dst.hi\n"
 9286           "\tSHR    $dst.lo,$cnt-32\n"
 9287           "\tXOR    $dst.hi,$dst.hi" %}
 9288   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9289   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9290   ins_pipe( ialu_reg_long );
 9291 %}
 9292 
 9293 // Shift Right Long by variable
 9294 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9295   match(Set dst (URShiftL dst shift));
 9296   effect(KILL cr);
 9297   ins_cost(600);
 9298   size(17);
 9299   format %{ "TEST   $shift,32\n\t"
 9300             "JEQ,s  small\n\t"
 9301             "MOV    $dst.lo,$dst.hi\n\t"
 9302             "XOR    $dst.hi,$dst.hi\n"
 9303     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9304             "SHR    $dst.hi,$shift" %}
 9305   ins_encode( shift_right_long( dst, shift ) );
 9306   ins_pipe( pipe_slow );
 9307 %}
 9308 
 9309 // Shift Right Long by 1-31
 9310 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9311   match(Set dst (RShiftL dst cnt));
 9312   effect(KILL cr);
 9313   ins_cost(200);
 9314   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9315             "SAR    $dst.hi,$cnt" %}
 9316   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9317   ins_encode( move_long_small_shift(dst,cnt) );
 9318   ins_pipe( ialu_reg_long );
 9319 %}
 9320 
 9321 // Shift Right Long by 32-63
 9322 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9323   match(Set dst (RShiftL dst cnt));
 9324   effect(KILL cr);
 9325   ins_cost(300);
 9326   format %{ "MOV    $dst.lo,$dst.hi\n"
 9327           "\tSAR    $dst.lo,$cnt-32\n"
 9328           "\tSAR    $dst.hi,31" %}
 9329   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9330   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9331   ins_pipe( ialu_reg_long );
 9332 %}
 9333 
 9334 // Shift Right arithmetic Long by variable
 9335 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9336   match(Set dst (RShiftL dst shift));
 9337   effect(KILL cr);
 9338   ins_cost(600);
 9339   size(18);
 9340   format %{ "TEST   $shift,32\n\t"
 9341             "JEQ,s  small\n\t"
 9342             "MOV    $dst.lo,$dst.hi\n\t"
 9343             "SAR    $dst.hi,31\n"
 9344     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9345             "SAR    $dst.hi,$shift" %}
 9346   ins_encode( shift_right_arith_long( dst, shift ) );
 9347   ins_pipe( pipe_slow );
 9348 %}
 9349 
 9350 
 9351 //----------Double Instructions------------------------------------------------
 9352 // Double Math
 9353 
 9354 // Compare & branch
 9355 
 9356 // P6 version of float compare, sets condition codes in EFLAGS
 9357 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9358   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9359   match(Set cr (CmpD src1 src2));
 9360   effect(KILL rax);
 9361   ins_cost(150);
 9362   format %{ "FLD    $src1\n\t"
 9363             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9364             "JNP    exit\n\t"
 9365             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9366             "SAHF\n"
 9367      "exit:\tNOP               // avoid branch to branch" %}
 9368   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9369   ins_encode( Push_Reg_DPR(src1),
 9370               OpcP, RegOpc(src2),
 9371               cmpF_P6_fixup );
 9372   ins_pipe( pipe_slow );
 9373 %}
 9374 
 9375 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9376   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9377   match(Set cr (CmpD src1 src2));
 9378   ins_cost(150);
 9379   format %{ "FLD    $src1\n\t"
 9380             "FUCOMIP ST,$src2  // P6 instruction" %}
 9381   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9382   ins_encode( Push_Reg_DPR(src1),
 9383               OpcP, RegOpc(src2));
 9384   ins_pipe( pipe_slow );
 9385 %}
 9386 
 9387 // Compare & branch
 9388 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9389   predicate(UseSSE<=1);
 9390   match(Set cr (CmpD src1 src2));
 9391   effect(KILL rax);
 9392   ins_cost(200);
 9393   format %{ "FLD    $src1\n\t"
 9394             "FCOMp  $src2\n\t"
 9395             "FNSTSW AX\n\t"
 9396             "TEST   AX,0x400\n\t"
 9397             "JZ,s   flags\n\t"
 9398             "MOV    AH,1\t# unordered treat as LT\n"
 9399     "flags:\tSAHF" %}
 9400   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9401   ins_encode( Push_Reg_DPR(src1),
 9402               OpcP, RegOpc(src2),
 9403               fpu_flags);
 9404   ins_pipe( pipe_slow );
 9405 %}
 9406 
 9407 // Compare vs zero into -1,0,1
 9408 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9409   predicate(UseSSE<=1);
 9410   match(Set dst (CmpD3 src1 zero));
 9411   effect(KILL cr, KILL rax);
 9412   ins_cost(280);
 9413   format %{ "FTSTD  $dst,$src1" %}
 9414   opcode(0xE4, 0xD9);
 9415   ins_encode( Push_Reg_DPR(src1),
 9416               OpcS, OpcP, PopFPU,
 9417               CmpF_Result(dst));
 9418   ins_pipe( pipe_slow );
 9419 %}
 9420 
 9421 // Compare into -1,0,1
 9422 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9423   predicate(UseSSE<=1);
 9424   match(Set dst (CmpD3 src1 src2));
 9425   effect(KILL cr, KILL rax);
 9426   ins_cost(300);
 9427   format %{ "FCMPD  $dst,$src1,$src2" %}
 9428   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9429   ins_encode( Push_Reg_DPR(src1),
 9430               OpcP, RegOpc(src2),
 9431               CmpF_Result(dst));
 9432   ins_pipe( pipe_slow );
 9433 %}
 9434 
 9435 // float compare and set condition codes in EFLAGS by XMM regs
 9436 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9437   predicate(UseSSE>=2);
 9438   match(Set cr (CmpD src1 src2));
 9439   ins_cost(145);
 9440   format %{ "UCOMISD $src1,$src2\n\t"
 9441             "JNP,s   exit\n\t"
 9442             "PUSHF\t# saw NaN, set CF\n\t"
 9443             "AND     [rsp], #0xffffff2b\n\t"
 9444             "POPF\n"
 9445     "exit:" %}
 9446   ins_encode %{
 9447     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9448     emit_cmpfp_fixup(_masm);
 9449   %}
 9450   ins_pipe( pipe_slow );
 9451 %}
 9452 
 9453 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9454   predicate(UseSSE>=2);
 9455   match(Set cr (CmpD src1 src2));
 9456   ins_cost(100);
 9457   format %{ "UCOMISD $src1,$src2" %}
 9458   ins_encode %{
 9459     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9460   %}
 9461   ins_pipe( pipe_slow );
 9462 %}
 9463 
 9464 // float compare and set condition codes in EFLAGS by XMM regs
 9465 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9466   predicate(UseSSE>=2);
 9467   match(Set cr (CmpD src1 (LoadD src2)));
 9468   ins_cost(145);
 9469   format %{ "UCOMISD $src1,$src2\n\t"
 9470             "JNP,s   exit\n\t"
 9471             "PUSHF\t# saw NaN, set CF\n\t"
 9472             "AND     [rsp], #0xffffff2b\n\t"
 9473             "POPF\n"
 9474     "exit:" %}
 9475   ins_encode %{
 9476     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9477     emit_cmpfp_fixup(_masm);
 9478   %}
 9479   ins_pipe( pipe_slow );
 9480 %}
 9481 
 9482 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9483   predicate(UseSSE>=2);
 9484   match(Set cr (CmpD src1 (LoadD src2)));
 9485   ins_cost(100);
 9486   format %{ "UCOMISD $src1,$src2" %}
 9487   ins_encode %{
 9488     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9489   %}
 9490   ins_pipe( pipe_slow );
 9491 %}
 9492 
 9493 // Compare into -1,0,1 in XMM
 9494 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9495   predicate(UseSSE>=2);
 9496   match(Set dst (CmpD3 src1 src2));
 9497   effect(KILL cr);
 9498   ins_cost(255);
 9499   format %{ "UCOMISD $src1, $src2\n\t"
 9500             "MOV     $dst, #-1\n\t"
 9501             "JP,s    done\n\t"
 9502             "JB,s    done\n\t"
 9503             "SETNE   $dst\n\t"
 9504             "MOVZB   $dst, $dst\n"
 9505     "done:" %}
 9506   ins_encode %{
 9507     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9508     emit_cmpfp3(_masm, $dst$$Register);
 9509   %}
 9510   ins_pipe( pipe_slow );
 9511 %}
 9512 
 9513 // Compare into -1,0,1 in XMM and memory
 9514 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9515   predicate(UseSSE>=2);
 9516   match(Set dst (CmpD3 src1 (LoadD src2)));
 9517   effect(KILL cr);
 9518   ins_cost(275);
 9519   format %{ "UCOMISD $src1, $src2\n\t"
 9520             "MOV     $dst, #-1\n\t"
 9521             "JP,s    done\n\t"
 9522             "JB,s    done\n\t"
 9523             "SETNE   $dst\n\t"
 9524             "MOVZB   $dst, $dst\n"
 9525     "done:" %}
 9526   ins_encode %{
 9527     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9528     emit_cmpfp3(_masm, $dst$$Register);
 9529   %}
 9530   ins_pipe( pipe_slow );
 9531 %}
 9532 
 9533 
 9534 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9535   predicate (UseSSE <=1);
 9536   match(Set dst (SubD dst src));
 9537 
 9538   format %{ "FLD    $src\n\t"
 9539             "DSUBp  $dst,ST" %}
 9540   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9541   ins_cost(150);
 9542   ins_encode( Push_Reg_DPR(src),
 9543               OpcP, RegOpc(dst) );
 9544   ins_pipe( fpu_reg_reg );
 9545 %}
 9546 
 9547 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9548   predicate (UseSSE <=1);
 9549   match(Set dst (RoundDouble (SubD src1 src2)));
 9550   ins_cost(250);
 9551 
 9552   format %{ "FLD    $src2\n\t"
 9553             "DSUB   ST,$src1\n\t"
 9554             "FSTP_D $dst\t# D-round" %}
 9555   opcode(0xD8, 0x5);
 9556   ins_encode( Push_Reg_DPR(src2),
 9557               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9558   ins_pipe( fpu_mem_reg_reg );
 9559 %}
 9560 
 9561 
 9562 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9563   predicate (UseSSE <=1);
 9564   match(Set dst (SubD dst (LoadD src)));
 9565   ins_cost(150);
 9566 
 9567   format %{ "FLD    $src\n\t"
 9568             "DSUBp  $dst,ST" %}
 9569   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9570   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9571               OpcP, RegOpc(dst) );
 9572   ins_pipe( fpu_reg_mem );
 9573 %}
 9574 
 9575 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9576   predicate (UseSSE<=1);
 9577   match(Set dst (AbsD src));
 9578   ins_cost(100);
 9579   format %{ "FABS" %}
 9580   opcode(0xE1, 0xD9);
 9581   ins_encode( OpcS, OpcP );
 9582   ins_pipe( fpu_reg_reg );
 9583 %}
 9584 
 9585 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9586   predicate(UseSSE<=1);
 9587   match(Set dst (NegD src));
 9588   ins_cost(100);
 9589   format %{ "FCHS" %}
 9590   opcode(0xE0, 0xD9);
 9591   ins_encode( OpcS, OpcP );
 9592   ins_pipe( fpu_reg_reg );
 9593 %}
 9594 
 9595 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9596   predicate(UseSSE<=1);
 9597   match(Set dst (AddD dst src));
 9598   format %{ "FLD    $src\n\t"
 9599             "DADD   $dst,ST" %}
 9600   size(4);
 9601   ins_cost(150);
 9602   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9603   ins_encode( Push_Reg_DPR(src),
 9604               OpcP, RegOpc(dst) );
 9605   ins_pipe( fpu_reg_reg );
 9606 %}
 9607 
 9608 
 9609 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9610   predicate(UseSSE<=1);
 9611   match(Set dst (RoundDouble (AddD src1 src2)));
 9612   ins_cost(250);
 9613 
 9614   format %{ "FLD    $src2\n\t"
 9615             "DADD   ST,$src1\n\t"
 9616             "FSTP_D $dst\t# D-round" %}
 9617   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9618   ins_encode( Push_Reg_DPR(src2),
 9619               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9620   ins_pipe( fpu_mem_reg_reg );
 9621 %}
 9622 
 9623 
 9624 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9625   predicate(UseSSE<=1);
 9626   match(Set dst (AddD dst (LoadD src)));
 9627   ins_cost(150);
 9628 
 9629   format %{ "FLD    $src\n\t"
 9630             "DADDp  $dst,ST" %}
 9631   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9632   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9633               OpcP, RegOpc(dst) );
 9634   ins_pipe( fpu_reg_mem );
 9635 %}
 9636 
 9637 // add-to-memory
 9638 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9639   predicate(UseSSE<=1);
 9640   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9641   ins_cost(150);
 9642 
 9643   format %{ "FLD_D  $dst\n\t"
 9644             "DADD   ST,$src\n\t"
 9645             "FST_D  $dst" %}
 9646   opcode(0xDD, 0x0);
 9647   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9648               Opcode(0xD8), RegOpc(src),
 9649               set_instruction_start,
 9650               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9651   ins_pipe( fpu_reg_mem );
 9652 %}
 9653 
 9654 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9655   predicate(UseSSE<=1);
 9656   match(Set dst (AddD dst con));
 9657   ins_cost(125);
 9658   format %{ "FLD1\n\t"
 9659             "DADDp  $dst,ST" %}
 9660   ins_encode %{
 9661     __ fld1();
 9662     __ faddp($dst$$reg);
 9663   %}
 9664   ins_pipe(fpu_reg);
 9665 %}
 9666 
 9667 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9668   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9669   match(Set dst (AddD dst con));
 9670   ins_cost(200);
 9671   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9672             "DADDp  $dst,ST" %}
 9673   ins_encode %{
 9674     __ fld_d($constantaddress($con));
 9675     __ faddp($dst$$reg);
 9676   %}
 9677   ins_pipe(fpu_reg_mem);
 9678 %}
 9679 
 9680 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9681   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9682   match(Set dst (RoundDouble (AddD src con)));
 9683   ins_cost(200);
 9684   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9685             "DADD   ST,$src\n\t"
 9686             "FSTP_D $dst\t# D-round" %}
 9687   ins_encode %{
 9688     __ fld_d($constantaddress($con));
 9689     __ fadd($src$$reg);
 9690     __ fstp_d(Address(rsp, $dst$$disp));
 9691   %}
 9692   ins_pipe(fpu_mem_reg_con);
 9693 %}
 9694 
 9695 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9696   predicate(UseSSE<=1);
 9697   match(Set dst (MulD dst src));
 9698   format %{ "FLD    $src\n\t"
 9699             "DMULp  $dst,ST" %}
 9700   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9701   ins_cost(150);
 9702   ins_encode( Push_Reg_DPR(src),
 9703               OpcP, RegOpc(dst) );
 9704   ins_pipe( fpu_reg_reg );
 9705 %}
 9706 
 9707 // Strict FP instruction biases argument before multiply then
 9708 // biases result to avoid double rounding of subnormals.
 9709 //
 9710 // scale arg1 by multiplying arg1 by 2^(-15360)
 9711 // load arg2
 9712 // multiply scaled arg1 by arg2
 9713 // rescale product by 2^(15360)
 9714 //
 9715 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9716   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9717   match(Set dst (MulD dst src));
 9718   ins_cost(1);   // Select this instruction for all FP double multiplies
 9719 
 9720   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9721             "DMULp  $dst,ST\n\t"
 9722             "FLD    $src\n\t"
 9723             "DMULp  $dst,ST\n\t"
 9724             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9725             "DMULp  $dst,ST\n\t" %}
 9726   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9727   ins_encode( strictfp_bias1(dst),
 9728               Push_Reg_DPR(src),
 9729               OpcP, RegOpc(dst),
 9730               strictfp_bias2(dst) );
 9731   ins_pipe( fpu_reg_reg );
 9732 %}
 9733 
 9734 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9735   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9736   match(Set dst (MulD dst con));
 9737   ins_cost(200);
 9738   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9739             "DMULp  $dst,ST" %}
 9740   ins_encode %{
 9741     __ fld_d($constantaddress($con));
 9742     __ fmulp($dst$$reg);
 9743   %}
 9744   ins_pipe(fpu_reg_mem);
 9745 %}
 9746 
 9747 
 9748 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9749   predicate( UseSSE<=1 );
 9750   match(Set dst (MulD dst (LoadD src)));
 9751   ins_cost(200);
 9752   format %{ "FLD_D  $src\n\t"
 9753             "DMULp  $dst,ST" %}
 9754   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9755   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9756               OpcP, RegOpc(dst) );
 9757   ins_pipe( fpu_reg_mem );
 9758 %}
 9759 
 9760 //
 9761 // Cisc-alternate to reg-reg multiply
 9762 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9763   predicate( UseSSE<=1 );
 9764   match(Set dst (MulD src (LoadD mem)));
 9765   ins_cost(250);
 9766   format %{ "FLD_D  $mem\n\t"
 9767             "DMUL   ST,$src\n\t"
 9768             "FSTP_D $dst" %}
 9769   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9770   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9771               OpcReg_FPR(src),
 9772               Pop_Reg_DPR(dst) );
 9773   ins_pipe( fpu_reg_reg_mem );
 9774 %}
 9775 
 9776 
 9777 // MACRO3 -- addDPR a mulDPR
 9778 // This instruction is a '2-address' instruction in that the result goes
 9779 // back to src2.  This eliminates a move from the macro; possibly the
 9780 // register allocator will have to add it back (and maybe not).
 9781 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9782   predicate( UseSSE<=1 );
 9783   match(Set src2 (AddD (MulD src0 src1) src2));
 9784   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9785             "DMUL   ST,$src1\n\t"
 9786             "DADDp  $src2,ST" %}
 9787   ins_cost(250);
 9788   opcode(0xDD); /* LoadD DD /0 */
 9789   ins_encode( Push_Reg_FPR(src0),
 9790               FMul_ST_reg(src1),
 9791               FAddP_reg_ST(src2) );
 9792   ins_pipe( fpu_reg_reg_reg );
 9793 %}
 9794 
 9795 
 9796 // MACRO3 -- subDPR a mulDPR
 9797 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9798   predicate( UseSSE<=1 );
 9799   match(Set src2 (SubD (MulD src0 src1) src2));
 9800   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9801             "DMUL   ST,$src1\n\t"
 9802             "DSUBRp $src2,ST" %}
 9803   ins_cost(250);
 9804   ins_encode( Push_Reg_FPR(src0),
 9805               FMul_ST_reg(src1),
 9806               Opcode(0xDE), Opc_plus(0xE0,src2));
 9807   ins_pipe( fpu_reg_reg_reg );
 9808 %}
 9809 
 9810 
 9811 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9812   predicate( UseSSE<=1 );
 9813   match(Set dst (DivD dst src));
 9814 
 9815   format %{ "FLD    $src\n\t"
 9816             "FDIVp  $dst,ST" %}
 9817   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9818   ins_cost(150);
 9819   ins_encode( Push_Reg_DPR(src),
 9820               OpcP, RegOpc(dst) );
 9821   ins_pipe( fpu_reg_reg );
 9822 %}
 9823 
 9824 // Strict FP instruction biases argument before division then
 9825 // biases result, to avoid double rounding of subnormals.
 9826 //
 9827 // scale dividend by multiplying dividend by 2^(-15360)
 9828 // load divisor
 9829 // divide scaled dividend by divisor
 9830 // rescale quotient by 2^(15360)
 9831 //
 9832 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9833   predicate (UseSSE<=1);
 9834   match(Set dst (DivD dst src));
 9835   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9836   ins_cost(01);
 9837 
 9838   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9839             "DMULp  $dst,ST\n\t"
 9840             "FLD    $src\n\t"
 9841             "FDIVp  $dst,ST\n\t"
 9842             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9843             "DMULp  $dst,ST\n\t" %}
 9844   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9845   ins_encode( strictfp_bias1(dst),
 9846               Push_Reg_DPR(src),
 9847               OpcP, RegOpc(dst),
 9848               strictfp_bias2(dst) );
 9849   ins_pipe( fpu_reg_reg );
 9850 %}
 9851 
 9852 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9853   predicate(UseSSE<=1);
 9854   match(Set dst (ModD dst src));
 9855   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9856 
 9857   format %{ "DMOD   $dst,$src" %}
 9858   ins_cost(250);
 9859   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9860               emitModDPR(),
 9861               Push_Result_Mod_DPR(src),
 9862               Pop_Reg_DPR(dst));
 9863   ins_pipe( pipe_slow );
 9864 %}
 9865 
 9866 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9867   predicate(UseSSE>=2);
 9868   match(Set dst (ModD src0 src1));
 9869   effect(KILL rax, KILL cr);
 9870 
 9871   format %{ "SUB    ESP,8\t # DMOD\n"
 9872           "\tMOVSD  [ESP+0],$src1\n"
 9873           "\tFLD_D  [ESP+0]\n"
 9874           "\tMOVSD  [ESP+0],$src0\n"
 9875           "\tFLD_D  [ESP+0]\n"
 9876      "loop:\tFPREM\n"
 9877           "\tFWAIT\n"
 9878           "\tFNSTSW AX\n"
 9879           "\tSAHF\n"
 9880           "\tJP     loop\n"
 9881           "\tFSTP_D [ESP+0]\n"
 9882           "\tMOVSD  $dst,[ESP+0]\n"
 9883           "\tADD    ESP,8\n"
 9884           "\tFSTP   ST0\t # Restore FPU Stack"
 9885     %}
 9886   ins_cost(250);
 9887   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9888   ins_pipe( pipe_slow );
 9889 %}
 9890 
 9891 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9892   predicate (UseSSE<=1);
 9893   match(Set dst(AtanD dst src));
 9894   format %{ "DATA   $dst,$src" %}
 9895   opcode(0xD9, 0xF3);
 9896   ins_encode( Push_Reg_DPR(src),
 9897               OpcP, OpcS, RegOpc(dst) );
 9898   ins_pipe( pipe_slow );
 9899 %}
 9900 
 9901 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9902   predicate (UseSSE>=2);
 9903   match(Set dst(AtanD dst src));
 9904   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9905   format %{ "DATA   $dst,$src" %}
 9906   opcode(0xD9, 0xF3);
 9907   ins_encode( Push_SrcD(src),
 9908               OpcP, OpcS, Push_ResultD(dst) );
 9909   ins_pipe( pipe_slow );
 9910 %}
 9911 
 9912 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9913   predicate (UseSSE<=1);
 9914   match(Set dst (SqrtD src));
 9915   format %{ "DSQRT  $dst,$src" %}
 9916   opcode(0xFA, 0xD9);
 9917   ins_encode( Push_Reg_DPR(src),
 9918               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9919   ins_pipe( pipe_slow );
 9920 %}
 9921 
 9922 //-------------Float Instructions-------------------------------
 9923 // Float Math
 9924 
 9925 // Code for float compare:
 9926 //     fcompp();
 9927 //     fwait(); fnstsw_ax();
 9928 //     sahf();
 9929 //     movl(dst, unordered_result);
 9930 //     jcc(Assembler::parity, exit);
 9931 //     movl(dst, less_result);
 9932 //     jcc(Assembler::below, exit);
 9933 //     movl(dst, equal_result);
 9934 //     jcc(Assembler::equal, exit);
 9935 //     movl(dst, greater_result);
 9936 //   exit:
 9937 
 9938 // P6 version of float compare, sets condition codes in EFLAGS
 9939 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9940   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9941   match(Set cr (CmpF src1 src2));
 9942   effect(KILL rax);
 9943   ins_cost(150);
 9944   format %{ "FLD    $src1\n\t"
 9945             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9946             "JNP    exit\n\t"
 9947             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9948             "SAHF\n"
 9949      "exit:\tNOP               // avoid branch to branch" %}
 9950   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9951   ins_encode( Push_Reg_DPR(src1),
 9952               OpcP, RegOpc(src2),
 9953               cmpF_P6_fixup );
 9954   ins_pipe( pipe_slow );
 9955 %}
 9956 
 9957 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9958   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9959   match(Set cr (CmpF src1 src2));
 9960   ins_cost(100);
 9961   format %{ "FLD    $src1\n\t"
 9962             "FUCOMIP ST,$src2  // P6 instruction" %}
 9963   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9964   ins_encode( Push_Reg_DPR(src1),
 9965               OpcP, RegOpc(src2));
 9966   ins_pipe( pipe_slow );
 9967 %}
 9968 
 9969 
 9970 // Compare & branch
 9971 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9972   predicate(UseSSE == 0);
 9973   match(Set cr (CmpF src1 src2));
 9974   effect(KILL rax);
 9975   ins_cost(200);
 9976   format %{ "FLD    $src1\n\t"
 9977             "FCOMp  $src2\n\t"
 9978             "FNSTSW AX\n\t"
 9979             "TEST   AX,0x400\n\t"
 9980             "JZ,s   flags\n\t"
 9981             "MOV    AH,1\t# unordered treat as LT\n"
 9982     "flags:\tSAHF" %}
 9983   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9984   ins_encode( Push_Reg_DPR(src1),
 9985               OpcP, RegOpc(src2),
 9986               fpu_flags);
 9987   ins_pipe( pipe_slow );
 9988 %}
 9989 
 9990 // Compare vs zero into -1,0,1
 9991 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9992   predicate(UseSSE == 0);
 9993   match(Set dst (CmpF3 src1 zero));
 9994   effect(KILL cr, KILL rax);
 9995   ins_cost(280);
 9996   format %{ "FTSTF  $dst,$src1" %}
 9997   opcode(0xE4, 0xD9);
 9998   ins_encode( Push_Reg_DPR(src1),
 9999               OpcS, OpcP, PopFPU,
10000               CmpF_Result(dst));
10001   ins_pipe( pipe_slow );
10002 %}
10003 
10004 // Compare into -1,0,1
10005 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10006   predicate(UseSSE == 0);
10007   match(Set dst (CmpF3 src1 src2));
10008   effect(KILL cr, KILL rax);
10009   ins_cost(300);
10010   format %{ "FCMPF  $dst,$src1,$src2" %}
10011   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10012   ins_encode( Push_Reg_DPR(src1),
10013               OpcP, RegOpc(src2),
10014               CmpF_Result(dst));
10015   ins_pipe( pipe_slow );
10016 %}
10017 
10018 // float compare and set condition codes in EFLAGS by XMM regs
10019 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10020   predicate(UseSSE>=1);
10021   match(Set cr (CmpF src1 src2));
10022   ins_cost(145);
10023   format %{ "UCOMISS $src1,$src2\n\t"
10024             "JNP,s   exit\n\t"
10025             "PUSHF\t# saw NaN, set CF\n\t"
10026             "AND     [rsp], #0xffffff2b\n\t"
10027             "POPF\n"
10028     "exit:" %}
10029   ins_encode %{
10030     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10031     emit_cmpfp_fixup(_masm);
10032   %}
10033   ins_pipe( pipe_slow );
10034 %}
10035 
10036 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10037   predicate(UseSSE>=1);
10038   match(Set cr (CmpF src1 src2));
10039   ins_cost(100);
10040   format %{ "UCOMISS $src1,$src2" %}
10041   ins_encode %{
10042     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10043   %}
10044   ins_pipe( pipe_slow );
10045 %}
10046 
10047 // float compare and set condition codes in EFLAGS by XMM regs
10048 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10049   predicate(UseSSE>=1);
10050   match(Set cr (CmpF src1 (LoadF src2)));
10051   ins_cost(165);
10052   format %{ "UCOMISS $src1,$src2\n\t"
10053             "JNP,s   exit\n\t"
10054             "PUSHF\t# saw NaN, set CF\n\t"
10055             "AND     [rsp], #0xffffff2b\n\t"
10056             "POPF\n"
10057     "exit:" %}
10058   ins_encode %{
10059     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10060     emit_cmpfp_fixup(_masm);
10061   %}
10062   ins_pipe( pipe_slow );
10063 %}
10064 
10065 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10066   predicate(UseSSE>=1);
10067   match(Set cr (CmpF src1 (LoadF src2)));
10068   ins_cost(100);
10069   format %{ "UCOMISS $src1,$src2" %}
10070   ins_encode %{
10071     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10072   %}
10073   ins_pipe( pipe_slow );
10074 %}
10075 
10076 // Compare into -1,0,1 in XMM
10077 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10078   predicate(UseSSE>=1);
10079   match(Set dst (CmpF3 src1 src2));
10080   effect(KILL cr);
10081   ins_cost(255);
10082   format %{ "UCOMISS $src1, $src2\n\t"
10083             "MOV     $dst, #-1\n\t"
10084             "JP,s    done\n\t"
10085             "JB,s    done\n\t"
10086             "SETNE   $dst\n\t"
10087             "MOVZB   $dst, $dst\n"
10088     "done:" %}
10089   ins_encode %{
10090     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10091     emit_cmpfp3(_masm, $dst$$Register);
10092   %}
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 // Compare into -1,0,1 in XMM and memory
10097 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10098   predicate(UseSSE>=1);
10099   match(Set dst (CmpF3 src1 (LoadF src2)));
10100   effect(KILL cr);
10101   ins_cost(275);
10102   format %{ "UCOMISS $src1, $src2\n\t"
10103             "MOV     $dst, #-1\n\t"
10104             "JP,s    done\n\t"
10105             "JB,s    done\n\t"
10106             "SETNE   $dst\n\t"
10107             "MOVZB   $dst, $dst\n"
10108     "done:" %}
10109   ins_encode %{
10110     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10111     emit_cmpfp3(_masm, $dst$$Register);
10112   %}
10113   ins_pipe( pipe_slow );
10114 %}
10115 
10116 // Spill to obtain 24-bit precision
10117 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10118   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10119   match(Set dst (SubF src1 src2));
10120 
10121   format %{ "FSUB   $dst,$src1 - $src2" %}
10122   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10123   ins_encode( Push_Reg_FPR(src1),
10124               OpcReg_FPR(src2),
10125               Pop_Mem_FPR(dst) );
10126   ins_pipe( fpu_mem_reg_reg );
10127 %}
10128 //
10129 // This instruction does not round to 24-bits
10130 instruct subFPR_reg(regFPR dst, regFPR src) %{
10131   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10132   match(Set dst (SubF dst src));
10133 
10134   format %{ "FSUB   $dst,$src" %}
10135   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10136   ins_encode( Push_Reg_FPR(src),
10137               OpcP, RegOpc(dst) );
10138   ins_pipe( fpu_reg_reg );
10139 %}
10140 
10141 // Spill to obtain 24-bit precision
10142 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10143   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10144   match(Set dst (AddF src1 src2));
10145 
10146   format %{ "FADD   $dst,$src1,$src2" %}
10147   opcode(0xD8, 0x0); /* D8 C0+i */
10148   ins_encode( Push_Reg_FPR(src2),
10149               OpcReg_FPR(src1),
10150               Pop_Mem_FPR(dst) );
10151   ins_pipe( fpu_mem_reg_reg );
10152 %}
10153 //
10154 // This instruction does not round to 24-bits
10155 instruct addFPR_reg(regFPR dst, regFPR src) %{
10156   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10157   match(Set dst (AddF dst src));
10158 
10159   format %{ "FLD    $src\n\t"
10160             "FADDp  $dst,ST" %}
10161   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10162   ins_encode( Push_Reg_FPR(src),
10163               OpcP, RegOpc(dst) );
10164   ins_pipe( fpu_reg_reg );
10165 %}
10166 
10167 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10168   predicate(UseSSE==0);
10169   match(Set dst (AbsF src));
10170   ins_cost(100);
10171   format %{ "FABS" %}
10172   opcode(0xE1, 0xD9);
10173   ins_encode( OpcS, OpcP );
10174   ins_pipe( fpu_reg_reg );
10175 %}
10176 
10177 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10178   predicate(UseSSE==0);
10179   match(Set dst (NegF src));
10180   ins_cost(100);
10181   format %{ "FCHS" %}
10182   opcode(0xE0, 0xD9);
10183   ins_encode( OpcS, OpcP );
10184   ins_pipe( fpu_reg_reg );
10185 %}
10186 
10187 // Cisc-alternate to addFPR_reg
10188 // Spill to obtain 24-bit precision
10189 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10190   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10191   match(Set dst (AddF src1 (LoadF src2)));
10192 
10193   format %{ "FLD    $src2\n\t"
10194             "FADD   ST,$src1\n\t"
10195             "FSTP_S $dst" %}
10196   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10197   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10198               OpcReg_FPR(src1),
10199               Pop_Mem_FPR(dst) );
10200   ins_pipe( fpu_mem_reg_mem );
10201 %}
10202 //
10203 // Cisc-alternate to addFPR_reg
10204 // This instruction does not round to 24-bits
10205 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10206   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10207   match(Set dst (AddF dst (LoadF src)));
10208 
10209   format %{ "FADD   $dst,$src" %}
10210   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10211   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10212               OpcP, RegOpc(dst) );
10213   ins_pipe( fpu_reg_mem );
10214 %}
10215 
10216 // // Following two instructions for _222_mpegaudio
10217 // Spill to obtain 24-bit precision
10218 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10219   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10220   match(Set dst (AddF src1 src2));
10221 
10222   format %{ "FADD   $dst,$src1,$src2" %}
10223   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10224   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10225               OpcReg_FPR(src2),
10226               Pop_Mem_FPR(dst) );
10227   ins_pipe( fpu_mem_reg_mem );
10228 %}
10229 
10230 // Cisc-spill variant
10231 // Spill to obtain 24-bit precision
10232 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10233   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10234   match(Set dst (AddF src1 (LoadF src2)));
10235 
10236   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10237   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10238   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10239               set_instruction_start,
10240               OpcP, RMopc_Mem(secondary,src1),
10241               Pop_Mem_FPR(dst) );
10242   ins_pipe( fpu_mem_mem_mem );
10243 %}
10244 
10245 // Spill to obtain 24-bit precision
10246 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10247   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF src1 src2));
10249 
10250   format %{ "FADD   $dst,$src1,$src2" %}
10251   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10252   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10253               set_instruction_start,
10254               OpcP, RMopc_Mem(secondary,src1),
10255               Pop_Mem_FPR(dst) );
10256   ins_pipe( fpu_mem_mem_mem );
10257 %}
10258 
10259 
10260 // Spill to obtain 24-bit precision
10261 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10262   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10263   match(Set dst (AddF src con));
10264   format %{ "FLD    $src\n\t"
10265             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10266             "FSTP_S $dst"  %}
10267   ins_encode %{
10268     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10269     __ fadd_s($constantaddress($con));
10270     __ fstp_s(Address(rsp, $dst$$disp));
10271   %}
10272   ins_pipe(fpu_mem_reg_con);
10273 %}
10274 //
10275 // This instruction does not round to 24-bits
10276 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10277   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10278   match(Set dst (AddF src con));
10279   format %{ "FLD    $src\n\t"
10280             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10281             "FSTP   $dst"  %}
10282   ins_encode %{
10283     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10284     __ fadd_s($constantaddress($con));
10285     __ fstp_d($dst$$reg);
10286   %}
10287   ins_pipe(fpu_reg_reg_con);
10288 %}
10289 
10290 // Spill to obtain 24-bit precision
10291 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10292   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10293   match(Set dst (MulF src1 src2));
10294 
10295   format %{ "FLD    $src1\n\t"
10296             "FMUL   $src2\n\t"
10297             "FSTP_S $dst"  %}
10298   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10299   ins_encode( Push_Reg_FPR(src1),
10300               OpcReg_FPR(src2),
10301               Pop_Mem_FPR(dst) );
10302   ins_pipe( fpu_mem_reg_reg );
10303 %}
10304 //
10305 // This instruction does not round to 24-bits
10306 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10307   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10308   match(Set dst (MulF src1 src2));
10309 
10310   format %{ "FLD    $src1\n\t"
10311             "FMUL   $src2\n\t"
10312             "FSTP_S $dst"  %}
10313   opcode(0xD8, 0x1); /* D8 C8+i */
10314   ins_encode( Push_Reg_FPR(src2),
10315               OpcReg_FPR(src1),
10316               Pop_Reg_FPR(dst) );
10317   ins_pipe( fpu_reg_reg_reg );
10318 %}
10319 
10320 
10321 // Spill to obtain 24-bit precision
10322 // Cisc-alternate to reg-reg multiply
10323 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10324   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10325   match(Set dst (MulF src1 (LoadF src2)));
10326 
10327   format %{ "FLD_S  $src2\n\t"
10328             "FMUL   $src1\n\t"
10329             "FSTP_S $dst"  %}
10330   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10331   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10332               OpcReg_FPR(src1),
10333               Pop_Mem_FPR(dst) );
10334   ins_pipe( fpu_mem_reg_mem );
10335 %}
10336 //
10337 // This instruction does not round to 24-bits
10338 // Cisc-alternate to reg-reg multiply
10339 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10340   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10341   match(Set dst (MulF src1 (LoadF src2)));
10342 
10343   format %{ "FMUL   $dst,$src1,$src2" %}
10344   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10345   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10346               OpcReg_FPR(src1),
10347               Pop_Reg_FPR(dst) );
10348   ins_pipe( fpu_reg_reg_mem );
10349 %}
10350 
10351 // Spill to obtain 24-bit precision
10352 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10353   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10354   match(Set dst (MulF src1 src2));
10355 
10356   format %{ "FMUL   $dst,$src1,$src2" %}
10357   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10358   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10359               set_instruction_start,
10360               OpcP, RMopc_Mem(secondary,src1),
10361               Pop_Mem_FPR(dst) );
10362   ins_pipe( fpu_mem_mem_mem );
10363 %}
10364 
10365 // Spill to obtain 24-bit precision
10366 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10367   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10368   match(Set dst (MulF src con));
10369 
10370   format %{ "FLD    $src\n\t"
10371             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10372             "FSTP_S $dst"  %}
10373   ins_encode %{
10374     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10375     __ fmul_s($constantaddress($con));
10376     __ fstp_s(Address(rsp, $dst$$disp));
10377   %}
10378   ins_pipe(fpu_mem_reg_con);
10379 %}
10380 //
10381 // This instruction does not round to 24-bits
10382 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10383   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10384   match(Set dst (MulF src con));
10385 
10386   format %{ "FLD    $src\n\t"
10387             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10388             "FSTP   $dst"  %}
10389   ins_encode %{
10390     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10391     __ fmul_s($constantaddress($con));
10392     __ fstp_d($dst$$reg);
10393   %}
10394   ins_pipe(fpu_reg_reg_con);
10395 %}
10396 
10397 
10398 //
10399 // MACRO1 -- subsume unshared load into mulFPR
10400 // This instruction does not round to 24-bits
10401 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10402   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10403   match(Set dst (MulF (LoadF mem1) src));
10404 
10405   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10406             "FMUL   ST,$src\n\t"
10407             "FSTP   $dst" %}
10408   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10409   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10410               OpcReg_FPR(src),
10411               Pop_Reg_FPR(dst) );
10412   ins_pipe( fpu_reg_reg_mem );
10413 %}
10414 //
10415 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10416 // This instruction does not round to 24-bits
10417 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10418   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10419   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10420   ins_cost(95);
10421 
10422   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10423             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10424             "FADD   ST,$src2\n\t"
10425             "FSTP   $dst" %}
10426   opcode(0xD9); /* LoadF D9 /0 */
10427   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10428               FMul_ST_reg(src1),
10429               FAdd_ST_reg(src2),
10430               Pop_Reg_FPR(dst) );
10431   ins_pipe( fpu_reg_mem_reg_reg );
10432 %}
10433 
10434 // MACRO3 -- addFPR a mulFPR
10435 // This instruction does not round to 24-bits.  It is a '2-address'
10436 // instruction in that the result goes back to src2.  This eliminates
10437 // a move from the macro; possibly the register allocator will have
10438 // to add it back (and maybe not).
10439 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10440   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10441   match(Set src2 (AddF (MulF src0 src1) src2));
10442 
10443   format %{ "FLD    $src0     ===MACRO3===\n\t"
10444             "FMUL   ST,$src1\n\t"
10445             "FADDP  $src2,ST" %}
10446   opcode(0xD9); /* LoadF D9 /0 */
10447   ins_encode( Push_Reg_FPR(src0),
10448               FMul_ST_reg(src1),
10449               FAddP_reg_ST(src2) );
10450   ins_pipe( fpu_reg_reg_reg );
10451 %}
10452 
10453 // MACRO4 -- divFPR subFPR
10454 // This instruction does not round to 24-bits
10455 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10456   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10457   match(Set dst (DivF (SubF src2 src1) src3));
10458 
10459   format %{ "FLD    $src2   ===MACRO4===\n\t"
10460             "FSUB   ST,$src1\n\t"
10461             "FDIV   ST,$src3\n\t"
10462             "FSTP  $dst" %}
10463   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10464   ins_encode( Push_Reg_FPR(src2),
10465               subFPR_divFPR_encode(src1,src3),
10466               Pop_Reg_FPR(dst) );
10467   ins_pipe( fpu_reg_reg_reg_reg );
10468 %}
10469 
10470 // Spill to obtain 24-bit precision
10471 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10472   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10473   match(Set dst (DivF src1 src2));
10474 
10475   format %{ "FDIV   $dst,$src1,$src2" %}
10476   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10477   ins_encode( Push_Reg_FPR(src1),
10478               OpcReg_FPR(src2),
10479               Pop_Mem_FPR(dst) );
10480   ins_pipe( fpu_mem_reg_reg );
10481 %}
10482 //
10483 // This instruction does not round to 24-bits
10484 instruct divFPR_reg(regFPR dst, regFPR src) %{
10485   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10486   match(Set dst (DivF dst src));
10487 
10488   format %{ "FDIV   $dst,$src" %}
10489   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10490   ins_encode( Push_Reg_FPR(src),
10491               OpcP, RegOpc(dst) );
10492   ins_pipe( fpu_reg_reg );
10493 %}
10494 
10495 
10496 // Spill to obtain 24-bit precision
10497 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10498   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10499   match(Set dst (ModF src1 src2));
10500   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10501 
10502   format %{ "FMOD   $dst,$src1,$src2" %}
10503   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10504               emitModDPR(),
10505               Push_Result_Mod_DPR(src2),
10506               Pop_Mem_FPR(dst));
10507   ins_pipe( pipe_slow );
10508 %}
10509 //
10510 // This instruction does not round to 24-bits
10511 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10512   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10513   match(Set dst (ModF dst src));
10514   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10515 
10516   format %{ "FMOD   $dst,$src" %}
10517   ins_encode(Push_Reg_Mod_DPR(dst, src),
10518               emitModDPR(),
10519               Push_Result_Mod_DPR(src),
10520               Pop_Reg_FPR(dst));
10521   ins_pipe( pipe_slow );
10522 %}
10523 
10524 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10525   predicate(UseSSE>=1);
10526   match(Set dst (ModF src0 src1));
10527   effect(KILL rax, KILL cr);
10528   format %{ "SUB    ESP,4\t # FMOD\n"
10529           "\tMOVSS  [ESP+0],$src1\n"
10530           "\tFLD_S  [ESP+0]\n"
10531           "\tMOVSS  [ESP+0],$src0\n"
10532           "\tFLD_S  [ESP+0]\n"
10533      "loop:\tFPREM\n"
10534           "\tFWAIT\n"
10535           "\tFNSTSW AX\n"
10536           "\tSAHF\n"
10537           "\tJP     loop\n"
10538           "\tFSTP_S [ESP+0]\n"
10539           "\tMOVSS  $dst,[ESP+0]\n"
10540           "\tADD    ESP,4\n"
10541           "\tFSTP   ST0\t # Restore FPU Stack"
10542     %}
10543   ins_cost(250);
10544   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10545   ins_pipe( pipe_slow );
10546 %}
10547 
10548 
10549 //----------Arithmetic Conversion Instructions---------------------------------
10550 // The conversions operations are all Alpha sorted.  Please keep it that way!
10551 
10552 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10553   predicate(UseSSE==0);
10554   match(Set dst (RoundFloat src));
10555   ins_cost(125);
10556   format %{ "FST_S  $dst,$src\t# F-round" %}
10557   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10558   ins_pipe( fpu_mem_reg );
10559 %}
10560 
10561 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10562   predicate(UseSSE<=1);
10563   match(Set dst (RoundDouble src));
10564   ins_cost(125);
10565   format %{ "FST_D  $dst,$src\t# D-round" %}
10566   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10567   ins_pipe( fpu_mem_reg );
10568 %}
10569 
10570 // Force rounding to 24-bit precision and 6-bit exponent
10571 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10572   predicate(UseSSE==0);
10573   match(Set dst (ConvD2F src));
10574   format %{ "FST_S  $dst,$src\t# F-round" %}
10575   expand %{
10576     roundFloat_mem_reg(dst,src);
10577   %}
10578 %}
10579 
10580 // Force rounding to 24-bit precision and 6-bit exponent
10581 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10582   predicate(UseSSE==1);
10583   match(Set dst (ConvD2F src));
10584   effect( KILL cr );
10585   format %{ "SUB    ESP,4\n\t"
10586             "FST_S  [ESP],$src\t# F-round\n\t"
10587             "MOVSS  $dst,[ESP]\n\t"
10588             "ADD ESP,4" %}
10589   ins_encode %{
10590     __ subptr(rsp, 4);
10591     if ($src$$reg != FPR1L_enc) {
10592       __ fld_s($src$$reg-1);
10593       __ fstp_s(Address(rsp, 0));
10594     } else {
10595       __ fst_s(Address(rsp, 0));
10596     }
10597     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10598     __ addptr(rsp, 4);
10599   %}
10600   ins_pipe( pipe_slow );
10601 %}
10602 
10603 // Force rounding double precision to single precision
10604 instruct convD2F_reg(regF dst, regD src) %{
10605   predicate(UseSSE>=2);
10606   match(Set dst (ConvD2F src));
10607   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10608   ins_encode %{
10609     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10610   %}
10611   ins_pipe( pipe_slow );
10612 %}
10613 
10614 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10615   predicate(UseSSE==0);
10616   match(Set dst (ConvF2D src));
10617   format %{ "FST_S  $dst,$src\t# D-round" %}
10618   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10619   ins_pipe( fpu_reg_reg );
10620 %}
10621 
10622 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10623   predicate(UseSSE==1);
10624   match(Set dst (ConvF2D src));
10625   format %{ "FST_D  $dst,$src\t# D-round" %}
10626   expand %{
10627     roundDouble_mem_reg(dst,src);
10628   %}
10629 %}
10630 
10631 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10632   predicate(UseSSE==1);
10633   match(Set dst (ConvF2D src));
10634   effect( KILL cr );
10635   format %{ "SUB    ESP,4\n\t"
10636             "MOVSS  [ESP] $src\n\t"
10637             "FLD_S  [ESP]\n\t"
10638             "ADD    ESP,4\n\t"
10639             "FSTP   $dst\t# D-round" %}
10640   ins_encode %{
10641     __ subptr(rsp, 4);
10642     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10643     __ fld_s(Address(rsp, 0));
10644     __ addptr(rsp, 4);
10645     __ fstp_d($dst$$reg);
10646   %}
10647   ins_pipe( pipe_slow );
10648 %}
10649 
10650 instruct convF2D_reg(regD dst, regF src) %{
10651   predicate(UseSSE>=2);
10652   match(Set dst (ConvF2D src));
10653   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10654   ins_encode %{
10655     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10656   %}
10657   ins_pipe( pipe_slow );
10658 %}
10659 
10660 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10661 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10662   predicate(UseSSE<=1);
10663   match(Set dst (ConvD2I src));
10664   effect( KILL tmp, KILL cr );
10665   format %{ "FLD    $src\t# Convert double to int \n\t"
10666             "FLDCW  trunc mode\n\t"
10667             "SUB    ESP,4\n\t"
10668             "FISTp  [ESP + #0]\n\t"
10669             "FLDCW  std/24-bit mode\n\t"
10670             "POP    EAX\n\t"
10671             "CMP    EAX,0x80000000\n\t"
10672             "JNE,s  fast\n\t"
10673             "FLD_D  $src\n\t"
10674             "CALL   d2i_wrapper\n"
10675       "fast:" %}
10676   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10677   ins_pipe( pipe_slow );
10678 %}
10679 
10680 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10681 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10682   predicate(UseSSE>=2);
10683   match(Set dst (ConvD2I src));
10684   effect( KILL tmp, KILL cr );
10685   format %{ "CVTTSD2SI $dst, $src\n\t"
10686             "CMP    $dst,0x80000000\n\t"
10687             "JNE,s  fast\n\t"
10688             "SUB    ESP, 8\n\t"
10689             "MOVSD  [ESP], $src\n\t"
10690             "FLD_D  [ESP]\n\t"
10691             "ADD    ESP, 8\n\t"
10692             "CALL   d2i_wrapper\n"
10693       "fast:" %}
10694   ins_encode %{
10695     Label fast;
10696     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10697     __ cmpl($dst$$Register, 0x80000000);
10698     __ jccb(Assembler::notEqual, fast);
10699     __ subptr(rsp, 8);
10700     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10701     __ fld_d(Address(rsp, 0));
10702     __ addptr(rsp, 8);
10703     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10704     __ post_call_nop();
10705     __ bind(fast);
10706   %}
10707   ins_pipe( pipe_slow );
10708 %}
10709 
10710 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10711   predicate(UseSSE<=1);
10712   match(Set dst (ConvD2L src));
10713   effect( KILL cr );
10714   format %{ "FLD    $src\t# Convert double to long\n\t"
10715             "FLDCW  trunc mode\n\t"
10716             "SUB    ESP,8\n\t"
10717             "FISTp  [ESP + #0]\n\t"
10718             "FLDCW  std/24-bit mode\n\t"
10719             "POP    EAX\n\t"
10720             "POP    EDX\n\t"
10721             "CMP    EDX,0x80000000\n\t"
10722             "JNE,s  fast\n\t"
10723             "TEST   EAX,EAX\n\t"
10724             "JNE,s  fast\n\t"
10725             "FLD    $src\n\t"
10726             "CALL   d2l_wrapper\n"
10727       "fast:" %}
10728   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10729   ins_pipe( pipe_slow );
10730 %}
10731 
10732 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10733 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10734   predicate (UseSSE>=2);
10735   match(Set dst (ConvD2L src));
10736   effect( KILL cr );
10737   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10738             "MOVSD  [ESP],$src\n\t"
10739             "FLD_D  [ESP]\n\t"
10740             "FLDCW  trunc mode\n\t"
10741             "FISTp  [ESP + #0]\n\t"
10742             "FLDCW  std/24-bit mode\n\t"
10743             "POP    EAX\n\t"
10744             "POP    EDX\n\t"
10745             "CMP    EDX,0x80000000\n\t"
10746             "JNE,s  fast\n\t"
10747             "TEST   EAX,EAX\n\t"
10748             "JNE,s  fast\n\t"
10749             "SUB    ESP,8\n\t"
10750             "MOVSD  [ESP],$src\n\t"
10751             "FLD_D  [ESP]\n\t"
10752             "ADD    ESP,8\n\t"
10753             "CALL   d2l_wrapper\n"
10754       "fast:" %}
10755   ins_encode %{
10756     Label fast;
10757     __ subptr(rsp, 8);
10758     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10759     __ fld_d(Address(rsp, 0));
10760     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10761     __ fistp_d(Address(rsp, 0));
10762     // Restore the rounding mode, mask the exception
10763     if (Compile::current()->in_24_bit_fp_mode()) {
10764       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10765     } else {
10766       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10767     }
10768     // Load the converted long, adjust CPU stack
10769     __ pop(rax);
10770     __ pop(rdx);
10771     __ cmpl(rdx, 0x80000000);
10772     __ jccb(Assembler::notEqual, fast);
10773     __ testl(rax, rax);
10774     __ jccb(Assembler::notEqual, fast);
10775     __ subptr(rsp, 8);
10776     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10777     __ fld_d(Address(rsp, 0));
10778     __ addptr(rsp, 8);
10779     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10780     __ post_call_nop();
10781     __ bind(fast);
10782   %}
10783   ins_pipe( pipe_slow );
10784 %}
10785 
10786 // Convert a double to an int.  Java semantics require we do complex
10787 // manglations in the corner cases.  So we set the rounding mode to
10788 // 'zero', store the darned double down as an int, and reset the
10789 // rounding mode to 'nearest'.  The hardware stores a flag value down
10790 // if we would overflow or converted a NAN; we check for this and
10791 // and go the slow path if needed.
10792 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10793   predicate(UseSSE==0);
10794   match(Set dst (ConvF2I src));
10795   effect( KILL tmp, KILL cr );
10796   format %{ "FLD    $src\t# Convert float to int \n\t"
10797             "FLDCW  trunc mode\n\t"
10798             "SUB    ESP,4\n\t"
10799             "FISTp  [ESP + #0]\n\t"
10800             "FLDCW  std/24-bit mode\n\t"
10801             "POP    EAX\n\t"
10802             "CMP    EAX,0x80000000\n\t"
10803             "JNE,s  fast\n\t"
10804             "FLD    $src\n\t"
10805             "CALL   d2i_wrapper\n"
10806       "fast:" %}
10807   // DPR2I_encoding works for FPR2I
10808   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10809   ins_pipe( pipe_slow );
10810 %}
10811 
10812 // Convert a float in xmm to an int reg.
10813 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10814   predicate(UseSSE>=1);
10815   match(Set dst (ConvF2I src));
10816   effect( KILL tmp, KILL cr );
10817   format %{ "CVTTSS2SI $dst, $src\n\t"
10818             "CMP    $dst,0x80000000\n\t"
10819             "JNE,s  fast\n\t"
10820             "SUB    ESP, 4\n\t"
10821             "MOVSS  [ESP], $src\n\t"
10822             "FLD    [ESP]\n\t"
10823             "ADD    ESP, 4\n\t"
10824             "CALL   d2i_wrapper\n"
10825       "fast:" %}
10826   ins_encode %{
10827     Label fast;
10828     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10829     __ cmpl($dst$$Register, 0x80000000);
10830     __ jccb(Assembler::notEqual, fast);
10831     __ subptr(rsp, 4);
10832     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10833     __ fld_s(Address(rsp, 0));
10834     __ addptr(rsp, 4);
10835     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10836     __ post_call_nop();
10837     __ bind(fast);
10838   %}
10839   ins_pipe( pipe_slow );
10840 %}
10841 
10842 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10843   predicate(UseSSE==0);
10844   match(Set dst (ConvF2L src));
10845   effect( KILL cr );
10846   format %{ "FLD    $src\t# Convert float to long\n\t"
10847             "FLDCW  trunc mode\n\t"
10848             "SUB    ESP,8\n\t"
10849             "FISTp  [ESP + #0]\n\t"
10850             "FLDCW  std/24-bit mode\n\t"
10851             "POP    EAX\n\t"
10852             "POP    EDX\n\t"
10853             "CMP    EDX,0x80000000\n\t"
10854             "JNE,s  fast\n\t"
10855             "TEST   EAX,EAX\n\t"
10856             "JNE,s  fast\n\t"
10857             "FLD    $src\n\t"
10858             "CALL   d2l_wrapper\n"
10859       "fast:" %}
10860   // DPR2L_encoding works for FPR2L
10861   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10862   ins_pipe( pipe_slow );
10863 %}
10864 
10865 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10866 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10867   predicate (UseSSE>=1);
10868   match(Set dst (ConvF2L src));
10869   effect( KILL cr );
10870   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10871             "MOVSS  [ESP],$src\n\t"
10872             "FLD_S  [ESP]\n\t"
10873             "FLDCW  trunc mode\n\t"
10874             "FISTp  [ESP + #0]\n\t"
10875             "FLDCW  std/24-bit mode\n\t"
10876             "POP    EAX\n\t"
10877             "POP    EDX\n\t"
10878             "CMP    EDX,0x80000000\n\t"
10879             "JNE,s  fast\n\t"
10880             "TEST   EAX,EAX\n\t"
10881             "JNE,s  fast\n\t"
10882             "SUB    ESP,4\t# Convert float to long\n\t"
10883             "MOVSS  [ESP],$src\n\t"
10884             "FLD_S  [ESP]\n\t"
10885             "ADD    ESP,4\n\t"
10886             "CALL   d2l_wrapper\n"
10887       "fast:" %}
10888   ins_encode %{
10889     Label fast;
10890     __ subptr(rsp, 8);
10891     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10892     __ fld_s(Address(rsp, 0));
10893     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10894     __ fistp_d(Address(rsp, 0));
10895     // Restore the rounding mode, mask the exception
10896     if (Compile::current()->in_24_bit_fp_mode()) {
10897       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10898     } else {
10899       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10900     }
10901     // Load the converted long, adjust CPU stack
10902     __ pop(rax);
10903     __ pop(rdx);
10904     __ cmpl(rdx, 0x80000000);
10905     __ jccb(Assembler::notEqual, fast);
10906     __ testl(rax, rax);
10907     __ jccb(Assembler::notEqual, fast);
10908     __ subptr(rsp, 4);
10909     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10910     __ fld_s(Address(rsp, 0));
10911     __ addptr(rsp, 4);
10912     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10913     __ post_call_nop();
10914     __ bind(fast);
10915   %}
10916   ins_pipe( pipe_slow );
10917 %}
10918 
10919 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10920   predicate( UseSSE<=1 );
10921   match(Set dst (ConvI2D src));
10922   format %{ "FILD   $src\n\t"
10923             "FSTP   $dst" %}
10924   opcode(0xDB, 0x0);  /* DB /0 */
10925   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10926   ins_pipe( fpu_reg_mem );
10927 %}
10928 
10929 instruct convI2D_reg(regD dst, rRegI src) %{
10930   predicate( UseSSE>=2 && !UseXmmI2D );
10931   match(Set dst (ConvI2D src));
10932   format %{ "CVTSI2SD $dst,$src" %}
10933   ins_encode %{
10934     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10935   %}
10936   ins_pipe( pipe_slow );
10937 %}
10938 
10939 instruct convI2D_mem(regD dst, memory mem) %{
10940   predicate( UseSSE>=2 );
10941   match(Set dst (ConvI2D (LoadI mem)));
10942   format %{ "CVTSI2SD $dst,$mem" %}
10943   ins_encode %{
10944     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10945   %}
10946   ins_pipe( pipe_slow );
10947 %}
10948 
10949 instruct convXI2D_reg(regD dst, rRegI src)
10950 %{
10951   predicate( UseSSE>=2 && UseXmmI2D );
10952   match(Set dst (ConvI2D src));
10953 
10954   format %{ "MOVD  $dst,$src\n\t"
10955             "CVTDQ2PD $dst,$dst\t# i2d" %}
10956   ins_encode %{
10957     __ movdl($dst$$XMMRegister, $src$$Register);
10958     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10959   %}
10960   ins_pipe(pipe_slow); // XXX
10961 %}
10962 
10963 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10964   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10965   match(Set dst (ConvI2D (LoadI mem)));
10966   format %{ "FILD   $mem\n\t"
10967             "FSTP   $dst" %}
10968   opcode(0xDB);      /* DB /0 */
10969   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10970               Pop_Reg_DPR(dst));
10971   ins_pipe( fpu_reg_mem );
10972 %}
10973 
10974 // Convert a byte to a float; no rounding step needed.
10975 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10976   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10977   match(Set dst (ConvI2F src));
10978   format %{ "FILD   $src\n\t"
10979             "FSTP   $dst" %}
10980 
10981   opcode(0xDB, 0x0);  /* DB /0 */
10982   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10983   ins_pipe( fpu_reg_mem );
10984 %}
10985 
10986 // In 24-bit mode, force exponent rounding by storing back out
10987 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10988   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10989   match(Set dst (ConvI2F src));
10990   ins_cost(200);
10991   format %{ "FILD   $src\n\t"
10992             "FSTP_S $dst" %}
10993   opcode(0xDB, 0x0);  /* DB /0 */
10994   ins_encode( Push_Mem_I(src),
10995               Pop_Mem_FPR(dst));
10996   ins_pipe( fpu_mem_mem );
10997 %}
10998 
10999 // In 24-bit mode, force exponent rounding by storing back out
11000 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11001   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11002   match(Set dst (ConvI2F (LoadI mem)));
11003   ins_cost(200);
11004   format %{ "FILD   $mem\n\t"
11005             "FSTP_S $dst" %}
11006   opcode(0xDB);  /* DB /0 */
11007   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11008               Pop_Mem_FPR(dst));
11009   ins_pipe( fpu_mem_mem );
11010 %}
11011 
11012 // This instruction does not round to 24-bits
11013 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11014   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11015   match(Set dst (ConvI2F src));
11016   format %{ "FILD   $src\n\t"
11017             "FSTP   $dst" %}
11018   opcode(0xDB, 0x0);  /* DB /0 */
11019   ins_encode( Push_Mem_I(src),
11020               Pop_Reg_FPR(dst));
11021   ins_pipe( fpu_reg_mem );
11022 %}
11023 
11024 // This instruction does not round to 24-bits
11025 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11026   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11027   match(Set dst (ConvI2F (LoadI mem)));
11028   format %{ "FILD   $mem\n\t"
11029             "FSTP   $dst" %}
11030   opcode(0xDB);      /* DB /0 */
11031   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11032               Pop_Reg_FPR(dst));
11033   ins_pipe( fpu_reg_mem );
11034 %}
11035 
11036 // Convert an int to a float in xmm; no rounding step needed.
11037 instruct convI2F_reg(regF dst, rRegI src) %{
11038   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11039   match(Set dst (ConvI2F src));
11040   format %{ "CVTSI2SS $dst, $src" %}
11041   ins_encode %{
11042     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11043   %}
11044   ins_pipe( pipe_slow );
11045 %}
11046 
11047  instruct convXI2F_reg(regF dst, rRegI src)
11048 %{
11049   predicate( UseSSE>=2 && UseXmmI2F );
11050   match(Set dst (ConvI2F src));
11051 
11052   format %{ "MOVD  $dst,$src\n\t"
11053             "CVTDQ2PS $dst,$dst\t# i2f" %}
11054   ins_encode %{
11055     __ movdl($dst$$XMMRegister, $src$$Register);
11056     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11057   %}
11058   ins_pipe(pipe_slow); // XXX
11059 %}
11060 
11061 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11062   match(Set dst (ConvI2L src));
11063   effect(KILL cr);
11064   ins_cost(375);
11065   format %{ "MOV    $dst.lo,$src\n\t"
11066             "MOV    $dst.hi,$src\n\t"
11067             "SAR    $dst.hi,31" %}
11068   ins_encode(convert_int_long(dst,src));
11069   ins_pipe( ialu_reg_reg_long );
11070 %}
11071 
11072 // Zero-extend convert int to long
11073 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11074   match(Set dst (AndL (ConvI2L src) mask) );
11075   effect( KILL flags );
11076   ins_cost(250);
11077   format %{ "MOV    $dst.lo,$src\n\t"
11078             "XOR    $dst.hi,$dst.hi" %}
11079   opcode(0x33); // XOR
11080   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11081   ins_pipe( ialu_reg_reg_long );
11082 %}
11083 
11084 // Zero-extend long
11085 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11086   match(Set dst (AndL src mask) );
11087   effect( KILL flags );
11088   ins_cost(250);
11089   format %{ "MOV    $dst.lo,$src.lo\n\t"
11090             "XOR    $dst.hi,$dst.hi\n\t" %}
11091   opcode(0x33); // XOR
11092   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11093   ins_pipe( ialu_reg_reg_long );
11094 %}
11095 
11096 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11097   predicate (UseSSE<=1);
11098   match(Set dst (ConvL2D src));
11099   effect( KILL cr );
11100   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11101             "PUSH   $src.lo\n\t"
11102             "FILD   ST,[ESP + #0]\n\t"
11103             "ADD    ESP,8\n\t"
11104             "FSTP_D $dst\t# D-round" %}
11105   opcode(0xDF, 0x5);  /* DF /5 */
11106   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11107   ins_pipe( pipe_slow );
11108 %}
11109 
11110 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11111   predicate (UseSSE>=2);
11112   match(Set dst (ConvL2D src));
11113   effect( KILL cr );
11114   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11115             "PUSH   $src.lo\n\t"
11116             "FILD_D [ESP]\n\t"
11117             "FSTP_D [ESP]\n\t"
11118             "MOVSD  $dst,[ESP]\n\t"
11119             "ADD    ESP,8" %}
11120   opcode(0xDF, 0x5);  /* DF /5 */
11121   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11122   ins_pipe( pipe_slow );
11123 %}
11124 
11125 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11126   predicate (UseSSE>=1);
11127   match(Set dst (ConvL2F src));
11128   effect( KILL cr );
11129   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11130             "PUSH   $src.lo\n\t"
11131             "FILD_D [ESP]\n\t"
11132             "FSTP_S [ESP]\n\t"
11133             "MOVSS  $dst,[ESP]\n\t"
11134             "ADD    ESP,8" %}
11135   opcode(0xDF, 0x5);  /* DF /5 */
11136   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11137   ins_pipe( pipe_slow );
11138 %}
11139 
11140 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11141   match(Set dst (ConvL2F src));
11142   effect( KILL cr );
11143   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11144             "PUSH   $src.lo\n\t"
11145             "FILD   ST,[ESP + #0]\n\t"
11146             "ADD    ESP,8\n\t"
11147             "FSTP_S $dst\t# F-round" %}
11148   opcode(0xDF, 0x5);  /* DF /5 */
11149   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11150   ins_pipe( pipe_slow );
11151 %}
11152 
11153 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11154   match(Set dst (ConvL2I src));
11155   effect( DEF dst, USE src );
11156   format %{ "MOV    $dst,$src.lo" %}
11157   ins_encode(enc_CopyL_Lo(dst,src));
11158   ins_pipe( ialu_reg_reg );
11159 %}
11160 
11161 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11162   match(Set dst (MoveF2I src));
11163   effect( DEF dst, USE src );
11164   ins_cost(100);
11165   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11166   ins_encode %{
11167     __ movl($dst$$Register, Address(rsp, $src$$disp));
11168   %}
11169   ins_pipe( ialu_reg_mem );
11170 %}
11171 
11172 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11173   predicate(UseSSE==0);
11174   match(Set dst (MoveF2I src));
11175   effect( DEF dst, USE src );
11176 
11177   ins_cost(125);
11178   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11179   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11180   ins_pipe( fpu_mem_reg );
11181 %}
11182 
11183 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11184   predicate(UseSSE>=1);
11185   match(Set dst (MoveF2I src));
11186   effect( DEF dst, USE src );
11187 
11188   ins_cost(95);
11189   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11190   ins_encode %{
11191     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11192   %}
11193   ins_pipe( pipe_slow );
11194 %}
11195 
11196 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11197   predicate(UseSSE>=2);
11198   match(Set dst (MoveF2I src));
11199   effect( DEF dst, USE src );
11200   ins_cost(85);
11201   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11202   ins_encode %{
11203     __ movdl($dst$$Register, $src$$XMMRegister);
11204   %}
11205   ins_pipe( pipe_slow );
11206 %}
11207 
11208 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11209   match(Set dst (MoveI2F src));
11210   effect( DEF dst, USE src );
11211 
11212   ins_cost(100);
11213   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11214   ins_encode %{
11215     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11216   %}
11217   ins_pipe( ialu_mem_reg );
11218 %}
11219 
11220 
11221 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11222   predicate(UseSSE==0);
11223   match(Set dst (MoveI2F src));
11224   effect(DEF dst, USE src);
11225 
11226   ins_cost(125);
11227   format %{ "FLD_S  $src\n\t"
11228             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11229   opcode(0xD9);               /* D9 /0, FLD m32real */
11230   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11231               Pop_Reg_FPR(dst) );
11232   ins_pipe( fpu_reg_mem );
11233 %}
11234 
11235 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11236   predicate(UseSSE>=1);
11237   match(Set dst (MoveI2F src));
11238   effect( DEF dst, USE src );
11239 
11240   ins_cost(95);
11241   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11242   ins_encode %{
11243     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11244   %}
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11249   predicate(UseSSE>=2);
11250   match(Set dst (MoveI2F src));
11251   effect( DEF dst, USE src );
11252 
11253   ins_cost(85);
11254   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11255   ins_encode %{
11256     __ movdl($dst$$XMMRegister, $src$$Register);
11257   %}
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11262   match(Set dst (MoveD2L src));
11263   effect(DEF dst, USE src);
11264 
11265   ins_cost(250);
11266   format %{ "MOV    $dst.lo,$src\n\t"
11267             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11268   opcode(0x8B, 0x8B);
11269   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11270   ins_pipe( ialu_mem_long_reg );
11271 %}
11272 
11273 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11274   predicate(UseSSE<=1);
11275   match(Set dst (MoveD2L src));
11276   effect(DEF dst, USE src);
11277 
11278   ins_cost(125);
11279   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11280   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11281   ins_pipe( fpu_mem_reg );
11282 %}
11283 
11284 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11285   predicate(UseSSE>=2);
11286   match(Set dst (MoveD2L src));
11287   effect(DEF dst, USE src);
11288   ins_cost(95);
11289   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11290   ins_encode %{
11291     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11292   %}
11293   ins_pipe( pipe_slow );
11294 %}
11295 
11296 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11297   predicate(UseSSE>=2);
11298   match(Set dst (MoveD2L src));
11299   effect(DEF dst, USE src, TEMP tmp);
11300   ins_cost(85);
11301   format %{ "MOVD   $dst.lo,$src\n\t"
11302             "PSHUFLW $tmp,$src,0x4E\n\t"
11303             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11304   ins_encode %{
11305     __ movdl($dst$$Register, $src$$XMMRegister);
11306     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11307     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11308   %}
11309   ins_pipe( pipe_slow );
11310 %}
11311 
11312 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11313   match(Set dst (MoveL2D src));
11314   effect(DEF dst, USE src);
11315 
11316   ins_cost(200);
11317   format %{ "MOV    $dst,$src.lo\n\t"
11318             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11319   opcode(0x89, 0x89);
11320   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11321   ins_pipe( ialu_mem_long_reg );
11322 %}
11323 
11324 
11325 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11326   predicate(UseSSE<=1);
11327   match(Set dst (MoveL2D src));
11328   effect(DEF dst, USE src);
11329   ins_cost(125);
11330 
11331   format %{ "FLD_D  $src\n\t"
11332             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11333   opcode(0xDD);               /* DD /0, FLD m64real */
11334   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11335               Pop_Reg_DPR(dst) );
11336   ins_pipe( fpu_reg_mem );
11337 %}
11338 
11339 
11340 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11341   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11342   match(Set dst (MoveL2D src));
11343   effect(DEF dst, USE src);
11344 
11345   ins_cost(95);
11346   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11347   ins_encode %{
11348     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11354   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11355   match(Set dst (MoveL2D src));
11356   effect(DEF dst, USE src);
11357 
11358   ins_cost(95);
11359   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11360   ins_encode %{
11361     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11362   %}
11363   ins_pipe( pipe_slow );
11364 %}
11365 
11366 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11367   predicate(UseSSE>=2);
11368   match(Set dst (MoveL2D src));
11369   effect(TEMP dst, USE src, TEMP tmp);
11370   ins_cost(85);
11371   format %{ "MOVD   $dst,$src.lo\n\t"
11372             "MOVD   $tmp,$src.hi\n\t"
11373             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11374   ins_encode %{
11375     __ movdl($dst$$XMMRegister, $src$$Register);
11376     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11377     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11378   %}
11379   ins_pipe( pipe_slow );
11380 %}
11381 
11382 //----------------------------- CompressBits/ExpandBits ------------------------
11383 
11384 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11385   predicate(n->bottom_type()->isa_long());
11386   match(Set dst (CompressBits src mask));
11387   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11388   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11389   ins_encode %{
11390     Label exit, partail_result;
11391     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11392     // Merge the results of upper and lower destination registers such that upper destination
11393     // results are contiguously laid out after the lower destination result.
11394     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11395     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11396     __ popcntl($rtmp$$Register, $mask$$Register);
11397     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11398     __ cmpl($rtmp$$Register, 32);
11399     __ jccb(Assembler::equal, exit);
11400     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11401     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11402     // Shift left the contents of upper destination register by true bit count of lower mask register
11403     // and merge with lower destination register.
11404     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11405     __ orl($dst$$Register, $rtmp$$Register);
11406     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11407     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11408     // since contents of upper destination have already been copied to lower destination
11409     // register.
11410     __ cmpl($rtmp$$Register, 0);
11411     __ jccb(Assembler::greater, partail_result);
11412     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11413     __ jmp(exit);
11414     __ bind(partail_result);
11415     // Perform right shift over upper destination register to move out bits already copied
11416     // to lower destination register.
11417     __ subl($rtmp$$Register, 32);
11418     __ negl($rtmp$$Register);
11419     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11420     __ bind(exit);
11421   %}
11422   ins_pipe( pipe_slow );
11423 %}
11424 
11425 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11426   predicate(n->bottom_type()->isa_long());
11427   match(Set dst (ExpandBits src mask));
11428   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11429   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11430   ins_encode %{
11431     // Extraction operation sequentially reads the bits from source register starting from LSB
11432     // and lays them out into destination register at bit locations corresponding to true bits
11433     // in mask register. Thus number of source bits read are equal to combined true bit count
11434     // of mask register pair.
11435     Label exit, mask_clipping;
11436     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11437     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11438     __ popcntl($rtmp$$Register, $mask$$Register);
11439     // If true bit count of lower mask register is 32 then none of bit of lower source register
11440     // will feed to upper destination register.
11441     __ cmpl($rtmp$$Register, 32);
11442     __ jccb(Assembler::equal, exit);
11443     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11444     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11445     // Shift right the contents of lower source register to remove already consumed bits.
11446     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11447     // Extract the bits from lower source register starting from LSB under the influence
11448     // of upper mask register.
11449     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11450     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11451     __ subl($rtmp$$Register, 32);
11452     __ negl($rtmp$$Register);
11453     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11454     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11455     // Clear the set bits in upper mask register which have been used to extract the contents
11456     // from lower source register.
11457     __ bind(mask_clipping);
11458     __ blsrl($mask$$Register, $mask$$Register);
11459     __ decrementl($rtmp$$Register, 1);
11460     __ jccb(Assembler::greater, mask_clipping);
11461     // Starting from LSB extract the bits from upper source register under the influence of
11462     // remaining set bits in upper mask register.
11463     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11464     // Merge the partial results extracted from lower and upper source register bits.
11465     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11466     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11467     __ bind(exit);
11468   %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 // =======================================================================
11473 // fast clearing of an array
11474 // Small ClearArray non-AVX512.
11475 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11476   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11477   match(Set dummy (ClearArray cnt base));
11478   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11479 
11480   format %{ $$template
11481     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11482     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11483     $$emit$$"JG     LARGE\n\t"
11484     $$emit$$"SHL    ECX, 1\n\t"
11485     $$emit$$"DEC    ECX\n\t"
11486     $$emit$$"JS     DONE\t# Zero length\n\t"
11487     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11488     $$emit$$"DEC    ECX\n\t"
11489     $$emit$$"JGE    LOOP\n\t"
11490     $$emit$$"JMP    DONE\n\t"
11491     $$emit$$"# LARGE:\n\t"
11492     if (UseFastStosb) {
11493        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11494        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11495     } else if (UseXMMForObjInit) {
11496        $$emit$$"MOV     RDI,RAX\n\t"
11497        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11498        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11499        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11500        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11501        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11502        $$emit$$"ADD     0x40,RAX\n\t"
11503        $$emit$$"# L_zero_64_bytes:\n\t"
11504        $$emit$$"SUB     0x8,RCX\n\t"
11505        $$emit$$"JGE     L_loop\n\t"
11506        $$emit$$"ADD     0x4,RCX\n\t"
11507        $$emit$$"JL      L_tail\n\t"
11508        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11509        $$emit$$"ADD     0x20,RAX\n\t"
11510        $$emit$$"SUB     0x4,RCX\n\t"
11511        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11512        $$emit$$"ADD     0x4,RCX\n\t"
11513        $$emit$$"JLE     L_end\n\t"
11514        $$emit$$"DEC     RCX\n\t"
11515        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11516        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11517        $$emit$$"ADD     0x8,RAX\n\t"
11518        $$emit$$"DEC     RCX\n\t"
11519        $$emit$$"JGE     L_sloop\n\t"
11520        $$emit$$"# L_end:\n\t"
11521     } else {
11522        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11523        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11524     }
11525     $$emit$$"# DONE"
11526   %}
11527   ins_encode %{
11528     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11529                  $tmp$$XMMRegister, false, knoreg);
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 // Small ClearArray AVX512 non-constant length.
11535 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11536   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11537   match(Set dummy (ClearArray cnt base));
11538   ins_cost(125);
11539   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11540 
11541   format %{ $$template
11542     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11543     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11544     $$emit$$"JG     LARGE\n\t"
11545     $$emit$$"SHL    ECX, 1\n\t"
11546     $$emit$$"DEC    ECX\n\t"
11547     $$emit$$"JS     DONE\t# Zero length\n\t"
11548     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11549     $$emit$$"DEC    ECX\n\t"
11550     $$emit$$"JGE    LOOP\n\t"
11551     $$emit$$"JMP    DONE\n\t"
11552     $$emit$$"# LARGE:\n\t"
11553     if (UseFastStosb) {
11554        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11555        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11556     } else if (UseXMMForObjInit) {
11557        $$emit$$"MOV     RDI,RAX\n\t"
11558        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11559        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11560        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11561        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11562        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11563        $$emit$$"ADD     0x40,RAX\n\t"
11564        $$emit$$"# L_zero_64_bytes:\n\t"
11565        $$emit$$"SUB     0x8,RCX\n\t"
11566        $$emit$$"JGE     L_loop\n\t"
11567        $$emit$$"ADD     0x4,RCX\n\t"
11568        $$emit$$"JL      L_tail\n\t"
11569        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11570        $$emit$$"ADD     0x20,RAX\n\t"
11571        $$emit$$"SUB     0x4,RCX\n\t"
11572        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11573        $$emit$$"ADD     0x4,RCX\n\t"
11574        $$emit$$"JLE     L_end\n\t"
11575        $$emit$$"DEC     RCX\n\t"
11576        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11577        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11578        $$emit$$"ADD     0x8,RAX\n\t"
11579        $$emit$$"DEC     RCX\n\t"
11580        $$emit$$"JGE     L_sloop\n\t"
11581        $$emit$$"# L_end:\n\t"
11582     } else {
11583        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11584        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11585     }
11586     $$emit$$"# DONE"
11587   %}
11588   ins_encode %{
11589     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11590                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11591   %}
11592   ins_pipe( pipe_slow );
11593 %}
11594 
11595 // Large ClearArray non-AVX512.
11596 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11597   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11598   match(Set dummy (ClearArray cnt base));
11599   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11600   format %{ $$template
11601     if (UseFastStosb) {
11602        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11603        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11604        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11605     } else if (UseXMMForObjInit) {
11606        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11607        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11608        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11609        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11610        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11611        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11612        $$emit$$"ADD     0x40,RAX\n\t"
11613        $$emit$$"# L_zero_64_bytes:\n\t"
11614        $$emit$$"SUB     0x8,RCX\n\t"
11615        $$emit$$"JGE     L_loop\n\t"
11616        $$emit$$"ADD     0x4,RCX\n\t"
11617        $$emit$$"JL      L_tail\n\t"
11618        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11619        $$emit$$"ADD     0x20,RAX\n\t"
11620        $$emit$$"SUB     0x4,RCX\n\t"
11621        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11622        $$emit$$"ADD     0x4,RCX\n\t"
11623        $$emit$$"JLE     L_end\n\t"
11624        $$emit$$"DEC     RCX\n\t"
11625        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11626        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11627        $$emit$$"ADD     0x8,RAX\n\t"
11628        $$emit$$"DEC     RCX\n\t"
11629        $$emit$$"JGE     L_sloop\n\t"
11630        $$emit$$"# L_end:\n\t"
11631     } else {
11632        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11633        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11634        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11635     }
11636     $$emit$$"# DONE"
11637   %}
11638   ins_encode %{
11639     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11640                  $tmp$$XMMRegister, true, knoreg);
11641   %}
11642   ins_pipe( pipe_slow );
11643 %}
11644 
11645 // Large ClearArray AVX512.
11646 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11647   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11648   match(Set dummy (ClearArray cnt base));
11649   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11650   format %{ $$template
11651     if (UseFastStosb) {
11652        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11653        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11654        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11655     } else if (UseXMMForObjInit) {
11656        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11657        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11658        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11659        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11660        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11661        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11662        $$emit$$"ADD     0x40,RAX\n\t"
11663        $$emit$$"# L_zero_64_bytes:\n\t"
11664        $$emit$$"SUB     0x8,RCX\n\t"
11665        $$emit$$"JGE     L_loop\n\t"
11666        $$emit$$"ADD     0x4,RCX\n\t"
11667        $$emit$$"JL      L_tail\n\t"
11668        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11669        $$emit$$"ADD     0x20,RAX\n\t"
11670        $$emit$$"SUB     0x4,RCX\n\t"
11671        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11672        $$emit$$"ADD     0x4,RCX\n\t"
11673        $$emit$$"JLE     L_end\n\t"
11674        $$emit$$"DEC     RCX\n\t"
11675        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11676        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11677        $$emit$$"ADD     0x8,RAX\n\t"
11678        $$emit$$"DEC     RCX\n\t"
11679        $$emit$$"JGE     L_sloop\n\t"
11680        $$emit$$"# L_end:\n\t"
11681     } else {
11682        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11683        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11684        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11685     }
11686     $$emit$$"# DONE"
11687   %}
11688   ins_encode %{
11689     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11690                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11691   %}
11692   ins_pipe( pipe_slow );
11693 %}
11694 
11695 // Small ClearArray AVX512 constant length.
11696 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11697 %{
11698   predicate(!((ClearArrayNode*)n)->is_large() &&
11699                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11700   match(Set dummy (ClearArray cnt base));
11701   ins_cost(100);
11702   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11703   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11704   ins_encode %{
11705    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11706   %}
11707   ins_pipe(pipe_slow);
11708 %}
11709 
11710 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11711                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11712   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11713   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11714   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11715 
11716   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11717   ins_encode %{
11718     __ string_compare($str1$$Register, $str2$$Register,
11719                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11720                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11721   %}
11722   ins_pipe( pipe_slow );
11723 %}
11724 
11725 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11726                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11727   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11728   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11729   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11730 
11731   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11732   ins_encode %{
11733     __ string_compare($str1$$Register, $str2$$Register,
11734                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11735                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11736   %}
11737   ins_pipe( pipe_slow );
11738 %}
11739 
11740 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11741                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11742   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11743   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11744   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11745 
11746   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11747   ins_encode %{
11748     __ string_compare($str1$$Register, $str2$$Register,
11749                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11750                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11751   %}
11752   ins_pipe( pipe_slow );
11753 %}
11754 
11755 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11756                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11757   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11758   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11759   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11760 
11761   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11762   ins_encode %{
11763     __ string_compare($str1$$Register, $str2$$Register,
11764                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11765                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11766   %}
11767   ins_pipe( pipe_slow );
11768 %}
11769 
11770 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11771                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11772   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11773   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11774   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11775 
11776   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11777   ins_encode %{
11778     __ string_compare($str1$$Register, $str2$$Register,
11779                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11780                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11781   %}
11782   ins_pipe( pipe_slow );
11783 %}
11784 
11785 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11786                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11787   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11788   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11789   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11790 
11791   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11792   ins_encode %{
11793     __ string_compare($str1$$Register, $str2$$Register,
11794                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11795                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11796   %}
11797   ins_pipe( pipe_slow );
11798 %}
11799 
11800 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11801                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11802   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11803   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11804   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11805 
11806   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11807   ins_encode %{
11808     __ string_compare($str2$$Register, $str1$$Register,
11809                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11810                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11811   %}
11812   ins_pipe( pipe_slow );
11813 %}
11814 
11815 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11816                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11817   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11818   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11819   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11820 
11821   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11822   ins_encode %{
11823     __ string_compare($str2$$Register, $str1$$Register,
11824                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11825                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11826   %}
11827   ins_pipe( pipe_slow );
11828 %}
11829 
11830 // fast string equals
11831 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11832                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11833   predicate(!VM_Version::supports_avx512vlbw());
11834   match(Set result (StrEquals (Binary str1 str2) cnt));
11835   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11836 
11837   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11838   ins_encode %{
11839     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11840                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11841                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11842   %}
11843 
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11848                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11849   predicate(VM_Version::supports_avx512vlbw());
11850   match(Set result (StrEquals (Binary str1 str2) cnt));
11851   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11852 
11853   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11854   ins_encode %{
11855     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11856                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11857                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11858   %}
11859 
11860   ins_pipe( pipe_slow );
11861 %}
11862 
11863 
11864 // fast search of substring with known size.
11865 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11866                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11867   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11868   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11869   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11870 
11871   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11872   ins_encode %{
11873     int icnt2 = (int)$int_cnt2$$constant;
11874     if (icnt2 >= 16) {
11875       // IndexOf for constant substrings with size >= 16 elements
11876       // which don't need to be loaded through stack.
11877       __ string_indexofC8($str1$$Register, $str2$$Register,
11878                           $cnt1$$Register, $cnt2$$Register,
11879                           icnt2, $result$$Register,
11880                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11881     } else {
11882       // Small strings are loaded through stack if they cross page boundary.
11883       __ string_indexof($str1$$Register, $str2$$Register,
11884                         $cnt1$$Register, $cnt2$$Register,
11885                         icnt2, $result$$Register,
11886                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11887     }
11888   %}
11889   ins_pipe( pipe_slow );
11890 %}
11891 
11892 // fast search of substring with known size.
11893 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11894                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11895   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11896   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11897   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11898 
11899   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11900   ins_encode %{
11901     int icnt2 = (int)$int_cnt2$$constant;
11902     if (icnt2 >= 8) {
11903       // IndexOf for constant substrings with size >= 8 elements
11904       // which don't need to be loaded through stack.
11905       __ string_indexofC8($str1$$Register, $str2$$Register,
11906                           $cnt1$$Register, $cnt2$$Register,
11907                           icnt2, $result$$Register,
11908                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11909     } else {
11910       // Small strings are loaded through stack if they cross page boundary.
11911       __ string_indexof($str1$$Register, $str2$$Register,
11912                         $cnt1$$Register, $cnt2$$Register,
11913                         icnt2, $result$$Register,
11914                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11915     }
11916   %}
11917   ins_pipe( pipe_slow );
11918 %}
11919 
11920 // fast search of substring with known size.
11921 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11922                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11923   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11924   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11925   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11926 
11927   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11928   ins_encode %{
11929     int icnt2 = (int)$int_cnt2$$constant;
11930     if (icnt2 >= 8) {
11931       // IndexOf for constant substrings with size >= 8 elements
11932       // which don't need to be loaded through stack.
11933       __ string_indexofC8($str1$$Register, $str2$$Register,
11934                           $cnt1$$Register, $cnt2$$Register,
11935                           icnt2, $result$$Register,
11936                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11937     } else {
11938       // Small strings are loaded through stack if they cross page boundary.
11939       __ string_indexof($str1$$Register, $str2$$Register,
11940                         $cnt1$$Register, $cnt2$$Register,
11941                         icnt2, $result$$Register,
11942                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11943     }
11944   %}
11945   ins_pipe( pipe_slow );
11946 %}
11947 
11948 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11949                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11950   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11951   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11952   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11953 
11954   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11955   ins_encode %{
11956     __ string_indexof($str1$$Register, $str2$$Register,
11957                       $cnt1$$Register, $cnt2$$Register,
11958                       (-1), $result$$Register,
11959                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11960   %}
11961   ins_pipe( pipe_slow );
11962 %}
11963 
11964 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11965                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11966   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11967   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11968   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11969 
11970   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11971   ins_encode %{
11972     __ string_indexof($str1$$Register, $str2$$Register,
11973                       $cnt1$$Register, $cnt2$$Register,
11974                       (-1), $result$$Register,
11975                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11976   %}
11977   ins_pipe( pipe_slow );
11978 %}
11979 
11980 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11981                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11982   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11983   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11984   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11985 
11986   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11987   ins_encode %{
11988     __ string_indexof($str1$$Register, $str2$$Register,
11989                       $cnt1$$Register, $cnt2$$Register,
11990                       (-1), $result$$Register,
11991                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11992   %}
11993   ins_pipe( pipe_slow );
11994 %}
11995 
11996 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11997                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11998   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11999   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12000   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12001   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12002   ins_encode %{
12003     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12004                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12005   %}
12006   ins_pipe( pipe_slow );
12007 %}
12008 
12009 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12010                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12011   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12012   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12013   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12014   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12015   ins_encode %{
12016     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12017                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12018   %}
12019   ins_pipe( pipe_slow );
12020 %}
12021 
12022 
12023 // fast array equals
12024 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12025                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12026 %{
12027   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12028   match(Set result (AryEq ary1 ary2));
12029   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12030   //ins_cost(300);
12031 
12032   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12033   ins_encode %{
12034     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12035                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12036                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12037   %}
12038   ins_pipe( pipe_slow );
12039 %}
12040 
12041 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12042                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12043 %{
12044   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12045   match(Set result (AryEq ary1 ary2));
12046   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12047   //ins_cost(300);
12048 
12049   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12050   ins_encode %{
12051     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12052                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12053                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12054   %}
12055   ins_pipe( pipe_slow );
12056 %}
12057 
12058 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12059                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12060 %{
12061   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12062   match(Set result (AryEq ary1 ary2));
12063   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12064   //ins_cost(300);
12065 
12066   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12067   ins_encode %{
12068     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12069                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12070                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12071   %}
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12076                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12077 %{
12078   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12079   match(Set result (AryEq ary1 ary2));
12080   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12081   //ins_cost(300);
12082 
12083   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12084   ins_encode %{
12085     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12086                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12087                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12088   %}
12089   ins_pipe( pipe_slow );
12090 %}
12091 
12092 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12093                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12094 %{
12095   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12096   match(Set result (CountPositives ary1 len));
12097   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12098 
12099   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12100   ins_encode %{
12101     __ count_positives($ary1$$Register, $len$$Register,
12102                        $result$$Register, $tmp3$$Register,
12103                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12104   %}
12105   ins_pipe( pipe_slow );
12106 %}
12107 
12108 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12109                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12110 %{
12111   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12112   match(Set result (CountPositives ary1 len));
12113   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12114 
12115   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12116   ins_encode %{
12117     __ count_positives($ary1$$Register, $len$$Register,
12118                        $result$$Register, $tmp3$$Register,
12119                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12120   %}
12121   ins_pipe( pipe_slow );
12122 %}
12123 
12124 
12125 // fast char[] to byte[] compression
12126 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12127                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12128   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12129   match(Set result (StrCompressedCopy src (Binary dst len)));
12130   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12131 
12132   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12133   ins_encode %{
12134     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12135                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12136                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12137                            knoreg, knoreg);
12138   %}
12139   ins_pipe( pipe_slow );
12140 %}
12141 
12142 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12143                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12144   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12145   match(Set result (StrCompressedCopy src (Binary dst len)));
12146   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12147 
12148   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12149   ins_encode %{
12150     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12151                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12152                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12153                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12154   %}
12155   ins_pipe( pipe_slow );
12156 %}
12157 
12158 // fast byte[] to char[] inflation
12159 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12160                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12161   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12162   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12163   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12164 
12165   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12166   ins_encode %{
12167     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12168                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12169   %}
12170   ins_pipe( pipe_slow );
12171 %}
12172 
12173 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12174                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12175   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12176   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12177   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12178 
12179   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12180   ins_encode %{
12181     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12182                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12183   %}
12184   ins_pipe( pipe_slow );
12185 %}
12186 
12187 // encode char[] to byte[] in ISO_8859_1
12188 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12189                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12190                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12191   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12192   match(Set result (EncodeISOArray src (Binary dst len)));
12193   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12194 
12195   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12196   ins_encode %{
12197     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12198                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12199                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12200   %}
12201   ins_pipe( pipe_slow );
12202 %}
12203 
12204 // encode char[] to byte[] in ASCII
12205 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12206                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12207                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12208   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12209   match(Set result (EncodeISOArray src (Binary dst len)));
12210   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12211 
12212   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12213   ins_encode %{
12214     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12215                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12216                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12217   %}
12218   ins_pipe( pipe_slow );
12219 %}
12220 
12221 //----------Control Flow Instructions------------------------------------------
12222 // Signed compare Instructions
12223 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12224   match(Set cr (CmpI op1 op2));
12225   effect( DEF cr, USE op1, USE op2 );
12226   format %{ "CMP    $op1,$op2" %}
12227   opcode(0x3B);  /* Opcode 3B /r */
12228   ins_encode( OpcP, RegReg( op1, op2) );
12229   ins_pipe( ialu_cr_reg_reg );
12230 %}
12231 
12232 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12233   match(Set cr (CmpI op1 op2));
12234   effect( DEF cr, USE op1 );
12235   format %{ "CMP    $op1,$op2" %}
12236   opcode(0x81,0x07);  /* Opcode 81 /7 */
12237   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12238   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12239   ins_pipe( ialu_cr_reg_imm );
12240 %}
12241 
12242 // Cisc-spilled version of cmpI_eReg
12243 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12244   match(Set cr (CmpI op1 (LoadI op2)));
12245 
12246   format %{ "CMP    $op1,$op2" %}
12247   ins_cost(500);
12248   opcode(0x3B);  /* Opcode 3B /r */
12249   ins_encode( OpcP, RegMem( op1, op2) );
12250   ins_pipe( ialu_cr_reg_mem );
12251 %}
12252 
12253 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12254   match(Set cr (CmpI src zero));
12255   effect( DEF cr, USE src );
12256 
12257   format %{ "TEST   $src,$src" %}
12258   opcode(0x85);
12259   ins_encode( OpcP, RegReg( src, src ) );
12260   ins_pipe( ialu_cr_reg_imm );
12261 %}
12262 
12263 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12264   match(Set cr (CmpI (AndI src con) zero));
12265 
12266   format %{ "TEST   $src,$con" %}
12267   opcode(0xF7,0x00);
12268   ins_encode( OpcP, RegOpc(src), Con32(con) );
12269   ins_pipe( ialu_cr_reg_imm );
12270 %}
12271 
12272 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12273   match(Set cr (CmpI (AndI src mem) zero));
12274 
12275   format %{ "TEST   $src,$mem" %}
12276   opcode(0x85);
12277   ins_encode( OpcP, RegMem( src, mem ) );
12278   ins_pipe( ialu_cr_reg_mem );
12279 %}
12280 
12281 // Unsigned compare Instructions; really, same as signed except they
12282 // produce an eFlagsRegU instead of eFlagsReg.
12283 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12284   match(Set cr (CmpU op1 op2));
12285 
12286   format %{ "CMPu   $op1,$op2" %}
12287   opcode(0x3B);  /* Opcode 3B /r */
12288   ins_encode( OpcP, RegReg( op1, op2) );
12289   ins_pipe( ialu_cr_reg_reg );
12290 %}
12291 
12292 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12293   match(Set cr (CmpU op1 op2));
12294 
12295   format %{ "CMPu   $op1,$op2" %}
12296   opcode(0x81,0x07);  /* Opcode 81 /7 */
12297   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12298   ins_pipe( ialu_cr_reg_imm );
12299 %}
12300 
12301 // // Cisc-spilled version of cmpU_eReg
12302 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12303   match(Set cr (CmpU op1 (LoadI op2)));
12304 
12305   format %{ "CMPu   $op1,$op2" %}
12306   ins_cost(500);
12307   opcode(0x3B);  /* Opcode 3B /r */
12308   ins_encode( OpcP, RegMem( op1, op2) );
12309   ins_pipe( ialu_cr_reg_mem );
12310 %}
12311 
12312 // // Cisc-spilled version of cmpU_eReg
12313 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12314 //  match(Set cr (CmpU (LoadI op1) op2));
12315 //
12316 //  format %{ "CMPu   $op1,$op2" %}
12317 //  ins_cost(500);
12318 //  opcode(0x39);  /* Opcode 39 /r */
12319 //  ins_encode( OpcP, RegMem( op1, op2) );
12320 //%}
12321 
12322 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12323   match(Set cr (CmpU src zero));
12324 
12325   format %{ "TESTu  $src,$src" %}
12326   opcode(0x85);
12327   ins_encode( OpcP, RegReg( src, src ) );
12328   ins_pipe( ialu_cr_reg_imm );
12329 %}
12330 
12331 // Unsigned pointer compare Instructions
12332 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12333   match(Set cr (CmpP op1 op2));
12334 
12335   format %{ "CMPu   $op1,$op2" %}
12336   opcode(0x3B);  /* Opcode 3B /r */
12337   ins_encode( OpcP, RegReg( op1, op2) );
12338   ins_pipe( ialu_cr_reg_reg );
12339 %}
12340 
12341 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12342   match(Set cr (CmpP op1 op2));
12343 
12344   format %{ "CMPu   $op1,$op2" %}
12345   opcode(0x81,0x07);  /* Opcode 81 /7 */
12346   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12347   ins_pipe( ialu_cr_reg_imm );
12348 %}
12349 
12350 // // Cisc-spilled version of cmpP_eReg
12351 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12352   match(Set cr (CmpP op1 (LoadP op2)));
12353 
12354   format %{ "CMPu   $op1,$op2" %}
12355   ins_cost(500);
12356   opcode(0x3B);  /* Opcode 3B /r */
12357   ins_encode( OpcP, RegMem( op1, op2) );
12358   ins_pipe( ialu_cr_reg_mem );
12359 %}
12360 
12361 // // Cisc-spilled version of cmpP_eReg
12362 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12363 //  match(Set cr (CmpP (LoadP op1) op2));
12364 //
12365 //  format %{ "CMPu   $op1,$op2" %}
12366 //  ins_cost(500);
12367 //  opcode(0x39);  /* Opcode 39 /r */
12368 //  ins_encode( OpcP, RegMem( op1, op2) );
12369 //%}
12370 
12371 // Compare raw pointer (used in out-of-heap check).
12372 // Only works because non-oop pointers must be raw pointers
12373 // and raw pointers have no anti-dependencies.
12374 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12375   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12376   match(Set cr (CmpP op1 (LoadP op2)));
12377 
12378   format %{ "CMPu   $op1,$op2" %}
12379   opcode(0x3B);  /* Opcode 3B /r */
12380   ins_encode( OpcP, RegMem( op1, op2) );
12381   ins_pipe( ialu_cr_reg_mem );
12382 %}
12383 
12384 //
12385 // This will generate a signed flags result. This should be ok
12386 // since any compare to a zero should be eq/neq.
12387 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12388   match(Set cr (CmpP src zero));
12389 
12390   format %{ "TEST   $src,$src" %}
12391   opcode(0x85);
12392   ins_encode( OpcP, RegReg( src, src ) );
12393   ins_pipe( ialu_cr_reg_imm );
12394 %}
12395 
12396 // Cisc-spilled version of testP_reg
12397 // This will generate a signed flags result. This should be ok
12398 // since any compare to a zero should be eq/neq.
12399 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12400   match(Set cr (CmpP (LoadP op) zero));
12401 
12402   format %{ "TEST   $op,0xFFFFFFFF" %}
12403   ins_cost(500);
12404   opcode(0xF7);               /* Opcode F7 /0 */
12405   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12406   ins_pipe( ialu_cr_reg_imm );
12407 %}
12408 
12409 // Yanked all unsigned pointer compare operations.
12410 // Pointer compares are done with CmpP which is already unsigned.
12411 
12412 //----------Max and Min--------------------------------------------------------
12413 // Min Instructions
12414 ////
12415 //   *** Min and Max using the conditional move are slower than the
12416 //   *** branch version on a Pentium III.
12417 // // Conditional move for min
12418 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12419 //  effect( USE_DEF op2, USE op1, USE cr );
12420 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12421 //  opcode(0x4C,0x0F);
12422 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12423 //  ins_pipe( pipe_cmov_reg );
12424 //%}
12425 //
12426 //// Min Register with Register (P6 version)
12427 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12428 //  predicate(VM_Version::supports_cmov() );
12429 //  match(Set op2 (MinI op1 op2));
12430 //  ins_cost(200);
12431 //  expand %{
12432 //    eFlagsReg cr;
12433 //    compI_eReg(cr,op1,op2);
12434 //    cmovI_reg_lt(op2,op1,cr);
12435 //  %}
12436 //%}
12437 
12438 // Min Register with Register (generic version)
12439 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12440   match(Set dst (MinI dst src));
12441   effect(KILL flags);
12442   ins_cost(300);
12443 
12444   format %{ "MIN    $dst,$src" %}
12445   opcode(0xCC);
12446   ins_encode( min_enc(dst,src) );
12447   ins_pipe( pipe_slow );
12448 %}
12449 
12450 // Max Register with Register
12451 //   *** Min and Max using the conditional move are slower than the
12452 //   *** branch version on a Pentium III.
12453 // // Conditional move for max
12454 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12455 //  effect( USE_DEF op2, USE op1, USE cr );
12456 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12457 //  opcode(0x4F,0x0F);
12458 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12459 //  ins_pipe( pipe_cmov_reg );
12460 //%}
12461 //
12462 // // Max Register with Register (P6 version)
12463 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12464 //  predicate(VM_Version::supports_cmov() );
12465 //  match(Set op2 (MaxI op1 op2));
12466 //  ins_cost(200);
12467 //  expand %{
12468 //    eFlagsReg cr;
12469 //    compI_eReg(cr,op1,op2);
12470 //    cmovI_reg_gt(op2,op1,cr);
12471 //  %}
12472 //%}
12473 
12474 // Max Register with Register (generic version)
12475 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12476   match(Set dst (MaxI dst src));
12477   effect(KILL flags);
12478   ins_cost(300);
12479 
12480   format %{ "MAX    $dst,$src" %}
12481   opcode(0xCC);
12482   ins_encode( max_enc(dst,src) );
12483   ins_pipe( pipe_slow );
12484 %}
12485 
12486 // ============================================================================
12487 // Counted Loop limit node which represents exact final iterator value.
12488 // Note: the resulting value should fit into integer range since
12489 // counted loops have limit check on overflow.
12490 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12491   match(Set limit (LoopLimit (Binary init limit) stride));
12492   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12493   ins_cost(300);
12494 
12495   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12496   ins_encode %{
12497     int strd = (int)$stride$$constant;
12498     assert(strd != 1 && strd != -1, "sanity");
12499     int m1 = (strd > 0) ? 1 : -1;
12500     // Convert limit to long (EAX:EDX)
12501     __ cdql();
12502     // Convert init to long (init:tmp)
12503     __ movl($tmp$$Register, $init$$Register);
12504     __ sarl($tmp$$Register, 31);
12505     // $limit - $init
12506     __ subl($limit$$Register, $init$$Register);
12507     __ sbbl($limit_hi$$Register, $tmp$$Register);
12508     // + ($stride - 1)
12509     if (strd > 0) {
12510       __ addl($limit$$Register, (strd - 1));
12511       __ adcl($limit_hi$$Register, 0);
12512       __ movl($tmp$$Register, strd);
12513     } else {
12514       __ addl($limit$$Register, (strd + 1));
12515       __ adcl($limit_hi$$Register, -1);
12516       __ lneg($limit_hi$$Register, $limit$$Register);
12517       __ movl($tmp$$Register, -strd);
12518     }
12519     // signed division: (EAX:EDX) / pos_stride
12520     __ idivl($tmp$$Register);
12521     if (strd < 0) {
12522       // restore sign
12523       __ negl($tmp$$Register);
12524     }
12525     // (EAX) * stride
12526     __ mull($tmp$$Register);
12527     // + init (ignore upper bits)
12528     __ addl($limit$$Register, $init$$Register);
12529   %}
12530   ins_pipe( pipe_slow );
12531 %}
12532 
12533 // ============================================================================
12534 // Branch Instructions
12535 // Jump Table
12536 instruct jumpXtnd(rRegI switch_val) %{
12537   match(Jump switch_val);
12538   ins_cost(350);
12539   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12540   ins_encode %{
12541     // Jump to Address(table_base + switch_reg)
12542     Address index(noreg, $switch_val$$Register, Address::times_1);
12543     __ jump(ArrayAddress($constantaddress, index), noreg);
12544   %}
12545   ins_pipe(pipe_jmp);
12546 %}
12547 
12548 // Jump Direct - Label defines a relative address from JMP+1
12549 instruct jmpDir(label labl) %{
12550   match(Goto);
12551   effect(USE labl);
12552 
12553   ins_cost(300);
12554   format %{ "JMP    $labl" %}
12555   size(5);
12556   ins_encode %{
12557     Label* L = $labl$$label;
12558     __ jmp(*L, false); // Always long jump
12559   %}
12560   ins_pipe( pipe_jmp );
12561 %}
12562 
12563 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12564 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12565   match(If cop cr);
12566   effect(USE labl);
12567 
12568   ins_cost(300);
12569   format %{ "J$cop    $labl" %}
12570   size(6);
12571   ins_encode %{
12572     Label* L = $labl$$label;
12573     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12574   %}
12575   ins_pipe( pipe_jcc );
12576 %}
12577 
12578 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12579 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12580   match(CountedLoopEnd cop cr);
12581   effect(USE labl);
12582 
12583   ins_cost(300);
12584   format %{ "J$cop    $labl\t# Loop end" %}
12585   size(6);
12586   ins_encode %{
12587     Label* L = $labl$$label;
12588     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12589   %}
12590   ins_pipe( pipe_jcc );
12591 %}
12592 
12593 // Jump Direct Conditional - using unsigned comparison
12594 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12595   match(If cop cmp);
12596   effect(USE labl);
12597 
12598   ins_cost(300);
12599   format %{ "J$cop,u  $labl" %}
12600   size(6);
12601   ins_encode %{
12602     Label* L = $labl$$label;
12603     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12604   %}
12605   ins_pipe(pipe_jcc);
12606 %}
12607 
12608 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12609   match(If cop cmp);
12610   effect(USE labl);
12611 
12612   ins_cost(200);
12613   format %{ "J$cop,u  $labl" %}
12614   size(6);
12615   ins_encode %{
12616     Label* L = $labl$$label;
12617     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12618   %}
12619   ins_pipe(pipe_jcc);
12620 %}
12621 
12622 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12623   match(If cop cmp);
12624   effect(USE labl);
12625 
12626   ins_cost(200);
12627   format %{ $$template
12628     if ($cop$$cmpcode == Assembler::notEqual) {
12629       $$emit$$"JP,u   $labl\n\t"
12630       $$emit$$"J$cop,u   $labl"
12631     } else {
12632       $$emit$$"JP,u   done\n\t"
12633       $$emit$$"J$cop,u   $labl\n\t"
12634       $$emit$$"done:"
12635     }
12636   %}
12637   ins_encode %{
12638     Label* l = $labl$$label;
12639     if ($cop$$cmpcode == Assembler::notEqual) {
12640       __ jcc(Assembler::parity, *l, false);
12641       __ jcc(Assembler::notEqual, *l, false);
12642     } else if ($cop$$cmpcode == Assembler::equal) {
12643       Label done;
12644       __ jccb(Assembler::parity, done);
12645       __ jcc(Assembler::equal, *l, false);
12646       __ bind(done);
12647     } else {
12648        ShouldNotReachHere();
12649     }
12650   %}
12651   ins_pipe(pipe_jcc);
12652 %}
12653 
12654 // ============================================================================
12655 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12656 // array for an instance of the superklass.  Set a hidden internal cache on a
12657 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12658 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12659 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12660   match(Set result (PartialSubtypeCheck sub super));
12661   effect( KILL rcx, KILL cr );
12662 
12663   ins_cost(1100);  // slightly larger than the next version
12664   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12665             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12666             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12667             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12668             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12669             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12670             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12671      "miss:\t" %}
12672 
12673   opcode(0x1); // Force a XOR of EDI
12674   ins_encode( enc_PartialSubtypeCheck() );
12675   ins_pipe( pipe_slow );
12676 %}
12677 
12678 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12679   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12680   effect( KILL rcx, KILL result );
12681 
12682   ins_cost(1000);
12683   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12684             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12685             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12686             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12687             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12688             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12689      "miss:\t" %}
12690 
12691   opcode(0x0);  // No need to XOR EDI
12692   ins_encode( enc_PartialSubtypeCheck() );
12693   ins_pipe( pipe_slow );
12694 %}
12695 
12696 // ============================================================================
12697 // Branch Instructions -- short offset versions
12698 //
12699 // These instructions are used to replace jumps of a long offset (the default
12700 // match) with jumps of a shorter offset.  These instructions are all tagged
12701 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12702 // match rules in general matching.  Instead, the ADLC generates a conversion
12703 // method in the MachNode which can be used to do in-place replacement of the
12704 // long variant with the shorter variant.  The compiler will determine if a
12705 // branch can be taken by the is_short_branch_offset() predicate in the machine
12706 // specific code section of the file.
12707 
12708 // Jump Direct - Label defines a relative address from JMP+1
12709 instruct jmpDir_short(label labl) %{
12710   match(Goto);
12711   effect(USE labl);
12712 
12713   ins_cost(300);
12714   format %{ "JMP,s  $labl" %}
12715   size(2);
12716   ins_encode %{
12717     Label* L = $labl$$label;
12718     __ jmpb(*L);
12719   %}
12720   ins_pipe( pipe_jmp );
12721   ins_short_branch(1);
12722 %}
12723 
12724 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12725 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12726   match(If cop cr);
12727   effect(USE labl);
12728 
12729   ins_cost(300);
12730   format %{ "J$cop,s  $labl" %}
12731   size(2);
12732   ins_encode %{
12733     Label* L = $labl$$label;
12734     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12735   %}
12736   ins_pipe( pipe_jcc );
12737   ins_short_branch(1);
12738 %}
12739 
12740 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12741 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12742   match(CountedLoopEnd cop cr);
12743   effect(USE labl);
12744 
12745   ins_cost(300);
12746   format %{ "J$cop,s  $labl\t# Loop end" %}
12747   size(2);
12748   ins_encode %{
12749     Label* L = $labl$$label;
12750     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12751   %}
12752   ins_pipe( pipe_jcc );
12753   ins_short_branch(1);
12754 %}
12755 
12756 // Jump Direct Conditional - using unsigned comparison
12757 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12758   match(If cop cmp);
12759   effect(USE labl);
12760 
12761   ins_cost(300);
12762   format %{ "J$cop,us $labl" %}
12763   size(2);
12764   ins_encode %{
12765     Label* L = $labl$$label;
12766     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12767   %}
12768   ins_pipe( pipe_jcc );
12769   ins_short_branch(1);
12770 %}
12771 
12772 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12773   match(If cop cmp);
12774   effect(USE labl);
12775 
12776   ins_cost(300);
12777   format %{ "J$cop,us $labl" %}
12778   size(2);
12779   ins_encode %{
12780     Label* L = $labl$$label;
12781     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12782   %}
12783   ins_pipe( pipe_jcc );
12784   ins_short_branch(1);
12785 %}
12786 
12787 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12788   match(If cop cmp);
12789   effect(USE labl);
12790 
12791   ins_cost(300);
12792   format %{ $$template
12793     if ($cop$$cmpcode == Assembler::notEqual) {
12794       $$emit$$"JP,u,s   $labl\n\t"
12795       $$emit$$"J$cop,u,s   $labl"
12796     } else {
12797       $$emit$$"JP,u,s   done\n\t"
12798       $$emit$$"J$cop,u,s  $labl\n\t"
12799       $$emit$$"done:"
12800     }
12801   %}
12802   size(4);
12803   ins_encode %{
12804     Label* l = $labl$$label;
12805     if ($cop$$cmpcode == Assembler::notEqual) {
12806       __ jccb(Assembler::parity, *l);
12807       __ jccb(Assembler::notEqual, *l);
12808     } else if ($cop$$cmpcode == Assembler::equal) {
12809       Label done;
12810       __ jccb(Assembler::parity, done);
12811       __ jccb(Assembler::equal, *l);
12812       __ bind(done);
12813     } else {
12814        ShouldNotReachHere();
12815     }
12816   %}
12817   ins_pipe(pipe_jcc);
12818   ins_short_branch(1);
12819 %}
12820 
12821 // ============================================================================
12822 // Long Compare
12823 //
12824 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12825 // is tricky.  The flavor of compare used depends on whether we are testing
12826 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12827 // The GE test is the negated LT test.  The LE test can be had by commuting
12828 // the operands (yielding a GE test) and then negating; negate again for the
12829 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12830 // NE test is negated from that.
12831 
12832 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12833 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12834 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12835 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12836 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12837 // foo match ends up with the wrong leaf.  One fix is to not match both
12838 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12839 // both forms beat the trinary form of long-compare and both are very useful
12840 // on Intel which has so few registers.
12841 
12842 // Manifest a CmpL result in an integer register.  Very painful.
12843 // This is the test to avoid.
12844 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12845   match(Set dst (CmpL3 src1 src2));
12846   effect( KILL flags );
12847   ins_cost(1000);
12848   format %{ "XOR    $dst,$dst\n\t"
12849             "CMP    $src1.hi,$src2.hi\n\t"
12850             "JLT,s  m_one\n\t"
12851             "JGT,s  p_one\n\t"
12852             "CMP    $src1.lo,$src2.lo\n\t"
12853             "JB,s   m_one\n\t"
12854             "JEQ,s  done\n"
12855     "p_one:\tINC    $dst\n\t"
12856             "JMP,s  done\n"
12857     "m_one:\tDEC    $dst\n"
12858      "done:" %}
12859   ins_encode %{
12860     Label p_one, m_one, done;
12861     __ xorptr($dst$$Register, $dst$$Register);
12862     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12863     __ jccb(Assembler::less,    m_one);
12864     __ jccb(Assembler::greater, p_one);
12865     __ cmpl($src1$$Register, $src2$$Register);
12866     __ jccb(Assembler::below,   m_one);
12867     __ jccb(Assembler::equal,   done);
12868     __ bind(p_one);
12869     __ incrementl($dst$$Register);
12870     __ jmpb(done);
12871     __ bind(m_one);
12872     __ decrementl($dst$$Register);
12873     __ bind(done);
12874   %}
12875   ins_pipe( pipe_slow );
12876 %}
12877 
12878 //======
12879 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12880 // compares.  Can be used for LE or GT compares by reversing arguments.
12881 // NOT GOOD FOR EQ/NE tests.
12882 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12883   match( Set flags (CmpL src zero ));
12884   ins_cost(100);
12885   format %{ "TEST   $src.hi,$src.hi" %}
12886   opcode(0x85);
12887   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12888   ins_pipe( ialu_cr_reg_reg );
12889 %}
12890 
12891 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12892 // compares.  Can be used for LE or GT compares by reversing arguments.
12893 // NOT GOOD FOR EQ/NE tests.
12894 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12895   match( Set flags (CmpL src1 src2 ));
12896   effect( TEMP tmp );
12897   ins_cost(300);
12898   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12899             "MOV    $tmp,$src1.hi\n\t"
12900             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12901   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12902   ins_pipe( ialu_cr_reg_reg );
12903 %}
12904 
12905 // Long compares reg < zero/req OR reg >= zero/req.
12906 // Just a wrapper for a normal branch, plus the predicate test.
12907 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12908   match(If cmp flags);
12909   effect(USE labl);
12910   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12911   expand %{
12912     jmpCon(cmp,flags,labl);    // JLT or JGE...
12913   %}
12914 %}
12915 
12916 //======
12917 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12918 // compares.  Can be used for LE or GT compares by reversing arguments.
12919 // NOT GOOD FOR EQ/NE tests.
12920 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12921   match(Set flags (CmpUL src zero));
12922   ins_cost(100);
12923   format %{ "TEST   $src.hi,$src.hi" %}
12924   opcode(0x85);
12925   ins_encode(OpcP, RegReg_Hi2(src, src));
12926   ins_pipe(ialu_cr_reg_reg);
12927 %}
12928 
12929 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12930 // compares.  Can be used for LE or GT compares by reversing arguments.
12931 // NOT GOOD FOR EQ/NE tests.
12932 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12933   match(Set flags (CmpUL src1 src2));
12934   effect(TEMP tmp);
12935   ins_cost(300);
12936   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12937             "MOV    $tmp,$src1.hi\n\t"
12938             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12939   ins_encode(long_cmp_flags2(src1, src2, tmp));
12940   ins_pipe(ialu_cr_reg_reg);
12941 %}
12942 
12943 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12944 // Just a wrapper for a normal branch, plus the predicate test.
12945 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12946   match(If cmp flags);
12947   effect(USE labl);
12948   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12949   expand %{
12950     jmpCon(cmp, flags, labl);    // JLT or JGE...
12951   %}
12952 %}
12953 
12954 // Compare 2 longs and CMOVE longs.
12955 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12956   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12957   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12958   ins_cost(400);
12959   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12960             "CMOV$cmp $dst.hi,$src.hi" %}
12961   opcode(0x0F,0x40);
12962   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12963   ins_pipe( pipe_cmov_reg_long );
12964 %}
12965 
12966 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12967   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12968   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12969   ins_cost(500);
12970   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12971             "CMOV$cmp $dst.hi,$src.hi" %}
12972   opcode(0x0F,0x40);
12973   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12974   ins_pipe( pipe_cmov_reg_long );
12975 %}
12976 
12977 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12978   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12979   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12980   ins_cost(400);
12981   expand %{
12982     cmovLL_reg_LTGE(cmp, flags, dst, src);
12983   %}
12984 %}
12985 
12986 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12987   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12988   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12989   ins_cost(500);
12990   expand %{
12991     cmovLL_mem_LTGE(cmp, flags, dst, src);
12992   %}
12993 %}
12994 
12995 // Compare 2 longs and CMOVE ints.
12996 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12997   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12998   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12999   ins_cost(200);
13000   format %{ "CMOV$cmp $dst,$src" %}
13001   opcode(0x0F,0x40);
13002   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13003   ins_pipe( pipe_cmov_reg );
13004 %}
13005 
13006 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13007   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13008   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13009   ins_cost(250);
13010   format %{ "CMOV$cmp $dst,$src" %}
13011   opcode(0x0F,0x40);
13012   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13013   ins_pipe( pipe_cmov_mem );
13014 %}
13015 
13016 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13017   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13018   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13019   ins_cost(200);
13020   expand %{
13021     cmovII_reg_LTGE(cmp, flags, dst, src);
13022   %}
13023 %}
13024 
13025 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13026   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13027   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13028   ins_cost(250);
13029   expand %{
13030     cmovII_mem_LTGE(cmp, flags, dst, src);
13031   %}
13032 %}
13033 
13034 // Compare 2 longs and CMOVE ptrs.
13035 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13036   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13037   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13038   ins_cost(200);
13039   format %{ "CMOV$cmp $dst,$src" %}
13040   opcode(0x0F,0x40);
13041   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13042   ins_pipe( pipe_cmov_reg );
13043 %}
13044 
13045 // Compare 2 unsigned longs and CMOVE ptrs.
13046 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13047   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13048   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13049   ins_cost(200);
13050   expand %{
13051     cmovPP_reg_LTGE(cmp,flags,dst,src);
13052   %}
13053 %}
13054 
13055 // Compare 2 longs and CMOVE doubles
13056 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13057   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13058   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13059   ins_cost(200);
13060   expand %{
13061     fcmovDPR_regS(cmp,flags,dst,src);
13062   %}
13063 %}
13064 
13065 // Compare 2 longs and CMOVE doubles
13066 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13067   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13068   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13069   ins_cost(200);
13070   expand %{
13071     fcmovD_regS(cmp,flags,dst,src);
13072   %}
13073 %}
13074 
13075 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13076   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13077   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13078   ins_cost(200);
13079   expand %{
13080     fcmovFPR_regS(cmp,flags,dst,src);
13081   %}
13082 %}
13083 
13084 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13085   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13086   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13087   ins_cost(200);
13088   expand %{
13089     fcmovF_regS(cmp,flags,dst,src);
13090   %}
13091 %}
13092 
13093 //======
13094 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13095 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13096   match( Set flags (CmpL src zero ));
13097   effect(TEMP tmp);
13098   ins_cost(200);
13099   format %{ "MOV    $tmp,$src.lo\n\t"
13100             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13101   ins_encode( long_cmp_flags0( src, tmp ) );
13102   ins_pipe( ialu_reg_reg_long );
13103 %}
13104 
13105 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13106 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13107   match( Set flags (CmpL src1 src2 ));
13108   ins_cost(200+300);
13109   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13110             "JNE,s  skip\n\t"
13111             "CMP    $src1.hi,$src2.hi\n\t"
13112      "skip:\t" %}
13113   ins_encode( long_cmp_flags1( src1, src2 ) );
13114   ins_pipe( ialu_cr_reg_reg );
13115 %}
13116 
13117 // Long compare reg == zero/reg OR reg != zero/reg
13118 // Just a wrapper for a normal branch, plus the predicate test.
13119 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13120   match(If cmp flags);
13121   effect(USE labl);
13122   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13123   expand %{
13124     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13125   %}
13126 %}
13127 
13128 //======
13129 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13130 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13131   match(Set flags (CmpUL src zero));
13132   effect(TEMP tmp);
13133   ins_cost(200);
13134   format %{ "MOV    $tmp,$src.lo\n\t"
13135             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13136   ins_encode(long_cmp_flags0(src, tmp));
13137   ins_pipe(ialu_reg_reg_long);
13138 %}
13139 
13140 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13141 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13142   match(Set flags (CmpUL src1 src2));
13143   ins_cost(200+300);
13144   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13145             "JNE,s  skip\n\t"
13146             "CMP    $src1.hi,$src2.hi\n\t"
13147      "skip:\t" %}
13148   ins_encode(long_cmp_flags1(src1, src2));
13149   ins_pipe(ialu_cr_reg_reg);
13150 %}
13151 
13152 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13153 // Just a wrapper for a normal branch, plus the predicate test.
13154 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13155   match(If cmp flags);
13156   effect(USE labl);
13157   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13158   expand %{
13159     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13160   %}
13161 %}
13162 
13163 // Compare 2 longs and CMOVE longs.
13164 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13165   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13166   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13167   ins_cost(400);
13168   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13169             "CMOV$cmp $dst.hi,$src.hi" %}
13170   opcode(0x0F,0x40);
13171   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13172   ins_pipe( pipe_cmov_reg_long );
13173 %}
13174 
13175 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13176   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13177   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178   ins_cost(500);
13179   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13180             "CMOV$cmp $dst.hi,$src.hi" %}
13181   opcode(0x0F,0x40);
13182   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13183   ins_pipe( pipe_cmov_reg_long );
13184 %}
13185 
13186 // Compare 2 longs and CMOVE ints.
13187 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13188   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13189   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13190   ins_cost(200);
13191   format %{ "CMOV$cmp $dst,$src" %}
13192   opcode(0x0F,0x40);
13193   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13194   ins_pipe( pipe_cmov_reg );
13195 %}
13196 
13197 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13198   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13199   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13200   ins_cost(250);
13201   format %{ "CMOV$cmp $dst,$src" %}
13202   opcode(0x0F,0x40);
13203   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13204   ins_pipe( pipe_cmov_mem );
13205 %}
13206 
13207 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13208   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13209   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13210   ins_cost(200);
13211   expand %{
13212     cmovII_reg_EQNE(cmp, flags, dst, src);
13213   %}
13214 %}
13215 
13216 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13217   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13218   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13219   ins_cost(250);
13220   expand %{
13221     cmovII_mem_EQNE(cmp, flags, dst, src);
13222   %}
13223 %}
13224 
13225 // Compare 2 longs and CMOVE ptrs.
13226 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13227   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13228   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13229   ins_cost(200);
13230   format %{ "CMOV$cmp $dst,$src" %}
13231   opcode(0x0F,0x40);
13232   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13233   ins_pipe( pipe_cmov_reg );
13234 %}
13235 
13236 // Compare 2 unsigned longs and CMOVE ptrs.
13237 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13238   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13239   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13240   ins_cost(200);
13241   expand %{
13242     cmovPP_reg_EQNE(cmp,flags,dst,src);
13243   %}
13244 %}
13245 
13246 // Compare 2 longs and CMOVE doubles
13247 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13248   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13249   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13250   ins_cost(200);
13251   expand %{
13252     fcmovDPR_regS(cmp,flags,dst,src);
13253   %}
13254 %}
13255 
13256 // Compare 2 longs and CMOVE doubles
13257 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13258   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13259   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13260   ins_cost(200);
13261   expand %{
13262     fcmovD_regS(cmp,flags,dst,src);
13263   %}
13264 %}
13265 
13266 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13267   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13268   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13269   ins_cost(200);
13270   expand %{
13271     fcmovFPR_regS(cmp,flags,dst,src);
13272   %}
13273 %}
13274 
13275 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13276   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13277   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13278   ins_cost(200);
13279   expand %{
13280     fcmovF_regS(cmp,flags,dst,src);
13281   %}
13282 %}
13283 
13284 //======
13285 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13286 // Same as cmpL_reg_flags_LEGT except must negate src
13287 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13288   match( Set flags (CmpL src zero ));
13289   effect( TEMP tmp );
13290   ins_cost(300);
13291   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13292             "CMP    $tmp,$src.lo\n\t"
13293             "SBB    $tmp,$src.hi\n\t" %}
13294   ins_encode( long_cmp_flags3(src, tmp) );
13295   ins_pipe( ialu_reg_reg_long );
13296 %}
13297 
13298 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13299 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13300 // requires a commuted test to get the same result.
13301 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13302   match( Set flags (CmpL src1 src2 ));
13303   effect( TEMP tmp );
13304   ins_cost(300);
13305   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13306             "MOV    $tmp,$src2.hi\n\t"
13307             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13308   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13309   ins_pipe( ialu_cr_reg_reg );
13310 %}
13311 
13312 // Long compares reg < zero/req OR reg >= zero/req.
13313 // Just a wrapper for a normal branch, plus the predicate test
13314 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13315   match(If cmp flags);
13316   effect(USE labl);
13317   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13318   ins_cost(300);
13319   expand %{
13320     jmpCon(cmp,flags,labl);    // JGT or JLE...
13321   %}
13322 %}
13323 
13324 //======
13325 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13326 // Same as cmpUL_reg_flags_LEGT except must negate src
13327 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13328   match(Set flags (CmpUL src zero));
13329   effect(TEMP tmp);
13330   ins_cost(300);
13331   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13332             "CMP    $tmp,$src.lo\n\t"
13333             "SBB    $tmp,$src.hi\n\t" %}
13334   ins_encode(long_cmp_flags3(src, tmp));
13335   ins_pipe(ialu_reg_reg_long);
13336 %}
13337 
13338 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13339 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13340 // requires a commuted test to get the same result.
13341 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13342   match(Set flags (CmpUL src1 src2));
13343   effect(TEMP tmp);
13344   ins_cost(300);
13345   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13346             "MOV    $tmp,$src2.hi\n\t"
13347             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13348   ins_encode(long_cmp_flags2( src2, src1, tmp));
13349   ins_pipe(ialu_cr_reg_reg);
13350 %}
13351 
13352 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13353 // Just a wrapper for a normal branch, plus the predicate test
13354 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13355   match(If cmp flags);
13356   effect(USE labl);
13357   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13358   ins_cost(300);
13359   expand %{
13360     jmpCon(cmp, flags, labl);    // JGT or JLE...
13361   %}
13362 %}
13363 
13364 // Compare 2 longs and CMOVE longs.
13365 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13366   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13367   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13368   ins_cost(400);
13369   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13370             "CMOV$cmp $dst.hi,$src.hi" %}
13371   opcode(0x0F,0x40);
13372   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13373   ins_pipe( pipe_cmov_reg_long );
13374 %}
13375 
13376 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13377   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13378   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13379   ins_cost(500);
13380   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13381             "CMOV$cmp $dst.hi,$src.hi+4" %}
13382   opcode(0x0F,0x40);
13383   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13384   ins_pipe( pipe_cmov_reg_long );
13385 %}
13386 
13387 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13388   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13389   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13390   ins_cost(400);
13391   expand %{
13392     cmovLL_reg_LEGT(cmp, flags, dst, src);
13393   %}
13394 %}
13395 
13396 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13397   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13398   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13399   ins_cost(500);
13400   expand %{
13401     cmovLL_mem_LEGT(cmp, flags, dst, src);
13402   %}
13403 %}
13404 
13405 // Compare 2 longs and CMOVE ints.
13406 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13407   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13408   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13409   ins_cost(200);
13410   format %{ "CMOV$cmp $dst,$src" %}
13411   opcode(0x0F,0x40);
13412   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13413   ins_pipe( pipe_cmov_reg );
13414 %}
13415 
13416 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13417   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13418   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13419   ins_cost(250);
13420   format %{ "CMOV$cmp $dst,$src" %}
13421   opcode(0x0F,0x40);
13422   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13423   ins_pipe( pipe_cmov_mem );
13424 %}
13425 
13426 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13427   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13428   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13429   ins_cost(200);
13430   expand %{
13431     cmovII_reg_LEGT(cmp, flags, dst, src);
13432   %}
13433 %}
13434 
13435 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13436   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13437   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13438   ins_cost(250);
13439   expand %{
13440     cmovII_mem_LEGT(cmp, flags, dst, src);
13441   %}
13442 %}
13443 
13444 // Compare 2 longs and CMOVE ptrs.
13445 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13446   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13448   ins_cost(200);
13449   format %{ "CMOV$cmp $dst,$src" %}
13450   opcode(0x0F,0x40);
13451   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13452   ins_pipe( pipe_cmov_reg );
13453 %}
13454 
13455 // Compare 2 unsigned longs and CMOVE ptrs.
13456 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13457   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13458   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13459   ins_cost(200);
13460   expand %{
13461     cmovPP_reg_LEGT(cmp,flags,dst,src);
13462   %}
13463 %}
13464 
13465 // Compare 2 longs and CMOVE doubles
13466 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13467   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13468   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13469   ins_cost(200);
13470   expand %{
13471     fcmovDPR_regS(cmp,flags,dst,src);
13472   %}
13473 %}
13474 
13475 // Compare 2 longs and CMOVE doubles
13476 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13477   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13478   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13479   ins_cost(200);
13480   expand %{
13481     fcmovD_regS(cmp,flags,dst,src);
13482   %}
13483 %}
13484 
13485 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13486   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13487   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13488   ins_cost(200);
13489   expand %{
13490     fcmovFPR_regS(cmp,flags,dst,src);
13491   %}
13492 %}
13493 
13494 
13495 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13496   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13497   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13498   ins_cost(200);
13499   expand %{
13500     fcmovF_regS(cmp,flags,dst,src);
13501   %}
13502 %}
13503 
13504 
13505 // ============================================================================
13506 // Procedure Call/Return Instructions
13507 // Call Java Static Instruction
13508 // Note: If this code changes, the corresponding ret_addr_offset() and
13509 //       compute_padding() functions will have to be adjusted.
13510 instruct CallStaticJavaDirect(method meth) %{
13511   match(CallStaticJava);
13512   effect(USE meth);
13513 
13514   ins_cost(300);
13515   format %{ "CALL,static " %}
13516   opcode(0xE8); /* E8 cd */
13517   ins_encode( pre_call_resets,
13518               Java_Static_Call( meth ),
13519               call_epilog,
13520               post_call_FPU );
13521   ins_pipe( pipe_slow );
13522   ins_alignment(4);
13523 %}
13524 
13525 // Call Java Dynamic Instruction
13526 // Note: If this code changes, the corresponding ret_addr_offset() and
13527 //       compute_padding() functions will have to be adjusted.
13528 instruct CallDynamicJavaDirect(method meth) %{
13529   match(CallDynamicJava);
13530   effect(USE meth);
13531 
13532   ins_cost(300);
13533   format %{ "MOV    EAX,(oop)-1\n\t"
13534             "CALL,dynamic" %}
13535   opcode(0xE8); /* E8 cd */
13536   ins_encode( pre_call_resets,
13537               Java_Dynamic_Call( meth ),
13538               call_epilog,
13539               post_call_FPU );
13540   ins_pipe( pipe_slow );
13541   ins_alignment(4);
13542 %}
13543 
13544 // Call Runtime Instruction
13545 instruct CallRuntimeDirect(method meth) %{
13546   match(CallRuntime );
13547   effect(USE meth);
13548 
13549   ins_cost(300);
13550   format %{ "CALL,runtime " %}
13551   opcode(0xE8); /* E8 cd */
13552   // Use FFREEs to clear entries in float stack
13553   ins_encode( pre_call_resets,
13554               FFree_Float_Stack_All,
13555               Java_To_Runtime( meth ),
13556               post_call_FPU );
13557   ins_pipe( pipe_slow );
13558 %}
13559 
13560 // Call runtime without safepoint
13561 instruct CallLeafDirect(method meth) %{
13562   match(CallLeaf);
13563   effect(USE meth);
13564 
13565   ins_cost(300);
13566   format %{ "CALL_LEAF,runtime " %}
13567   opcode(0xE8); /* E8 cd */
13568   ins_encode( pre_call_resets,
13569               FFree_Float_Stack_All,
13570               Java_To_Runtime( meth ),
13571               Verify_FPU_For_Leaf, post_call_FPU );
13572   ins_pipe( pipe_slow );
13573 %}
13574 
13575 instruct CallLeafNoFPDirect(method meth) %{
13576   match(CallLeafNoFP);
13577   effect(USE meth);
13578 
13579   ins_cost(300);
13580   format %{ "CALL_LEAF_NOFP,runtime " %}
13581   opcode(0xE8); /* E8 cd */
13582   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13583   ins_pipe( pipe_slow );
13584 %}
13585 
13586 
13587 // Return Instruction
13588 // Remove the return address & jump to it.
13589 instruct Ret() %{
13590   match(Return);
13591   format %{ "RET" %}
13592   opcode(0xC3);
13593   ins_encode(OpcP);
13594   ins_pipe( pipe_jmp );
13595 %}
13596 
13597 // Tail Call; Jump from runtime stub to Java code.
13598 // Also known as an 'interprocedural jump'.
13599 // Target of jump will eventually return to caller.
13600 // TailJump below removes the return address.
13601 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13602   match(TailCall jump_target method_ptr);
13603   ins_cost(300);
13604   format %{ "JMP    $jump_target \t# EBX holds method" %}
13605   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13606   ins_encode( OpcP, RegOpc(jump_target) );
13607   ins_pipe( pipe_jmp );
13608 %}
13609 
13610 
13611 // Tail Jump; remove the return address; jump to target.
13612 // TailCall above leaves the return address around.
13613 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13614   match( TailJump jump_target ex_oop );
13615   ins_cost(300);
13616   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13617             "JMP    $jump_target " %}
13618   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13619   ins_encode( enc_pop_rdx,
13620               OpcP, RegOpc(jump_target) );
13621   ins_pipe( pipe_jmp );
13622 %}
13623 
13624 // Create exception oop: created by stack-crawling runtime code.
13625 // Created exception is now available to this handler, and is setup
13626 // just prior to jumping to this handler.  No code emitted.
13627 instruct CreateException( eAXRegP ex_oop )
13628 %{
13629   match(Set ex_oop (CreateEx));
13630 
13631   size(0);
13632   // use the following format syntax
13633   format %{ "# exception oop is in EAX; no code emitted" %}
13634   ins_encode();
13635   ins_pipe( empty );
13636 %}
13637 
13638 
13639 // Rethrow exception:
13640 // The exception oop will come in the first argument position.
13641 // Then JUMP (not call) to the rethrow stub code.
13642 instruct RethrowException()
13643 %{
13644   match(Rethrow);
13645 
13646   // use the following format syntax
13647   format %{ "JMP    rethrow_stub" %}
13648   ins_encode(enc_rethrow);
13649   ins_pipe( pipe_jmp );
13650 %}
13651 
13652 // inlined locking and unlocking
13653 
13654 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13655   predicate(Compile::current()->use_rtm());
13656   match(Set cr (FastLock object box));
13657   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13658   ins_cost(300);
13659   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13660   ins_encode %{
13661     __ get_thread($thread$$Register);
13662     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13663                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13664                  _rtm_counters, _stack_rtm_counters,
13665                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13666                  true, ra_->C->profile_rtm());
13667   %}
13668   ins_pipe(pipe_slow);
13669 %}
13670 
13671 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13672   predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13673   match(Set cr (FastLock object box));
13674   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13675   ins_cost(300);
13676   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13677   ins_encode %{
13678     __ get_thread($thread$$Register);
13679     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13680                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13681   %}
13682   ins_pipe(pipe_slow);
13683 %}
13684 
13685 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp, eRegP scr) %{
13686   predicate(LockingMode != LM_LIGHTWEIGHT);
13687   match(Set cr (FastUnlock object box));
13688   effect(TEMP tmp, TEMP scr, USE_KILL box);
13689   ins_cost(300);
13690   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13691   ins_encode %{
13692     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, ra_->C->use_rtm());
13693   %}
13694   ins_pipe(pipe_slow);
13695 %}
13696 
13697 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13698   predicate(LockingMode == LM_LIGHTWEIGHT);
13699   match(Set cr (FastLock object box));
13700   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13701   ins_cost(300);
13702   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13703   ins_encode %{
13704     __ get_thread($thread$$Register);
13705     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13706   %}
13707   ins_pipe(pipe_slow);
13708 %}
13709 
13710 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP scr, eRegP thread) %{
13711   predicate(LockingMode == LM_LIGHTWEIGHT);
13712   match(Set cr (FastUnlock object eax_reg));
13713   effect(TEMP tmp, TEMP scr, USE_KILL eax_reg, TEMP thread);
13714   ins_cost(300);
13715   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13716   ins_encode %{
13717     __ get_thread($thread$$Register);
13718     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $scr$$Register, $thread$$Register);
13719   %}
13720   ins_pipe(pipe_slow);
13721 %}
13722 
13723 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13724   predicate(Matcher::vector_length(n) <= 32);
13725   match(Set dst (MaskAll src));
13726   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13727   ins_encode %{
13728     int mask_len = Matcher::vector_length(this);
13729     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13730   %}
13731   ins_pipe( pipe_slow );
13732 %}
13733 
13734 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13735   predicate(Matcher::vector_length(n) > 32);
13736   match(Set dst (MaskAll src));
13737   effect(TEMP ktmp);
13738   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13739   ins_encode %{
13740     int mask_len = Matcher::vector_length(this);
13741     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13742   %}
13743   ins_pipe( pipe_slow );
13744 %}
13745 
13746 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13747   predicate(Matcher::vector_length(n) > 32);
13748   match(Set dst (MaskAll src));
13749   effect(TEMP ktmp);
13750   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13751   ins_encode %{
13752     int mask_len = Matcher::vector_length(this);
13753     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13754   %}
13755   ins_pipe( pipe_slow );
13756 %}
13757 
13758 // ============================================================================
13759 // Safepoint Instruction
13760 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13761   match(SafePoint poll);
13762   effect(KILL cr, USE poll);
13763 
13764   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13765   ins_cost(125);
13766   // EBP would need size(3)
13767   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13768   ins_encode %{
13769     __ relocate(relocInfo::poll_type);
13770     address pre_pc = __ pc();
13771     __ testl(rax, Address($poll$$Register, 0));
13772     address post_pc = __ pc();
13773     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13774   %}
13775   ins_pipe(ialu_reg_mem);
13776 %}
13777 
13778 
13779 // ============================================================================
13780 // This name is KNOWN by the ADLC and cannot be changed.
13781 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13782 // for this guy.
13783 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13784   match(Set dst (ThreadLocal));
13785   effect(DEF dst, KILL cr);
13786 
13787   format %{ "MOV    $dst, Thread::current()" %}
13788   ins_encode %{
13789     Register dstReg = as_Register($dst$$reg);
13790     __ get_thread(dstReg);
13791   %}
13792   ins_pipe( ialu_reg_fat );
13793 %}
13794 
13795 
13796 
13797 //----------PEEPHOLE RULES-----------------------------------------------------
13798 // These must follow all instruction definitions as they use the names
13799 // defined in the instructions definitions.
13800 //
13801 // peepmatch ( root_instr_name [preceding_instruction]* );
13802 //
13803 // peepconstraint %{
13804 // (instruction_number.operand_name relational_op instruction_number.operand_name
13805 //  [, ...] );
13806 // // instruction numbers are zero-based using left to right order in peepmatch
13807 //
13808 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13809 // // provide an instruction_number.operand_name for each operand that appears
13810 // // in the replacement instruction's match rule
13811 //
13812 // ---------VM FLAGS---------------------------------------------------------
13813 //
13814 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13815 //
13816 // Each peephole rule is given an identifying number starting with zero and
13817 // increasing by one in the order seen by the parser.  An individual peephole
13818 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13819 // on the command-line.
13820 //
13821 // ---------CURRENT LIMITATIONS----------------------------------------------
13822 //
13823 // Only match adjacent instructions in same basic block
13824 // Only equality constraints
13825 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13826 // Only one replacement instruction
13827 //
13828 // ---------EXAMPLE----------------------------------------------------------
13829 //
13830 // // pertinent parts of existing instructions in architecture description
13831 // instruct movI(rRegI dst, rRegI src) %{
13832 //   match(Set dst (CopyI src));
13833 // %}
13834 //
13835 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13836 //   match(Set dst (AddI dst src));
13837 //   effect(KILL cr);
13838 // %}
13839 //
13840 // // Change (inc mov) to lea
13841 // peephole %{
13842 //   // increment preceded by register-register move
13843 //   peepmatch ( incI_eReg movI );
13844 //   // require that the destination register of the increment
13845 //   // match the destination register of the move
13846 //   peepconstraint ( 0.dst == 1.dst );
13847 //   // construct a replacement instruction that sets
13848 //   // the destination to ( move's source register + one )
13849 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13850 // %}
13851 //
13852 // Implementation no longer uses movX instructions since
13853 // machine-independent system no longer uses CopyX nodes.
13854 //
13855 // peephole %{
13856 //   peepmatch ( incI_eReg movI );
13857 //   peepconstraint ( 0.dst == 1.dst );
13858 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13859 // %}
13860 //
13861 // peephole %{
13862 //   peepmatch ( decI_eReg movI );
13863 //   peepconstraint ( 0.dst == 1.dst );
13864 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13865 // %}
13866 //
13867 // peephole %{
13868 //   peepmatch ( addI_eReg_imm movI );
13869 //   peepconstraint ( 0.dst == 1.dst );
13870 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13871 // %}
13872 //
13873 // peephole %{
13874 //   peepmatch ( addP_eReg_imm movP );
13875 //   peepconstraint ( 0.dst == 1.dst );
13876 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13877 // %}
13878 
13879 // // Change load of spilled value to only a spill
13880 // instruct storeI(memory mem, rRegI src) %{
13881 //   match(Set mem (StoreI mem src));
13882 // %}
13883 //
13884 // instruct loadI(rRegI dst, memory mem) %{
13885 //   match(Set dst (LoadI mem));
13886 // %}
13887 //
13888 peephole %{
13889   peepmatch ( loadI storeI );
13890   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13891   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13892 %}
13893 
13894 //----------SMARTSPILL RULES---------------------------------------------------
13895 // These must follow all instruction definitions as they use the names
13896 // defined in the instructions definitions.