1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  661               "JA      #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  712       C->output()->add_stub(stub);
  713       code_stub = &stub->entry();
  714     }
  715     __ relocate(relocInfo::poll_return_type);
  716     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  717   }
  718 }
  719 
  720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  721   return MachNode::size(ra_); // too many variables; just compute it
  722                               // the hard way
  723 }
  724 
  725 int MachEpilogNode::reloc() const {
  726   return 0; // a large enough number
  727 }
  728 
  729 const Pipeline * MachEpilogNode::pipeline() const {
  730   return MachNode::pipeline_class();
  731 }
  732 
  733 //=============================================================================
  734 
  735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  736 static enum RC rc_class( OptoReg::Name reg ) {
  737 
  738   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  739   if (OptoReg::is_stack(reg)) return rc_stack;
  740 
  741   VMReg r = OptoReg::as_VMReg(reg);
  742   if (r->is_Register()) return rc_int;
  743   if (r->is_FloatRegister()) {
  744     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  745     return rc_float;
  746   }
  747   if (r->is_KRegister()) return rc_kreg;
  748   assert(r->is_XMMRegister(), "must be");
  749   return rc_xmm;
  750 }
  751 
  752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  753                         int opcode, const char *op_str, int size, outputStream* st ) {
  754   if( cbuf ) {
  755     emit_opcode  (*cbuf, opcode );
  756     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757 #ifndef PRODUCT
  758   } else if( !do_size ) {
  759     if( size != 0 ) st->print("\n\t");
  760     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  761       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  762       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  763     } else { // FLD, FST, PUSH, POP
  764       st->print("%s [ESP + #%d]",op_str,offset);
  765     }
  766 #endif
  767   }
  768   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  769   return size+3+offset_size;
  770 }
  771 
  772 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  774                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  775   int in_size_in_bits = Assembler::EVEX_32bit;
  776   int evex_encoding = 0;
  777   if (reg_lo+1 == reg_hi) {
  778     in_size_in_bits = Assembler::EVEX_64bit;
  779     evex_encoding = Assembler::VEX_W;
  780   }
  781   if (cbuf) {
  782     MacroAssembler _masm(cbuf);
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     _masm.set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (cbuf) {
  835     MacroAssembler _masm(cbuf);
  836     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  837     _masm.set_managed();
  838     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  839       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     } else {
  842       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  844     }
  845 #ifndef PRODUCT
  846   } else if (!do_size) {
  847     if (size != 0) st->print("\n\t");
  848     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  849       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       } else {
  852         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       }
  854     } else {
  855       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  856         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       } else {
  858         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       }
  860     }
  861 #endif
  862   }
  863   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  864   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  865   int sz = (UseAVX > 2) ? 6 : 4;
  866   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  867       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  868   return size + sz;
  869 }
  870 
  871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  872                             int src_hi, int dst_hi, int size, outputStream* st ) {
  873   // 32-bit
  874   if (cbuf) {
  875     MacroAssembler _masm(cbuf);
  876     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  877     _masm.set_managed();
  878     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  879              as_Register(Matcher::_regEncode[src_lo]));
  880 #ifndef PRODUCT
  881   } else if (!do_size) {
  882     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  883 #endif
  884   }
  885   return (UseAVX> 2) ? 6 : 4;
  886 }
  887 
  888 
  889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  890                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  891   // 32-bit
  892   if (cbuf) {
  893     MacroAssembler _masm(cbuf);
  894     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  895     _masm.set_managed();
  896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  897              as_XMMRegister(Matcher::_regEncode[src_lo]));
  898 #ifndef PRODUCT
  899   } else if (!do_size) {
  900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  901 #endif
  902   }
  903   return (UseAVX> 2) ? 6 : 4;
  904 }
  905 
  906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  907   if( cbuf ) {
  908     emit_opcode(*cbuf, 0x8B );
  909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  910 #ifndef PRODUCT
  911   } else if( !do_size ) {
  912     if( size != 0 ) st->print("\n\t");
  913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  914 #endif
  915   }
  916   return size+2;
  917 }
  918 
  919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  920                                  int offset, int size, outputStream* st ) {
  921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  922     if( cbuf ) {
  923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  925 #ifndef PRODUCT
  926     } else if( !do_size ) {
  927       if( size != 0 ) st->print("\n\t");
  928       st->print("FLD    %s",Matcher::regName[src_lo]);
  929 #endif
  930     }
  931     size += 2;
  932   }
  933 
  934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  935   const char *op_str;
  936   int op;
  937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  939     op = 0xDD;
  940   } else {                   // 32-bit store
  941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  942     op = 0xD9;
  943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  944   }
  945 
  946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  947 }
  948 
  949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  952 
  953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  954                             int stack_offset, int reg, uint ireg, outputStream* st);
  955 
  956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  957                                      int dst_offset, uint ireg, outputStream* st) {
  958   if (cbuf) {
  959     MacroAssembler _masm(cbuf);
  960     switch (ireg) {
  961     case Op_VecS:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       break;
  965     case Op_VecD:
  966       __ pushl(Address(rsp, src_offset));
  967       __ popl (Address(rsp, dst_offset));
  968       __ pushl(Address(rsp, src_offset+4));
  969       __ popl (Address(rsp, dst_offset+4));
  970       break;
  971     case Op_VecX:
  972       __ movdqu(Address(rsp, -16), xmm0);
  973       __ movdqu(xmm0, Address(rsp, src_offset));
  974       __ movdqu(Address(rsp, dst_offset), xmm0);
  975       __ movdqu(xmm0, Address(rsp, -16));
  976       break;
  977     case Op_VecY:
  978       __ vmovdqu(Address(rsp, -32), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, src_offset));
  980       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  981       __ vmovdqu(xmm0, Address(rsp, -32));
  982       break;
  983     case Op_VecZ:
  984       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  986       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  987       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  988       break;
  989     default:
  990       ShouldNotReachHere();
  991     }
  992 #ifndef PRODUCT
  993   } else {
  994     switch (ireg) {
  995     case Op_VecS:
  996       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  997                 "popl    [rsp + #%d]",
  998                 src_offset, dst_offset);
  999       break;
 1000     case Op_VecD:
 1001       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1002                 "popq    [rsp + #%d]\n\t"
 1003                 "pushl   [rsp + #%d]\n\t"
 1004                 "popq    [rsp + #%d]",
 1005                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1006       break;
 1007      case Op_VecX:
 1008       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1009                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1010                 "movdqu  [rsp + #%d], xmm0\n\t"
 1011                 "movdqu  xmm0, [rsp - #16]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecY:
 1015       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #32]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecZ:
 1022       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #64]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     default:
 1029       ShouldNotReachHere();
 1030     }
 1031 #endif
 1032   }
 1033 }
 1034 
 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1036   // Get registers to move
 1037   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1038   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1039   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1040   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1041 
 1042   enum RC src_second_rc = rc_class(src_second);
 1043   enum RC src_first_rc = rc_class(src_first);
 1044   enum RC dst_second_rc = rc_class(dst_second);
 1045   enum RC dst_first_rc = rc_class(dst_first);
 1046 
 1047   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1048 
 1049   // Generate spill code!
 1050   int size = 0;
 1051 
 1052   if( src_first == dst_first && src_second == dst_second )
 1053     return size;            // Self copy, no move
 1054 
 1055   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1056     uint ireg = ideal_reg();
 1057     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1058     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1059     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1060     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1061       // mem -> mem
 1062       int src_offset = ra_->reg2offset(src_first);
 1063       int dst_offset = ra_->reg2offset(dst_first);
 1064       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1066       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1067     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1068       int stack_offset = ra_->reg2offset(dst_first);
 1069       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1070     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1071       int stack_offset = ra_->reg2offset(src_first);
 1072       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1073     } else {
 1074       ShouldNotReachHere();
 1075     }
 1076     return 0;
 1077   }
 1078 
 1079   // --------------------------------------
 1080   // Check for mem-mem move.  push/pop to move.
 1081   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1082     if( src_second == dst_first ) { // overlapping stack copy ranges
 1083       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1084       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1085       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1086       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1087     }
 1088     // move low bits
 1089     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1090     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1091     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094     }
 1095     return size;
 1096   }
 1097 
 1098   // --------------------------------------
 1099   // Check for integer reg-reg copy
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1101     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1102 
 1103   // Check for integer store
 1104   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1105     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1106 
 1107   // Check for integer load
 1108   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1109     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1110 
 1111   // Check for integer reg-xmm reg copy
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1114             "no 64 bit integer-float reg moves" );
 1115     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1116   }
 1117   // --------------------------------------
 1118   // Check for float reg-reg copy
 1119   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1121             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1122     if( cbuf ) {
 1123 
 1124       // Note the mucking with the register encode to compensate for the 0/1
 1125       // indexing issue mentioned in a comment in the reg_def sections
 1126       // for FPR registers many lines above here.
 1127 
 1128       if( src_first != FPR1L_num ) {
 1129         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1130         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1131         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1132         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1133      } else {
 1134         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1135         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1136      }
 1137 #ifndef PRODUCT
 1138     } else if( !do_size ) {
 1139       if( size != 0 ) st->print("\n\t");
 1140       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1141       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1142 #endif
 1143     }
 1144     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1145   }
 1146 
 1147   // Check for float store
 1148   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1149     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1150   }
 1151 
 1152   // Check for float load
 1153   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1154     int offset = ra_->reg2offset(src_first);
 1155     const char *op_str;
 1156     int op;
 1157     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1158       op_str = "FLD_D";
 1159       op = 0xDD;
 1160     } else {                   // 32-bit load
 1161       op_str = "FLD_S";
 1162       op = 0xD9;
 1163       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1164     }
 1165     if( cbuf ) {
 1166       emit_opcode  (*cbuf, op );
 1167       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1168       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1169       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1170 #ifndef PRODUCT
 1171     } else if( !do_size ) {
 1172       if( size != 0 ) st->print("\n\t");
 1173       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1174 #endif
 1175     }
 1176     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1177     return size + 3+offset_size+2;
 1178   }
 1179 
 1180   // Check for xmm reg-reg copy
 1181   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1182     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1183             (src_first+1 == src_second && dst_first+1 == dst_second),
 1184             "no non-adjacent float-moves" );
 1185     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1186   }
 1187 
 1188   // Check for xmm reg-integer reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1191             "no 64 bit float-integer reg moves" );
 1192     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm store
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1197     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1198   }
 1199 
 1200   // Check for float xmm load
 1201   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1202     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1203   }
 1204 
 1205   // Copy from float reg to xmm reg
 1206   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1207     // copy to the top of stack from floating point reg
 1208     // and use LEA to preserve flags
 1209     if( cbuf ) {
 1210       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1211       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1212       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1213       emit_d8(*cbuf,0xF8);
 1214 #ifndef PRODUCT
 1215     } else if( !do_size ) {
 1216       if( size != 0 ) st->print("\n\t");
 1217       st->print("LEA    ESP,[ESP-8]");
 1218 #endif
 1219     }
 1220     size += 4;
 1221 
 1222     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1223 
 1224     // Copy from the temp memory to the xmm reg.
 1225     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1226 
 1227     if( cbuf ) {
 1228       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1229       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1230       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1231       emit_d8(*cbuf,0x08);
 1232 #ifndef PRODUCT
 1233     } else if( !do_size ) {
 1234       if( size != 0 ) st->print("\n\t");
 1235       st->print("LEA    ESP,[ESP+8]");
 1236 #endif
 1237     }
 1238     size += 4;
 1239     return size;
 1240   }
 1241 
 1242   // AVX-512 opmask specific spilling.
 1243   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1244     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1245     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1246     int offset = ra_->reg2offset(src_first);
 1247     if (cbuf != nullptr) {
 1248       MacroAssembler _masm(cbuf);
 1249       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1250 #ifndef PRODUCT
 1251     } else {
 1252       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1253 #endif
 1254     }
 1255     return 0;
 1256   }
 1257 
 1258   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1259     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1260     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1261     int offset = ra_->reg2offset(dst_first);
 1262     if (cbuf != nullptr) {
 1263       MacroAssembler _masm(cbuf);
 1264       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1265 #ifndef PRODUCT
 1266     } else {
 1267       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1268 #endif
 1269     }
 1270     return 0;
 1271   }
 1272 
 1273   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1274     Unimplemented();
 1275     return 0;
 1276   }
 1277 
 1278   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1279     Unimplemented();
 1280     return 0;
 1281   }
 1282 
 1283   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1284     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1285     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1286     if (cbuf != nullptr) {
 1287       MacroAssembler _masm(cbuf);
 1288       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1289 #ifndef PRODUCT
 1290     } else {
 1291       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1292 #endif
 1293     }
 1294     return 0;
 1295   }
 1296 
 1297   assert( size > 0, "missed a case" );
 1298 
 1299   // --------------------------------------------------------------------
 1300   // Check for second bits still needing moving.
 1301   if( src_second == dst_second )
 1302     return size;               // Self copy; no move
 1303   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1304 
 1305   // Check for second word int-int move
 1306   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1307     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1308 
 1309   // Check for second word integer store
 1310   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1311     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1312 
 1313   // Check for second word integer load
 1314   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1315     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1316 
 1317   Unimplemented();
 1318   return 0; // Mute compiler
 1319 }
 1320 
 1321 #ifndef PRODUCT
 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1323   implementation( NULL, ra_, false, st );
 1324 }
 1325 #endif
 1326 
 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1328   implementation( &cbuf, ra_, false, NULL );
 1329 }
 1330 
 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1332   return MachNode::size(ra_);
 1333 }
 1334 
 1335 
 1336 //=============================================================================
 1337 #ifndef PRODUCT
 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_reg_first(this);
 1341   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1342 }
 1343 #endif
 1344 
 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1346   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1347   int reg = ra_->get_encode(this);
 1348   if( offset >= 128 ) {
 1349     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1350     emit_rm(cbuf, 0x2, reg, 0x04);
 1351     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1352     emit_d32(cbuf, offset);
 1353   }
 1354   else {
 1355     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1356     emit_rm(cbuf, 0x1, reg, 0x04);
 1357     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1358     emit_d8(cbuf, offset);
 1359   }
 1360 }
 1361 
 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1364   if( offset >= 128 ) {
 1365     return 7;
 1366   }
 1367   else {
 1368     return 4;
 1369   }
 1370 }
 1371 
 1372 //=============================================================================
 1373 #ifndef PRODUCT
 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1375   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1376   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1377   st->print_cr("\tNOP");
 1378   st->print_cr("\tNOP");
 1379   if( !OptoBreakpoint )
 1380     st->print_cr("\tNOP");
 1381 }
 1382 #endif
 1383 
 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1385   MacroAssembler masm(&cbuf);
 1386 #ifdef ASSERT
 1387   uint insts_size = cbuf.insts_size();
 1388 #endif
 1389   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1390   masm.jump_cc(Assembler::notEqual,
 1391                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1392   /* WARNING these NOPs are critical so that verified entry point is properly
 1393      aligned for patching by NativeJump::patch_verified_entry() */
 1394   int nops_cnt = 2;
 1395   if( !OptoBreakpoint ) // Leave space for int3
 1396      nops_cnt += 1;
 1397   masm.nop(nops_cnt);
 1398 
 1399   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1400 }
 1401 
 1402 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1403   return OptoBreakpoint ? 11 : 12;
 1404 }
 1405 
 1406 
 1407 //=============================================================================
 1408 
 1409 // Vector calling convention not supported.
 1410 bool Matcher::supports_vector_calling_convention() {
 1411   return false;
 1412 }
 1413 
 1414 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1415   Unimplemented();
 1416   return OptoRegPair(0, 0);
 1417 }
 1418 
 1419 // Is this branch offset short enough that a short branch can be used?
 1420 //
 1421 // NOTE: If the platform does not provide any short branch variants, then
 1422 //       this method should return false for offset 0.
 1423 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1424   // The passed offset is relative to address of the branch.
 1425   // On 86 a branch displacement is calculated relative to address
 1426   // of a next instruction.
 1427   offset -= br_size;
 1428 
 1429   // the short version of jmpConUCF2 contains multiple branches,
 1430   // making the reach slightly less
 1431   if (rule == jmpConUCF2_rule)
 1432     return (-126 <= offset && offset <= 125);
 1433   return (-128 <= offset && offset <= 127);
 1434 }
 1435 
 1436 // Return whether or not this register is ever used as an argument.  This
 1437 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1438 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1439 // arguments in those registers not be available to the callee.
 1440 bool Matcher::can_be_java_arg( int reg ) {
 1441   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1442   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1443   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1444   return false;
 1445 }
 1446 
 1447 bool Matcher::is_spillable_arg( int reg ) {
 1448   return can_be_java_arg(reg);
 1449 }
 1450 
 1451 uint Matcher::int_pressure_limit()
 1452 {
 1453   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1454 }
 1455 
 1456 uint Matcher::float_pressure_limit()
 1457 {
 1458   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1459 }
 1460 
 1461 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1462   // Use hardware integer DIV instruction when
 1463   // it is faster than a code which use multiply.
 1464   // Only when constant divisor fits into 32 bit
 1465   // (min_jint is excluded to get only correct
 1466   // positive 32 bit values from negative).
 1467   return VM_Version::has_fast_idiv() &&
 1468          (divisor == (int)divisor && divisor != min_jint);
 1469 }
 1470 
 1471 // Register for DIVI projection of divmodI
 1472 RegMask Matcher::divI_proj_mask() {
 1473   return EAX_REG_mask();
 1474 }
 1475 
 1476 // Register for MODI projection of divmodI
 1477 RegMask Matcher::modI_proj_mask() {
 1478   return EDX_REG_mask();
 1479 }
 1480 
 1481 // Register for DIVL projection of divmodL
 1482 RegMask Matcher::divL_proj_mask() {
 1483   ShouldNotReachHere();
 1484   return RegMask();
 1485 }
 1486 
 1487 // Register for MODL projection of divmodL
 1488 RegMask Matcher::modL_proj_mask() {
 1489   ShouldNotReachHere();
 1490   return RegMask();
 1491 }
 1492 
 1493 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1494   return NO_REG_mask();
 1495 }
 1496 
 1497 // Returns true if the high 32 bits of the value is known to be zero.
 1498 bool is_operand_hi32_zero(Node* n) {
 1499   int opc = n->Opcode();
 1500   if (opc == Op_AndL) {
 1501     Node* o2 = n->in(2);
 1502     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1503       return true;
 1504     }
 1505   }
 1506   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1507     return true;
 1508   }
 1509   return false;
 1510 }
 1511 
 1512 %}
 1513 
 1514 //----------ENCODING BLOCK-----------------------------------------------------
 1515 // This block specifies the encoding classes used by the compiler to output
 1516 // byte streams.  Encoding classes generate functions which are called by
 1517 // Machine Instruction Nodes in order to generate the bit encoding of the
 1518 // instruction.  Operands specify their base encoding interface with the
 1519 // interface keyword.  There are currently supported four interfaces,
 1520 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1521 // operand to generate a function which returns its register number when
 1522 // queried.   CONST_INTER causes an operand to generate a function which
 1523 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1524 // operand to generate four functions which return the Base Register, the
 1525 // Index Register, the Scale Value, and the Offset Value of the operand when
 1526 // queried.  COND_INTER causes an operand to generate six functions which
 1527 // return the encoding code (ie - encoding bits for the instruction)
 1528 // associated with each basic boolean condition for a conditional instruction.
 1529 // Instructions specify two basic values for encoding.  They use the
 1530 // ins_encode keyword to specify their encoding class (which must be one of
 1531 // the class names specified in the encoding block), and they use the
 1532 // opcode keyword to specify, in order, their primary, secondary, and
 1533 // tertiary opcode.  Only the opcode sections which a particular instruction
 1534 // needs for encoding need to be specified.
 1535 encode %{
 1536   // Build emit functions for each basic byte or larger field in the intel
 1537   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1538   // code in the enc_class source block.  Emit functions will live in the
 1539   // main source block for now.  In future, we can generalize this by
 1540   // adding a syntax that specifies the sizes of fields in an order,
 1541   // so that the adlc can build the emit functions automagically
 1542 
 1543   // Emit primary opcode
 1544   enc_class OpcP %{
 1545     emit_opcode(cbuf, $primary);
 1546   %}
 1547 
 1548   // Emit secondary opcode
 1549   enc_class OpcS %{
 1550     emit_opcode(cbuf, $secondary);
 1551   %}
 1552 
 1553   // Emit opcode directly
 1554   enc_class Opcode(immI d8) %{
 1555     emit_opcode(cbuf, $d8$$constant);
 1556   %}
 1557 
 1558   enc_class SizePrefix %{
 1559     emit_opcode(cbuf,0x66);
 1560   %}
 1561 
 1562   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1563     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1564   %}
 1565 
 1566   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1567     emit_opcode(cbuf,$opcode$$constant);
 1568     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1569   %}
 1570 
 1571   enc_class mov_r32_imm0( rRegI dst ) %{
 1572     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1573     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1574   %}
 1575 
 1576   enc_class cdq_enc %{
 1577     // Full implementation of Java idiv and irem; checks for
 1578     // special case as described in JVM spec., p.243 & p.271.
 1579     //
 1580     //         normal case                           special case
 1581     //
 1582     // input : rax,: dividend                         min_int
 1583     //         reg: divisor                          -1
 1584     //
 1585     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1586     //         rdx: remainder (= rax, irem reg)       0
 1587     //
 1588     //  Code sequnce:
 1589     //
 1590     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1591     //  0F 85 0B 00 00 00    jne         normal_case
 1592     //  33 D2                xor         rdx,edx
 1593     //  83 F9 FF             cmp         rcx,0FFh
 1594     //  0F 84 03 00 00 00    je          done
 1595     //                  normal_case:
 1596     //  99                   cdq
 1597     //  F7 F9                idiv        rax,ecx
 1598     //                  done:
 1599     //
 1600     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1601     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1603     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1604     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1605     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1606     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1607     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1608     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1609     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1610     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1611     // normal_case:
 1612     emit_opcode(cbuf,0x99);                                         // cdq
 1613     // idiv (note: must be emitted by the user of this rule)
 1614     // normal:
 1615   %}
 1616 
 1617   // Dense encoding for older common ops
 1618   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1619     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1620   %}
 1621 
 1622 
 1623   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1624   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1625     // Check for 8-bit immediate, and set sign extend bit in opcode
 1626     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1627       emit_opcode(cbuf, $primary | 0x02);
 1628     }
 1629     else {                          // If 32-bit immediate
 1630       emit_opcode(cbuf, $primary);
 1631     }
 1632   %}
 1633 
 1634   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1635     // Emit primary opcode and set sign-extend bit
 1636     // Check for 8-bit immediate, and set sign extend bit in opcode
 1637     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1638       emit_opcode(cbuf, $primary | 0x02);    }
 1639     else {                          // If 32-bit immediate
 1640       emit_opcode(cbuf, $primary);
 1641     }
 1642     // Emit r/m byte with secondary opcode, after primary opcode.
 1643     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1644   %}
 1645 
 1646   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1647     // Check for 8-bit immediate, and set sign extend bit in opcode
 1648     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1649       $$$emit8$imm$$constant;
 1650     }
 1651     else {                          // If 32-bit immediate
 1652       // Output immediate
 1653       $$$emit32$imm$$constant;
 1654     }
 1655   %}
 1656 
 1657   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1658     // Emit primary opcode and set sign-extend bit
 1659     // Check for 8-bit immediate, and set sign extend bit in opcode
 1660     int con = (int)$imm$$constant; // Throw away top bits
 1661     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1662     // Emit r/m byte with secondary opcode, after primary opcode.
 1663     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1664     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1665     else                               emit_d32(cbuf,con);
 1666   %}
 1667 
 1668   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1669     // Emit primary opcode and set sign-extend bit
 1670     // Check for 8-bit immediate, and set sign extend bit in opcode
 1671     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1672     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1673     // Emit r/m byte with tertiary opcode, after primary opcode.
 1674     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1675     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1676     else                               emit_d32(cbuf,con);
 1677   %}
 1678 
 1679   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1680     emit_cc(cbuf, $secondary, $dst$$reg );
 1681   %}
 1682 
 1683   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1684     int destlo = $dst$$reg;
 1685     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1686     // bswap lo
 1687     emit_opcode(cbuf, 0x0F);
 1688     emit_cc(cbuf, 0xC8, destlo);
 1689     // bswap hi
 1690     emit_opcode(cbuf, 0x0F);
 1691     emit_cc(cbuf, 0xC8, desthi);
 1692     // xchg lo and hi
 1693     emit_opcode(cbuf, 0x87);
 1694     emit_rm(cbuf, 0x3, destlo, desthi);
 1695   %}
 1696 
 1697   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1698     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1699   %}
 1700 
 1701   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1702     $$$emit8$primary;
 1703     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1704   %}
 1705 
 1706   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1707     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1708     emit_d8(cbuf, op >> 8 );
 1709     emit_d8(cbuf, op & 255);
 1710   %}
 1711 
 1712   // emulate a CMOV with a conditional branch around a MOV
 1713   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1714     // Invert sense of branch from sense of CMOV
 1715     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1716     emit_d8( cbuf, $brOffs$$constant );
 1717   %}
 1718 
 1719   enc_class enc_PartialSubtypeCheck( ) %{
 1720     Register Redi = as_Register(EDI_enc); // result register
 1721     Register Reax = as_Register(EAX_enc); // super class
 1722     Register Recx = as_Register(ECX_enc); // killed
 1723     Register Resi = as_Register(ESI_enc); // sub class
 1724     Label miss;
 1725 
 1726     MacroAssembler _masm(&cbuf);
 1727     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1728                                      NULL, &miss,
 1729                                      /*set_cond_codes:*/ true);
 1730     if ($primary) {
 1731       __ xorptr(Redi, Redi);
 1732     }
 1733     __ bind(miss);
 1734   %}
 1735 
 1736   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1737     MacroAssembler masm(&cbuf);
 1738     int start = masm.offset();
 1739     if (UseSSE >= 2) {
 1740       if (VerifyFPU) {
 1741         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1742       }
 1743     } else {
 1744       // External c_calling_convention expects the FPU stack to be 'clean'.
 1745       // Compiled code leaves it dirty.  Do cleanup now.
 1746       masm.empty_FPU_stack();
 1747     }
 1748     if (sizeof_FFree_Float_Stack_All == -1) {
 1749       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1750     } else {
 1751       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1752     }
 1753   %}
 1754 
 1755   enc_class Verify_FPU_For_Leaf %{
 1756     if( VerifyFPU ) {
 1757       MacroAssembler masm(&cbuf);
 1758       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1759     }
 1760   %}
 1761 
 1762   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1763     // This is the instruction starting address for relocation info.
 1764     MacroAssembler _masm(&cbuf);
 1765     cbuf.set_insts_mark();
 1766     $$$emit8$primary;
 1767     // CALL directly to the runtime
 1768     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1769                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1770     __ post_call_nop();
 1771 
 1772     if (UseSSE >= 2) {
 1773       MacroAssembler _masm(&cbuf);
 1774       BasicType rt = tf()->return_type();
 1775 
 1776       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1777         // A C runtime call where the return value is unused.  In SSE2+
 1778         // mode the result needs to be removed from the FPU stack.  It's
 1779         // likely that this function call could be removed by the
 1780         // optimizer if the C function is a pure function.
 1781         __ ffree(0);
 1782       } else if (rt == T_FLOAT) {
 1783         __ lea(rsp, Address(rsp, -4));
 1784         __ fstp_s(Address(rsp, 0));
 1785         __ movflt(xmm0, Address(rsp, 0));
 1786         __ lea(rsp, Address(rsp,  4));
 1787       } else if (rt == T_DOUBLE) {
 1788         __ lea(rsp, Address(rsp, -8));
 1789         __ fstp_d(Address(rsp, 0));
 1790         __ movdbl(xmm0, Address(rsp, 0));
 1791         __ lea(rsp, Address(rsp,  8));
 1792       }
 1793     }
 1794   %}
 1795 
 1796   enc_class pre_call_resets %{
 1797     // If method sets FPU control word restore it here
 1798     debug_only(int off0 = cbuf.insts_size());
 1799     if (ra_->C->in_24_bit_fp_mode()) {
 1800       MacroAssembler _masm(&cbuf);
 1801       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1802     }
 1803     // Clear upper bits of YMM registers when current compiled code uses
 1804     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1805     MacroAssembler _masm(&cbuf);
 1806     __ vzeroupper();
 1807     debug_only(int off1 = cbuf.insts_size());
 1808     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1809   %}
 1810 
 1811   enc_class post_call_FPU %{
 1812     // If method sets FPU control word do it here also
 1813     if (Compile::current()->in_24_bit_fp_mode()) {
 1814       MacroAssembler masm(&cbuf);
 1815       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1816     }
 1817   %}
 1818 
 1819   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1820     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1821     // who we intended to call.
 1822     MacroAssembler _masm(&cbuf);
 1823     cbuf.set_insts_mark();
 1824     $$$emit8$primary;
 1825 
 1826     if (!_method) {
 1827       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1828                      runtime_call_Relocation::spec(),
 1829                      RELOC_IMM32);
 1830       __ post_call_nop();
 1831     } else {
 1832       int method_index = resolved_method_index(cbuf);
 1833       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1834                                                   : static_call_Relocation::spec(method_index);
 1835       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1836                      rspec, RELOC_DISP32);
 1837       __ post_call_nop();
 1838       address mark = cbuf.insts_mark();
 1839       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1840         // Calls of the same statically bound method can share
 1841         // a stub to the interpreter.
 1842         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1843       } else {
 1844         // Emit stubs for static call.
 1845         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1846         if (stub == NULL) {
 1847           ciEnv::current()->record_failure("CodeCache is full");
 1848           return;
 1849         }
 1850       }
 1851     }
 1852   %}
 1853 
 1854   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1855     MacroAssembler _masm(&cbuf);
 1856     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1857     __ post_call_nop();
 1858   %}
 1859 
 1860   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1861     int disp = in_bytes(Method::from_compiled_offset());
 1862     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1863 
 1864     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1865     MacroAssembler _masm(&cbuf);
 1866     cbuf.set_insts_mark();
 1867     $$$emit8$primary;
 1868     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1869     emit_d8(cbuf, disp);             // Displacement
 1870     __ post_call_nop();
 1871   %}
 1872 
 1873 //   Following encoding is no longer used, but may be restored if calling
 1874 //   convention changes significantly.
 1875 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1876 //
 1877 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1878 //     // int ic_reg     = Matcher::inline_cache_reg();
 1879 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1880 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1881 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1882 //
 1883 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1884 //     // // so we load it immediately before the call
 1885 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1886 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1887 //
 1888 //     // xor rbp,ebp
 1889 //     emit_opcode(cbuf, 0x33);
 1890 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1891 //
 1892 //     // CALL to interpreter.
 1893 //     cbuf.set_insts_mark();
 1894 //     $$$emit8$primary;
 1895 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1896 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1897 //   %}
 1898 
 1899   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1900     $$$emit8$primary;
 1901     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1902     $$$emit8$shift$$constant;
 1903   %}
 1904 
 1905   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1906     // Load immediate does not have a zero or sign extended version
 1907     // for 8-bit immediates
 1908     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1909     $$$emit32$src$$constant;
 1910   %}
 1911 
 1912   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1913     // Load immediate does not have a zero or sign extended version
 1914     // for 8-bit immediates
 1915     emit_opcode(cbuf, $primary + $dst$$reg);
 1916     $$$emit32$src$$constant;
 1917   %}
 1918 
 1919   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1920     // Load immediate does not have a zero or sign extended version
 1921     // for 8-bit immediates
 1922     int dst_enc = $dst$$reg;
 1923     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1924     if (src_con == 0) {
 1925       // xor dst, dst
 1926       emit_opcode(cbuf, 0x33);
 1927       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1928     } else {
 1929       emit_opcode(cbuf, $primary + dst_enc);
 1930       emit_d32(cbuf, src_con);
 1931     }
 1932   %}
 1933 
 1934   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1935     // Load immediate does not have a zero or sign extended version
 1936     // for 8-bit immediates
 1937     int dst_enc = $dst$$reg + 2;
 1938     int src_con = ((julong)($src$$constant)) >> 32;
 1939     if (src_con == 0) {
 1940       // xor dst, dst
 1941       emit_opcode(cbuf, 0x33);
 1942       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1943     } else {
 1944       emit_opcode(cbuf, $primary + dst_enc);
 1945       emit_d32(cbuf, src_con);
 1946     }
 1947   %}
 1948 
 1949 
 1950   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1951   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1952     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1953   %}
 1954 
 1955   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1956     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1957   %}
 1958 
 1959   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1960     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1961   %}
 1962 
 1963   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1964     $$$emit8$primary;
 1965     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1966   %}
 1967 
 1968   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1969     $$$emit8$secondary;
 1970     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1971   %}
 1972 
 1973   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1974     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1975   %}
 1976 
 1977   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1978     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1979   %}
 1980 
 1981   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1982     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1983   %}
 1984 
 1985   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1986     // Output immediate
 1987     $$$emit32$src$$constant;
 1988   %}
 1989 
 1990   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1991     // Output Float immediate bits
 1992     jfloat jf = $src$$constant;
 1993     int    jf_as_bits = jint_cast( jf );
 1994     emit_d32(cbuf, jf_as_bits);
 1995   %}
 1996 
 1997   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1998     // Output Float immediate bits
 1999     jfloat jf = $src$$constant;
 2000     int    jf_as_bits = jint_cast( jf );
 2001     emit_d32(cbuf, jf_as_bits);
 2002   %}
 2003 
 2004   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 2005     // Output immediate
 2006     $$$emit16$src$$constant;
 2007   %}
 2008 
 2009   enc_class Con_d32(immI src) %{
 2010     emit_d32(cbuf,$src$$constant);
 2011   %}
 2012 
 2013   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2014     // Output immediate memory reference
 2015     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2016     emit_d32(cbuf, 0x00);
 2017   %}
 2018 
 2019   enc_class lock_prefix( ) %{
 2020     emit_opcode(cbuf,0xF0);         // [Lock]
 2021   %}
 2022 
 2023   // Cmp-xchg long value.
 2024   // Note: we need to swap rbx, and rcx before and after the
 2025   //       cmpxchg8 instruction because the instruction uses
 2026   //       rcx as the high order word of the new value to store but
 2027   //       our register encoding uses rbx,.
 2028   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2029 
 2030     // XCHG  rbx,ecx
 2031     emit_opcode(cbuf,0x87);
 2032     emit_opcode(cbuf,0xD9);
 2033     // [Lock]
 2034     emit_opcode(cbuf,0xF0);
 2035     // CMPXCHG8 [Eptr]
 2036     emit_opcode(cbuf,0x0F);
 2037     emit_opcode(cbuf,0xC7);
 2038     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2039     // XCHG  rbx,ecx
 2040     emit_opcode(cbuf,0x87);
 2041     emit_opcode(cbuf,0xD9);
 2042   %}
 2043 
 2044   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2045     // [Lock]
 2046     emit_opcode(cbuf,0xF0);
 2047 
 2048     // CMPXCHG [Eptr]
 2049     emit_opcode(cbuf,0x0F);
 2050     emit_opcode(cbuf,0xB1);
 2051     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2052   %}
 2053 
 2054   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2055     // [Lock]
 2056     emit_opcode(cbuf,0xF0);
 2057 
 2058     // CMPXCHGB [Eptr]
 2059     emit_opcode(cbuf,0x0F);
 2060     emit_opcode(cbuf,0xB0);
 2061     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2062   %}
 2063 
 2064   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2065     // [Lock]
 2066     emit_opcode(cbuf,0xF0);
 2067 
 2068     // 16-bit mode
 2069     emit_opcode(cbuf, 0x66);
 2070 
 2071     // CMPXCHGW [Eptr]
 2072     emit_opcode(cbuf,0x0F);
 2073     emit_opcode(cbuf,0xB1);
 2074     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2075   %}
 2076 
 2077   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2078     int res_encoding = $res$$reg;
 2079 
 2080     // MOV  res,0
 2081     emit_opcode( cbuf, 0xB8 + res_encoding);
 2082     emit_d32( cbuf, 0 );
 2083     // JNE,s  fail
 2084     emit_opcode(cbuf,0x75);
 2085     emit_d8(cbuf, 5 );
 2086     // MOV  res,1
 2087     emit_opcode( cbuf, 0xB8 + res_encoding);
 2088     emit_d32( cbuf, 1 );
 2089     // fail:
 2090   %}
 2091 
 2092   enc_class set_instruction_start( ) %{
 2093     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2094   %}
 2095 
 2096   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2097     int reg_encoding = $ereg$$reg;
 2098     int base  = $mem$$base;
 2099     int index = $mem$$index;
 2100     int scale = $mem$$scale;
 2101     int displace = $mem$$disp;
 2102     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2103     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2104   %}
 2105 
 2106   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2107     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2108     int base  = $mem$$base;
 2109     int index = $mem$$index;
 2110     int scale = $mem$$scale;
 2111     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2112     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2113     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2114   %}
 2115 
 2116   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2117     int r1, r2;
 2118     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2119     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2120     emit_opcode(cbuf,0x0F);
 2121     emit_opcode(cbuf,$tertiary);
 2122     emit_rm(cbuf, 0x3, r1, r2);
 2123     emit_d8(cbuf,$cnt$$constant);
 2124     emit_d8(cbuf,$primary);
 2125     emit_rm(cbuf, 0x3, $secondary, r1);
 2126     emit_d8(cbuf,$cnt$$constant);
 2127   %}
 2128 
 2129   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2130     emit_opcode( cbuf, 0x8B ); // Move
 2131     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2132     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2133       emit_d8(cbuf,$primary);
 2134       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2135       emit_d8(cbuf,$cnt$$constant-32);
 2136     }
 2137     emit_d8(cbuf,$primary);
 2138     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2139     emit_d8(cbuf,31);
 2140   %}
 2141 
 2142   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2143     int r1, r2;
 2144     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2145     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2146 
 2147     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2148     emit_rm(cbuf, 0x3, r1, r2);
 2149     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2150       emit_opcode(cbuf,$primary);
 2151       emit_rm(cbuf, 0x3, $secondary, r1);
 2152       emit_d8(cbuf,$cnt$$constant-32);
 2153     }
 2154     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2155     emit_rm(cbuf, 0x3, r2, r2);
 2156   %}
 2157 
 2158   // Clone of RegMem but accepts an extra parameter to access each
 2159   // half of a double in memory; it never needs relocation info.
 2160   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2161     emit_opcode(cbuf,$opcode$$constant);
 2162     int reg_encoding = $rm_reg$$reg;
 2163     int base     = $mem$$base;
 2164     int index    = $mem$$index;
 2165     int scale    = $mem$$scale;
 2166     int displace = $mem$$disp + $disp_for_half$$constant;
 2167     relocInfo::relocType disp_reloc = relocInfo::none;
 2168     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2169   %}
 2170 
 2171   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2172   //
 2173   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2174   // and it never needs relocation information.
 2175   // Frequently used to move data between FPU's Stack Top and memory.
 2176   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2177     int rm_byte_opcode = $rm_opcode$$constant;
 2178     int base     = $mem$$base;
 2179     int index    = $mem$$index;
 2180     int scale    = $mem$$scale;
 2181     int displace = $mem$$disp;
 2182     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2183     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2184   %}
 2185 
 2186   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2187     int rm_byte_opcode = $rm_opcode$$constant;
 2188     int base     = $mem$$base;
 2189     int index    = $mem$$index;
 2190     int scale    = $mem$$scale;
 2191     int displace = $mem$$disp;
 2192     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2193     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2194   %}
 2195 
 2196   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2197     int reg_encoding = $dst$$reg;
 2198     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2199     int index        = 0x04;            // 0x04 indicates no index
 2200     int scale        = 0x00;            // 0x00 indicates no scale
 2201     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2202     relocInfo::relocType disp_reloc = relocInfo::none;
 2203     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2204   %}
 2205 
 2206   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2207     // Compare dst,src
 2208     emit_opcode(cbuf,0x3B);
 2209     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2210     // jmp dst < src around move
 2211     emit_opcode(cbuf,0x7C);
 2212     emit_d8(cbuf,2);
 2213     // move dst,src
 2214     emit_opcode(cbuf,0x8B);
 2215     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2216   %}
 2217 
 2218   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2219     // Compare dst,src
 2220     emit_opcode(cbuf,0x3B);
 2221     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2222     // jmp dst > src around move
 2223     emit_opcode(cbuf,0x7F);
 2224     emit_d8(cbuf,2);
 2225     // move dst,src
 2226     emit_opcode(cbuf,0x8B);
 2227     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2228   %}
 2229 
 2230   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2231     // If src is FPR1, we can just FST to store it.
 2232     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2233     int reg_encoding = 0x2; // Just store
 2234     int base  = $mem$$base;
 2235     int index = $mem$$index;
 2236     int scale = $mem$$scale;
 2237     int displace = $mem$$disp;
 2238     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2239     if( $src$$reg != FPR1L_enc ) {
 2240       reg_encoding = 0x3;  // Store & pop
 2241       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2242       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2243     }
 2244     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2245     emit_opcode(cbuf,$primary);
 2246     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2247   %}
 2248 
 2249   enc_class neg_reg(rRegI dst) %{
 2250     // NEG $dst
 2251     emit_opcode(cbuf,0xF7);
 2252     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2253   %}
 2254 
 2255   enc_class setLT_reg(eCXRegI dst) %{
 2256     // SETLT $dst
 2257     emit_opcode(cbuf,0x0F);
 2258     emit_opcode(cbuf,0x9C);
 2259     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2260   %}
 2261 
 2262   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2263     int tmpReg = $tmp$$reg;
 2264 
 2265     // SUB $p,$q
 2266     emit_opcode(cbuf,0x2B);
 2267     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2268     // SBB $tmp,$tmp
 2269     emit_opcode(cbuf,0x1B);
 2270     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2271     // AND $tmp,$y
 2272     emit_opcode(cbuf,0x23);
 2273     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2274     // ADD $p,$tmp
 2275     emit_opcode(cbuf,0x03);
 2276     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2277   %}
 2278 
 2279   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2280     // TEST shift,32
 2281     emit_opcode(cbuf,0xF7);
 2282     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2283     emit_d32(cbuf,0x20);
 2284     // JEQ,s small
 2285     emit_opcode(cbuf, 0x74);
 2286     emit_d8(cbuf, 0x04);
 2287     // MOV    $dst.hi,$dst.lo
 2288     emit_opcode( cbuf, 0x8B );
 2289     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2290     // CLR    $dst.lo
 2291     emit_opcode(cbuf, 0x33);
 2292     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2293 // small:
 2294     // SHLD   $dst.hi,$dst.lo,$shift
 2295     emit_opcode(cbuf,0x0F);
 2296     emit_opcode(cbuf,0xA5);
 2297     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2298     // SHL    $dst.lo,$shift"
 2299     emit_opcode(cbuf,0xD3);
 2300     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2301   %}
 2302 
 2303   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2304     // TEST shift,32
 2305     emit_opcode(cbuf,0xF7);
 2306     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2307     emit_d32(cbuf,0x20);
 2308     // JEQ,s small
 2309     emit_opcode(cbuf, 0x74);
 2310     emit_d8(cbuf, 0x04);
 2311     // MOV    $dst.lo,$dst.hi
 2312     emit_opcode( cbuf, 0x8B );
 2313     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2314     // CLR    $dst.hi
 2315     emit_opcode(cbuf, 0x33);
 2316     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2317 // small:
 2318     // SHRD   $dst.lo,$dst.hi,$shift
 2319     emit_opcode(cbuf,0x0F);
 2320     emit_opcode(cbuf,0xAD);
 2321     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2322     // SHR    $dst.hi,$shift"
 2323     emit_opcode(cbuf,0xD3);
 2324     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2325   %}
 2326 
 2327   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2328     // TEST shift,32
 2329     emit_opcode(cbuf,0xF7);
 2330     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2331     emit_d32(cbuf,0x20);
 2332     // JEQ,s small
 2333     emit_opcode(cbuf, 0x74);
 2334     emit_d8(cbuf, 0x05);
 2335     // MOV    $dst.lo,$dst.hi
 2336     emit_opcode( cbuf, 0x8B );
 2337     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338     // SAR    $dst.hi,31
 2339     emit_opcode(cbuf, 0xC1);
 2340     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2341     emit_d8(cbuf, 0x1F );
 2342 // small:
 2343     // SHRD   $dst.lo,$dst.hi,$shift
 2344     emit_opcode(cbuf,0x0F);
 2345     emit_opcode(cbuf,0xAD);
 2346     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2347     // SAR    $dst.hi,$shift"
 2348     emit_opcode(cbuf,0xD3);
 2349     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2350   %}
 2351 
 2352 
 2353   // ----------------- Encodings for floating point unit -----------------
 2354   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2355   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2356     $$$emit8$primary;
 2357     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2358   %}
 2359 
 2360   // Pop argument in FPR0 with FSTP ST(0)
 2361   enc_class PopFPU() %{
 2362     emit_opcode( cbuf, 0xDD );
 2363     emit_d8( cbuf, 0xD8 );
 2364   %}
 2365 
 2366   // !!!!! equivalent to Pop_Reg_F
 2367   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2368     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2369     emit_d8( cbuf, 0xD8+$dst$$reg );
 2370   %}
 2371 
 2372   enc_class Push_Reg_DPR( regDPR dst ) %{
 2373     emit_opcode( cbuf, 0xD9 );
 2374     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2375   %}
 2376 
 2377   enc_class strictfp_bias1( regDPR dst ) %{
 2378     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2379     emit_opcode( cbuf, 0x2D );
 2380     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2381     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2382     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2383   %}
 2384 
 2385   enc_class strictfp_bias2( regDPR dst ) %{
 2386     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2387     emit_opcode( cbuf, 0x2D );
 2388     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2389     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2390     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2391   %}
 2392 
 2393   // Special case for moving an integer register to a stack slot.
 2394   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2395     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2396   %}
 2397 
 2398   // Special case for moving a register to a stack slot.
 2399   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2400     // Opcode already emitted
 2401     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2402     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2403     emit_d32(cbuf, $dst$$disp);   // Displacement
 2404   %}
 2405 
 2406   // Push the integer in stackSlot 'src' onto FP-stack
 2407   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2408     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2409   %}
 2410 
 2411   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2412   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2413     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2414   %}
 2415 
 2416   // Same as Pop_Mem_F except for opcode
 2417   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2418   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2419     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2420   %}
 2421 
 2422   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2423     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2424     emit_d8( cbuf, 0xD8+$dst$$reg );
 2425   %}
 2426 
 2427   enc_class Push_Reg_FPR( regFPR dst ) %{
 2428     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2429     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2430   %}
 2431 
 2432   // Push FPU's float to a stack-slot, and pop FPU-stack
 2433   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2434     int pop = 0x02;
 2435     if ($src$$reg != FPR1L_enc) {
 2436       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2437       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2438       pop = 0x03;
 2439     }
 2440     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2441   %}
 2442 
 2443   // Push FPU's double to a stack-slot, and pop FPU-stack
 2444   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2445     int pop = 0x02;
 2446     if ($src$$reg != FPR1L_enc) {
 2447       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2448       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2449       pop = 0x03;
 2450     }
 2451     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2452   %}
 2453 
 2454   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2455   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2456     int pop = 0xD0 - 1; // -1 since we skip FLD
 2457     if ($src$$reg != FPR1L_enc) {
 2458       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2459       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2460       pop = 0xD8;
 2461     }
 2462     emit_opcode( cbuf, 0xDD );
 2463     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2464   %}
 2465 
 2466 
 2467   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2468     // load dst in FPR0
 2469     emit_opcode( cbuf, 0xD9 );
 2470     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2471     if ($src$$reg != FPR1L_enc) {
 2472       // fincstp
 2473       emit_opcode (cbuf, 0xD9);
 2474       emit_opcode (cbuf, 0xF7);
 2475       // swap src with FPR1:
 2476       // FXCH FPR1 with src
 2477       emit_opcode(cbuf, 0xD9);
 2478       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2479       // fdecstp
 2480       emit_opcode (cbuf, 0xD9);
 2481       emit_opcode (cbuf, 0xF6);
 2482     }
 2483   %}
 2484 
 2485   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2486     MacroAssembler _masm(&cbuf);
 2487     __ subptr(rsp, 8);
 2488     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2489     __ fld_d(Address(rsp, 0));
 2490     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2491     __ fld_d(Address(rsp, 0));
 2492   %}
 2493 
 2494   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2495     MacroAssembler _masm(&cbuf);
 2496     __ subptr(rsp, 4);
 2497     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2498     __ fld_s(Address(rsp, 0));
 2499     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2500     __ fld_s(Address(rsp, 0));
 2501   %}
 2502 
 2503   enc_class Push_ResultD(regD dst) %{
 2504     MacroAssembler _masm(&cbuf);
 2505     __ fstp_d(Address(rsp, 0));
 2506     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2507     __ addptr(rsp, 8);
 2508   %}
 2509 
 2510   enc_class Push_ResultF(regF dst, immI d8) %{
 2511     MacroAssembler _masm(&cbuf);
 2512     __ fstp_s(Address(rsp, 0));
 2513     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2514     __ addptr(rsp, $d8$$constant);
 2515   %}
 2516 
 2517   enc_class Push_SrcD(regD src) %{
 2518     MacroAssembler _masm(&cbuf);
 2519     __ subptr(rsp, 8);
 2520     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2521     __ fld_d(Address(rsp, 0));
 2522   %}
 2523 
 2524   enc_class push_stack_temp_qword() %{
 2525     MacroAssembler _masm(&cbuf);
 2526     __ subptr(rsp, 8);
 2527   %}
 2528 
 2529   enc_class pop_stack_temp_qword() %{
 2530     MacroAssembler _masm(&cbuf);
 2531     __ addptr(rsp, 8);
 2532   %}
 2533 
 2534   enc_class push_xmm_to_fpr1(regD src) %{
 2535     MacroAssembler _masm(&cbuf);
 2536     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2537     __ fld_d(Address(rsp, 0));
 2538   %}
 2539 
 2540   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2541     if ($src$$reg != FPR1L_enc) {
 2542       // fincstp
 2543       emit_opcode (cbuf, 0xD9);
 2544       emit_opcode (cbuf, 0xF7);
 2545       // FXCH FPR1 with src
 2546       emit_opcode(cbuf, 0xD9);
 2547       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2548       // fdecstp
 2549       emit_opcode (cbuf, 0xD9);
 2550       emit_opcode (cbuf, 0xF6);
 2551     }
 2552     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2553     // // FSTP   FPR$dst$$reg
 2554     // emit_opcode( cbuf, 0xDD );
 2555     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2556   %}
 2557 
 2558   enc_class fnstsw_sahf_skip_parity() %{
 2559     // fnstsw ax
 2560     emit_opcode( cbuf, 0xDF );
 2561     emit_opcode( cbuf, 0xE0 );
 2562     // sahf
 2563     emit_opcode( cbuf, 0x9E );
 2564     // jnp  ::skip
 2565     emit_opcode( cbuf, 0x7B );
 2566     emit_opcode( cbuf, 0x05 );
 2567   %}
 2568 
 2569   enc_class emitModDPR() %{
 2570     // fprem must be iterative
 2571     // :: loop
 2572     // fprem
 2573     emit_opcode( cbuf, 0xD9 );
 2574     emit_opcode( cbuf, 0xF8 );
 2575     // wait
 2576     emit_opcode( cbuf, 0x9b );
 2577     // fnstsw ax
 2578     emit_opcode( cbuf, 0xDF );
 2579     emit_opcode( cbuf, 0xE0 );
 2580     // sahf
 2581     emit_opcode( cbuf, 0x9E );
 2582     // jp  ::loop
 2583     emit_opcode( cbuf, 0x0F );
 2584     emit_opcode( cbuf, 0x8A );
 2585     emit_opcode( cbuf, 0xF4 );
 2586     emit_opcode( cbuf, 0xFF );
 2587     emit_opcode( cbuf, 0xFF );
 2588     emit_opcode( cbuf, 0xFF );
 2589   %}
 2590 
 2591   enc_class fpu_flags() %{
 2592     // fnstsw_ax
 2593     emit_opcode( cbuf, 0xDF);
 2594     emit_opcode( cbuf, 0xE0);
 2595     // test ax,0x0400
 2596     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2597     emit_opcode( cbuf, 0xA9 );
 2598     emit_d16   ( cbuf, 0x0400 );
 2599     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2600     // // test rax,0x0400
 2601     // emit_opcode( cbuf, 0xA9 );
 2602     // emit_d32   ( cbuf, 0x00000400 );
 2603     //
 2604     // jz exit (no unordered comparison)
 2605     emit_opcode( cbuf, 0x74 );
 2606     emit_d8    ( cbuf, 0x02 );
 2607     // mov ah,1 - treat as LT case (set carry flag)
 2608     emit_opcode( cbuf, 0xB4 );
 2609     emit_d8    ( cbuf, 0x01 );
 2610     // sahf
 2611     emit_opcode( cbuf, 0x9E);
 2612   %}
 2613 
 2614   enc_class cmpF_P6_fixup() %{
 2615     // Fixup the integer flags in case comparison involved a NaN
 2616     //
 2617     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2618     emit_opcode( cbuf, 0x7B );
 2619     emit_d8    ( cbuf, 0x03 );
 2620     // MOV AH,1 - treat as LT case (set carry flag)
 2621     emit_opcode( cbuf, 0xB4 );
 2622     emit_d8    ( cbuf, 0x01 );
 2623     // SAHF
 2624     emit_opcode( cbuf, 0x9E);
 2625     // NOP     // target for branch to avoid branch to branch
 2626     emit_opcode( cbuf, 0x90);
 2627   %}
 2628 
 2629 //     fnstsw_ax();
 2630 //     sahf();
 2631 //     movl(dst, nan_result);
 2632 //     jcc(Assembler::parity, exit);
 2633 //     movl(dst, less_result);
 2634 //     jcc(Assembler::below, exit);
 2635 //     movl(dst, equal_result);
 2636 //     jcc(Assembler::equal, exit);
 2637 //     movl(dst, greater_result);
 2638 
 2639 // less_result     =  1;
 2640 // greater_result  = -1;
 2641 // equal_result    = 0;
 2642 // nan_result      = -1;
 2643 
 2644   enc_class CmpF_Result(rRegI dst) %{
 2645     // fnstsw_ax();
 2646     emit_opcode( cbuf, 0xDF);
 2647     emit_opcode( cbuf, 0xE0);
 2648     // sahf
 2649     emit_opcode( cbuf, 0x9E);
 2650     // movl(dst, nan_result);
 2651     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2652     emit_d32( cbuf, -1 );
 2653     // jcc(Assembler::parity, exit);
 2654     emit_opcode( cbuf, 0x7A );
 2655     emit_d8    ( cbuf, 0x13 );
 2656     // movl(dst, less_result);
 2657     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2658     emit_d32( cbuf, -1 );
 2659     // jcc(Assembler::below, exit);
 2660     emit_opcode( cbuf, 0x72 );
 2661     emit_d8    ( cbuf, 0x0C );
 2662     // movl(dst, equal_result);
 2663     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2664     emit_d32( cbuf, 0 );
 2665     // jcc(Assembler::equal, exit);
 2666     emit_opcode( cbuf, 0x74 );
 2667     emit_d8    ( cbuf, 0x05 );
 2668     // movl(dst, greater_result);
 2669     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2670     emit_d32( cbuf, 1 );
 2671   %}
 2672 
 2673 
 2674   // Compare the longs and set flags
 2675   // BROKEN!  Do Not use as-is
 2676   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2677     // CMP    $src1.hi,$src2.hi
 2678     emit_opcode( cbuf, 0x3B );
 2679     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2680     // JNE,s  done
 2681     emit_opcode(cbuf,0x75);
 2682     emit_d8(cbuf, 2 );
 2683     // CMP    $src1.lo,$src2.lo
 2684     emit_opcode( cbuf, 0x3B );
 2685     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2686 // done:
 2687   %}
 2688 
 2689   enc_class convert_int_long( regL dst, rRegI src ) %{
 2690     // mov $dst.lo,$src
 2691     int dst_encoding = $dst$$reg;
 2692     int src_encoding = $src$$reg;
 2693     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2694     // mov $dst.hi,$src
 2695     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2696     // sar $dst.hi,31
 2697     emit_opcode( cbuf, 0xC1 );
 2698     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2699     emit_d8(cbuf, 0x1F );
 2700   %}
 2701 
 2702   enc_class convert_long_double( eRegL src ) %{
 2703     // push $src.hi
 2704     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2705     // push $src.lo
 2706     emit_opcode(cbuf, 0x50+$src$$reg  );
 2707     // fild 64-bits at [SP]
 2708     emit_opcode(cbuf,0xdf);
 2709     emit_d8(cbuf, 0x6C);
 2710     emit_d8(cbuf, 0x24);
 2711     emit_d8(cbuf, 0x00);
 2712     // pop stack
 2713     emit_opcode(cbuf, 0x83); // add  SP, #8
 2714     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2715     emit_d8(cbuf, 0x8);
 2716   %}
 2717 
 2718   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2719     // IMUL   EDX:EAX,$src1
 2720     emit_opcode( cbuf, 0xF7 );
 2721     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2722     // SAR    EDX,$cnt-32
 2723     int shift_count = ((int)$cnt$$constant) - 32;
 2724     if (shift_count > 0) {
 2725       emit_opcode(cbuf, 0xC1);
 2726       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2727       emit_d8(cbuf, shift_count);
 2728     }
 2729   %}
 2730 
 2731   // this version doesn't have add sp, 8
 2732   enc_class convert_long_double2( eRegL src ) %{
 2733     // push $src.hi
 2734     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2735     // push $src.lo
 2736     emit_opcode(cbuf, 0x50+$src$$reg  );
 2737     // fild 64-bits at [SP]
 2738     emit_opcode(cbuf,0xdf);
 2739     emit_d8(cbuf, 0x6C);
 2740     emit_d8(cbuf, 0x24);
 2741     emit_d8(cbuf, 0x00);
 2742   %}
 2743 
 2744   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2745     // Basic idea: long = (long)int * (long)int
 2746     // IMUL EDX:EAX, src
 2747     emit_opcode( cbuf, 0xF7 );
 2748     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2749   %}
 2750 
 2751   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2752     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2753     // MUL EDX:EAX, src
 2754     emit_opcode( cbuf, 0xF7 );
 2755     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2756   %}
 2757 
 2758   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2759     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2760     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2761     // MOV    $tmp,$src.lo
 2762     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2763     // IMUL   $tmp,EDX
 2764     emit_opcode( cbuf, 0x0F );
 2765     emit_opcode( cbuf, 0xAF );
 2766     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2767     // MOV    EDX,$src.hi
 2768     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2769     // IMUL   EDX,EAX
 2770     emit_opcode( cbuf, 0x0F );
 2771     emit_opcode( cbuf, 0xAF );
 2772     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2773     // ADD    $tmp,EDX
 2774     emit_opcode( cbuf, 0x03 );
 2775     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2776     // MUL   EDX:EAX,$src.lo
 2777     emit_opcode( cbuf, 0xF7 );
 2778     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2779     // ADD    EDX,ESI
 2780     emit_opcode( cbuf, 0x03 );
 2781     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2782   %}
 2783 
 2784   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2785     // Basic idea: lo(result) = lo(src * y_lo)
 2786     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2787     // IMUL   $tmp,EDX,$src
 2788     emit_opcode( cbuf, 0x6B );
 2789     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2790     emit_d8( cbuf, (int)$src$$constant );
 2791     // MOV    EDX,$src
 2792     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2793     emit_d32( cbuf, (int)$src$$constant );
 2794     // MUL   EDX:EAX,EDX
 2795     emit_opcode( cbuf, 0xF7 );
 2796     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2797     // ADD    EDX,ESI
 2798     emit_opcode( cbuf, 0x03 );
 2799     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2800   %}
 2801 
 2802   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2803     // PUSH src1.hi
 2804     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2805     // PUSH src1.lo
 2806     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2807     // PUSH src2.hi
 2808     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2809     // PUSH src2.lo
 2810     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2811     // CALL directly to the runtime
 2812     MacroAssembler _masm(&cbuf);
 2813     cbuf.set_insts_mark();
 2814     emit_opcode(cbuf,0xE8);       // Call into runtime
 2815     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2816     __ post_call_nop();
 2817     // Restore stack
 2818     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2819     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2820     emit_d8(cbuf, 4*4);
 2821   %}
 2822 
 2823   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2824     // PUSH src1.hi
 2825     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2826     // PUSH src1.lo
 2827     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2828     // PUSH src2.hi
 2829     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2830     // PUSH src2.lo
 2831     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2832     // CALL directly to the runtime
 2833     MacroAssembler _masm(&cbuf);
 2834     cbuf.set_insts_mark();
 2835     emit_opcode(cbuf,0xE8);       // Call into runtime
 2836     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2837     __ post_call_nop();
 2838     // Restore stack
 2839     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2840     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2841     emit_d8(cbuf, 4*4);
 2842   %}
 2843 
 2844   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2845     // MOV   $tmp,$src.lo
 2846     emit_opcode(cbuf, 0x8B);
 2847     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2848     // OR    $tmp,$src.hi
 2849     emit_opcode(cbuf, 0x0B);
 2850     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2851   %}
 2852 
 2853   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2854     // CMP    $src1.lo,$src2.lo
 2855     emit_opcode( cbuf, 0x3B );
 2856     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2857     // JNE,s  skip
 2858     emit_cc(cbuf, 0x70, 0x5);
 2859     emit_d8(cbuf,2);
 2860     // CMP    $src1.hi,$src2.hi
 2861     emit_opcode( cbuf, 0x3B );
 2862     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2863   %}
 2864 
 2865   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2866     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2867     emit_opcode( cbuf, 0x3B );
 2868     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2869     // MOV    $tmp,$src1.hi
 2870     emit_opcode( cbuf, 0x8B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2872     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2873     emit_opcode( cbuf, 0x1B );
 2874     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2875   %}
 2876 
 2877   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2878     // XOR    $tmp,$tmp
 2879     emit_opcode(cbuf,0x33);  // XOR
 2880     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2881     // CMP    $tmp,$src.lo
 2882     emit_opcode( cbuf, 0x3B );
 2883     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2884     // SBB    $tmp,$src.hi
 2885     emit_opcode( cbuf, 0x1B );
 2886     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2887   %}
 2888 
 2889  // Sniff, sniff... smells like Gnu Superoptimizer
 2890   enc_class neg_long( eRegL dst ) %{
 2891     emit_opcode(cbuf,0xF7);    // NEG hi
 2892     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2893     emit_opcode(cbuf,0xF7);    // NEG lo
 2894     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2895     emit_opcode(cbuf,0x83);    // SBB hi,0
 2896     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2897     emit_d8    (cbuf,0 );
 2898   %}
 2899 
 2900   enc_class enc_pop_rdx() %{
 2901     emit_opcode(cbuf,0x5A);
 2902   %}
 2903 
 2904   enc_class enc_rethrow() %{
 2905     MacroAssembler _masm(&cbuf);
 2906     cbuf.set_insts_mark();
 2907     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2908     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2909                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2910     __ post_call_nop();
 2911   %}
 2912 
 2913 
 2914   // Convert a double to an int.  Java semantics require we do complex
 2915   // manglelations in the corner cases.  So we set the rounding mode to
 2916   // 'zero', store the darned double down as an int, and reset the
 2917   // rounding mode to 'nearest'.  The hardware throws an exception which
 2918   // patches up the correct value directly to the stack.
 2919   enc_class DPR2I_encoding( regDPR src ) %{
 2920     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2921     // exceptions here, so that a NAN or other corner-case value will
 2922     // thrown an exception (but normal values get converted at full speed).
 2923     // However, I2C adapters and other float-stack manglers leave pending
 2924     // invalid-op exceptions hanging.  We would have to clear them before
 2925     // enabling them and that is more expensive than just testing for the
 2926     // invalid value Intel stores down in the corner cases.
 2927     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2928     emit_opcode(cbuf,0x2D);
 2929     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2930     // Allocate a word
 2931     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2932     emit_opcode(cbuf,0xEC);
 2933     emit_d8(cbuf,0x04);
 2934     // Encoding assumes a double has been pushed into FPR0.
 2935     // Store down the double as an int, popping the FPU stack
 2936     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2937     emit_opcode(cbuf,0x1C);
 2938     emit_d8(cbuf,0x24);
 2939     // Restore the rounding mode; mask the exception
 2940     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2941     emit_opcode(cbuf,0x2D);
 2942     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2943         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2944         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2945 
 2946     // Load the converted int; adjust CPU stack
 2947     emit_opcode(cbuf,0x58);       // POP EAX
 2948     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2949     emit_d32   (cbuf,0x80000000); //         0x80000000
 2950     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2951     emit_d8    (cbuf,0x07);       // Size of slow_call
 2952     // Push src onto stack slow-path
 2953     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2954     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2955     // CALL directly to the runtime
 2956     MacroAssembler _masm(&cbuf);
 2957     cbuf.set_insts_mark();
 2958     emit_opcode(cbuf,0xE8);       // Call into runtime
 2959     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2960     __ post_call_nop();
 2961     // Carry on here...
 2962   %}
 2963 
 2964   enc_class DPR2L_encoding( regDPR src ) %{
 2965     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2966     emit_opcode(cbuf,0x2D);
 2967     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2968     // Allocate a word
 2969     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2970     emit_opcode(cbuf,0xEC);
 2971     emit_d8(cbuf,0x08);
 2972     // Encoding assumes a double has been pushed into FPR0.
 2973     // Store down the double as a long, popping the FPU stack
 2974     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2975     emit_opcode(cbuf,0x3C);
 2976     emit_d8(cbuf,0x24);
 2977     // Restore the rounding mode; mask the exception
 2978     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2979     emit_opcode(cbuf,0x2D);
 2980     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2981         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2982         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2983 
 2984     // Load the converted int; adjust CPU stack
 2985     emit_opcode(cbuf,0x58);       // POP EAX
 2986     emit_opcode(cbuf,0x5A);       // POP EDX
 2987     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2988     emit_d8    (cbuf,0xFA);       // rdx
 2989     emit_d32   (cbuf,0x80000000); //         0x80000000
 2990     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2991     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2992     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2993     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2994     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2995     emit_d8    (cbuf,0x07);       // Size of slow_call
 2996     // Push src onto stack slow-path
 2997     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2998     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2999     // CALL directly to the runtime
 3000     MacroAssembler _masm(&cbuf);
 3001     cbuf.set_insts_mark();
 3002     emit_opcode(cbuf,0xE8);       // Call into runtime
 3003     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 3004     __ post_call_nop();
 3005     // Carry on here...
 3006   %}
 3007 
 3008   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 3009     // Operand was loaded from memory into fp ST (stack top)
 3010     // FMUL   ST,$src  /* D8 C8+i */
 3011     emit_opcode(cbuf, 0xD8);
 3012     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3013   %}
 3014 
 3015   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3016     // FADDP  ST,src2  /* D8 C0+i */
 3017     emit_opcode(cbuf, 0xD8);
 3018     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3019     //could use FADDP  src2,fpST  /* DE C0+i */
 3020   %}
 3021 
 3022   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3023     // FADDP  src2,ST  /* DE C0+i */
 3024     emit_opcode(cbuf, 0xDE);
 3025     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3026   %}
 3027 
 3028   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3029     // Operand has been loaded into fp ST (stack top)
 3030       // FSUB   ST,$src1
 3031       emit_opcode(cbuf, 0xD8);
 3032       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3033 
 3034       // FDIV
 3035       emit_opcode(cbuf, 0xD8);
 3036       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3037   %}
 3038 
 3039   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3040     // Operand was loaded from memory into fp ST (stack top)
 3041     // FADD   ST,$src  /* D8 C0+i */
 3042     emit_opcode(cbuf, 0xD8);
 3043     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3044 
 3045     // FMUL  ST,src2  /* D8 C*+i */
 3046     emit_opcode(cbuf, 0xD8);
 3047     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3048   %}
 3049 
 3050 
 3051   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3052     // Operand was loaded from memory into fp ST (stack top)
 3053     // FADD   ST,$src  /* D8 C0+i */
 3054     emit_opcode(cbuf, 0xD8);
 3055     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3056 
 3057     // FMULP  src2,ST  /* DE C8+i */
 3058     emit_opcode(cbuf, 0xDE);
 3059     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3060   %}
 3061 
 3062   // Atomically load the volatile long
 3063   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3064     emit_opcode(cbuf,0xDF);
 3065     int rm_byte_opcode = 0x05;
 3066     int base     = $mem$$base;
 3067     int index    = $mem$$index;
 3068     int scale    = $mem$$scale;
 3069     int displace = $mem$$disp;
 3070     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3071     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3072     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3073   %}
 3074 
 3075   // Volatile Store Long.  Must be atomic, so move it into
 3076   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3077   // target address before the store (for null-ptr checks)
 3078   // so the memory operand is used twice in the encoding.
 3079   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3080     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3081     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3082     emit_opcode(cbuf,0xDF);
 3083     int rm_byte_opcode = 0x07;
 3084     int base     = $mem$$base;
 3085     int index    = $mem$$index;
 3086     int scale    = $mem$$scale;
 3087     int displace = $mem$$disp;
 3088     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3089     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3090   %}
 3091 
 3092 %}
 3093 
 3094 
 3095 //----------FRAME--------------------------------------------------------------
 3096 // Definition of frame structure and management information.
 3097 //
 3098 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3099 //                             |   (to get allocators register number
 3100 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3101 //  r   CALLER     |        |
 3102 //  o     |        +--------+      pad to even-align allocators stack-slot
 3103 //  w     V        |  pad0  |        numbers; owned by CALLER
 3104 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3105 //  h     ^        |   in   |  5
 3106 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3107 //  |     |        |        |  3
 3108 //  |     |        +--------+
 3109 //  V     |        | old out|      Empty on Intel, window on Sparc
 3110 //        |    old |preserve|      Must be even aligned.
 3111 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3112 //        |        |   in   |  3   area for Intel ret address
 3113 //     Owned by    |preserve|      Empty on Sparc.
 3114 //       SELF      +--------+
 3115 //        |        |  pad2  |  2   pad to align old SP
 3116 //        |        +--------+  1
 3117 //        |        | locks  |  0
 3118 //        |        +--------+----> OptoReg::stack0(), even aligned
 3119 //        |        |  pad1  | 11   pad to align new SP
 3120 //        |        +--------+
 3121 //        |        |        | 10
 3122 //        |        | spills |  9   spills
 3123 //        V        |        |  8   (pad0 slot for callee)
 3124 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3125 //        ^        |  out   |  7
 3126 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3127 //     Owned by    +--------+
 3128 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3129 //        |    new |preserve|      Must be even-aligned.
 3130 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3131 //        |        |        |
 3132 //
 3133 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3134 //         known from SELF's arguments and the Java calling convention.
 3135 //         Region 6-7 is determined per call site.
 3136 // Note 2: If the calling convention leaves holes in the incoming argument
 3137 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3138 //         are owned by the CALLEE.  Holes should not be necessary in the
 3139 //         incoming area, as the Java calling convention is completely under
 3140 //         the control of the AD file.  Doubles can be sorted and packed to
 3141 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3142 //         varargs C calling conventions.
 3143 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3144 //         even aligned with pad0 as needed.
 3145 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3146 //         region 6-11 is even aligned; it may be padded out more so that
 3147 //         the region from SP to FP meets the minimum stack alignment.
 3148 
 3149 frame %{
 3150   // These three registers define part of the calling convention
 3151   // between compiled code and the interpreter.
 3152   inline_cache_reg(EAX);                // Inline Cache Register
 3153 
 3154   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3155   cisc_spilling_operand_name(indOffset32);
 3156 
 3157   // Number of stack slots consumed by locking an object
 3158   sync_stack_slots(1);
 3159 
 3160   // Compiled code's Frame Pointer
 3161   frame_pointer(ESP);
 3162   // Interpreter stores its frame pointer in a register which is
 3163   // stored to the stack by I2CAdaptors.
 3164   // I2CAdaptors convert from interpreted java to compiled java.
 3165   interpreter_frame_pointer(EBP);
 3166 
 3167   // Stack alignment requirement
 3168   // Alignment size in bytes (128-bit -> 16 bytes)
 3169   stack_alignment(StackAlignmentInBytes);
 3170 
 3171   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3172   // for calls to C.  Supports the var-args backing area for register parms.
 3173   varargs_C_out_slots_killed(0);
 3174 
 3175   // The after-PROLOG location of the return address.  Location of
 3176   // return address specifies a type (REG or STACK) and a number
 3177   // representing the register number (i.e. - use a register name) or
 3178   // stack slot.
 3179   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3180   // Otherwise, it is above the locks and verification slot and alignment word
 3181   return_addr(STACK - 1 +
 3182               align_up((Compile::current()->in_preserve_stack_slots() +
 3183                         Compile::current()->fixed_slots()),
 3184                        stack_alignment_in_slots()));
 3185 
 3186   // Location of C & interpreter return values
 3187   c_return_value %{
 3188     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3189     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3190     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3191 
 3192     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3193     // that C functions return float and double results in XMM0.
 3194     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3195       return OptoRegPair(XMM0b_num,XMM0_num);
 3196     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3197       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3198 
 3199     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3200   %}
 3201 
 3202   // Location of return values
 3203   return_value %{
 3204     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3205     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3206     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3207     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3208       return OptoRegPair(XMM0b_num,XMM0_num);
 3209     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3210       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3211     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3212   %}
 3213 
 3214 %}
 3215 
 3216 //----------ATTRIBUTES---------------------------------------------------------
 3217 //----------Operand Attributes-------------------------------------------------
 3218 op_attrib op_cost(0);        // Required cost attribute
 3219 
 3220 //----------Instruction Attributes---------------------------------------------
 3221 ins_attrib ins_cost(100);       // Required cost attribute
 3222 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3223 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3224                                 // non-matching short branch variant of some
 3225                                                             // long branch?
 3226 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3227                                 // specifies the alignment that some part of the instruction (not
 3228                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3229                                 // function must be provided for the instruction
 3230 
 3231 //----------OPERANDS-----------------------------------------------------------
 3232 // Operand definitions must precede instruction definitions for correct parsing
 3233 // in the ADLC because operands constitute user defined types which are used in
 3234 // instruction definitions.
 3235 
 3236 //----------Simple Operands----------------------------------------------------
 3237 // Immediate Operands
 3238 // Integer Immediate
 3239 operand immI() %{
 3240   match(ConI);
 3241 
 3242   op_cost(10);
 3243   format %{ %}
 3244   interface(CONST_INTER);
 3245 %}
 3246 
 3247 // Constant for test vs zero
 3248 operand immI_0() %{
 3249   predicate(n->get_int() == 0);
 3250   match(ConI);
 3251 
 3252   op_cost(0);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Constant for increment
 3258 operand immI_1() %{
 3259   predicate(n->get_int() == 1);
 3260   match(ConI);
 3261 
 3262   op_cost(0);
 3263   format %{ %}
 3264   interface(CONST_INTER);
 3265 %}
 3266 
 3267 // Constant for decrement
 3268 operand immI_M1() %{
 3269   predicate(n->get_int() == -1);
 3270   match(ConI);
 3271 
 3272   op_cost(0);
 3273   format %{ %}
 3274   interface(CONST_INTER);
 3275 %}
 3276 
 3277 // Valid scale values for addressing modes
 3278 operand immI2() %{
 3279   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3280   match(ConI);
 3281 
 3282   format %{ %}
 3283   interface(CONST_INTER);
 3284 %}
 3285 
 3286 operand immI8() %{
 3287   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3288   match(ConI);
 3289 
 3290   op_cost(5);
 3291   format %{ %}
 3292   interface(CONST_INTER);
 3293 %}
 3294 
 3295 operand immU8() %{
 3296   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3297   match(ConI);
 3298 
 3299   op_cost(5);
 3300   format %{ %}
 3301   interface(CONST_INTER);
 3302 %}
 3303 
 3304 operand immI16() %{
 3305   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3306   match(ConI);
 3307 
 3308   op_cost(10);
 3309   format %{ %}
 3310   interface(CONST_INTER);
 3311 %}
 3312 
 3313 // Int Immediate non-negative
 3314 operand immU31()
 3315 %{
 3316   predicate(n->get_int() >= 0);
 3317   match(ConI);
 3318 
 3319   op_cost(0);
 3320   format %{ %}
 3321   interface(CONST_INTER);
 3322 %}
 3323 
 3324 // Constant for long shifts
 3325 operand immI_32() %{
 3326   predicate( n->get_int() == 32 );
 3327   match(ConI);
 3328 
 3329   op_cost(0);
 3330   format %{ %}
 3331   interface(CONST_INTER);
 3332 %}
 3333 
 3334 operand immI_1_31() %{
 3335   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3336   match(ConI);
 3337 
 3338   op_cost(0);
 3339   format %{ %}
 3340   interface(CONST_INTER);
 3341 %}
 3342 
 3343 operand immI_32_63() %{
 3344   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3345   match(ConI);
 3346   op_cost(0);
 3347 
 3348   format %{ %}
 3349   interface(CONST_INTER);
 3350 %}
 3351 
 3352 operand immI_2() %{
 3353   predicate( n->get_int() == 2 );
 3354   match(ConI);
 3355 
 3356   op_cost(0);
 3357   format %{ %}
 3358   interface(CONST_INTER);
 3359 %}
 3360 
 3361 operand immI_3() %{
 3362   predicate( n->get_int() == 3 );
 3363   match(ConI);
 3364 
 3365   op_cost(0);
 3366   format %{ %}
 3367   interface(CONST_INTER);
 3368 %}
 3369 
 3370 operand immI_4()
 3371 %{
 3372   predicate(n->get_int() == 4);
 3373   match(ConI);
 3374 
 3375   op_cost(0);
 3376   format %{ %}
 3377   interface(CONST_INTER);
 3378 %}
 3379 
 3380 operand immI_8()
 3381 %{
 3382   predicate(n->get_int() == 8);
 3383   match(ConI);
 3384 
 3385   op_cost(0);
 3386   format %{ %}
 3387   interface(CONST_INTER);
 3388 %}
 3389 
 3390 // Pointer Immediate
 3391 operand immP() %{
 3392   match(ConP);
 3393 
 3394   op_cost(10);
 3395   format %{ %}
 3396   interface(CONST_INTER);
 3397 %}
 3398 
 3399 // NULL Pointer Immediate
 3400 operand immP0() %{
 3401   predicate( n->get_ptr() == 0 );
 3402   match(ConP);
 3403   op_cost(0);
 3404 
 3405   format %{ %}
 3406   interface(CONST_INTER);
 3407 %}
 3408 
 3409 // Long Immediate
 3410 operand immL() %{
 3411   match(ConL);
 3412 
 3413   op_cost(20);
 3414   format %{ %}
 3415   interface(CONST_INTER);
 3416 %}
 3417 
 3418 // Long Immediate zero
 3419 operand immL0() %{
 3420   predicate( n->get_long() == 0L );
 3421   match(ConL);
 3422   op_cost(0);
 3423 
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 // Long Immediate zero
 3429 operand immL_M1() %{
 3430   predicate( n->get_long() == -1L );
 3431   match(ConL);
 3432   op_cost(0);
 3433 
 3434   format %{ %}
 3435   interface(CONST_INTER);
 3436 %}
 3437 
 3438 // Long immediate from 0 to 127.
 3439 // Used for a shorter form of long mul by 10.
 3440 operand immL_127() %{
 3441   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3442   match(ConL);
 3443   op_cost(0);
 3444 
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 // Long Immediate: low 32-bit mask
 3450 operand immL_32bits() %{
 3451   predicate(n->get_long() == 0xFFFFFFFFL);
 3452   match(ConL);
 3453   op_cost(0);
 3454 
 3455   format %{ %}
 3456   interface(CONST_INTER);
 3457 %}
 3458 
 3459 // Long Immediate: low 32-bit mask
 3460 operand immL32() %{
 3461   predicate(n->get_long() == (int)(n->get_long()));
 3462   match(ConL);
 3463   op_cost(20);
 3464 
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 //Double Immediate zero
 3470 operand immDPR0() %{
 3471   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3472   // bug that generates code such that NaNs compare equal to 0.0
 3473   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3474   match(ConD);
 3475 
 3476   op_cost(5);
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 // Double Immediate one
 3482 operand immDPR1() %{
 3483   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3484   match(ConD);
 3485 
 3486   op_cost(5);
 3487   format %{ %}
 3488   interface(CONST_INTER);
 3489 %}
 3490 
 3491 // Double Immediate
 3492 operand immDPR() %{
 3493   predicate(UseSSE<=1);
 3494   match(ConD);
 3495 
 3496   op_cost(5);
 3497   format %{ %}
 3498   interface(CONST_INTER);
 3499 %}
 3500 
 3501 operand immD() %{
 3502   predicate(UseSSE>=2);
 3503   match(ConD);
 3504 
 3505   op_cost(5);
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Double Immediate zero
 3511 operand immD0() %{
 3512   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3513   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3514   // compare equal to -0.0.
 3515   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3516   match(ConD);
 3517 
 3518   format %{ %}
 3519   interface(CONST_INTER);
 3520 %}
 3521 
 3522 // Float Immediate zero
 3523 operand immFPR0() %{
 3524   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3525   match(ConF);
 3526 
 3527   op_cost(5);
 3528   format %{ %}
 3529   interface(CONST_INTER);
 3530 %}
 3531 
 3532 // Float Immediate one
 3533 operand immFPR1() %{
 3534   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3535   match(ConF);
 3536 
 3537   op_cost(5);
 3538   format %{ %}
 3539   interface(CONST_INTER);
 3540 %}
 3541 
 3542 // Float Immediate
 3543 operand immFPR() %{
 3544   predicate( UseSSE == 0 );
 3545   match(ConF);
 3546 
 3547   op_cost(5);
 3548   format %{ %}
 3549   interface(CONST_INTER);
 3550 %}
 3551 
 3552 // Float Immediate
 3553 operand immF() %{
 3554   predicate(UseSSE >= 1);
 3555   match(ConF);
 3556 
 3557   op_cost(5);
 3558   format %{ %}
 3559   interface(CONST_INTER);
 3560 %}
 3561 
 3562 // Float Immediate zero.  Zero and not -0.0
 3563 operand immF0() %{
 3564   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3565   match(ConF);
 3566 
 3567   op_cost(5);
 3568   format %{ %}
 3569   interface(CONST_INTER);
 3570 %}
 3571 
 3572 // Immediates for special shifts (sign extend)
 3573 
 3574 // Constants for increment
 3575 operand immI_16() %{
 3576   predicate( n->get_int() == 16 );
 3577   match(ConI);
 3578 
 3579   format %{ %}
 3580   interface(CONST_INTER);
 3581 %}
 3582 
 3583 operand immI_24() %{
 3584   predicate( n->get_int() == 24 );
 3585   match(ConI);
 3586 
 3587   format %{ %}
 3588   interface(CONST_INTER);
 3589 %}
 3590 
 3591 // Constant for byte-wide masking
 3592 operand immI_255() %{
 3593   predicate( n->get_int() == 255 );
 3594   match(ConI);
 3595 
 3596   format %{ %}
 3597   interface(CONST_INTER);
 3598 %}
 3599 
 3600 // Constant for short-wide masking
 3601 operand immI_65535() %{
 3602   predicate(n->get_int() == 65535);
 3603   match(ConI);
 3604 
 3605   format %{ %}
 3606   interface(CONST_INTER);
 3607 %}
 3608 
 3609 operand kReg()
 3610 %{
 3611   constraint(ALLOC_IN_RC(vectmask_reg));
 3612   match(RegVectMask);
 3613   format %{%}
 3614   interface(REG_INTER);
 3615 %}
 3616 
 3617 operand kReg_K1()
 3618 %{
 3619   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3620   match(RegVectMask);
 3621   format %{%}
 3622   interface(REG_INTER);
 3623 %}
 3624 
 3625 operand kReg_K2()
 3626 %{
 3627   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3628   match(RegVectMask);
 3629   format %{%}
 3630   interface(REG_INTER);
 3631 %}
 3632 
 3633 // Special Registers
 3634 operand kReg_K3()
 3635 %{
 3636   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3637   match(RegVectMask);
 3638   format %{%}
 3639   interface(REG_INTER);
 3640 %}
 3641 
 3642 operand kReg_K4()
 3643 %{
 3644   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3645   match(RegVectMask);
 3646   format %{%}
 3647   interface(REG_INTER);
 3648 %}
 3649 
 3650 operand kReg_K5()
 3651 %{
 3652   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3653   match(RegVectMask);
 3654   format %{%}
 3655   interface(REG_INTER);
 3656 %}
 3657 
 3658 operand kReg_K6()
 3659 %{
 3660   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3661   match(RegVectMask);
 3662   format %{%}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 // Special Registers
 3667 operand kReg_K7()
 3668 %{
 3669   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3670   match(RegVectMask);
 3671   format %{%}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 // Register Operands
 3676 // Integer Register
 3677 operand rRegI() %{
 3678   constraint(ALLOC_IN_RC(int_reg));
 3679   match(RegI);
 3680   match(xRegI);
 3681   match(eAXRegI);
 3682   match(eBXRegI);
 3683   match(eCXRegI);
 3684   match(eDXRegI);
 3685   match(eDIRegI);
 3686   match(eSIRegI);
 3687 
 3688   format %{ %}
 3689   interface(REG_INTER);
 3690 %}
 3691 
 3692 // Subset of Integer Register
 3693 operand xRegI(rRegI reg) %{
 3694   constraint(ALLOC_IN_RC(int_x_reg));
 3695   match(reg);
 3696   match(eAXRegI);
 3697   match(eBXRegI);
 3698   match(eCXRegI);
 3699   match(eDXRegI);
 3700 
 3701   format %{ %}
 3702   interface(REG_INTER);
 3703 %}
 3704 
 3705 // Special Registers
 3706 operand eAXRegI(xRegI reg) %{
 3707   constraint(ALLOC_IN_RC(eax_reg));
 3708   match(reg);
 3709   match(rRegI);
 3710 
 3711   format %{ "EAX" %}
 3712   interface(REG_INTER);
 3713 %}
 3714 
 3715 // Special Registers
 3716 operand eBXRegI(xRegI reg) %{
 3717   constraint(ALLOC_IN_RC(ebx_reg));
 3718   match(reg);
 3719   match(rRegI);
 3720 
 3721   format %{ "EBX" %}
 3722   interface(REG_INTER);
 3723 %}
 3724 
 3725 operand eCXRegI(xRegI reg) %{
 3726   constraint(ALLOC_IN_RC(ecx_reg));
 3727   match(reg);
 3728   match(rRegI);
 3729 
 3730   format %{ "ECX" %}
 3731   interface(REG_INTER);
 3732 %}
 3733 
 3734 operand eDXRegI(xRegI reg) %{
 3735   constraint(ALLOC_IN_RC(edx_reg));
 3736   match(reg);
 3737   match(rRegI);
 3738 
 3739   format %{ "EDX" %}
 3740   interface(REG_INTER);
 3741 %}
 3742 
 3743 operand eDIRegI(xRegI reg) %{
 3744   constraint(ALLOC_IN_RC(edi_reg));
 3745   match(reg);
 3746   match(rRegI);
 3747 
 3748   format %{ "EDI" %}
 3749   interface(REG_INTER);
 3750 %}
 3751 
 3752 operand naxRegI() %{
 3753   constraint(ALLOC_IN_RC(nax_reg));
 3754   match(RegI);
 3755   match(eCXRegI);
 3756   match(eDXRegI);
 3757   match(eSIRegI);
 3758   match(eDIRegI);
 3759 
 3760   format %{ %}
 3761   interface(REG_INTER);
 3762 %}
 3763 
 3764 operand nadxRegI() %{
 3765   constraint(ALLOC_IN_RC(nadx_reg));
 3766   match(RegI);
 3767   match(eBXRegI);
 3768   match(eCXRegI);
 3769   match(eSIRegI);
 3770   match(eDIRegI);
 3771 
 3772   format %{ %}
 3773   interface(REG_INTER);
 3774 %}
 3775 
 3776 operand ncxRegI() %{
 3777   constraint(ALLOC_IN_RC(ncx_reg));
 3778   match(RegI);
 3779   match(eAXRegI);
 3780   match(eDXRegI);
 3781   match(eSIRegI);
 3782   match(eDIRegI);
 3783 
 3784   format %{ %}
 3785   interface(REG_INTER);
 3786 %}
 3787 
 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3789 // //
 3790 operand eSIRegI(xRegI reg) %{
 3791    constraint(ALLOC_IN_RC(esi_reg));
 3792    match(reg);
 3793    match(rRegI);
 3794 
 3795    format %{ "ESI" %}
 3796    interface(REG_INTER);
 3797 %}
 3798 
 3799 // Pointer Register
 3800 operand anyRegP() %{
 3801   constraint(ALLOC_IN_RC(any_reg));
 3802   match(RegP);
 3803   match(eAXRegP);
 3804   match(eBXRegP);
 3805   match(eCXRegP);
 3806   match(eDIRegP);
 3807   match(eRegP);
 3808 
 3809   format %{ %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand eRegP() %{
 3814   constraint(ALLOC_IN_RC(int_reg));
 3815   match(RegP);
 3816   match(eAXRegP);
 3817   match(eBXRegP);
 3818   match(eCXRegP);
 3819   match(eDIRegP);
 3820 
 3821   format %{ %}
 3822   interface(REG_INTER);
 3823 %}
 3824 
 3825 operand rRegP() %{
 3826   constraint(ALLOC_IN_RC(int_reg));
 3827   match(RegP);
 3828   match(eAXRegP);
 3829   match(eBXRegP);
 3830   match(eCXRegP);
 3831   match(eDIRegP);
 3832 
 3833   format %{ %}
 3834   interface(REG_INTER);
 3835 %}
 3836 
 3837 // On windows95, EBP is not safe to use for implicit null tests.
 3838 operand eRegP_no_EBP() %{
 3839   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3840   match(RegP);
 3841   match(eAXRegP);
 3842   match(eBXRegP);
 3843   match(eCXRegP);
 3844   match(eDIRegP);
 3845 
 3846   op_cost(100);
 3847   format %{ %}
 3848   interface(REG_INTER);
 3849 %}
 3850 
 3851 operand naxRegP() %{
 3852   constraint(ALLOC_IN_RC(nax_reg));
 3853   match(RegP);
 3854   match(eBXRegP);
 3855   match(eDXRegP);
 3856   match(eCXRegP);
 3857   match(eSIRegP);
 3858   match(eDIRegP);
 3859 
 3860   format %{ %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 operand nabxRegP() %{
 3865   constraint(ALLOC_IN_RC(nabx_reg));
 3866   match(RegP);
 3867   match(eCXRegP);
 3868   match(eDXRegP);
 3869   match(eSIRegP);
 3870   match(eDIRegP);
 3871 
 3872   format %{ %}
 3873   interface(REG_INTER);
 3874 %}
 3875 
 3876 operand pRegP() %{
 3877   constraint(ALLOC_IN_RC(p_reg));
 3878   match(RegP);
 3879   match(eBXRegP);
 3880   match(eDXRegP);
 3881   match(eSIRegP);
 3882   match(eDIRegP);
 3883 
 3884   format %{ %}
 3885   interface(REG_INTER);
 3886 %}
 3887 
 3888 // Special Registers
 3889 // Return a pointer value
 3890 operand eAXRegP(eRegP reg) %{
 3891   constraint(ALLOC_IN_RC(eax_reg));
 3892   match(reg);
 3893   format %{ "EAX" %}
 3894   interface(REG_INTER);
 3895 %}
 3896 
 3897 // Used in AtomicAdd
 3898 operand eBXRegP(eRegP reg) %{
 3899   constraint(ALLOC_IN_RC(ebx_reg));
 3900   match(reg);
 3901   format %{ "EBX" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 // Tail-call (interprocedural jump) to interpreter
 3906 operand eCXRegP(eRegP reg) %{
 3907   constraint(ALLOC_IN_RC(ecx_reg));
 3908   match(reg);
 3909   format %{ "ECX" %}
 3910   interface(REG_INTER);
 3911 %}
 3912 
 3913 operand eDXRegP(eRegP reg) %{
 3914   constraint(ALLOC_IN_RC(edx_reg));
 3915   match(reg);
 3916   format %{ "EDX" %}
 3917   interface(REG_INTER);
 3918 %}
 3919 
 3920 operand eSIRegP(eRegP reg) %{
 3921   constraint(ALLOC_IN_RC(esi_reg));
 3922   match(reg);
 3923   format %{ "ESI" %}
 3924   interface(REG_INTER);
 3925 %}
 3926 
 3927 // Used in rep stosw
 3928 operand eDIRegP(eRegP reg) %{
 3929   constraint(ALLOC_IN_RC(edi_reg));
 3930   match(reg);
 3931   format %{ "EDI" %}
 3932   interface(REG_INTER);
 3933 %}
 3934 
 3935 operand eRegL() %{
 3936   constraint(ALLOC_IN_RC(long_reg));
 3937   match(RegL);
 3938   match(eADXRegL);
 3939 
 3940   format %{ %}
 3941   interface(REG_INTER);
 3942 %}
 3943 
 3944 operand eADXRegL( eRegL reg ) %{
 3945   constraint(ALLOC_IN_RC(eadx_reg));
 3946   match(reg);
 3947 
 3948   format %{ "EDX:EAX" %}
 3949   interface(REG_INTER);
 3950 %}
 3951 
 3952 operand eBCXRegL( eRegL reg ) %{
 3953   constraint(ALLOC_IN_RC(ebcx_reg));
 3954   match(reg);
 3955 
 3956   format %{ "EBX:ECX" %}
 3957   interface(REG_INTER);
 3958 %}
 3959 
 3960 operand eBDPRegL( eRegL reg ) %{
 3961   constraint(ALLOC_IN_RC(ebpd_reg));
 3962   match(reg);
 3963 
 3964   format %{ "EBP:EDI" %}
 3965   interface(REG_INTER);
 3966 %}
 3967 // Special case for integer high multiply
 3968 operand eADXRegL_low_only() %{
 3969   constraint(ALLOC_IN_RC(eadx_reg));
 3970   match(RegL);
 3971 
 3972   format %{ "EAX" %}
 3973   interface(REG_INTER);
 3974 %}
 3975 
 3976 // Flags register, used as output of compare instructions
 3977 operand rFlagsReg() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980 
 3981   format %{ "EFLAGS" %}
 3982   interface(REG_INTER);
 3983 %}
 3984 
 3985 // Flags register, used as output of compare instructions
 3986 operand eFlagsReg() %{
 3987   constraint(ALLOC_IN_RC(int_flags));
 3988   match(RegFlags);
 3989 
 3990   format %{ "EFLAGS" %}
 3991   interface(REG_INTER);
 3992 %}
 3993 
 3994 // Flags register, used as output of FLOATING POINT compare instructions
 3995 operand eFlagsRegU() %{
 3996   constraint(ALLOC_IN_RC(int_flags));
 3997   match(RegFlags);
 3998 
 3999   format %{ "EFLAGS_U" %}
 4000   interface(REG_INTER);
 4001 %}
 4002 
 4003 operand eFlagsRegUCF() %{
 4004   constraint(ALLOC_IN_RC(int_flags));
 4005   match(RegFlags);
 4006   predicate(false);
 4007 
 4008   format %{ "EFLAGS_U_CF" %}
 4009   interface(REG_INTER);
 4010 %}
 4011 
 4012 // Condition Code Register used by long compare
 4013 operand flagsReg_long_LTGE() %{
 4014   constraint(ALLOC_IN_RC(int_flags));
 4015   match(RegFlags);
 4016   format %{ "FLAGS_LTGE" %}
 4017   interface(REG_INTER);
 4018 %}
 4019 operand flagsReg_long_EQNE() %{
 4020   constraint(ALLOC_IN_RC(int_flags));
 4021   match(RegFlags);
 4022   format %{ "FLAGS_EQNE" %}
 4023   interface(REG_INTER);
 4024 %}
 4025 operand flagsReg_long_LEGT() %{
 4026   constraint(ALLOC_IN_RC(int_flags));
 4027   match(RegFlags);
 4028   format %{ "FLAGS_LEGT" %}
 4029   interface(REG_INTER);
 4030 %}
 4031 
 4032 // Condition Code Register used by unsigned long compare
 4033 operand flagsReg_ulong_LTGE() %{
 4034   constraint(ALLOC_IN_RC(int_flags));
 4035   match(RegFlags);
 4036   format %{ "FLAGS_U_LTGE" %}
 4037   interface(REG_INTER);
 4038 %}
 4039 operand flagsReg_ulong_EQNE() %{
 4040   constraint(ALLOC_IN_RC(int_flags));
 4041   match(RegFlags);
 4042   format %{ "FLAGS_U_EQNE" %}
 4043   interface(REG_INTER);
 4044 %}
 4045 operand flagsReg_ulong_LEGT() %{
 4046   constraint(ALLOC_IN_RC(int_flags));
 4047   match(RegFlags);
 4048   format %{ "FLAGS_U_LEGT" %}
 4049   interface(REG_INTER);
 4050 %}
 4051 
 4052 // Float register operands
 4053 operand regDPR() %{
 4054   predicate( UseSSE < 2 );
 4055   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4056   match(RegD);
 4057   match(regDPR1);
 4058   match(regDPR2);
 4059   format %{ %}
 4060   interface(REG_INTER);
 4061 %}
 4062 
 4063 operand regDPR1(regDPR reg) %{
 4064   predicate( UseSSE < 2 );
 4065   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4066   match(reg);
 4067   format %{ "FPR1" %}
 4068   interface(REG_INTER);
 4069 %}
 4070 
 4071 operand regDPR2(regDPR reg) %{
 4072   predicate( UseSSE < 2 );
 4073   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4074   match(reg);
 4075   format %{ "FPR2" %}
 4076   interface(REG_INTER);
 4077 %}
 4078 
 4079 operand regnotDPR1(regDPR reg) %{
 4080   predicate( UseSSE < 2 );
 4081   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4082   match(reg);
 4083   format %{ %}
 4084   interface(REG_INTER);
 4085 %}
 4086 
 4087 // Float register operands
 4088 operand regFPR() %{
 4089   predicate( UseSSE < 2 );
 4090   constraint(ALLOC_IN_RC(fp_flt_reg));
 4091   match(RegF);
 4092   match(regFPR1);
 4093   format %{ %}
 4094   interface(REG_INTER);
 4095 %}
 4096 
 4097 // Float register operands
 4098 operand regFPR1(regFPR reg) %{
 4099   predicate( UseSSE < 2 );
 4100   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4101   match(reg);
 4102   format %{ "FPR1" %}
 4103   interface(REG_INTER);
 4104 %}
 4105 
 4106 // XMM Float register operands
 4107 operand regF() %{
 4108   predicate( UseSSE>=1 );
 4109   constraint(ALLOC_IN_RC(float_reg_legacy));
 4110   match(RegF);
 4111   format %{ %}
 4112   interface(REG_INTER);
 4113 %}
 4114 
 4115 operand legRegF() %{
 4116   predicate( UseSSE>=1 );
 4117   constraint(ALLOC_IN_RC(float_reg_legacy));
 4118   match(RegF);
 4119   format %{ %}
 4120   interface(REG_INTER);
 4121 %}
 4122 
 4123 // Float register operands
 4124 operand vlRegF() %{
 4125    constraint(ALLOC_IN_RC(float_reg_vl));
 4126    match(RegF);
 4127 
 4128    format %{ %}
 4129    interface(REG_INTER);
 4130 %}
 4131 
 4132 // XMM Double register operands
 4133 operand regD() %{
 4134   predicate( UseSSE>=2 );
 4135   constraint(ALLOC_IN_RC(double_reg_legacy));
 4136   match(RegD);
 4137   format %{ %}
 4138   interface(REG_INTER);
 4139 %}
 4140 
 4141 // Double register operands
 4142 operand legRegD() %{
 4143   predicate( UseSSE>=2 );
 4144   constraint(ALLOC_IN_RC(double_reg_legacy));
 4145   match(RegD);
 4146   format %{ %}
 4147   interface(REG_INTER);
 4148 %}
 4149 
 4150 operand vlRegD() %{
 4151    constraint(ALLOC_IN_RC(double_reg_vl));
 4152    match(RegD);
 4153 
 4154    format %{ %}
 4155    interface(REG_INTER);
 4156 %}
 4157 
 4158 //----------Memory Operands----------------------------------------------------
 4159 // Direct Memory Operand
 4160 operand direct(immP addr) %{
 4161   match(addr);
 4162 
 4163   format %{ "[$addr]" %}
 4164   interface(MEMORY_INTER) %{
 4165     base(0xFFFFFFFF);
 4166     index(0x4);
 4167     scale(0x0);
 4168     disp($addr);
 4169   %}
 4170 %}
 4171 
 4172 // Indirect Memory Operand
 4173 operand indirect(eRegP reg) %{
 4174   constraint(ALLOC_IN_RC(int_reg));
 4175   match(reg);
 4176 
 4177   format %{ "[$reg]" %}
 4178   interface(MEMORY_INTER) %{
 4179     base($reg);
 4180     index(0x4);
 4181     scale(0x0);
 4182     disp(0x0);
 4183   %}
 4184 %}
 4185 
 4186 // Indirect Memory Plus Short Offset Operand
 4187 operand indOffset8(eRegP reg, immI8 off) %{
 4188   match(AddP reg off);
 4189 
 4190   format %{ "[$reg + $off]" %}
 4191   interface(MEMORY_INTER) %{
 4192     base($reg);
 4193     index(0x4);
 4194     scale(0x0);
 4195     disp($off);
 4196   %}
 4197 %}
 4198 
 4199 // Indirect Memory Plus Long Offset Operand
 4200 operand indOffset32(eRegP reg, immI off) %{
 4201   match(AddP reg off);
 4202 
 4203   format %{ "[$reg + $off]" %}
 4204   interface(MEMORY_INTER) %{
 4205     base($reg);
 4206     index(0x4);
 4207     scale(0x0);
 4208     disp($off);
 4209   %}
 4210 %}
 4211 
 4212 // Indirect Memory Plus Long Offset Operand
 4213 operand indOffset32X(rRegI reg, immP off) %{
 4214   match(AddP off reg);
 4215 
 4216   format %{ "[$reg + $off]" %}
 4217   interface(MEMORY_INTER) %{
 4218     base($reg);
 4219     index(0x4);
 4220     scale(0x0);
 4221     disp($off);
 4222   %}
 4223 %}
 4224 
 4225 // Indirect Memory Plus Index Register Plus Offset Operand
 4226 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4227   match(AddP (AddP reg ireg) off);
 4228 
 4229   op_cost(10);
 4230   format %{"[$reg + $off + $ireg]" %}
 4231   interface(MEMORY_INTER) %{
 4232     base($reg);
 4233     index($ireg);
 4234     scale(0x0);
 4235     disp($off);
 4236   %}
 4237 %}
 4238 
 4239 // Indirect Memory Plus Index Register Plus Offset Operand
 4240 operand indIndex(eRegP reg, rRegI ireg) %{
 4241   match(AddP reg ireg);
 4242 
 4243   op_cost(10);
 4244   format %{"[$reg + $ireg]" %}
 4245   interface(MEMORY_INTER) %{
 4246     base($reg);
 4247     index($ireg);
 4248     scale(0x0);
 4249     disp(0x0);
 4250   %}
 4251 %}
 4252 
 4253 // // -------------------------------------------------------------------------
 4254 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4255 // // -------------------------------------------------------------------------
 4256 // // Scaled Memory Operands
 4257 // // Indirect Memory Times Scale Plus Offset Operand
 4258 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4259 //   match(AddP off (LShiftI ireg scale));
 4260 //
 4261 //   op_cost(10);
 4262 //   format %{"[$off + $ireg << $scale]" %}
 4263 //   interface(MEMORY_INTER) %{
 4264 //     base(0x4);
 4265 //     index($ireg);
 4266 //     scale($scale);
 4267 //     disp($off);
 4268 //   %}
 4269 // %}
 4270 
 4271 // Indirect Memory Times Scale Plus Index Register
 4272 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4273   match(AddP reg (LShiftI ireg scale));
 4274 
 4275   op_cost(10);
 4276   format %{"[$reg + $ireg << $scale]" %}
 4277   interface(MEMORY_INTER) %{
 4278     base($reg);
 4279     index($ireg);
 4280     scale($scale);
 4281     disp(0x0);
 4282   %}
 4283 %}
 4284 
 4285 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4286 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4287   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4288 
 4289   op_cost(10);
 4290   format %{"[$reg + $off + $ireg << $scale]" %}
 4291   interface(MEMORY_INTER) %{
 4292     base($reg);
 4293     index($ireg);
 4294     scale($scale);
 4295     disp($off);
 4296   %}
 4297 %}
 4298 
 4299 //----------Load Long Memory Operands------------------------------------------
 4300 // The load-long idiom will use it's address expression again after loading
 4301 // the first word of the long.  If the load-long destination overlaps with
 4302 // registers used in the addressing expression, the 2nd half will be loaded
 4303 // from a clobbered address.  Fix this by requiring that load-long use
 4304 // address registers that do not overlap with the load-long target.
 4305 
 4306 // load-long support
 4307 operand load_long_RegP() %{
 4308   constraint(ALLOC_IN_RC(esi_reg));
 4309   match(RegP);
 4310   match(eSIRegP);
 4311   op_cost(100);
 4312   format %{  %}
 4313   interface(REG_INTER);
 4314 %}
 4315 
 4316 // Indirect Memory Operand Long
 4317 operand load_long_indirect(load_long_RegP reg) %{
 4318   constraint(ALLOC_IN_RC(esi_reg));
 4319   match(reg);
 4320 
 4321   format %{ "[$reg]" %}
 4322   interface(MEMORY_INTER) %{
 4323     base($reg);
 4324     index(0x4);
 4325     scale(0x0);
 4326     disp(0x0);
 4327   %}
 4328 %}
 4329 
 4330 // Indirect Memory Plus Long Offset Operand
 4331 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4332   match(AddP reg off);
 4333 
 4334   format %{ "[$reg + $off]" %}
 4335   interface(MEMORY_INTER) %{
 4336     base($reg);
 4337     index(0x4);
 4338     scale(0x0);
 4339     disp($off);
 4340   %}
 4341 %}
 4342 
 4343 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4344 
 4345 
 4346 //----------Special Memory Operands--------------------------------------------
 4347 // Stack Slot Operand - This operand is used for loading and storing temporary
 4348 //                      values on the stack where a match requires a value to
 4349 //                      flow through memory.
 4350 operand stackSlotP(sRegP reg) %{
 4351   constraint(ALLOC_IN_RC(stack_slots));
 4352   // No match rule because this operand is only generated in matching
 4353   format %{ "[$reg]" %}
 4354   interface(MEMORY_INTER) %{
 4355     base(0x4);   // ESP
 4356     index(0x4);  // No Index
 4357     scale(0x0);  // No Scale
 4358     disp($reg);  // Stack Offset
 4359   %}
 4360 %}
 4361 
 4362 operand stackSlotI(sRegI reg) %{
 4363   constraint(ALLOC_IN_RC(stack_slots));
 4364   // No match rule because this operand is only generated in matching
 4365   format %{ "[$reg]" %}
 4366   interface(MEMORY_INTER) %{
 4367     base(0x4);   // ESP
 4368     index(0x4);  // No Index
 4369     scale(0x0);  // No Scale
 4370     disp($reg);  // Stack Offset
 4371   %}
 4372 %}
 4373 
 4374 operand stackSlotF(sRegF reg) %{
 4375   constraint(ALLOC_IN_RC(stack_slots));
 4376   // No match rule because this operand is only generated in matching
 4377   format %{ "[$reg]" %}
 4378   interface(MEMORY_INTER) %{
 4379     base(0x4);   // ESP
 4380     index(0x4);  // No Index
 4381     scale(0x0);  // No Scale
 4382     disp($reg);  // Stack Offset
 4383   %}
 4384 %}
 4385 
 4386 operand stackSlotD(sRegD reg) %{
 4387   constraint(ALLOC_IN_RC(stack_slots));
 4388   // No match rule because this operand is only generated in matching
 4389   format %{ "[$reg]" %}
 4390   interface(MEMORY_INTER) %{
 4391     base(0x4);   // ESP
 4392     index(0x4);  // No Index
 4393     scale(0x0);  // No Scale
 4394     disp($reg);  // Stack Offset
 4395   %}
 4396 %}
 4397 
 4398 operand stackSlotL(sRegL reg) %{
 4399   constraint(ALLOC_IN_RC(stack_slots));
 4400   // No match rule because this operand is only generated in matching
 4401   format %{ "[$reg]" %}
 4402   interface(MEMORY_INTER) %{
 4403     base(0x4);   // ESP
 4404     index(0x4);  // No Index
 4405     scale(0x0);  // No Scale
 4406     disp($reg);  // Stack Offset
 4407   %}
 4408 %}
 4409 
 4410 //----------Conditional Branch Operands----------------------------------------
 4411 // Comparison Op  - This is the operation of the comparison, and is limited to
 4412 //                  the following set of codes:
 4413 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4414 //
 4415 // Other attributes of the comparison, such as unsignedness, are specified
 4416 // by the comparison instruction that sets a condition code flags register.
 4417 // That result is represented by a flags operand whose subtype is appropriate
 4418 // to the unsignedness (etc.) of the comparison.
 4419 //
 4420 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4421 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4422 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4423 
 4424 // Comparison Code
 4425 operand cmpOp() %{
 4426   match(Bool);
 4427 
 4428   format %{ "" %}
 4429   interface(COND_INTER) %{
 4430     equal(0x4, "e");
 4431     not_equal(0x5, "ne");
 4432     less(0xC, "l");
 4433     greater_equal(0xD, "ge");
 4434     less_equal(0xE, "le");
 4435     greater(0xF, "g");
 4436     overflow(0x0, "o");
 4437     no_overflow(0x1, "no");
 4438   %}
 4439 %}
 4440 
 4441 // Comparison Code, unsigned compare.  Used by FP also, with
 4442 // C2 (unordered) turned into GT or LT already.  The other bits
 4443 // C0 and C3 are turned into Carry & Zero flags.
 4444 operand cmpOpU() %{
 4445   match(Bool);
 4446 
 4447   format %{ "" %}
 4448   interface(COND_INTER) %{
 4449     equal(0x4, "e");
 4450     not_equal(0x5, "ne");
 4451     less(0x2, "b");
 4452     greater_equal(0x3, "nb");
 4453     less_equal(0x6, "be");
 4454     greater(0x7, "nbe");
 4455     overflow(0x0, "o");
 4456     no_overflow(0x1, "no");
 4457   %}
 4458 %}
 4459 
 4460 // Floating comparisons that don't require any fixup for the unordered case
 4461 operand cmpOpUCF() %{
 4462   match(Bool);
 4463   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4464             n->as_Bool()->_test._test == BoolTest::ge ||
 4465             n->as_Bool()->_test._test == BoolTest::le ||
 4466             n->as_Bool()->_test._test == BoolTest::gt);
 4467   format %{ "" %}
 4468   interface(COND_INTER) %{
 4469     equal(0x4, "e");
 4470     not_equal(0x5, "ne");
 4471     less(0x2, "b");
 4472     greater_equal(0x3, "nb");
 4473     less_equal(0x6, "be");
 4474     greater(0x7, "nbe");
 4475     overflow(0x0, "o");
 4476     no_overflow(0x1, "no");
 4477   %}
 4478 %}
 4479 
 4480 
 4481 // Floating comparisons that can be fixed up with extra conditional jumps
 4482 operand cmpOpUCF2() %{
 4483   match(Bool);
 4484   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4485             n->as_Bool()->_test._test == BoolTest::eq);
 4486   format %{ "" %}
 4487   interface(COND_INTER) %{
 4488     equal(0x4, "e");
 4489     not_equal(0x5, "ne");
 4490     less(0x2, "b");
 4491     greater_equal(0x3, "nb");
 4492     less_equal(0x6, "be");
 4493     greater(0x7, "nbe");
 4494     overflow(0x0, "o");
 4495     no_overflow(0x1, "no");
 4496   %}
 4497 %}
 4498 
 4499 // Comparison Code for FP conditional move
 4500 operand cmpOp_fcmov() %{
 4501   match(Bool);
 4502 
 4503   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4504             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4505   format %{ "" %}
 4506   interface(COND_INTER) %{
 4507     equal        (0x0C8);
 4508     not_equal    (0x1C8);
 4509     less         (0x0C0);
 4510     greater_equal(0x1C0);
 4511     less_equal   (0x0D0);
 4512     greater      (0x1D0);
 4513     overflow(0x0, "o"); // not really supported by the instruction
 4514     no_overflow(0x1, "no"); // not really supported by the instruction
 4515   %}
 4516 %}
 4517 
 4518 // Comparison Code used in long compares
 4519 operand cmpOp_commute() %{
 4520   match(Bool);
 4521 
 4522   format %{ "" %}
 4523   interface(COND_INTER) %{
 4524     equal(0x4, "e");
 4525     not_equal(0x5, "ne");
 4526     less(0xF, "g");
 4527     greater_equal(0xE, "le");
 4528     less_equal(0xD, "ge");
 4529     greater(0xC, "l");
 4530     overflow(0x0, "o");
 4531     no_overflow(0x1, "no");
 4532   %}
 4533 %}
 4534 
 4535 // Comparison Code used in unsigned long compares
 4536 operand cmpOpU_commute() %{
 4537   match(Bool);
 4538 
 4539   format %{ "" %}
 4540   interface(COND_INTER) %{
 4541     equal(0x4, "e");
 4542     not_equal(0x5, "ne");
 4543     less(0x7, "nbe");
 4544     greater_equal(0x6, "be");
 4545     less_equal(0x3, "nb");
 4546     greater(0x2, "b");
 4547     overflow(0x0, "o");
 4548     no_overflow(0x1, "no");
 4549   %}
 4550 %}
 4551 
 4552 //----------OPERAND CLASSES----------------------------------------------------
 4553 // Operand Classes are groups of operands that are used as to simplify
 4554 // instruction definitions by not requiring the AD writer to specify separate
 4555 // instructions for every form of operand when the instruction accepts
 4556 // multiple operand types with the same basic encoding and format.  The classic
 4557 // case of this is memory operands.
 4558 
 4559 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4560                indIndex, indIndexScale, indIndexScaleOffset);
 4561 
 4562 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4563 // This means some kind of offset is always required and you cannot use
 4564 // an oop as the offset (done when working on static globals).
 4565 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4566                     indIndex, indIndexScale, indIndexScaleOffset);
 4567 
 4568 
 4569 //----------PIPELINE-----------------------------------------------------------
 4570 // Rules which define the behavior of the target architectures pipeline.
 4571 pipeline %{
 4572 
 4573 //----------ATTRIBUTES---------------------------------------------------------
 4574 attributes %{
 4575   variable_size_instructions;        // Fixed size instructions
 4576   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4577   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4578   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4579   instruction_fetch_units = 1;       // of 16 bytes
 4580 
 4581   // List of nop instructions
 4582   nops( MachNop );
 4583 %}
 4584 
 4585 //----------RESOURCES----------------------------------------------------------
 4586 // Resources are the functional units available to the machine
 4587 
 4588 // Generic P2/P3 pipeline
 4589 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4590 // 3 instructions decoded per cycle.
 4591 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4592 // 2 ALU op, only ALU0 handles mul/div instructions.
 4593 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4594            MS0, MS1, MEM = MS0 | MS1,
 4595            BR, FPU,
 4596            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4597 
 4598 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4599 // Pipeline Description specifies the stages in the machine's pipeline
 4600 
 4601 // Generic P2/P3 pipeline
 4602 pipe_desc(S0, S1, S2, S3, S4, S5);
 4603 
 4604 //----------PIPELINE CLASSES---------------------------------------------------
 4605 // Pipeline Classes describe the stages in which input and output are
 4606 // referenced by the hardware pipeline.
 4607 
 4608 // Naming convention: ialu or fpu
 4609 // Then: _reg
 4610 // Then: _reg if there is a 2nd register
 4611 // Then: _long if it's a pair of instructions implementing a long
 4612 // Then: _fat if it requires the big decoder
 4613 //   Or: _mem if it requires the big decoder and a memory unit.
 4614 
 4615 // Integer ALU reg operation
 4616 pipe_class ialu_reg(rRegI dst) %{
 4617     single_instruction;
 4618     dst    : S4(write);
 4619     dst    : S3(read);
 4620     DECODE : S0;        // any decoder
 4621     ALU    : S3;        // any alu
 4622 %}
 4623 
 4624 // Long ALU reg operation
 4625 pipe_class ialu_reg_long(eRegL dst) %{
 4626     instruction_count(2);
 4627     dst    : S4(write);
 4628     dst    : S3(read);
 4629     DECODE : S0(2);     // any 2 decoders
 4630     ALU    : S3(2);     // both alus
 4631 %}
 4632 
 4633 // Integer ALU reg operation using big decoder
 4634 pipe_class ialu_reg_fat(rRegI dst) %{
 4635     single_instruction;
 4636     dst    : S4(write);
 4637     dst    : S3(read);
 4638     D0     : S0;        // big decoder only
 4639     ALU    : S3;        // any alu
 4640 %}
 4641 
 4642 // Long ALU reg operation using big decoder
 4643 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4644     instruction_count(2);
 4645     dst    : S4(write);
 4646     dst    : S3(read);
 4647     D0     : S0(2);     // big decoder only; twice
 4648     ALU    : S3(2);     // any 2 alus
 4649 %}
 4650 
 4651 // Integer ALU reg-reg operation
 4652 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4653     single_instruction;
 4654     dst    : S4(write);
 4655     src    : S3(read);
 4656     DECODE : S0;        // any decoder
 4657     ALU    : S3;        // any alu
 4658 %}
 4659 
 4660 // Long ALU reg-reg operation
 4661 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4662     instruction_count(2);
 4663     dst    : S4(write);
 4664     src    : S3(read);
 4665     DECODE : S0(2);     // any 2 decoders
 4666     ALU    : S3(2);     // both alus
 4667 %}
 4668 
 4669 // Integer ALU reg-reg operation
 4670 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4671     single_instruction;
 4672     dst    : S4(write);
 4673     src    : S3(read);
 4674     D0     : S0;        // big decoder only
 4675     ALU    : S3;        // any alu
 4676 %}
 4677 
 4678 // Long ALU reg-reg operation
 4679 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4680     instruction_count(2);
 4681     dst    : S4(write);
 4682     src    : S3(read);
 4683     D0     : S0(2);     // big decoder only; twice
 4684     ALU    : S3(2);     // both alus
 4685 %}
 4686 
 4687 // Integer ALU reg-mem operation
 4688 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4689     single_instruction;
 4690     dst    : S5(write);
 4691     mem    : S3(read);
 4692     D0     : S0;        // big decoder only
 4693     ALU    : S4;        // any alu
 4694     MEM    : S3;        // any mem
 4695 %}
 4696 
 4697 // Long ALU reg-mem operation
 4698 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4699     instruction_count(2);
 4700     dst    : S5(write);
 4701     mem    : S3(read);
 4702     D0     : S0(2);     // big decoder only; twice
 4703     ALU    : S4(2);     // any 2 alus
 4704     MEM    : S3(2);     // both mems
 4705 %}
 4706 
 4707 // Integer mem operation (prefetch)
 4708 pipe_class ialu_mem(memory mem)
 4709 %{
 4710     single_instruction;
 4711     mem    : S3(read);
 4712     D0     : S0;        // big decoder only
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 // Integer Store to Memory
 4717 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4718     single_instruction;
 4719     mem    : S3(read);
 4720     src    : S5(read);
 4721     D0     : S0;        // big decoder only
 4722     ALU    : S4;        // any alu
 4723     MEM    : S3;
 4724 %}
 4725 
 4726 // Long Store to Memory
 4727 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4728     instruction_count(2);
 4729     mem    : S3(read);
 4730     src    : S5(read);
 4731     D0     : S0(2);     // big decoder only; twice
 4732     ALU    : S4(2);     // any 2 alus
 4733     MEM    : S3(2);     // Both mems
 4734 %}
 4735 
 4736 // Integer Store to Memory
 4737 pipe_class ialu_mem_imm(memory mem) %{
 4738     single_instruction;
 4739     mem    : S3(read);
 4740     D0     : S0;        // big decoder only
 4741     ALU    : S4;        // any alu
 4742     MEM    : S3;
 4743 %}
 4744 
 4745 // Integer ALU0 reg-reg operation
 4746 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4747     single_instruction;
 4748     dst    : S4(write);
 4749     src    : S3(read);
 4750     D0     : S0;        // Big decoder only
 4751     ALU0   : S3;        // only alu0
 4752 %}
 4753 
 4754 // Integer ALU0 reg-mem operation
 4755 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4756     single_instruction;
 4757     dst    : S5(write);
 4758     mem    : S3(read);
 4759     D0     : S0;        // big decoder only
 4760     ALU0   : S4;        // ALU0 only
 4761     MEM    : S3;        // any mem
 4762 %}
 4763 
 4764 // Integer ALU reg-reg operation
 4765 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4766     single_instruction;
 4767     cr     : S4(write);
 4768     src1   : S3(read);
 4769     src2   : S3(read);
 4770     DECODE : S0;        // any decoder
 4771     ALU    : S3;        // any alu
 4772 %}
 4773 
 4774 // Integer ALU reg-imm operation
 4775 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4776     single_instruction;
 4777     cr     : S4(write);
 4778     src1   : S3(read);
 4779     DECODE : S0;        // any decoder
 4780     ALU    : S3;        // any alu
 4781 %}
 4782 
 4783 // Integer ALU reg-mem operation
 4784 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4785     single_instruction;
 4786     cr     : S4(write);
 4787     src1   : S3(read);
 4788     src2   : S3(read);
 4789     D0     : S0;        // big decoder only
 4790     ALU    : S4;        // any alu
 4791     MEM    : S3;
 4792 %}
 4793 
 4794 // Conditional move reg-reg
 4795 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4796     instruction_count(4);
 4797     y      : S4(read);
 4798     q      : S3(read);
 4799     p      : S3(read);
 4800     DECODE : S0(4);     // any decoder
 4801 %}
 4802 
 4803 // Conditional move reg-reg
 4804 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4805     single_instruction;
 4806     dst    : S4(write);
 4807     src    : S3(read);
 4808     cr     : S3(read);
 4809     DECODE : S0;        // any decoder
 4810 %}
 4811 
 4812 // Conditional move reg-mem
 4813 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4814     single_instruction;
 4815     dst    : S4(write);
 4816     src    : S3(read);
 4817     cr     : S3(read);
 4818     DECODE : S0;        // any decoder
 4819     MEM    : S3;
 4820 %}
 4821 
 4822 // Conditional move reg-reg long
 4823 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4824     single_instruction;
 4825     dst    : S4(write);
 4826     src    : S3(read);
 4827     cr     : S3(read);
 4828     DECODE : S0(2);     // any 2 decoders
 4829 %}
 4830 
 4831 // Conditional move double reg-reg
 4832 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4833     single_instruction;
 4834     dst    : S4(write);
 4835     src    : S3(read);
 4836     cr     : S3(read);
 4837     DECODE : S0;        // any decoder
 4838 %}
 4839 
 4840 // Float reg-reg operation
 4841 pipe_class fpu_reg(regDPR dst) %{
 4842     instruction_count(2);
 4843     dst    : S3(read);
 4844     DECODE : S0(2);     // any 2 decoders
 4845     FPU    : S3;
 4846 %}
 4847 
 4848 // Float reg-reg operation
 4849 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4850     instruction_count(2);
 4851     dst    : S4(write);
 4852     src    : S3(read);
 4853     DECODE : S0(2);     // any 2 decoders
 4854     FPU    : S3;
 4855 %}
 4856 
 4857 // Float reg-reg operation
 4858 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4859     instruction_count(3);
 4860     dst    : S4(write);
 4861     src1   : S3(read);
 4862     src2   : S3(read);
 4863     DECODE : S0(3);     // any 3 decoders
 4864     FPU    : S3(2);
 4865 %}
 4866 
 4867 // Float reg-reg operation
 4868 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4869     instruction_count(4);
 4870     dst    : S4(write);
 4871     src1   : S3(read);
 4872     src2   : S3(read);
 4873     src3   : S3(read);
 4874     DECODE : S0(4);     // any 3 decoders
 4875     FPU    : S3(2);
 4876 %}
 4877 
 4878 // Float reg-reg operation
 4879 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4880     instruction_count(4);
 4881     dst    : S4(write);
 4882     src1   : S3(read);
 4883     src2   : S3(read);
 4884     src3   : S3(read);
 4885     DECODE : S1(3);     // any 3 decoders
 4886     D0     : S0;        // Big decoder only
 4887     FPU    : S3(2);
 4888     MEM    : S3;
 4889 %}
 4890 
 4891 // Float reg-mem operation
 4892 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4893     instruction_count(2);
 4894     dst    : S5(write);
 4895     mem    : S3(read);
 4896     D0     : S0;        // big decoder only
 4897     DECODE : S1;        // any decoder for FPU POP
 4898     FPU    : S4;
 4899     MEM    : S3;        // any mem
 4900 %}
 4901 
 4902 // Float reg-mem operation
 4903 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4904     instruction_count(3);
 4905     dst    : S5(write);
 4906     src1   : S3(read);
 4907     mem    : S3(read);
 4908     D0     : S0;        // big decoder only
 4909     DECODE : S1(2);     // any decoder for FPU POP
 4910     FPU    : S4;
 4911     MEM    : S3;        // any mem
 4912 %}
 4913 
 4914 // Float mem-reg operation
 4915 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4916     instruction_count(2);
 4917     src    : S5(read);
 4918     mem    : S3(read);
 4919     DECODE : S0;        // any decoder for FPU PUSH
 4920     D0     : S1;        // big decoder only
 4921     FPU    : S4;
 4922     MEM    : S3;        // any mem
 4923 %}
 4924 
 4925 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4926     instruction_count(3);
 4927     src1   : S3(read);
 4928     src2   : S3(read);
 4929     mem    : S3(read);
 4930     DECODE : S0(2);     // any decoder for FPU PUSH
 4931     D0     : S1;        // big decoder only
 4932     FPU    : S4;
 4933     MEM    : S3;        // any mem
 4934 %}
 4935 
 4936 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4937     instruction_count(3);
 4938     src1   : S3(read);
 4939     src2   : S3(read);
 4940     mem    : S4(read);
 4941     DECODE : S0;        // any decoder for FPU PUSH
 4942     D0     : S0(2);     // big decoder only
 4943     FPU    : S4;
 4944     MEM    : S3(2);     // any mem
 4945 %}
 4946 
 4947 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4948     instruction_count(2);
 4949     src1   : S3(read);
 4950     dst    : S4(read);
 4951     D0     : S0(2);     // big decoder only
 4952     MEM    : S3(2);     // any mem
 4953 %}
 4954 
 4955 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4956     instruction_count(3);
 4957     src1   : S3(read);
 4958     src2   : S3(read);
 4959     dst    : S4(read);
 4960     D0     : S0(3);     // big decoder only
 4961     FPU    : S4;
 4962     MEM    : S3(3);     // any mem
 4963 %}
 4964 
 4965 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4966     instruction_count(3);
 4967     src1   : S4(read);
 4968     mem    : S4(read);
 4969     DECODE : S0;        // any decoder for FPU PUSH
 4970     D0     : S0(2);     // big decoder only
 4971     FPU    : S4;
 4972     MEM    : S3(2);     // any mem
 4973 %}
 4974 
 4975 // Float load constant
 4976 pipe_class fpu_reg_con(regDPR dst) %{
 4977     instruction_count(2);
 4978     dst    : S5(write);
 4979     D0     : S0;        // big decoder only for the load
 4980     DECODE : S1;        // any decoder for FPU POP
 4981     FPU    : S4;
 4982     MEM    : S3;        // any mem
 4983 %}
 4984 
 4985 // Float load constant
 4986 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4987     instruction_count(3);
 4988     dst    : S5(write);
 4989     src    : S3(read);
 4990     D0     : S0;        // big decoder only for the load
 4991     DECODE : S1(2);     // any decoder for FPU POP
 4992     FPU    : S4;
 4993     MEM    : S3;        // any mem
 4994 %}
 4995 
 4996 // UnConditional branch
 4997 pipe_class pipe_jmp( label labl ) %{
 4998     single_instruction;
 4999     BR   : S3;
 5000 %}
 5001 
 5002 // Conditional branch
 5003 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 5004     single_instruction;
 5005     cr    : S1(read);
 5006     BR    : S3;
 5007 %}
 5008 
 5009 // Allocation idiom
 5010 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 5011     instruction_count(1); force_serialization;
 5012     fixed_latency(6);
 5013     heap_ptr : S3(read);
 5014     DECODE   : S0(3);
 5015     D0       : S2;
 5016     MEM      : S3;
 5017     ALU      : S3(2);
 5018     dst      : S5(write);
 5019     BR       : S5;
 5020 %}
 5021 
 5022 // Generic big/slow expanded idiom
 5023 pipe_class pipe_slow(  ) %{
 5024     instruction_count(10); multiple_bundles; force_serialization;
 5025     fixed_latency(100);
 5026     D0  : S0(2);
 5027     MEM : S3(2);
 5028 %}
 5029 
 5030 // The real do-nothing guy
 5031 pipe_class empty( ) %{
 5032     instruction_count(0);
 5033 %}
 5034 
 5035 // Define the class for the Nop node
 5036 define %{
 5037    MachNop = empty;
 5038 %}
 5039 
 5040 %}
 5041 
 5042 //----------INSTRUCTIONS-------------------------------------------------------
 5043 //
 5044 // match      -- States which machine-independent subtree may be replaced
 5045 //               by this instruction.
 5046 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5047 //               selection to identify a minimum cost tree of machine
 5048 //               instructions that matches a tree of machine-independent
 5049 //               instructions.
 5050 // format     -- A string providing the disassembly for this instruction.
 5051 //               The value of an instruction's operand may be inserted
 5052 //               by referring to it with a '$' prefix.
 5053 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5054 //               to within an encode class as $primary, $secondary, and $tertiary
 5055 //               respectively.  The primary opcode is commonly used to
 5056 //               indicate the type of machine instruction, while secondary
 5057 //               and tertiary are often used for prefix options or addressing
 5058 //               modes.
 5059 // ins_encode -- A list of encode classes with parameters. The encode class
 5060 //               name must have been defined in an 'enc_class' specification
 5061 //               in the encode section of the architecture description.
 5062 
 5063 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5064 // Load Float
 5065 instruct MoveF2LEG(legRegF dst, regF src) %{
 5066   match(Set dst src);
 5067   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5068   ins_encode %{
 5069     ShouldNotReachHere();
 5070   %}
 5071   ins_pipe( fpu_reg_reg );
 5072 %}
 5073 
 5074 // Load Float
 5075 instruct MoveLEG2F(regF dst, legRegF src) %{
 5076   match(Set dst src);
 5077   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5078   ins_encode %{
 5079     ShouldNotReachHere();
 5080   %}
 5081   ins_pipe( fpu_reg_reg );
 5082 %}
 5083 
 5084 // Load Float
 5085 instruct MoveF2VL(vlRegF dst, regF src) %{
 5086   match(Set dst src);
 5087   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5088   ins_encode %{
 5089     ShouldNotReachHere();
 5090   %}
 5091   ins_pipe( fpu_reg_reg );
 5092 %}
 5093 
 5094 // Load Float
 5095 instruct MoveVL2F(regF dst, vlRegF src) %{
 5096   match(Set dst src);
 5097   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5098   ins_encode %{
 5099     ShouldNotReachHere();
 5100   %}
 5101   ins_pipe( fpu_reg_reg );
 5102 %}
 5103 
 5104 
 5105 
 5106 // Load Double
 5107 instruct MoveD2LEG(legRegD dst, regD src) %{
 5108   match(Set dst src);
 5109   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5110   ins_encode %{
 5111     ShouldNotReachHere();
 5112   %}
 5113   ins_pipe( fpu_reg_reg );
 5114 %}
 5115 
 5116 // Load Double
 5117 instruct MoveLEG2D(regD dst, legRegD src) %{
 5118   match(Set dst src);
 5119   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5120   ins_encode %{
 5121     ShouldNotReachHere();
 5122   %}
 5123   ins_pipe( fpu_reg_reg );
 5124 %}
 5125 
 5126 // Load Double
 5127 instruct MoveD2VL(vlRegD dst, regD src) %{
 5128   match(Set dst src);
 5129   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5130   ins_encode %{
 5131     ShouldNotReachHere();
 5132   %}
 5133   ins_pipe( fpu_reg_reg );
 5134 %}
 5135 
 5136 // Load Double
 5137 instruct MoveVL2D(regD dst, vlRegD src) %{
 5138   match(Set dst src);
 5139   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5140   ins_encode %{
 5141     ShouldNotReachHere();
 5142   %}
 5143   ins_pipe( fpu_reg_reg );
 5144 %}
 5145 
 5146 //----------BSWAP-Instruction--------------------------------------------------
 5147 instruct bytes_reverse_int(rRegI dst) %{
 5148   match(Set dst (ReverseBytesI dst));
 5149 
 5150   format %{ "BSWAP  $dst" %}
 5151   opcode(0x0F, 0xC8);
 5152   ins_encode( OpcP, OpcSReg(dst) );
 5153   ins_pipe( ialu_reg );
 5154 %}
 5155 
 5156 instruct bytes_reverse_long(eRegL dst) %{
 5157   match(Set dst (ReverseBytesL dst));
 5158 
 5159   format %{ "BSWAP  $dst.lo\n\t"
 5160             "BSWAP  $dst.hi\n\t"
 5161             "XCHG   $dst.lo $dst.hi" %}
 5162 
 5163   ins_cost(125);
 5164   ins_encode( bswap_long_bytes(dst) );
 5165   ins_pipe( ialu_reg_reg);
 5166 %}
 5167 
 5168 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5169   match(Set dst (ReverseBytesUS dst));
 5170   effect(KILL cr);
 5171 
 5172   format %{ "BSWAP  $dst\n\t"
 5173             "SHR    $dst,16\n\t" %}
 5174   ins_encode %{
 5175     __ bswapl($dst$$Register);
 5176     __ shrl($dst$$Register, 16);
 5177   %}
 5178   ins_pipe( ialu_reg );
 5179 %}
 5180 
 5181 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5182   match(Set dst (ReverseBytesS dst));
 5183   effect(KILL cr);
 5184 
 5185   format %{ "BSWAP  $dst\n\t"
 5186             "SAR    $dst,16\n\t" %}
 5187   ins_encode %{
 5188     __ bswapl($dst$$Register);
 5189     __ sarl($dst$$Register, 16);
 5190   %}
 5191   ins_pipe( ialu_reg );
 5192 %}
 5193 
 5194 
 5195 //---------- Zeros Count Instructions ------------------------------------------
 5196 
 5197 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5198   predicate(UseCountLeadingZerosInstruction);
 5199   match(Set dst (CountLeadingZerosI src));
 5200   effect(KILL cr);
 5201 
 5202   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5203   ins_encode %{
 5204     __ lzcntl($dst$$Register, $src$$Register);
 5205   %}
 5206   ins_pipe(ialu_reg);
 5207 %}
 5208 
 5209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5210   predicate(!UseCountLeadingZerosInstruction);
 5211   match(Set dst (CountLeadingZerosI src));
 5212   effect(KILL cr);
 5213 
 5214   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5215             "JNZ    skip\n\t"
 5216             "MOV    $dst, -1\n"
 5217       "skip:\n\t"
 5218             "NEG    $dst\n\t"
 5219             "ADD    $dst, 31" %}
 5220   ins_encode %{
 5221     Register Rdst = $dst$$Register;
 5222     Register Rsrc = $src$$Register;
 5223     Label skip;
 5224     __ bsrl(Rdst, Rsrc);
 5225     __ jccb(Assembler::notZero, skip);
 5226     __ movl(Rdst, -1);
 5227     __ bind(skip);
 5228     __ negl(Rdst);
 5229     __ addl(Rdst, BitsPerInt - 1);
 5230   %}
 5231   ins_pipe(ialu_reg);
 5232 %}
 5233 
 5234 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5235   predicate(UseCountLeadingZerosInstruction);
 5236   match(Set dst (CountLeadingZerosL src));
 5237   effect(TEMP dst, KILL cr);
 5238 
 5239   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5240             "JNC    done\n\t"
 5241             "LZCNT  $dst, $src.lo\n\t"
 5242             "ADD    $dst, 32\n"
 5243       "done:" %}
 5244   ins_encode %{
 5245     Register Rdst = $dst$$Register;
 5246     Register Rsrc = $src$$Register;
 5247     Label done;
 5248     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5249     __ jccb(Assembler::carryClear, done);
 5250     __ lzcntl(Rdst, Rsrc);
 5251     __ addl(Rdst, BitsPerInt);
 5252     __ bind(done);
 5253   %}
 5254   ins_pipe(ialu_reg);
 5255 %}
 5256 
 5257 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5258   predicate(!UseCountLeadingZerosInstruction);
 5259   match(Set dst (CountLeadingZerosL src));
 5260   effect(TEMP dst, KILL cr);
 5261 
 5262   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5263             "JZ     msw_is_zero\n\t"
 5264             "ADD    $dst, 32\n\t"
 5265             "JMP    not_zero\n"
 5266       "msw_is_zero:\n\t"
 5267             "BSR    $dst, $src.lo\n\t"
 5268             "JNZ    not_zero\n\t"
 5269             "MOV    $dst, -1\n"
 5270       "not_zero:\n\t"
 5271             "NEG    $dst\n\t"
 5272             "ADD    $dst, 63\n" %}
 5273  ins_encode %{
 5274     Register Rdst = $dst$$Register;
 5275     Register Rsrc = $src$$Register;
 5276     Label msw_is_zero;
 5277     Label not_zero;
 5278     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5279     __ jccb(Assembler::zero, msw_is_zero);
 5280     __ addl(Rdst, BitsPerInt);
 5281     __ jmpb(not_zero);
 5282     __ bind(msw_is_zero);
 5283     __ bsrl(Rdst, Rsrc);
 5284     __ jccb(Assembler::notZero, not_zero);
 5285     __ movl(Rdst, -1);
 5286     __ bind(not_zero);
 5287     __ negl(Rdst);
 5288     __ addl(Rdst, BitsPerLong - 1);
 5289   %}
 5290   ins_pipe(ialu_reg);
 5291 %}
 5292 
 5293 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5294   predicate(UseCountTrailingZerosInstruction);
 5295   match(Set dst (CountTrailingZerosI src));
 5296   effect(KILL cr);
 5297 
 5298   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5299   ins_encode %{
 5300     __ tzcntl($dst$$Register, $src$$Register);
 5301   %}
 5302   ins_pipe(ialu_reg);
 5303 %}
 5304 
 5305 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5306   predicate(!UseCountTrailingZerosInstruction);
 5307   match(Set dst (CountTrailingZerosI src));
 5308   effect(KILL cr);
 5309 
 5310   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5311             "JNZ    done\n\t"
 5312             "MOV    $dst, 32\n"
 5313       "done:" %}
 5314   ins_encode %{
 5315     Register Rdst = $dst$$Register;
 5316     Label done;
 5317     __ bsfl(Rdst, $src$$Register);
 5318     __ jccb(Assembler::notZero, done);
 5319     __ movl(Rdst, BitsPerInt);
 5320     __ bind(done);
 5321   %}
 5322   ins_pipe(ialu_reg);
 5323 %}
 5324 
 5325 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5326   predicate(UseCountTrailingZerosInstruction);
 5327   match(Set dst (CountTrailingZerosL src));
 5328   effect(TEMP dst, KILL cr);
 5329 
 5330   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5331             "JNC    done\n\t"
 5332             "TZCNT  $dst, $src.hi\n\t"
 5333             "ADD    $dst, 32\n"
 5334             "done:" %}
 5335   ins_encode %{
 5336     Register Rdst = $dst$$Register;
 5337     Register Rsrc = $src$$Register;
 5338     Label done;
 5339     __ tzcntl(Rdst, Rsrc);
 5340     __ jccb(Assembler::carryClear, done);
 5341     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5342     __ addl(Rdst, BitsPerInt);
 5343     __ bind(done);
 5344   %}
 5345   ins_pipe(ialu_reg);
 5346 %}
 5347 
 5348 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5349   predicate(!UseCountTrailingZerosInstruction);
 5350   match(Set dst (CountTrailingZerosL src));
 5351   effect(TEMP dst, KILL cr);
 5352 
 5353   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5354             "JNZ    done\n\t"
 5355             "BSF    $dst, $src.hi\n\t"
 5356             "JNZ    msw_not_zero\n\t"
 5357             "MOV    $dst, 32\n"
 5358       "msw_not_zero:\n\t"
 5359             "ADD    $dst, 32\n"
 5360       "done:" %}
 5361   ins_encode %{
 5362     Register Rdst = $dst$$Register;
 5363     Register Rsrc = $src$$Register;
 5364     Label msw_not_zero;
 5365     Label done;
 5366     __ bsfl(Rdst, Rsrc);
 5367     __ jccb(Assembler::notZero, done);
 5368     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5369     __ jccb(Assembler::notZero, msw_not_zero);
 5370     __ movl(Rdst, BitsPerInt);
 5371     __ bind(msw_not_zero);
 5372     __ addl(Rdst, BitsPerInt);
 5373     __ bind(done);
 5374   %}
 5375   ins_pipe(ialu_reg);
 5376 %}
 5377 
 5378 
 5379 //---------- Population Count Instructions -------------------------------------
 5380 
 5381 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5382   predicate(UsePopCountInstruction);
 5383   match(Set dst (PopCountI src));
 5384   effect(KILL cr);
 5385 
 5386   format %{ "POPCNT $dst, $src" %}
 5387   ins_encode %{
 5388     __ popcntl($dst$$Register, $src$$Register);
 5389   %}
 5390   ins_pipe(ialu_reg);
 5391 %}
 5392 
 5393 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5394   predicate(UsePopCountInstruction);
 5395   match(Set dst (PopCountI (LoadI mem)));
 5396   effect(KILL cr);
 5397 
 5398   format %{ "POPCNT $dst, $mem" %}
 5399   ins_encode %{
 5400     __ popcntl($dst$$Register, $mem$$Address);
 5401   %}
 5402   ins_pipe(ialu_reg);
 5403 %}
 5404 
 5405 // Note: Long.bitCount(long) returns an int.
 5406 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5407   predicate(UsePopCountInstruction);
 5408   match(Set dst (PopCountL src));
 5409   effect(KILL cr, TEMP tmp, TEMP dst);
 5410 
 5411   format %{ "POPCNT $dst, $src.lo\n\t"
 5412             "POPCNT $tmp, $src.hi\n\t"
 5413             "ADD    $dst, $tmp" %}
 5414   ins_encode %{
 5415     __ popcntl($dst$$Register, $src$$Register);
 5416     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5417     __ addl($dst$$Register, $tmp$$Register);
 5418   %}
 5419   ins_pipe(ialu_reg);
 5420 %}
 5421 
 5422 // Note: Long.bitCount(long) returns an int.
 5423 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5424   predicate(UsePopCountInstruction);
 5425   match(Set dst (PopCountL (LoadL mem)));
 5426   effect(KILL cr, TEMP tmp, TEMP dst);
 5427 
 5428   format %{ "POPCNT $dst, $mem\n\t"
 5429             "POPCNT $tmp, $mem+4\n\t"
 5430             "ADD    $dst, $tmp" %}
 5431   ins_encode %{
 5432     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5433     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5434     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5435     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5436     __ addl($dst$$Register, $tmp$$Register);
 5437   %}
 5438   ins_pipe(ialu_reg);
 5439 %}
 5440 
 5441 
 5442 //----------Load/Store/Move Instructions---------------------------------------
 5443 //----------Load Instructions--------------------------------------------------
 5444 // Load Byte (8bit signed)
 5445 instruct loadB(xRegI dst, memory mem) %{
 5446   match(Set dst (LoadB mem));
 5447 
 5448   ins_cost(125);
 5449   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5450 
 5451   ins_encode %{
 5452     __ movsbl($dst$$Register, $mem$$Address);
 5453   %}
 5454 
 5455   ins_pipe(ialu_reg_mem);
 5456 %}
 5457 
 5458 // Load Byte (8bit signed) into Long Register
 5459 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5460   match(Set dst (ConvI2L (LoadB mem)));
 5461   effect(KILL cr);
 5462 
 5463   ins_cost(375);
 5464   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5465             "MOV    $dst.hi,$dst.lo\n\t"
 5466             "SAR    $dst.hi,7" %}
 5467 
 5468   ins_encode %{
 5469     __ movsbl($dst$$Register, $mem$$Address);
 5470     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5471     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5472   %}
 5473 
 5474   ins_pipe(ialu_reg_mem);
 5475 %}
 5476 
 5477 // Load Unsigned Byte (8bit UNsigned)
 5478 instruct loadUB(xRegI dst, memory mem) %{
 5479   match(Set dst (LoadUB mem));
 5480 
 5481   ins_cost(125);
 5482   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5483 
 5484   ins_encode %{
 5485     __ movzbl($dst$$Register, $mem$$Address);
 5486   %}
 5487 
 5488   ins_pipe(ialu_reg_mem);
 5489 %}
 5490 
 5491 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5492 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5493   match(Set dst (ConvI2L (LoadUB mem)));
 5494   effect(KILL cr);
 5495 
 5496   ins_cost(250);
 5497   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5498             "XOR    $dst.hi,$dst.hi" %}
 5499 
 5500   ins_encode %{
 5501     Register Rdst = $dst$$Register;
 5502     __ movzbl(Rdst, $mem$$Address);
 5503     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5504   %}
 5505 
 5506   ins_pipe(ialu_reg_mem);
 5507 %}
 5508 
 5509 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5510 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5511   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5512   effect(KILL cr);
 5513 
 5514   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5515             "XOR    $dst.hi,$dst.hi\n\t"
 5516             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5517   ins_encode %{
 5518     Register Rdst = $dst$$Register;
 5519     __ movzbl(Rdst, $mem$$Address);
 5520     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5521     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5522   %}
 5523   ins_pipe(ialu_reg_mem);
 5524 %}
 5525 
 5526 // Load Short (16bit signed)
 5527 instruct loadS(rRegI dst, memory mem) %{
 5528   match(Set dst (LoadS mem));
 5529 
 5530   ins_cost(125);
 5531   format %{ "MOVSX  $dst,$mem\t# short" %}
 5532 
 5533   ins_encode %{
 5534     __ movswl($dst$$Register, $mem$$Address);
 5535   %}
 5536 
 5537   ins_pipe(ialu_reg_mem);
 5538 %}
 5539 
 5540 // Load Short (16 bit signed) to Byte (8 bit signed)
 5541 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5542   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5543 
 5544   ins_cost(125);
 5545   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5546   ins_encode %{
 5547     __ movsbl($dst$$Register, $mem$$Address);
 5548   %}
 5549   ins_pipe(ialu_reg_mem);
 5550 %}
 5551 
 5552 // Load Short (16bit signed) into Long Register
 5553 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5554   match(Set dst (ConvI2L (LoadS mem)));
 5555   effect(KILL cr);
 5556 
 5557   ins_cost(375);
 5558   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5559             "MOV    $dst.hi,$dst.lo\n\t"
 5560             "SAR    $dst.hi,15" %}
 5561 
 5562   ins_encode %{
 5563     __ movswl($dst$$Register, $mem$$Address);
 5564     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5565     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5566   %}
 5567 
 5568   ins_pipe(ialu_reg_mem);
 5569 %}
 5570 
 5571 // Load Unsigned Short/Char (16bit unsigned)
 5572 instruct loadUS(rRegI dst, memory mem) %{
 5573   match(Set dst (LoadUS mem));
 5574 
 5575   ins_cost(125);
 5576   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5577 
 5578   ins_encode %{
 5579     __ movzwl($dst$$Register, $mem$$Address);
 5580   %}
 5581 
 5582   ins_pipe(ialu_reg_mem);
 5583 %}
 5584 
 5585 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5586 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5587   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5588 
 5589   ins_cost(125);
 5590   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5591   ins_encode %{
 5592     __ movsbl($dst$$Register, $mem$$Address);
 5593   %}
 5594   ins_pipe(ialu_reg_mem);
 5595 %}
 5596 
 5597 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5598 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5599   match(Set dst (ConvI2L (LoadUS mem)));
 5600   effect(KILL cr);
 5601 
 5602   ins_cost(250);
 5603   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5604             "XOR    $dst.hi,$dst.hi" %}
 5605 
 5606   ins_encode %{
 5607     __ movzwl($dst$$Register, $mem$$Address);
 5608     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5609   %}
 5610 
 5611   ins_pipe(ialu_reg_mem);
 5612 %}
 5613 
 5614 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5615 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5616   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5617   effect(KILL cr);
 5618 
 5619   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5620             "XOR    $dst.hi,$dst.hi" %}
 5621   ins_encode %{
 5622     Register Rdst = $dst$$Register;
 5623     __ movzbl(Rdst, $mem$$Address);
 5624     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5625   %}
 5626   ins_pipe(ialu_reg_mem);
 5627 %}
 5628 
 5629 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5630 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5631   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5632   effect(KILL cr);
 5633 
 5634   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5635             "XOR    $dst.hi,$dst.hi\n\t"
 5636             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5637   ins_encode %{
 5638     Register Rdst = $dst$$Register;
 5639     __ movzwl(Rdst, $mem$$Address);
 5640     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5641     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5642   %}
 5643   ins_pipe(ialu_reg_mem);
 5644 %}
 5645 
 5646 // Load Integer
 5647 instruct loadI(rRegI dst, memory mem) %{
 5648   match(Set dst (LoadI mem));
 5649 
 5650   ins_cost(125);
 5651   format %{ "MOV    $dst,$mem\t# int" %}
 5652 
 5653   ins_encode %{
 5654     __ movl($dst$$Register, $mem$$Address);
 5655   %}
 5656 
 5657   ins_pipe(ialu_reg_mem);
 5658 %}
 5659 
 5660 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5661 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5662   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5663 
 5664   ins_cost(125);
 5665   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5666   ins_encode %{
 5667     __ movsbl($dst$$Register, $mem$$Address);
 5668   %}
 5669   ins_pipe(ialu_reg_mem);
 5670 %}
 5671 
 5672 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5673 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5674   match(Set dst (AndI (LoadI mem) mask));
 5675 
 5676   ins_cost(125);
 5677   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5678   ins_encode %{
 5679     __ movzbl($dst$$Register, $mem$$Address);
 5680   %}
 5681   ins_pipe(ialu_reg_mem);
 5682 %}
 5683 
 5684 // Load Integer (32 bit signed) to Short (16 bit signed)
 5685 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5686   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5687 
 5688   ins_cost(125);
 5689   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5690   ins_encode %{
 5691     __ movswl($dst$$Register, $mem$$Address);
 5692   %}
 5693   ins_pipe(ialu_reg_mem);
 5694 %}
 5695 
 5696 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5697 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5698   match(Set dst (AndI (LoadI mem) mask));
 5699 
 5700   ins_cost(125);
 5701   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5702   ins_encode %{
 5703     __ movzwl($dst$$Register, $mem$$Address);
 5704   %}
 5705   ins_pipe(ialu_reg_mem);
 5706 %}
 5707 
 5708 // Load Integer into Long Register
 5709 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5710   match(Set dst (ConvI2L (LoadI mem)));
 5711   effect(KILL cr);
 5712 
 5713   ins_cost(375);
 5714   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5715             "MOV    $dst.hi,$dst.lo\n\t"
 5716             "SAR    $dst.hi,31" %}
 5717 
 5718   ins_encode %{
 5719     __ movl($dst$$Register, $mem$$Address);
 5720     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5721     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5722   %}
 5723 
 5724   ins_pipe(ialu_reg_mem);
 5725 %}
 5726 
 5727 // Load Integer with mask 0xFF into Long Register
 5728 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5729   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5730   effect(KILL cr);
 5731 
 5732   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5733             "XOR    $dst.hi,$dst.hi" %}
 5734   ins_encode %{
 5735     Register Rdst = $dst$$Register;
 5736     __ movzbl(Rdst, $mem$$Address);
 5737     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5738   %}
 5739   ins_pipe(ialu_reg_mem);
 5740 %}
 5741 
 5742 // Load Integer with mask 0xFFFF into Long Register
 5743 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5744   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5745   effect(KILL cr);
 5746 
 5747   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5748             "XOR    $dst.hi,$dst.hi" %}
 5749   ins_encode %{
 5750     Register Rdst = $dst$$Register;
 5751     __ movzwl(Rdst, $mem$$Address);
 5752     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5753   %}
 5754   ins_pipe(ialu_reg_mem);
 5755 %}
 5756 
 5757 // Load Integer with 31-bit mask into Long Register
 5758 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5759   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5760   effect(KILL cr);
 5761 
 5762   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5763             "XOR    $dst.hi,$dst.hi\n\t"
 5764             "AND    $dst.lo,$mask" %}
 5765   ins_encode %{
 5766     Register Rdst = $dst$$Register;
 5767     __ movl(Rdst, $mem$$Address);
 5768     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5769     __ andl(Rdst, $mask$$constant);
 5770   %}
 5771   ins_pipe(ialu_reg_mem);
 5772 %}
 5773 
 5774 // Load Unsigned Integer into Long Register
 5775 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5776   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5777   effect(KILL cr);
 5778 
 5779   ins_cost(250);
 5780   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5781             "XOR    $dst.hi,$dst.hi" %}
 5782 
 5783   ins_encode %{
 5784     __ movl($dst$$Register, $mem$$Address);
 5785     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5786   %}
 5787 
 5788   ins_pipe(ialu_reg_mem);
 5789 %}
 5790 
 5791 // Load Long.  Cannot clobber address while loading, so restrict address
 5792 // register to ESI
 5793 instruct loadL(eRegL dst, load_long_memory mem) %{
 5794   predicate(!((LoadLNode*)n)->require_atomic_access());
 5795   match(Set dst (LoadL mem));
 5796 
 5797   ins_cost(250);
 5798   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5799             "MOV    $dst.hi,$mem+4" %}
 5800 
 5801   ins_encode %{
 5802     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5803     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5804     __ movl($dst$$Register, Amemlo);
 5805     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5806   %}
 5807 
 5808   ins_pipe(ialu_reg_long_mem);
 5809 %}
 5810 
 5811 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5812 // then store it down to the stack and reload on the int
 5813 // side.
 5814 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5815   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5816   match(Set dst (LoadL mem));
 5817 
 5818   ins_cost(200);
 5819   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5820             "FISTp  $dst" %}
 5821   ins_encode(enc_loadL_volatile(mem,dst));
 5822   ins_pipe( fpu_reg_mem );
 5823 %}
 5824 
 5825 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5826   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5827   match(Set dst (LoadL mem));
 5828   effect(TEMP tmp);
 5829   ins_cost(180);
 5830   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5831             "MOVSD  $dst,$tmp" %}
 5832   ins_encode %{
 5833     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5834     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5835   %}
 5836   ins_pipe( pipe_slow );
 5837 %}
 5838 
 5839 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5840   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5841   match(Set dst (LoadL mem));
 5842   effect(TEMP tmp);
 5843   ins_cost(160);
 5844   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5845             "MOVD   $dst.lo,$tmp\n\t"
 5846             "PSRLQ  $tmp,32\n\t"
 5847             "MOVD   $dst.hi,$tmp" %}
 5848   ins_encode %{
 5849     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5850     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5851     __ psrlq($tmp$$XMMRegister, 32);
 5852     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5853   %}
 5854   ins_pipe( pipe_slow );
 5855 %}
 5856 
 5857 // Load Range
 5858 instruct loadRange(rRegI dst, memory mem) %{
 5859   match(Set dst (LoadRange mem));
 5860 
 5861   ins_cost(125);
 5862   format %{ "MOV    $dst,$mem" %}
 5863   opcode(0x8B);
 5864   ins_encode( OpcP, RegMem(dst,mem));
 5865   ins_pipe( ialu_reg_mem );
 5866 %}
 5867 
 5868 
 5869 // Load Pointer
 5870 instruct loadP(eRegP dst, memory mem) %{
 5871   match(Set dst (LoadP mem));
 5872 
 5873   ins_cost(125);
 5874   format %{ "MOV    $dst,$mem" %}
 5875   opcode(0x8B);
 5876   ins_encode( OpcP, RegMem(dst,mem));
 5877   ins_pipe( ialu_reg_mem );
 5878 %}
 5879 
 5880 // Load Klass Pointer
 5881 instruct loadKlass(eRegP dst, memory mem) %{
 5882   match(Set dst (LoadKlass mem));
 5883 
 5884   ins_cost(125);
 5885   format %{ "MOV    $dst,$mem" %}
 5886   opcode(0x8B);
 5887   ins_encode( OpcP, RegMem(dst,mem));
 5888   ins_pipe( ialu_reg_mem );
 5889 %}
 5890 
 5891 // Load Double
 5892 instruct loadDPR(regDPR dst, memory mem) %{
 5893   predicate(UseSSE<=1);
 5894   match(Set dst (LoadD mem));
 5895 
 5896   ins_cost(150);
 5897   format %{ "FLD_D  ST,$mem\n\t"
 5898             "FSTP   $dst" %}
 5899   opcode(0xDD);               /* DD /0 */
 5900   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5901               Pop_Reg_DPR(dst) );
 5902   ins_pipe( fpu_reg_mem );
 5903 %}
 5904 
 5905 // Load Double to XMM
 5906 instruct loadD(regD dst, memory mem) %{
 5907   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5908   match(Set dst (LoadD mem));
 5909   ins_cost(145);
 5910   format %{ "MOVSD  $dst,$mem" %}
 5911   ins_encode %{
 5912     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5913   %}
 5914   ins_pipe( pipe_slow );
 5915 %}
 5916 
 5917 instruct loadD_partial(regD dst, memory mem) %{
 5918   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5919   match(Set dst (LoadD mem));
 5920   ins_cost(145);
 5921   format %{ "MOVLPD $dst,$mem" %}
 5922   ins_encode %{
 5923     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5924   %}
 5925   ins_pipe( pipe_slow );
 5926 %}
 5927 
 5928 // Load to XMM register (single-precision floating point)
 5929 // MOVSS instruction
 5930 instruct loadF(regF dst, memory mem) %{
 5931   predicate(UseSSE>=1);
 5932   match(Set dst (LoadF mem));
 5933   ins_cost(145);
 5934   format %{ "MOVSS  $dst,$mem" %}
 5935   ins_encode %{
 5936     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5937   %}
 5938   ins_pipe( pipe_slow );
 5939 %}
 5940 
 5941 // Load Float
 5942 instruct loadFPR(regFPR dst, memory mem) %{
 5943   predicate(UseSSE==0);
 5944   match(Set dst (LoadF mem));
 5945 
 5946   ins_cost(150);
 5947   format %{ "FLD_S  ST,$mem\n\t"
 5948             "FSTP   $dst" %}
 5949   opcode(0xD9);               /* D9 /0 */
 5950   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5951               Pop_Reg_FPR(dst) );
 5952   ins_pipe( fpu_reg_mem );
 5953 %}
 5954 
 5955 // Load Effective Address
 5956 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5957   match(Set dst mem);
 5958 
 5959   ins_cost(110);
 5960   format %{ "LEA    $dst,$mem" %}
 5961   opcode(0x8D);
 5962   ins_encode( OpcP, RegMem(dst,mem));
 5963   ins_pipe( ialu_reg_reg_fat );
 5964 %}
 5965 
 5966 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5967   match(Set dst mem);
 5968 
 5969   ins_cost(110);
 5970   format %{ "LEA    $dst,$mem" %}
 5971   opcode(0x8D);
 5972   ins_encode( OpcP, RegMem(dst,mem));
 5973   ins_pipe( ialu_reg_reg_fat );
 5974 %}
 5975 
 5976 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5977   match(Set dst mem);
 5978 
 5979   ins_cost(110);
 5980   format %{ "LEA    $dst,$mem" %}
 5981   opcode(0x8D);
 5982   ins_encode( OpcP, RegMem(dst,mem));
 5983   ins_pipe( ialu_reg_reg_fat );
 5984 %}
 5985 
 5986 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5987   match(Set dst mem);
 5988 
 5989   ins_cost(110);
 5990   format %{ "LEA    $dst,$mem" %}
 5991   opcode(0x8D);
 5992   ins_encode( OpcP, RegMem(dst,mem));
 5993   ins_pipe( ialu_reg_reg_fat );
 5994 %}
 5995 
 5996 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5997   match(Set dst mem);
 5998 
 5999   ins_cost(110);
 6000   format %{ "LEA    $dst,$mem" %}
 6001   opcode(0x8D);
 6002   ins_encode( OpcP, RegMem(dst,mem));
 6003   ins_pipe( ialu_reg_reg_fat );
 6004 %}
 6005 
 6006 // Load Constant
 6007 instruct loadConI(rRegI dst, immI src) %{
 6008   match(Set dst src);
 6009 
 6010   format %{ "MOV    $dst,$src" %}
 6011   ins_encode( LdImmI(dst, src) );
 6012   ins_pipe( ialu_reg_fat );
 6013 %}
 6014 
 6015 // Load Constant zero
 6016 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6017   match(Set dst src);
 6018   effect(KILL cr);
 6019 
 6020   ins_cost(50);
 6021   format %{ "XOR    $dst,$dst" %}
 6022   opcode(0x33);  /* + rd */
 6023   ins_encode( OpcP, RegReg( dst, dst ) );
 6024   ins_pipe( ialu_reg );
 6025 %}
 6026 
 6027 instruct loadConP(eRegP dst, immP src) %{
 6028   match(Set dst src);
 6029 
 6030   format %{ "MOV    $dst,$src" %}
 6031   opcode(0xB8);  /* + rd */
 6032   ins_encode( LdImmP(dst, src) );
 6033   ins_pipe( ialu_reg_fat );
 6034 %}
 6035 
 6036 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6037   match(Set dst src);
 6038   effect(KILL cr);
 6039   ins_cost(200);
 6040   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6041             "MOV    $dst.hi,$src.hi" %}
 6042   opcode(0xB8);
 6043   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6044   ins_pipe( ialu_reg_long_fat );
 6045 %}
 6046 
 6047 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6048   match(Set dst src);
 6049   effect(KILL cr);
 6050   ins_cost(150);
 6051   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6052             "XOR    $dst.hi,$dst.hi" %}
 6053   opcode(0x33,0x33);
 6054   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6055   ins_pipe( ialu_reg_long );
 6056 %}
 6057 
 6058 // The instruction usage is guarded by predicate in operand immFPR().
 6059 instruct loadConFPR(regFPR dst, immFPR con) %{
 6060   match(Set dst con);
 6061   ins_cost(125);
 6062   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6063             "FSTP   $dst" %}
 6064   ins_encode %{
 6065     __ fld_s($constantaddress($con));
 6066     __ fstp_d($dst$$reg);
 6067   %}
 6068   ins_pipe(fpu_reg_con);
 6069 %}
 6070 
 6071 // The instruction usage is guarded by predicate in operand immFPR0().
 6072 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6073   match(Set dst con);
 6074   ins_cost(125);
 6075   format %{ "FLDZ   ST\n\t"
 6076             "FSTP   $dst" %}
 6077   ins_encode %{
 6078     __ fldz();
 6079     __ fstp_d($dst$$reg);
 6080   %}
 6081   ins_pipe(fpu_reg_con);
 6082 %}
 6083 
 6084 // The instruction usage is guarded by predicate in operand immFPR1().
 6085 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6086   match(Set dst con);
 6087   ins_cost(125);
 6088   format %{ "FLD1   ST\n\t"
 6089             "FSTP   $dst" %}
 6090   ins_encode %{
 6091     __ fld1();
 6092     __ fstp_d($dst$$reg);
 6093   %}
 6094   ins_pipe(fpu_reg_con);
 6095 %}
 6096 
 6097 // The instruction usage is guarded by predicate in operand immF().
 6098 instruct loadConF(regF dst, immF con) %{
 6099   match(Set dst con);
 6100   ins_cost(125);
 6101   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6102   ins_encode %{
 6103     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6104   %}
 6105   ins_pipe(pipe_slow);
 6106 %}
 6107 
 6108 // The instruction usage is guarded by predicate in operand immF0().
 6109 instruct loadConF0(regF dst, immF0 src) %{
 6110   match(Set dst src);
 6111   ins_cost(100);
 6112   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6113   ins_encode %{
 6114     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6115   %}
 6116   ins_pipe(pipe_slow);
 6117 %}
 6118 
 6119 // The instruction usage is guarded by predicate in operand immDPR().
 6120 instruct loadConDPR(regDPR dst, immDPR con) %{
 6121   match(Set dst con);
 6122   ins_cost(125);
 6123 
 6124   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6125             "FSTP   $dst" %}
 6126   ins_encode %{
 6127     __ fld_d($constantaddress($con));
 6128     __ fstp_d($dst$$reg);
 6129   %}
 6130   ins_pipe(fpu_reg_con);
 6131 %}
 6132 
 6133 // The instruction usage is guarded by predicate in operand immDPR0().
 6134 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6135   match(Set dst con);
 6136   ins_cost(125);
 6137 
 6138   format %{ "FLDZ   ST\n\t"
 6139             "FSTP   $dst" %}
 6140   ins_encode %{
 6141     __ fldz();
 6142     __ fstp_d($dst$$reg);
 6143   %}
 6144   ins_pipe(fpu_reg_con);
 6145 %}
 6146 
 6147 // The instruction usage is guarded by predicate in operand immDPR1().
 6148 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6149   match(Set dst con);
 6150   ins_cost(125);
 6151 
 6152   format %{ "FLD1   ST\n\t"
 6153             "FSTP   $dst" %}
 6154   ins_encode %{
 6155     __ fld1();
 6156     __ fstp_d($dst$$reg);
 6157   %}
 6158   ins_pipe(fpu_reg_con);
 6159 %}
 6160 
 6161 // The instruction usage is guarded by predicate in operand immD().
 6162 instruct loadConD(regD dst, immD con) %{
 6163   match(Set dst con);
 6164   ins_cost(125);
 6165   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6166   ins_encode %{
 6167     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6168   %}
 6169   ins_pipe(pipe_slow);
 6170 %}
 6171 
 6172 // The instruction usage is guarded by predicate in operand immD0().
 6173 instruct loadConD0(regD dst, immD0 src) %{
 6174   match(Set dst src);
 6175   ins_cost(100);
 6176   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6177   ins_encode %{
 6178     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6179   %}
 6180   ins_pipe( pipe_slow );
 6181 %}
 6182 
 6183 // Load Stack Slot
 6184 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6185   match(Set dst src);
 6186   ins_cost(125);
 6187 
 6188   format %{ "MOV    $dst,$src" %}
 6189   opcode(0x8B);
 6190   ins_encode( OpcP, RegMem(dst,src));
 6191   ins_pipe( ialu_reg_mem );
 6192 %}
 6193 
 6194 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6195   match(Set dst src);
 6196 
 6197   ins_cost(200);
 6198   format %{ "MOV    $dst,$src.lo\n\t"
 6199             "MOV    $dst+4,$src.hi" %}
 6200   opcode(0x8B, 0x8B);
 6201   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6202   ins_pipe( ialu_mem_long_reg );
 6203 %}
 6204 
 6205 // Load Stack Slot
 6206 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6207   match(Set dst src);
 6208   ins_cost(125);
 6209 
 6210   format %{ "MOV    $dst,$src" %}
 6211   opcode(0x8B);
 6212   ins_encode( OpcP, RegMem(dst,src));
 6213   ins_pipe( ialu_reg_mem );
 6214 %}
 6215 
 6216 // Load Stack Slot
 6217 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6218   match(Set dst src);
 6219   ins_cost(125);
 6220 
 6221   format %{ "FLD_S  $src\n\t"
 6222             "FSTP   $dst" %}
 6223   opcode(0xD9);               /* D9 /0, FLD m32real */
 6224   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6225               Pop_Reg_FPR(dst) );
 6226   ins_pipe( fpu_reg_mem );
 6227 %}
 6228 
 6229 // Load Stack Slot
 6230 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6231   match(Set dst src);
 6232   ins_cost(125);
 6233 
 6234   format %{ "FLD_D  $src\n\t"
 6235             "FSTP   $dst" %}
 6236   opcode(0xDD);               /* DD /0, FLD m64real */
 6237   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6238               Pop_Reg_DPR(dst) );
 6239   ins_pipe( fpu_reg_mem );
 6240 %}
 6241 
 6242 // Prefetch instructions for allocation.
 6243 // Must be safe to execute with invalid address (cannot fault).
 6244 
 6245 instruct prefetchAlloc0( memory mem ) %{
 6246   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6247   match(PrefetchAllocation mem);
 6248   ins_cost(0);
 6249   size(0);
 6250   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6251   ins_encode();
 6252   ins_pipe(empty);
 6253 %}
 6254 
 6255 instruct prefetchAlloc( memory mem ) %{
 6256   predicate(AllocatePrefetchInstr==3);
 6257   match( PrefetchAllocation mem );
 6258   ins_cost(100);
 6259 
 6260   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6261   ins_encode %{
 6262     __ prefetchw($mem$$Address);
 6263   %}
 6264   ins_pipe(ialu_mem);
 6265 %}
 6266 
 6267 instruct prefetchAllocNTA( memory mem ) %{
 6268   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6269   match(PrefetchAllocation mem);
 6270   ins_cost(100);
 6271 
 6272   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6273   ins_encode %{
 6274     __ prefetchnta($mem$$Address);
 6275   %}
 6276   ins_pipe(ialu_mem);
 6277 %}
 6278 
 6279 instruct prefetchAllocT0( memory mem ) %{
 6280   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6281   match(PrefetchAllocation mem);
 6282   ins_cost(100);
 6283 
 6284   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6285   ins_encode %{
 6286     __ prefetcht0($mem$$Address);
 6287   %}
 6288   ins_pipe(ialu_mem);
 6289 %}
 6290 
 6291 instruct prefetchAllocT2( memory mem ) %{
 6292   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6293   match(PrefetchAllocation mem);
 6294   ins_cost(100);
 6295 
 6296   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6297   ins_encode %{
 6298     __ prefetcht2($mem$$Address);
 6299   %}
 6300   ins_pipe(ialu_mem);
 6301 %}
 6302 
 6303 //----------Store Instructions-------------------------------------------------
 6304 
 6305 // Store Byte
 6306 instruct storeB(memory mem, xRegI src) %{
 6307   match(Set mem (StoreB mem src));
 6308 
 6309   ins_cost(125);
 6310   format %{ "MOV8   $mem,$src" %}
 6311   opcode(0x88);
 6312   ins_encode( OpcP, RegMem( src, mem ) );
 6313   ins_pipe( ialu_mem_reg );
 6314 %}
 6315 
 6316 // Store Char/Short
 6317 instruct storeC(memory mem, rRegI src) %{
 6318   match(Set mem (StoreC mem src));
 6319 
 6320   ins_cost(125);
 6321   format %{ "MOV16  $mem,$src" %}
 6322   opcode(0x89, 0x66);
 6323   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6324   ins_pipe( ialu_mem_reg );
 6325 %}
 6326 
 6327 // Store Integer
 6328 instruct storeI(memory mem, rRegI src) %{
 6329   match(Set mem (StoreI mem src));
 6330 
 6331   ins_cost(125);
 6332   format %{ "MOV    $mem,$src" %}
 6333   opcode(0x89);
 6334   ins_encode( OpcP, RegMem( src, mem ) );
 6335   ins_pipe( ialu_mem_reg );
 6336 %}
 6337 
 6338 // Store Long
 6339 instruct storeL(long_memory mem, eRegL src) %{
 6340   predicate(!((StoreLNode*)n)->require_atomic_access());
 6341   match(Set mem (StoreL mem src));
 6342 
 6343   ins_cost(200);
 6344   format %{ "MOV    $mem,$src.lo\n\t"
 6345             "MOV    $mem+4,$src.hi" %}
 6346   opcode(0x89, 0x89);
 6347   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6348   ins_pipe( ialu_mem_long_reg );
 6349 %}
 6350 
 6351 // Store Long to Integer
 6352 instruct storeL2I(memory mem, eRegL src) %{
 6353   match(Set mem (StoreI mem (ConvL2I src)));
 6354 
 6355   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6356   ins_encode %{
 6357     __ movl($mem$$Address, $src$$Register);
 6358   %}
 6359   ins_pipe(ialu_mem_reg);
 6360 %}
 6361 
 6362 // Volatile Store Long.  Must be atomic, so move it into
 6363 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6364 // target address before the store (for null-ptr checks)
 6365 // so the memory operand is used twice in the encoding.
 6366 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6367   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6368   match(Set mem (StoreL mem src));
 6369   effect( KILL cr );
 6370   ins_cost(400);
 6371   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6372             "FILD   $src\n\t"
 6373             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6374   opcode(0x3B);
 6375   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6376   ins_pipe( fpu_reg_mem );
 6377 %}
 6378 
 6379 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6380   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6381   match(Set mem (StoreL mem src));
 6382   effect( TEMP tmp, KILL cr );
 6383   ins_cost(380);
 6384   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6385             "MOVSD  $tmp,$src\n\t"
 6386             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6387   ins_encode %{
 6388     __ cmpl(rax, $mem$$Address);
 6389     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6390     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6391   %}
 6392   ins_pipe( pipe_slow );
 6393 %}
 6394 
 6395 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6396   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6397   match(Set mem (StoreL mem src));
 6398   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6399   ins_cost(360);
 6400   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6401             "MOVD   $tmp,$src.lo\n\t"
 6402             "MOVD   $tmp2,$src.hi\n\t"
 6403             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6404             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6405   ins_encode %{
 6406     __ cmpl(rax, $mem$$Address);
 6407     __ movdl($tmp$$XMMRegister, $src$$Register);
 6408     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6409     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6410     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6411   %}
 6412   ins_pipe( pipe_slow );
 6413 %}
 6414 
 6415 // Store Pointer; for storing unknown oops and raw pointers
 6416 instruct storeP(memory mem, anyRegP src) %{
 6417   match(Set mem (StoreP mem src));
 6418 
 6419   ins_cost(125);
 6420   format %{ "MOV    $mem,$src" %}
 6421   opcode(0x89);
 6422   ins_encode( OpcP, RegMem( src, mem ) );
 6423   ins_pipe( ialu_mem_reg );
 6424 %}
 6425 
 6426 // Store Integer Immediate
 6427 instruct storeImmI(memory mem, immI src) %{
 6428   match(Set mem (StoreI mem src));
 6429 
 6430   ins_cost(150);
 6431   format %{ "MOV    $mem,$src" %}
 6432   opcode(0xC7);               /* C7 /0 */
 6433   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6434   ins_pipe( ialu_mem_imm );
 6435 %}
 6436 
 6437 // Store Short/Char Immediate
 6438 instruct storeImmI16(memory mem, immI16 src) %{
 6439   predicate(UseStoreImmI16);
 6440   match(Set mem (StoreC mem src));
 6441 
 6442   ins_cost(150);
 6443   format %{ "MOV16  $mem,$src" %}
 6444   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6445   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6446   ins_pipe( ialu_mem_imm );
 6447 %}
 6448 
 6449 // Store Pointer Immediate; null pointers or constant oops that do not
 6450 // need card-mark barriers.
 6451 instruct storeImmP(memory mem, immP src) %{
 6452   match(Set mem (StoreP mem src));
 6453 
 6454   ins_cost(150);
 6455   format %{ "MOV    $mem,$src" %}
 6456   opcode(0xC7);               /* C7 /0 */
 6457   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6458   ins_pipe( ialu_mem_imm );
 6459 %}
 6460 
 6461 // Store Byte Immediate
 6462 instruct storeImmB(memory mem, immI8 src) %{
 6463   match(Set mem (StoreB mem src));
 6464 
 6465   ins_cost(150);
 6466   format %{ "MOV8   $mem,$src" %}
 6467   opcode(0xC6);               /* C6 /0 */
 6468   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6469   ins_pipe( ialu_mem_imm );
 6470 %}
 6471 
 6472 // Store CMS card-mark Immediate
 6473 instruct storeImmCM(memory mem, immI8 src) %{
 6474   match(Set mem (StoreCM mem src));
 6475 
 6476   ins_cost(150);
 6477   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6478   opcode(0xC6);               /* C6 /0 */
 6479   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6480   ins_pipe( ialu_mem_imm );
 6481 %}
 6482 
 6483 // Store Double
 6484 instruct storeDPR( memory mem, regDPR1 src) %{
 6485   predicate(UseSSE<=1);
 6486   match(Set mem (StoreD mem src));
 6487 
 6488   ins_cost(100);
 6489   format %{ "FST_D  $mem,$src" %}
 6490   opcode(0xDD);       /* DD /2 */
 6491   ins_encode( enc_FPR_store(mem,src) );
 6492   ins_pipe( fpu_mem_reg );
 6493 %}
 6494 
 6495 // Store double does rounding on x86
 6496 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6497   predicate(UseSSE<=1);
 6498   match(Set mem (StoreD mem (RoundDouble src)));
 6499 
 6500   ins_cost(100);
 6501   format %{ "FST_D  $mem,$src\t# round" %}
 6502   opcode(0xDD);       /* DD /2 */
 6503   ins_encode( enc_FPR_store(mem,src) );
 6504   ins_pipe( fpu_mem_reg );
 6505 %}
 6506 
 6507 // Store XMM register to memory (double-precision floating points)
 6508 // MOVSD instruction
 6509 instruct storeD(memory mem, regD src) %{
 6510   predicate(UseSSE>=2);
 6511   match(Set mem (StoreD mem src));
 6512   ins_cost(95);
 6513   format %{ "MOVSD  $mem,$src" %}
 6514   ins_encode %{
 6515     __ movdbl($mem$$Address, $src$$XMMRegister);
 6516   %}
 6517   ins_pipe( pipe_slow );
 6518 %}
 6519 
 6520 // Store XMM register to memory (single-precision floating point)
 6521 // MOVSS instruction
 6522 instruct storeF(memory mem, regF src) %{
 6523   predicate(UseSSE>=1);
 6524   match(Set mem (StoreF mem src));
 6525   ins_cost(95);
 6526   format %{ "MOVSS  $mem,$src" %}
 6527   ins_encode %{
 6528     __ movflt($mem$$Address, $src$$XMMRegister);
 6529   %}
 6530   ins_pipe( pipe_slow );
 6531 %}
 6532 
 6533 
 6534 // Store Float
 6535 instruct storeFPR( memory mem, regFPR1 src) %{
 6536   predicate(UseSSE==0);
 6537   match(Set mem (StoreF mem src));
 6538 
 6539   ins_cost(100);
 6540   format %{ "FST_S  $mem,$src" %}
 6541   opcode(0xD9);       /* D9 /2 */
 6542   ins_encode( enc_FPR_store(mem,src) );
 6543   ins_pipe( fpu_mem_reg );
 6544 %}
 6545 
 6546 // Store Float does rounding on x86
 6547 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6548   predicate(UseSSE==0);
 6549   match(Set mem (StoreF mem (RoundFloat src)));
 6550 
 6551   ins_cost(100);
 6552   format %{ "FST_S  $mem,$src\t# round" %}
 6553   opcode(0xD9);       /* D9 /2 */
 6554   ins_encode( enc_FPR_store(mem,src) );
 6555   ins_pipe( fpu_mem_reg );
 6556 %}
 6557 
 6558 // Store Float does rounding on x86
 6559 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6560   predicate(UseSSE<=1);
 6561   match(Set mem (StoreF mem (ConvD2F src)));
 6562 
 6563   ins_cost(100);
 6564   format %{ "FST_S  $mem,$src\t# D-round" %}
 6565   opcode(0xD9);       /* D9 /2 */
 6566   ins_encode( enc_FPR_store(mem,src) );
 6567   ins_pipe( fpu_mem_reg );
 6568 %}
 6569 
 6570 // Store immediate Float value (it is faster than store from FPU register)
 6571 // The instruction usage is guarded by predicate in operand immFPR().
 6572 instruct storeFPR_imm( memory mem, immFPR src) %{
 6573   match(Set mem (StoreF mem src));
 6574 
 6575   ins_cost(50);
 6576   format %{ "MOV    $mem,$src\t# store float" %}
 6577   opcode(0xC7);               /* C7 /0 */
 6578   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6579   ins_pipe( ialu_mem_imm );
 6580 %}
 6581 
 6582 // Store immediate Float value (it is faster than store from XMM register)
 6583 // The instruction usage is guarded by predicate in operand immF().
 6584 instruct storeF_imm( memory mem, immF src) %{
 6585   match(Set mem (StoreF mem src));
 6586 
 6587   ins_cost(50);
 6588   format %{ "MOV    $mem,$src\t# store float" %}
 6589   opcode(0xC7);               /* C7 /0 */
 6590   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6591   ins_pipe( ialu_mem_imm );
 6592 %}
 6593 
 6594 // Store Integer to stack slot
 6595 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6596   match(Set dst src);
 6597 
 6598   ins_cost(100);
 6599   format %{ "MOV    $dst,$src" %}
 6600   opcode(0x89);
 6601   ins_encode( OpcPRegSS( dst, src ) );
 6602   ins_pipe( ialu_mem_reg );
 6603 %}
 6604 
 6605 // Store Integer to stack slot
 6606 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6607   match(Set dst src);
 6608 
 6609   ins_cost(100);
 6610   format %{ "MOV    $dst,$src" %}
 6611   opcode(0x89);
 6612   ins_encode( OpcPRegSS( dst, src ) );
 6613   ins_pipe( ialu_mem_reg );
 6614 %}
 6615 
 6616 // Store Long to stack slot
 6617 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6618   match(Set dst src);
 6619 
 6620   ins_cost(200);
 6621   format %{ "MOV    $dst,$src.lo\n\t"
 6622             "MOV    $dst+4,$src.hi" %}
 6623   opcode(0x89, 0x89);
 6624   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6625   ins_pipe( ialu_mem_long_reg );
 6626 %}
 6627 
 6628 //----------MemBar Instructions-----------------------------------------------
 6629 // Memory barrier flavors
 6630 
 6631 instruct membar_acquire() %{
 6632   match(MemBarAcquire);
 6633   match(LoadFence);
 6634   ins_cost(400);
 6635 
 6636   size(0);
 6637   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6638   ins_encode();
 6639   ins_pipe(empty);
 6640 %}
 6641 
 6642 instruct membar_acquire_lock() %{
 6643   match(MemBarAcquireLock);
 6644   ins_cost(0);
 6645 
 6646   size(0);
 6647   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6648   ins_encode( );
 6649   ins_pipe(empty);
 6650 %}
 6651 
 6652 instruct membar_release() %{
 6653   match(MemBarRelease);
 6654   match(StoreFence);
 6655   ins_cost(400);
 6656 
 6657   size(0);
 6658   format %{ "MEMBAR-release ! (empty encoding)" %}
 6659   ins_encode( );
 6660   ins_pipe(empty);
 6661 %}
 6662 
 6663 instruct membar_release_lock() %{
 6664   match(MemBarReleaseLock);
 6665   ins_cost(0);
 6666 
 6667   size(0);
 6668   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6669   ins_encode( );
 6670   ins_pipe(empty);
 6671 %}
 6672 
 6673 instruct membar_volatile(eFlagsReg cr) %{
 6674   match(MemBarVolatile);
 6675   effect(KILL cr);
 6676   ins_cost(400);
 6677 
 6678   format %{
 6679     $$template
 6680     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6681   %}
 6682   ins_encode %{
 6683     __ membar(Assembler::StoreLoad);
 6684   %}
 6685   ins_pipe(pipe_slow);
 6686 %}
 6687 
 6688 instruct unnecessary_membar_volatile() %{
 6689   match(MemBarVolatile);
 6690   predicate(Matcher::post_store_load_barrier(n));
 6691   ins_cost(0);
 6692 
 6693   size(0);
 6694   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6695   ins_encode( );
 6696   ins_pipe(empty);
 6697 %}
 6698 
 6699 instruct membar_storestore() %{
 6700   match(MemBarStoreStore);
 6701   match(StoreStoreFence);
 6702   ins_cost(0);
 6703 
 6704   size(0);
 6705   format %{ "MEMBAR-storestore (empty encoding)" %}
 6706   ins_encode( );
 6707   ins_pipe(empty);
 6708 %}
 6709 
 6710 //----------Move Instructions--------------------------------------------------
 6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6712   match(Set dst (CastX2P src));
 6713   format %{ "# X2P  $dst, $src" %}
 6714   ins_encode( /*empty encoding*/ );
 6715   ins_cost(0);
 6716   ins_pipe(empty);
 6717 %}
 6718 
 6719 instruct castP2X(rRegI dst, eRegP src ) %{
 6720   match(Set dst (CastP2X src));
 6721   ins_cost(50);
 6722   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6723   ins_encode( enc_Copy( dst, src) );
 6724   ins_pipe( ialu_reg_reg );
 6725 %}
 6726 
 6727 //----------Conditional Move---------------------------------------------------
 6728 // Conditional move
 6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6730   predicate(!VM_Version::supports_cmov() );
 6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6732   ins_cost(200);
 6733   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6734             "MOV    $dst,$src\n"
 6735       "skip:" %}
 6736   ins_encode %{
 6737     Label Lskip;
 6738     // Invert sense of branch from sense of CMOV
 6739     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6740     __ movl($dst$$Register, $src$$Register);
 6741     __ bind(Lskip);
 6742   %}
 6743   ins_pipe( pipe_cmov_reg );
 6744 %}
 6745 
 6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6747   predicate(!VM_Version::supports_cmov() );
 6748   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6749   ins_cost(200);
 6750   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6751             "MOV    $dst,$src\n"
 6752       "skip:" %}
 6753   ins_encode %{
 6754     Label Lskip;
 6755     // Invert sense of branch from sense of CMOV
 6756     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6757     __ movl($dst$$Register, $src$$Register);
 6758     __ bind(Lskip);
 6759   %}
 6760   ins_pipe( pipe_cmov_reg );
 6761 %}
 6762 
 6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6764   predicate(VM_Version::supports_cmov() );
 6765   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6766   ins_cost(200);
 6767   format %{ "CMOV$cop $dst,$src" %}
 6768   opcode(0x0F,0x40);
 6769   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6770   ins_pipe( pipe_cmov_reg );
 6771 %}
 6772 
 6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6774   predicate(VM_Version::supports_cmov() );
 6775   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6776   ins_cost(200);
 6777   format %{ "CMOV$cop $dst,$src" %}
 6778   opcode(0x0F,0x40);
 6779   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6780   ins_pipe( pipe_cmov_reg );
 6781 %}
 6782 
 6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6784   predicate(VM_Version::supports_cmov() );
 6785   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6786   ins_cost(200);
 6787   expand %{
 6788     cmovI_regU(cop, cr, dst, src);
 6789   %}
 6790 %}
 6791 
 6792 // Conditional move
 6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6794   predicate(VM_Version::supports_cmov() );
 6795   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6796   ins_cost(250);
 6797   format %{ "CMOV$cop $dst,$src" %}
 6798   opcode(0x0F,0x40);
 6799   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6800   ins_pipe( pipe_cmov_mem );
 6801 %}
 6802 
 6803 // Conditional move
 6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6805   predicate(VM_Version::supports_cmov() );
 6806   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6807   ins_cost(250);
 6808   format %{ "CMOV$cop $dst,$src" %}
 6809   opcode(0x0F,0x40);
 6810   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6811   ins_pipe( pipe_cmov_mem );
 6812 %}
 6813 
 6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6815   predicate(VM_Version::supports_cmov() );
 6816   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6817   ins_cost(250);
 6818   expand %{
 6819     cmovI_memU(cop, cr, dst, src);
 6820   %}
 6821 %}
 6822 
 6823 // Conditional move
 6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6825   predicate(VM_Version::supports_cmov() );
 6826   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6827   ins_cost(200);
 6828   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6829   opcode(0x0F,0x40);
 6830   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6831   ins_pipe( pipe_cmov_reg );
 6832 %}
 6833 
 6834 // Conditional move (non-P6 version)
 6835 // Note:  a CMoveP is generated for  stubs and native wrappers
 6836 //        regardless of whether we are on a P6, so we
 6837 //        emulate a cmov here
 6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6839   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6840   ins_cost(300);
 6841   format %{ "Jn$cop   skip\n\t"
 6842           "MOV    $dst,$src\t# pointer\n"
 6843       "skip:" %}
 6844   opcode(0x8b);
 6845   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6846   ins_pipe( pipe_cmov_reg );
 6847 %}
 6848 
 6849 // Conditional move
 6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6851   predicate(VM_Version::supports_cmov() );
 6852   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6853   ins_cost(200);
 6854   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6855   opcode(0x0F,0x40);
 6856   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6857   ins_pipe( pipe_cmov_reg );
 6858 %}
 6859 
 6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6861   predicate(VM_Version::supports_cmov() );
 6862   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6863   ins_cost(200);
 6864   expand %{
 6865     cmovP_regU(cop, cr, dst, src);
 6866   %}
 6867 %}
 6868 
 6869 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6870 // correctly meets the two pointer arguments; one is an incoming
 6871 // register but the other is a memory operand.  ALSO appears to
 6872 // be buggy with implicit null checks.
 6873 //
 6874 //// Conditional move
 6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6876 //  predicate(VM_Version::supports_cmov() );
 6877 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6878 //  ins_cost(250);
 6879 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6880 //  opcode(0x0F,0x40);
 6881 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6882 //  ins_pipe( pipe_cmov_mem );
 6883 //%}
 6884 //
 6885 //// Conditional move
 6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6887 //  predicate(VM_Version::supports_cmov() );
 6888 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6889 //  ins_cost(250);
 6890 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6891 //  opcode(0x0F,0x40);
 6892 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6893 //  ins_pipe( pipe_cmov_mem );
 6894 //%}
 6895 
 6896 // Conditional move
 6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6898   predicate(UseSSE<=1);
 6899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6900   ins_cost(200);
 6901   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6902   opcode(0xDA);
 6903   ins_encode( enc_cmov_dpr(cop,src) );
 6904   ins_pipe( pipe_cmovDPR_reg );
 6905 %}
 6906 
 6907 // Conditional move
 6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6909   predicate(UseSSE==0);
 6910   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6911   ins_cost(200);
 6912   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6913   opcode(0xDA);
 6914   ins_encode( enc_cmov_dpr(cop,src) );
 6915   ins_pipe( pipe_cmovDPR_reg );
 6916 %}
 6917 
 6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6920   predicate(UseSSE<=1);
 6921   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6922   ins_cost(200);
 6923   format %{ "Jn$cop   skip\n\t"
 6924             "MOV    $dst,$src\t# double\n"
 6925       "skip:" %}
 6926   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6927   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6928   ins_pipe( pipe_cmovDPR_reg );
 6929 %}
 6930 
 6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6933   predicate(UseSSE==0);
 6934   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6935   ins_cost(200);
 6936   format %{ "Jn$cop    skip\n\t"
 6937             "MOV    $dst,$src\t# float\n"
 6938       "skip:" %}
 6939   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6940   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6941   ins_pipe( pipe_cmovDPR_reg );
 6942 %}
 6943 
 6944 // No CMOVE with SSE/SSE2
 6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6946   predicate (UseSSE>=1);
 6947   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6948   ins_cost(200);
 6949   format %{ "Jn$cop   skip\n\t"
 6950             "MOVSS  $dst,$src\t# float\n"
 6951       "skip:" %}
 6952   ins_encode %{
 6953     Label skip;
 6954     // Invert sense of branch from sense of CMOV
 6955     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6956     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6957     __ bind(skip);
 6958   %}
 6959   ins_pipe( pipe_slow );
 6960 %}
 6961 
 6962 // No CMOVE with SSE/SSE2
 6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6964   predicate (UseSSE>=2);
 6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6966   ins_cost(200);
 6967   format %{ "Jn$cop   skip\n\t"
 6968             "MOVSD  $dst,$src\t# float\n"
 6969       "skip:" %}
 6970   ins_encode %{
 6971     Label skip;
 6972     // Invert sense of branch from sense of CMOV
 6973     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6974     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6975     __ bind(skip);
 6976   %}
 6977   ins_pipe( pipe_slow );
 6978 %}
 6979 
 6980 // unsigned version
 6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6982   predicate (UseSSE>=1);
 6983   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6984   ins_cost(200);
 6985   format %{ "Jn$cop   skip\n\t"
 6986             "MOVSS  $dst,$src\t# float\n"
 6987       "skip:" %}
 6988   ins_encode %{
 6989     Label skip;
 6990     // Invert sense of branch from sense of CMOV
 6991     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6992     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6993     __ bind(skip);
 6994   %}
 6995   ins_pipe( pipe_slow );
 6996 %}
 6997 
 6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6999   predicate (UseSSE>=1);
 7000   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7001   ins_cost(200);
 7002   expand %{
 7003     fcmovF_regU(cop, cr, dst, src);
 7004   %}
 7005 %}
 7006 
 7007 // unsigned version
 7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 7009   predicate (UseSSE>=2);
 7010   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7011   ins_cost(200);
 7012   format %{ "Jn$cop   skip\n\t"
 7013             "MOVSD  $dst,$src\t# float\n"
 7014       "skip:" %}
 7015   ins_encode %{
 7016     Label skip;
 7017     // Invert sense of branch from sense of CMOV
 7018     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7019     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7020     __ bind(skip);
 7021   %}
 7022   ins_pipe( pipe_slow );
 7023 %}
 7024 
 7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7026   predicate (UseSSE>=2);
 7027   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7028   ins_cost(200);
 7029   expand %{
 7030     fcmovD_regU(cop, cr, dst, src);
 7031   %}
 7032 %}
 7033 
 7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7035   predicate(VM_Version::supports_cmov() );
 7036   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7037   ins_cost(200);
 7038   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7039             "CMOV$cop $dst.hi,$src.hi" %}
 7040   opcode(0x0F,0x40);
 7041   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7042   ins_pipe( pipe_cmov_reg_long );
 7043 %}
 7044 
 7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7046   predicate(VM_Version::supports_cmov() );
 7047   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7048   ins_cost(200);
 7049   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7050             "CMOV$cop $dst.hi,$src.hi" %}
 7051   opcode(0x0F,0x40);
 7052   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7053   ins_pipe( pipe_cmov_reg_long );
 7054 %}
 7055 
 7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7057   predicate(VM_Version::supports_cmov() );
 7058   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7059   ins_cost(200);
 7060   expand %{
 7061     cmovL_regU(cop, cr, dst, src);
 7062   %}
 7063 %}
 7064 
 7065 //----------Arithmetic Instructions--------------------------------------------
 7066 //----------Addition Instructions----------------------------------------------
 7067 
 7068 // Integer Addition Instructions
 7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7070   match(Set dst (AddI dst src));
 7071   effect(KILL cr);
 7072 
 7073   size(2);
 7074   format %{ "ADD    $dst,$src" %}
 7075   opcode(0x03);
 7076   ins_encode( OpcP, RegReg( dst, src) );
 7077   ins_pipe( ialu_reg_reg );
 7078 %}
 7079 
 7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7081   match(Set dst (AddI dst src));
 7082   effect(KILL cr);
 7083 
 7084   format %{ "ADD    $dst,$src" %}
 7085   opcode(0x81, 0x00); /* /0 id */
 7086   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7087   ins_pipe( ialu_reg );
 7088 %}
 7089 
 7090 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7091   predicate(UseIncDec);
 7092   match(Set dst (AddI dst src));
 7093   effect(KILL cr);
 7094 
 7095   size(1);
 7096   format %{ "INC    $dst" %}
 7097   opcode(0x40); /*  */
 7098   ins_encode( Opc_plus( primary, dst ) );
 7099   ins_pipe( ialu_reg );
 7100 %}
 7101 
 7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7103   match(Set dst (AddI src0 src1));
 7104   ins_cost(110);
 7105 
 7106   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7107   opcode(0x8D); /* 0x8D /r */
 7108   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7109   ins_pipe( ialu_reg_reg );
 7110 %}
 7111 
 7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7113   match(Set dst (AddP src0 src1));
 7114   ins_cost(110);
 7115 
 7116   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7117   opcode(0x8D); /* 0x8D /r */
 7118   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7119   ins_pipe( ialu_reg_reg );
 7120 %}
 7121 
 7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7123   predicate(UseIncDec);
 7124   match(Set dst (AddI dst src));
 7125   effect(KILL cr);
 7126 
 7127   size(1);
 7128   format %{ "DEC    $dst" %}
 7129   opcode(0x48); /*  */
 7130   ins_encode( Opc_plus( primary, dst ) );
 7131   ins_pipe( ialu_reg );
 7132 %}
 7133 
 7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7135   match(Set dst (AddP dst src));
 7136   effect(KILL cr);
 7137 
 7138   size(2);
 7139   format %{ "ADD    $dst,$src" %}
 7140   opcode(0x03);
 7141   ins_encode( OpcP, RegReg( dst, src) );
 7142   ins_pipe( ialu_reg_reg );
 7143 %}
 7144 
 7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7146   match(Set dst (AddP dst src));
 7147   effect(KILL cr);
 7148 
 7149   format %{ "ADD    $dst,$src" %}
 7150   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7151   // ins_encode( RegImm( dst, src) );
 7152   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7153   ins_pipe( ialu_reg );
 7154 %}
 7155 
 7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7157   match(Set dst (AddI dst (LoadI src)));
 7158   effect(KILL cr);
 7159 
 7160   ins_cost(150);
 7161   format %{ "ADD    $dst,$src" %}
 7162   opcode(0x03);
 7163   ins_encode( OpcP, RegMem( dst, src) );
 7164   ins_pipe( ialu_reg_mem );
 7165 %}
 7166 
 7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7169   effect(KILL cr);
 7170 
 7171   ins_cost(150);
 7172   format %{ "ADD    $dst,$src" %}
 7173   opcode(0x01);  /* Opcode 01 /r */
 7174   ins_encode( OpcP, RegMem( src, dst ) );
 7175   ins_pipe( ialu_mem_reg );
 7176 %}
 7177 
 7178 // Add Memory with Immediate
 7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7180   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7181   effect(KILL cr);
 7182 
 7183   ins_cost(125);
 7184   format %{ "ADD    $dst,$src" %}
 7185   opcode(0x81);               /* Opcode 81 /0 id */
 7186   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7187   ins_pipe( ialu_mem_imm );
 7188 %}
 7189 
 7190 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7191   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7192   effect(KILL cr);
 7193 
 7194   ins_cost(125);
 7195   format %{ "INC    $dst" %}
 7196   opcode(0xFF);               /* Opcode FF /0 */
 7197   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7198   ins_pipe( ialu_mem_imm );
 7199 %}
 7200 
 7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7202   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7203   effect(KILL cr);
 7204 
 7205   ins_cost(125);
 7206   format %{ "DEC    $dst" %}
 7207   opcode(0xFF);               /* Opcode FF /1 */
 7208   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7209   ins_pipe( ialu_mem_imm );
 7210 %}
 7211 
 7212 
 7213 instruct checkCastPP( eRegP dst ) %{
 7214   match(Set dst (CheckCastPP dst));
 7215 
 7216   size(0);
 7217   format %{ "#checkcastPP of $dst" %}
 7218   ins_encode( /*empty encoding*/ );
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castPP( eRegP dst ) %{
 7223   match(Set dst (CastPP dst));
 7224   format %{ "#castPP of $dst" %}
 7225   ins_encode( /*empty encoding*/ );
 7226   ins_pipe( empty );
 7227 %}
 7228 
 7229 instruct castII( rRegI dst ) %{
 7230   match(Set dst (CastII dst));
 7231   format %{ "#castII of $dst" %}
 7232   ins_encode( /*empty encoding*/ );
 7233   ins_cost(0);
 7234   ins_pipe( empty );
 7235 %}
 7236 
 7237 instruct castLL( eRegL dst ) %{
 7238   match(Set dst (CastLL dst));
 7239   format %{ "#castLL of $dst" %}
 7240   ins_encode( /*empty encoding*/ );
 7241   ins_cost(0);
 7242   ins_pipe( empty );
 7243 %}
 7244 
 7245 instruct castFF( regF dst ) %{
 7246   predicate(UseSSE >= 1);
 7247   match(Set dst (CastFF dst));
 7248   format %{ "#castFF of $dst" %}
 7249   ins_encode( /*empty encoding*/ );
 7250   ins_cost(0);
 7251   ins_pipe( empty );
 7252 %}
 7253 
 7254 instruct castDD( regD dst ) %{
 7255   predicate(UseSSE >= 2);
 7256   match(Set dst (CastDD dst));
 7257   format %{ "#castDD of $dst" %}
 7258   ins_encode( /*empty encoding*/ );
 7259   ins_cost(0);
 7260   ins_pipe( empty );
 7261 %}
 7262 
 7263 instruct castFF_PR( regFPR dst ) %{
 7264   predicate(UseSSE < 1);
 7265   match(Set dst (CastFF dst));
 7266   format %{ "#castFF of $dst" %}
 7267   ins_encode( /*empty encoding*/ );
 7268   ins_cost(0);
 7269   ins_pipe( empty );
 7270 %}
 7271 
 7272 instruct castDD_PR( regDPR dst ) %{
 7273   predicate(UseSSE < 2);
 7274   match(Set dst (CastDD dst));
 7275   format %{ "#castDD of $dst" %}
 7276   ins_encode( /*empty encoding*/ );
 7277   ins_cost(0);
 7278   ins_pipe( empty );
 7279 %}
 7280 
 7281 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7282 
 7283 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7284   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7285   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7286   effect(KILL cr, KILL oldval);
 7287   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7288             "MOV    $res,0\n\t"
 7289             "JNE,s  fail\n\t"
 7290             "MOV    $res,1\n"
 7291           "fail:" %}
 7292   ins_encode( enc_cmpxchg8(mem_ptr),
 7293               enc_flags_ne_to_boolean(res) );
 7294   ins_pipe( pipe_cmpxchg );
 7295 %}
 7296 
 7297 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7298   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7299   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7300   effect(KILL cr, KILL oldval);
 7301   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7302             "MOV    $res,0\n\t"
 7303             "JNE,s  fail\n\t"
 7304             "MOV    $res,1\n"
 7305           "fail:" %}
 7306   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7307   ins_pipe( pipe_cmpxchg );
 7308 %}
 7309 
 7310 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7311   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7312   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7313   effect(KILL cr, KILL oldval);
 7314   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7315             "MOV    $res,0\n\t"
 7316             "JNE,s  fail\n\t"
 7317             "MOV    $res,1\n"
 7318           "fail:" %}
 7319   ins_encode( enc_cmpxchgb(mem_ptr),
 7320               enc_flags_ne_to_boolean(res) );
 7321   ins_pipe( pipe_cmpxchg );
 7322 %}
 7323 
 7324 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7325   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7326   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7327   effect(KILL cr, KILL oldval);
 7328   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7329             "MOV    $res,0\n\t"
 7330             "JNE,s  fail\n\t"
 7331             "MOV    $res,1\n"
 7332           "fail:" %}
 7333   ins_encode( enc_cmpxchgw(mem_ptr),
 7334               enc_flags_ne_to_boolean(res) );
 7335   ins_pipe( pipe_cmpxchg );
 7336 %}
 7337 
 7338 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7339   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7340   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7341   effect(KILL cr, KILL oldval);
 7342   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7343             "MOV    $res,0\n\t"
 7344             "JNE,s  fail\n\t"
 7345             "MOV    $res,1\n"
 7346           "fail:" %}
 7347   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7348   ins_pipe( pipe_cmpxchg );
 7349 %}
 7350 
 7351 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7352   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7353   effect(KILL cr);
 7354   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7355   ins_encode( enc_cmpxchg8(mem_ptr) );
 7356   ins_pipe( pipe_cmpxchg );
 7357 %}
 7358 
 7359 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7360   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7361   effect(KILL cr);
 7362   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7363   ins_encode( enc_cmpxchg(mem_ptr) );
 7364   ins_pipe( pipe_cmpxchg );
 7365 %}
 7366 
 7367 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7368   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7369   effect(KILL cr);
 7370   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7371   ins_encode( enc_cmpxchgb(mem_ptr) );
 7372   ins_pipe( pipe_cmpxchg );
 7373 %}
 7374 
 7375 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7376   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7377   effect(KILL cr);
 7378   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7379   ins_encode( enc_cmpxchgw(mem_ptr) );
 7380   ins_pipe( pipe_cmpxchg );
 7381 %}
 7382 
 7383 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7384   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7385   effect(KILL cr);
 7386   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7387   ins_encode( enc_cmpxchg(mem_ptr) );
 7388   ins_pipe( pipe_cmpxchg );
 7389 %}
 7390 
 7391 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7392   predicate(n->as_LoadStore()->result_not_used());
 7393   match(Set dummy (GetAndAddB mem add));
 7394   effect(KILL cr);
 7395   format %{ "ADDB  [$mem],$add" %}
 7396   ins_encode %{
 7397     __ lock();
 7398     __ addb($mem$$Address, $add$$constant);
 7399   %}
 7400   ins_pipe( pipe_cmpxchg );
 7401 %}
 7402 
 7403 // Important to match to xRegI: only 8-bit regs.
 7404 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7405   match(Set newval (GetAndAddB mem newval));
 7406   effect(KILL cr);
 7407   format %{ "XADDB  [$mem],$newval" %}
 7408   ins_encode %{
 7409     __ lock();
 7410     __ xaddb($mem$$Address, $newval$$Register);
 7411   %}
 7412   ins_pipe( pipe_cmpxchg );
 7413 %}
 7414 
 7415 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7416   predicate(n->as_LoadStore()->result_not_used());
 7417   match(Set dummy (GetAndAddS mem add));
 7418   effect(KILL cr);
 7419   format %{ "ADDS  [$mem],$add" %}
 7420   ins_encode %{
 7421     __ lock();
 7422     __ addw($mem$$Address, $add$$constant);
 7423   %}
 7424   ins_pipe( pipe_cmpxchg );
 7425 %}
 7426 
 7427 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7428   match(Set newval (GetAndAddS mem newval));
 7429   effect(KILL cr);
 7430   format %{ "XADDS  [$mem],$newval" %}
 7431   ins_encode %{
 7432     __ lock();
 7433     __ xaddw($mem$$Address, $newval$$Register);
 7434   %}
 7435   ins_pipe( pipe_cmpxchg );
 7436 %}
 7437 
 7438 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7439   predicate(n->as_LoadStore()->result_not_used());
 7440   match(Set dummy (GetAndAddI mem add));
 7441   effect(KILL cr);
 7442   format %{ "ADDL  [$mem],$add" %}
 7443   ins_encode %{
 7444     __ lock();
 7445     __ addl($mem$$Address, $add$$constant);
 7446   %}
 7447   ins_pipe( pipe_cmpxchg );
 7448 %}
 7449 
 7450 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7451   match(Set newval (GetAndAddI mem newval));
 7452   effect(KILL cr);
 7453   format %{ "XADDL  [$mem],$newval" %}
 7454   ins_encode %{
 7455     __ lock();
 7456     __ xaddl($mem$$Address, $newval$$Register);
 7457   %}
 7458   ins_pipe( pipe_cmpxchg );
 7459 %}
 7460 
 7461 // Important to match to xRegI: only 8-bit regs.
 7462 instruct xchgB( memory mem, xRegI newval) %{
 7463   match(Set newval (GetAndSetB mem newval));
 7464   format %{ "XCHGB  $newval,[$mem]" %}
 7465   ins_encode %{
 7466     __ xchgb($newval$$Register, $mem$$Address);
 7467   %}
 7468   ins_pipe( pipe_cmpxchg );
 7469 %}
 7470 
 7471 instruct xchgS( memory mem, rRegI newval) %{
 7472   match(Set newval (GetAndSetS mem newval));
 7473   format %{ "XCHGW  $newval,[$mem]" %}
 7474   ins_encode %{
 7475     __ xchgw($newval$$Register, $mem$$Address);
 7476   %}
 7477   ins_pipe( pipe_cmpxchg );
 7478 %}
 7479 
 7480 instruct xchgI( memory mem, rRegI newval) %{
 7481   match(Set newval (GetAndSetI mem newval));
 7482   format %{ "XCHGL  $newval,[$mem]" %}
 7483   ins_encode %{
 7484     __ xchgl($newval$$Register, $mem$$Address);
 7485   %}
 7486   ins_pipe( pipe_cmpxchg );
 7487 %}
 7488 
 7489 instruct xchgP( memory mem, pRegP newval) %{
 7490   match(Set newval (GetAndSetP mem newval));
 7491   format %{ "XCHGL  $newval,[$mem]" %}
 7492   ins_encode %{
 7493     __ xchgl($newval$$Register, $mem$$Address);
 7494   %}
 7495   ins_pipe( pipe_cmpxchg );
 7496 %}
 7497 
 7498 //----------Subtraction Instructions-------------------------------------------
 7499 
 7500 // Integer Subtraction Instructions
 7501 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7502   match(Set dst (SubI dst src));
 7503   effect(KILL cr);
 7504 
 7505   size(2);
 7506   format %{ "SUB    $dst,$src" %}
 7507   opcode(0x2B);
 7508   ins_encode( OpcP, RegReg( dst, src) );
 7509   ins_pipe( ialu_reg_reg );
 7510 %}
 7511 
 7512 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7513   match(Set dst (SubI dst src));
 7514   effect(KILL cr);
 7515 
 7516   format %{ "SUB    $dst,$src" %}
 7517   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7518   // ins_encode( RegImm( dst, src) );
 7519   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7520   ins_pipe( ialu_reg );
 7521 %}
 7522 
 7523 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7524   match(Set dst (SubI dst (LoadI src)));
 7525   effect(KILL cr);
 7526 
 7527   ins_cost(150);
 7528   format %{ "SUB    $dst,$src" %}
 7529   opcode(0x2B);
 7530   ins_encode( OpcP, RegMem( dst, src) );
 7531   ins_pipe( ialu_reg_mem );
 7532 %}
 7533 
 7534 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7535   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7536   effect(KILL cr);
 7537 
 7538   ins_cost(150);
 7539   format %{ "SUB    $dst,$src" %}
 7540   opcode(0x29);  /* Opcode 29 /r */
 7541   ins_encode( OpcP, RegMem( src, dst ) );
 7542   ins_pipe( ialu_mem_reg );
 7543 %}
 7544 
 7545 // Subtract from a pointer
 7546 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7547   match(Set dst (AddP dst (SubI zero src)));
 7548   effect(KILL cr);
 7549 
 7550   size(2);
 7551   format %{ "SUB    $dst,$src" %}
 7552   opcode(0x2B);
 7553   ins_encode( OpcP, RegReg( dst, src) );
 7554   ins_pipe( ialu_reg_reg );
 7555 %}
 7556 
 7557 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7558   match(Set dst (SubI zero dst));
 7559   effect(KILL cr);
 7560 
 7561   size(2);
 7562   format %{ "NEG    $dst" %}
 7563   opcode(0xF7,0x03);  // Opcode F7 /3
 7564   ins_encode( OpcP, RegOpc( dst ) );
 7565   ins_pipe( ialu_reg );
 7566 %}
 7567 
 7568 //----------Multiplication/Division Instructions-------------------------------
 7569 // Integer Multiplication Instructions
 7570 // Multiply Register
 7571 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7572   match(Set dst (MulI dst src));
 7573   effect(KILL cr);
 7574 
 7575   size(3);
 7576   ins_cost(300);
 7577   format %{ "IMUL   $dst,$src" %}
 7578   opcode(0xAF, 0x0F);
 7579   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7580   ins_pipe( ialu_reg_reg_alu0 );
 7581 %}
 7582 
 7583 // Multiply 32-bit Immediate
 7584 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7585   match(Set dst (MulI src imm));
 7586   effect(KILL cr);
 7587 
 7588   ins_cost(300);
 7589   format %{ "IMUL   $dst,$src,$imm" %}
 7590   opcode(0x69);  /* 69 /r id */
 7591   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7592   ins_pipe( ialu_reg_reg_alu0 );
 7593 %}
 7594 
 7595 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7596   match(Set dst src);
 7597   effect(KILL cr);
 7598 
 7599   // Note that this is artificially increased to make it more expensive than loadConL
 7600   ins_cost(250);
 7601   format %{ "MOV    EAX,$src\t// low word only" %}
 7602   opcode(0xB8);
 7603   ins_encode( LdImmL_Lo(dst, src) );
 7604   ins_pipe( ialu_reg_fat );
 7605 %}
 7606 
 7607 // Multiply by 32-bit Immediate, taking the shifted high order results
 7608 //  (special case for shift by 32)
 7609 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7610   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7611   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7613              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7614   effect(USE src1, KILL cr);
 7615 
 7616   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7617   ins_cost(0*100 + 1*400 - 150);
 7618   format %{ "IMUL   EDX:EAX,$src1" %}
 7619   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7620   ins_pipe( pipe_slow );
 7621 %}
 7622 
 7623 // Multiply by 32-bit Immediate, taking the shifted high order results
 7624 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7625   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7626   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7629   effect(USE src1, KILL cr);
 7630 
 7631   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7632   ins_cost(1*100 + 1*400 - 150);
 7633   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7634             "SAR    EDX,$cnt-32" %}
 7635   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7636   ins_pipe( pipe_slow );
 7637 %}
 7638 
 7639 // Multiply Memory 32-bit Immediate
 7640 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7641   match(Set dst (MulI (LoadI src) imm));
 7642   effect(KILL cr);
 7643 
 7644   ins_cost(300);
 7645   format %{ "IMUL   $dst,$src,$imm" %}
 7646   opcode(0x69);  /* 69 /r id */
 7647   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7648   ins_pipe( ialu_reg_mem_alu0 );
 7649 %}
 7650 
 7651 // Multiply Memory
 7652 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7653   match(Set dst (MulI dst (LoadI src)));
 7654   effect(KILL cr);
 7655 
 7656   ins_cost(350);
 7657   format %{ "IMUL   $dst,$src" %}
 7658   opcode(0xAF, 0x0F);
 7659   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7660   ins_pipe( ialu_reg_mem_alu0 );
 7661 %}
 7662 
 7663 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7664 %{
 7665   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7666   effect(KILL cr, KILL src2);
 7667 
 7668   expand %{ mulI_eReg(dst, src1, cr);
 7669            mulI_eReg(src2, src3, cr);
 7670            addI_eReg(dst, src2, cr); %}
 7671 %}
 7672 
 7673 // Multiply Register Int to Long
 7674 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7675   // Basic Idea: long = (long)int * (long)int
 7676   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7677   effect(DEF dst, USE src, USE src1, KILL flags);
 7678 
 7679   ins_cost(300);
 7680   format %{ "IMUL   $dst,$src1" %}
 7681 
 7682   ins_encode( long_int_multiply( dst, src1 ) );
 7683   ins_pipe( ialu_reg_reg_alu0 );
 7684 %}
 7685 
 7686 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7687   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7688   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7689   effect(KILL flags);
 7690 
 7691   ins_cost(300);
 7692   format %{ "MUL    $dst,$src1" %}
 7693 
 7694   ins_encode( long_uint_multiply(dst, src1) );
 7695   ins_pipe( ialu_reg_reg_alu0 );
 7696 %}
 7697 
 7698 // Multiply Register Long
 7699 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7700   match(Set dst (MulL dst src));
 7701   effect(KILL cr, TEMP tmp);
 7702   ins_cost(4*100+3*400);
 7703 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7704 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7705   format %{ "MOV    $tmp,$src.lo\n\t"
 7706             "IMUL   $tmp,EDX\n\t"
 7707             "MOV    EDX,$src.hi\n\t"
 7708             "IMUL   EDX,EAX\n\t"
 7709             "ADD    $tmp,EDX\n\t"
 7710             "MUL    EDX:EAX,$src.lo\n\t"
 7711             "ADD    EDX,$tmp" %}
 7712   ins_encode( long_multiply( dst, src, tmp ) );
 7713   ins_pipe( pipe_slow );
 7714 %}
 7715 
 7716 // Multiply Register Long where the left operand's high 32 bits are zero
 7717 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7718   predicate(is_operand_hi32_zero(n->in(1)));
 7719   match(Set dst (MulL dst src));
 7720   effect(KILL cr, TEMP tmp);
 7721   ins_cost(2*100+2*400);
 7722 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7723 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7724   format %{ "MOV    $tmp,$src.hi\n\t"
 7725             "IMUL   $tmp,EAX\n\t"
 7726             "MUL    EDX:EAX,$src.lo\n\t"
 7727             "ADD    EDX,$tmp" %}
 7728   ins_encode %{
 7729     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7730     __ imull($tmp$$Register, rax);
 7731     __ mull($src$$Register);
 7732     __ addl(rdx, $tmp$$Register);
 7733   %}
 7734   ins_pipe( pipe_slow );
 7735 %}
 7736 
 7737 // Multiply Register Long where the right operand's high 32 bits are zero
 7738 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7739   predicate(is_operand_hi32_zero(n->in(2)));
 7740   match(Set dst (MulL dst src));
 7741   effect(KILL cr, TEMP tmp);
 7742   ins_cost(2*100+2*400);
 7743 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7744 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7745   format %{ "MOV    $tmp,$src.lo\n\t"
 7746             "IMUL   $tmp,EDX\n\t"
 7747             "MUL    EDX:EAX,$src.lo\n\t"
 7748             "ADD    EDX,$tmp" %}
 7749   ins_encode %{
 7750     __ movl($tmp$$Register, $src$$Register);
 7751     __ imull($tmp$$Register, rdx);
 7752     __ mull($src$$Register);
 7753     __ addl(rdx, $tmp$$Register);
 7754   %}
 7755   ins_pipe( pipe_slow );
 7756 %}
 7757 
 7758 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7759 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7760   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7761   match(Set dst (MulL dst src));
 7762   effect(KILL cr);
 7763   ins_cost(1*400);
 7764 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7765 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7766   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7767   ins_encode %{
 7768     __ mull($src$$Register);
 7769   %}
 7770   ins_pipe( pipe_slow );
 7771 %}
 7772 
 7773 // Multiply Register Long by small constant
 7774 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7775   match(Set dst (MulL dst src));
 7776   effect(KILL cr, TEMP tmp);
 7777   ins_cost(2*100+2*400);
 7778   size(12);
 7779 // Basic idea: lo(result) = lo(src * EAX)
 7780 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7781   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7782             "MOV    EDX,$src\n\t"
 7783             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7784             "ADD    EDX,$tmp" %}
 7785   ins_encode( long_multiply_con( dst, src, tmp ) );
 7786   ins_pipe( pipe_slow );
 7787 %}
 7788 
 7789 // Integer DIV with Register
 7790 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7791   match(Set rax (DivI rax div));
 7792   effect(KILL rdx, KILL cr);
 7793   size(26);
 7794   ins_cost(30*100+10*100);
 7795   format %{ "CMP    EAX,0x80000000\n\t"
 7796             "JNE,s  normal\n\t"
 7797             "XOR    EDX,EDX\n\t"
 7798             "CMP    ECX,-1\n\t"
 7799             "JE,s   done\n"
 7800     "normal: CDQ\n\t"
 7801             "IDIV   $div\n\t"
 7802     "done:"        %}
 7803   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7804   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7805   ins_pipe( ialu_reg_reg_alu0 );
 7806 %}
 7807 
 7808 // Divide Register Long
 7809 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7810   match(Set dst (DivL src1 src2));
 7811   effect(CALL);
 7812   ins_cost(10000);
 7813   format %{ "PUSH   $src1.hi\n\t"
 7814             "PUSH   $src1.lo\n\t"
 7815             "PUSH   $src2.hi\n\t"
 7816             "PUSH   $src2.lo\n\t"
 7817             "CALL   SharedRuntime::ldiv\n\t"
 7818             "ADD    ESP,16" %}
 7819   ins_encode( long_div(src1,src2) );
 7820   ins_pipe( pipe_slow );
 7821 %}
 7822 
 7823 // Integer DIVMOD with Register, both quotient and mod results
 7824 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7825   match(DivModI rax div);
 7826   effect(KILL cr);
 7827   size(26);
 7828   ins_cost(30*100+10*100);
 7829   format %{ "CMP    EAX,0x80000000\n\t"
 7830             "JNE,s  normal\n\t"
 7831             "XOR    EDX,EDX\n\t"
 7832             "CMP    ECX,-1\n\t"
 7833             "JE,s   done\n"
 7834     "normal: CDQ\n\t"
 7835             "IDIV   $div\n\t"
 7836     "done:"        %}
 7837   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7838   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7839   ins_pipe( pipe_slow );
 7840 %}
 7841 
 7842 // Integer MOD with Register
 7843 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7844   match(Set rdx (ModI rax div));
 7845   effect(KILL rax, KILL cr);
 7846 
 7847   size(26);
 7848   ins_cost(300);
 7849   format %{ "CDQ\n\t"
 7850             "IDIV   $div" %}
 7851   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7852   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7853   ins_pipe( ialu_reg_reg_alu0 );
 7854 %}
 7855 
 7856 // Remainder Register Long
 7857 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7858   match(Set dst (ModL src1 src2));
 7859   effect(CALL);
 7860   ins_cost(10000);
 7861   format %{ "PUSH   $src1.hi\n\t"
 7862             "PUSH   $src1.lo\n\t"
 7863             "PUSH   $src2.hi\n\t"
 7864             "PUSH   $src2.lo\n\t"
 7865             "CALL   SharedRuntime::lrem\n\t"
 7866             "ADD    ESP,16" %}
 7867   ins_encode( long_mod(src1,src2) );
 7868   ins_pipe( pipe_slow );
 7869 %}
 7870 
 7871 // Divide Register Long (no special case since divisor != -1)
 7872 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7873   match(Set dst (DivL dst imm));
 7874   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7875   ins_cost(1000);
 7876   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7877             "XOR    $tmp2,$tmp2\n\t"
 7878             "CMP    $tmp,EDX\n\t"
 7879             "JA,s   fast\n\t"
 7880             "MOV    $tmp2,EAX\n\t"
 7881             "MOV    EAX,EDX\n\t"
 7882             "MOV    EDX,0\n\t"
 7883             "JLE,s  pos\n\t"
 7884             "LNEG   EAX : $tmp2\n\t"
 7885             "DIV    $tmp # unsigned division\n\t"
 7886             "XCHG   EAX,$tmp2\n\t"
 7887             "DIV    $tmp\n\t"
 7888             "LNEG   $tmp2 : EAX\n\t"
 7889             "JMP,s  done\n"
 7890     "pos:\n\t"
 7891             "DIV    $tmp\n\t"
 7892             "XCHG   EAX,$tmp2\n"
 7893     "fast:\n\t"
 7894             "DIV    $tmp\n"
 7895     "done:\n\t"
 7896             "MOV    EDX,$tmp2\n\t"
 7897             "NEG    EDX:EAX # if $imm < 0" %}
 7898   ins_encode %{
 7899     int con = (int)$imm$$constant;
 7900     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7901     int pcon = (con > 0) ? con : -con;
 7902     Label Lfast, Lpos, Ldone;
 7903 
 7904     __ movl($tmp$$Register, pcon);
 7905     __ xorl($tmp2$$Register,$tmp2$$Register);
 7906     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7907     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7908 
 7909     __ movl($tmp2$$Register, $dst$$Register); // save
 7910     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7911     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7912     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7913 
 7914     // Negative dividend.
 7915     // convert value to positive to use unsigned division
 7916     __ lneg($dst$$Register, $tmp2$$Register);
 7917     __ divl($tmp$$Register);
 7918     __ xchgl($dst$$Register, $tmp2$$Register);
 7919     __ divl($tmp$$Register);
 7920     // revert result back to negative
 7921     __ lneg($tmp2$$Register, $dst$$Register);
 7922     __ jmpb(Ldone);
 7923 
 7924     __ bind(Lpos);
 7925     __ divl($tmp$$Register); // Use unsigned division
 7926     __ xchgl($dst$$Register, $tmp2$$Register);
 7927     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7928 
 7929     __ bind(Lfast);
 7930     // fast path: src is positive
 7931     __ divl($tmp$$Register); // Use unsigned division
 7932 
 7933     __ bind(Ldone);
 7934     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7935     if (con < 0) {
 7936       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7937     }
 7938   %}
 7939   ins_pipe( pipe_slow );
 7940 %}
 7941 
 7942 // Remainder Register Long (remainder fit into 32 bits)
 7943 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7944   match(Set dst (ModL dst imm));
 7945   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7946   ins_cost(1000);
 7947   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7948             "CMP    $tmp,EDX\n\t"
 7949             "JA,s   fast\n\t"
 7950             "MOV    $tmp2,EAX\n\t"
 7951             "MOV    EAX,EDX\n\t"
 7952             "MOV    EDX,0\n\t"
 7953             "JLE,s  pos\n\t"
 7954             "LNEG   EAX : $tmp2\n\t"
 7955             "DIV    $tmp # unsigned division\n\t"
 7956             "MOV    EAX,$tmp2\n\t"
 7957             "DIV    $tmp\n\t"
 7958             "NEG    EDX\n\t"
 7959             "JMP,s  done\n"
 7960     "pos:\n\t"
 7961             "DIV    $tmp\n\t"
 7962             "MOV    EAX,$tmp2\n"
 7963     "fast:\n\t"
 7964             "DIV    $tmp\n"
 7965     "done:\n\t"
 7966             "MOV    EAX,EDX\n\t"
 7967             "SAR    EDX,31\n\t" %}
 7968   ins_encode %{
 7969     int con = (int)$imm$$constant;
 7970     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7971     int pcon = (con > 0) ? con : -con;
 7972     Label  Lfast, Lpos, Ldone;
 7973 
 7974     __ movl($tmp$$Register, pcon);
 7975     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7976     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7977 
 7978     __ movl($tmp2$$Register, $dst$$Register); // save
 7979     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7980     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7981     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7982 
 7983     // Negative dividend.
 7984     // convert value to positive to use unsigned division
 7985     __ lneg($dst$$Register, $tmp2$$Register);
 7986     __ divl($tmp$$Register);
 7987     __ movl($dst$$Register, $tmp2$$Register);
 7988     __ divl($tmp$$Register);
 7989     // revert remainder back to negative
 7990     __ negl(HIGH_FROM_LOW($dst$$Register));
 7991     __ jmpb(Ldone);
 7992 
 7993     __ bind(Lpos);
 7994     __ divl($tmp$$Register);
 7995     __ movl($dst$$Register, $tmp2$$Register);
 7996 
 7997     __ bind(Lfast);
 7998     // fast path: src is positive
 7999     __ divl($tmp$$Register);
 8000 
 8001     __ bind(Ldone);
 8002     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8003     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8004 
 8005   %}
 8006   ins_pipe( pipe_slow );
 8007 %}
 8008 
 8009 // Integer Shift Instructions
 8010 // Shift Left by one
 8011 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8012   match(Set dst (LShiftI dst shift));
 8013   effect(KILL cr);
 8014 
 8015   size(2);
 8016   format %{ "SHL    $dst,$shift" %}
 8017   opcode(0xD1, 0x4);  /* D1 /4 */
 8018   ins_encode( OpcP, RegOpc( dst ) );
 8019   ins_pipe( ialu_reg );
 8020 %}
 8021 
 8022 // Shift Left by 8-bit immediate
 8023 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8024   match(Set dst (LShiftI dst shift));
 8025   effect(KILL cr);
 8026 
 8027   size(3);
 8028   format %{ "SHL    $dst,$shift" %}
 8029   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8030   ins_encode( RegOpcImm( dst, shift) );
 8031   ins_pipe( ialu_reg );
 8032 %}
 8033 
 8034 // Shift Left by variable
 8035 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8036   match(Set dst (LShiftI dst shift));
 8037   effect(KILL cr);
 8038 
 8039   size(2);
 8040   format %{ "SHL    $dst,$shift" %}
 8041   opcode(0xD3, 0x4);  /* D3 /4 */
 8042   ins_encode( OpcP, RegOpc( dst ) );
 8043   ins_pipe( ialu_reg_reg );
 8044 %}
 8045 
 8046 // Arithmetic shift right by one
 8047 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8048   match(Set dst (RShiftI dst shift));
 8049   effect(KILL cr);
 8050 
 8051   size(2);
 8052   format %{ "SAR    $dst,$shift" %}
 8053   opcode(0xD1, 0x7);  /* D1 /7 */
 8054   ins_encode( OpcP, RegOpc( dst ) );
 8055   ins_pipe( ialu_reg );
 8056 %}
 8057 
 8058 // Arithmetic shift right by one
 8059 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8060   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8061   effect(KILL cr);
 8062   format %{ "SAR    $dst,$shift" %}
 8063   opcode(0xD1, 0x7);  /* D1 /7 */
 8064   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8065   ins_pipe( ialu_mem_imm );
 8066 %}
 8067 
 8068 // Arithmetic Shift Right by 8-bit immediate
 8069 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8070   match(Set dst (RShiftI dst shift));
 8071   effect(KILL cr);
 8072 
 8073   size(3);
 8074   format %{ "SAR    $dst,$shift" %}
 8075   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8076   ins_encode( RegOpcImm( dst, shift ) );
 8077   ins_pipe( ialu_mem_imm );
 8078 %}
 8079 
 8080 // Arithmetic Shift Right by 8-bit immediate
 8081 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8082   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8083   effect(KILL cr);
 8084 
 8085   format %{ "SAR    $dst,$shift" %}
 8086   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8087   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8088   ins_pipe( ialu_mem_imm );
 8089 %}
 8090 
 8091 // Arithmetic Shift Right by variable
 8092 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8093   match(Set dst (RShiftI dst shift));
 8094   effect(KILL cr);
 8095 
 8096   size(2);
 8097   format %{ "SAR    $dst,$shift" %}
 8098   opcode(0xD3, 0x7);  /* D3 /7 */
 8099   ins_encode( OpcP, RegOpc( dst ) );
 8100   ins_pipe( ialu_reg_reg );
 8101 %}
 8102 
 8103 // Logical shift right by one
 8104 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8105   match(Set dst (URShiftI dst shift));
 8106   effect(KILL cr);
 8107 
 8108   size(2);
 8109   format %{ "SHR    $dst,$shift" %}
 8110   opcode(0xD1, 0x5);  /* D1 /5 */
 8111   ins_encode( OpcP, RegOpc( dst ) );
 8112   ins_pipe( ialu_reg );
 8113 %}
 8114 
 8115 // Logical Shift Right by 8-bit immediate
 8116 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8117   match(Set dst (URShiftI dst shift));
 8118   effect(KILL cr);
 8119 
 8120   size(3);
 8121   format %{ "SHR    $dst,$shift" %}
 8122   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8123   ins_encode( RegOpcImm( dst, shift) );
 8124   ins_pipe( ialu_reg );
 8125 %}
 8126 
 8127 
 8128 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8129 // This idiom is used by the compiler for the i2b bytecode.
 8130 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8131   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8132 
 8133   size(3);
 8134   format %{ "MOVSX  $dst,$src :8" %}
 8135   ins_encode %{
 8136     __ movsbl($dst$$Register, $src$$Register);
 8137   %}
 8138   ins_pipe(ialu_reg_reg);
 8139 %}
 8140 
 8141 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8142 // This idiom is used by the compiler the i2s bytecode.
 8143 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8144   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8145 
 8146   size(3);
 8147   format %{ "MOVSX  $dst,$src :16" %}
 8148   ins_encode %{
 8149     __ movswl($dst$$Register, $src$$Register);
 8150   %}
 8151   ins_pipe(ialu_reg_reg);
 8152 %}
 8153 
 8154 
 8155 // Logical Shift Right by variable
 8156 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8157   match(Set dst (URShiftI dst shift));
 8158   effect(KILL cr);
 8159 
 8160   size(2);
 8161   format %{ "SHR    $dst,$shift" %}
 8162   opcode(0xD3, 0x5);  /* D3 /5 */
 8163   ins_encode( OpcP, RegOpc( dst ) );
 8164   ins_pipe( ialu_reg_reg );
 8165 %}
 8166 
 8167 
 8168 //----------Logical Instructions-----------------------------------------------
 8169 //----------Integer Logical Instructions---------------------------------------
 8170 // And Instructions
 8171 // And Register with Register
 8172 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8173   match(Set dst (AndI dst src));
 8174   effect(KILL cr);
 8175 
 8176   size(2);
 8177   format %{ "AND    $dst,$src" %}
 8178   opcode(0x23);
 8179   ins_encode( OpcP, RegReg( dst, src) );
 8180   ins_pipe( ialu_reg_reg );
 8181 %}
 8182 
 8183 // And Register with Immediate
 8184 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8185   match(Set dst (AndI dst src));
 8186   effect(KILL cr);
 8187 
 8188   format %{ "AND    $dst,$src" %}
 8189   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8190   // ins_encode( RegImm( dst, src) );
 8191   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8192   ins_pipe( ialu_reg );
 8193 %}
 8194 
 8195 // And Register with Memory
 8196 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8197   match(Set dst (AndI dst (LoadI src)));
 8198   effect(KILL cr);
 8199 
 8200   ins_cost(150);
 8201   format %{ "AND    $dst,$src" %}
 8202   opcode(0x23);
 8203   ins_encode( OpcP, RegMem( dst, src) );
 8204   ins_pipe( ialu_reg_mem );
 8205 %}
 8206 
 8207 // And Memory with Register
 8208 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8209   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8210   effect(KILL cr);
 8211 
 8212   ins_cost(150);
 8213   format %{ "AND    $dst,$src" %}
 8214   opcode(0x21);  /* Opcode 21 /r */
 8215   ins_encode( OpcP, RegMem( src, dst ) );
 8216   ins_pipe( ialu_mem_reg );
 8217 %}
 8218 
 8219 // And Memory with Immediate
 8220 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8221   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8222   effect(KILL cr);
 8223 
 8224   ins_cost(125);
 8225   format %{ "AND    $dst,$src" %}
 8226   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8227   // ins_encode( MemImm( dst, src) );
 8228   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8229   ins_pipe( ialu_mem_imm );
 8230 %}
 8231 
 8232 // BMI1 instructions
 8233 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8234   match(Set dst (AndI (XorI src1 minus_1) src2));
 8235   predicate(UseBMI1Instructions);
 8236   effect(KILL cr);
 8237 
 8238   format %{ "ANDNL  $dst, $src1, $src2" %}
 8239 
 8240   ins_encode %{
 8241     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8242   %}
 8243   ins_pipe(ialu_reg);
 8244 %}
 8245 
 8246 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8247   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8248   predicate(UseBMI1Instructions);
 8249   effect(KILL cr);
 8250 
 8251   ins_cost(125);
 8252   format %{ "ANDNL  $dst, $src1, $src2" %}
 8253 
 8254   ins_encode %{
 8255     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8256   %}
 8257   ins_pipe(ialu_reg_mem);
 8258 %}
 8259 
 8260 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8261   match(Set dst (AndI (SubI imm_zero src) src));
 8262   predicate(UseBMI1Instructions);
 8263   effect(KILL cr);
 8264 
 8265   format %{ "BLSIL  $dst, $src" %}
 8266 
 8267   ins_encode %{
 8268     __ blsil($dst$$Register, $src$$Register);
 8269   %}
 8270   ins_pipe(ialu_reg);
 8271 %}
 8272 
 8273 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8274   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8275   predicate(UseBMI1Instructions);
 8276   effect(KILL cr);
 8277 
 8278   ins_cost(125);
 8279   format %{ "BLSIL  $dst, $src" %}
 8280 
 8281   ins_encode %{
 8282     __ blsil($dst$$Register, $src$$Address);
 8283   %}
 8284   ins_pipe(ialu_reg_mem);
 8285 %}
 8286 
 8287 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8288 %{
 8289   match(Set dst (XorI (AddI src minus_1) src));
 8290   predicate(UseBMI1Instructions);
 8291   effect(KILL cr);
 8292 
 8293   format %{ "BLSMSKL $dst, $src" %}
 8294 
 8295   ins_encode %{
 8296     __ blsmskl($dst$$Register, $src$$Register);
 8297   %}
 8298 
 8299   ins_pipe(ialu_reg);
 8300 %}
 8301 
 8302 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8303 %{
 8304   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8305   predicate(UseBMI1Instructions);
 8306   effect(KILL cr);
 8307 
 8308   ins_cost(125);
 8309   format %{ "BLSMSKL $dst, $src" %}
 8310 
 8311   ins_encode %{
 8312     __ blsmskl($dst$$Register, $src$$Address);
 8313   %}
 8314 
 8315   ins_pipe(ialu_reg_mem);
 8316 %}
 8317 
 8318 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8319 %{
 8320   match(Set dst (AndI (AddI src minus_1) src) );
 8321   predicate(UseBMI1Instructions);
 8322   effect(KILL cr);
 8323 
 8324   format %{ "BLSRL  $dst, $src" %}
 8325 
 8326   ins_encode %{
 8327     __ blsrl($dst$$Register, $src$$Register);
 8328   %}
 8329 
 8330   ins_pipe(ialu_reg);
 8331 %}
 8332 
 8333 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8334 %{
 8335   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8336   predicate(UseBMI1Instructions);
 8337   effect(KILL cr);
 8338 
 8339   ins_cost(125);
 8340   format %{ "BLSRL  $dst, $src" %}
 8341 
 8342   ins_encode %{
 8343     __ blsrl($dst$$Register, $src$$Address);
 8344   %}
 8345 
 8346   ins_pipe(ialu_reg_mem);
 8347 %}
 8348 
 8349 // Or Instructions
 8350 // Or Register with Register
 8351 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8352   match(Set dst (OrI dst src));
 8353   effect(KILL cr);
 8354 
 8355   size(2);
 8356   format %{ "OR     $dst,$src" %}
 8357   opcode(0x0B);
 8358   ins_encode( OpcP, RegReg( dst, src) );
 8359   ins_pipe( ialu_reg_reg );
 8360 %}
 8361 
 8362 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8363   match(Set dst (OrI dst (CastP2X src)));
 8364   effect(KILL cr);
 8365 
 8366   size(2);
 8367   format %{ "OR     $dst,$src" %}
 8368   opcode(0x0B);
 8369   ins_encode( OpcP, RegReg( dst, src) );
 8370   ins_pipe( ialu_reg_reg );
 8371 %}
 8372 
 8373 
 8374 // Or Register with Immediate
 8375 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8376   match(Set dst (OrI dst src));
 8377   effect(KILL cr);
 8378 
 8379   format %{ "OR     $dst,$src" %}
 8380   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8381   // ins_encode( RegImm( dst, src) );
 8382   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8383   ins_pipe( ialu_reg );
 8384 %}
 8385 
 8386 // Or Register with Memory
 8387 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8388   match(Set dst (OrI dst (LoadI src)));
 8389   effect(KILL cr);
 8390 
 8391   ins_cost(150);
 8392   format %{ "OR     $dst,$src" %}
 8393   opcode(0x0B);
 8394   ins_encode( OpcP, RegMem( dst, src) );
 8395   ins_pipe( ialu_reg_mem );
 8396 %}
 8397 
 8398 // Or Memory with Register
 8399 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8400   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8401   effect(KILL cr);
 8402 
 8403   ins_cost(150);
 8404   format %{ "OR     $dst,$src" %}
 8405   opcode(0x09);  /* Opcode 09 /r */
 8406   ins_encode( OpcP, RegMem( src, dst ) );
 8407   ins_pipe( ialu_mem_reg );
 8408 %}
 8409 
 8410 // Or Memory with Immediate
 8411 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8412   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8413   effect(KILL cr);
 8414 
 8415   ins_cost(125);
 8416   format %{ "OR     $dst,$src" %}
 8417   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8418   // ins_encode( MemImm( dst, src) );
 8419   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8420   ins_pipe( ialu_mem_imm );
 8421 %}
 8422 
 8423 // ROL/ROR
 8424 // ROL expand
 8425 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8426   effect(USE_DEF dst, USE shift, KILL cr);
 8427 
 8428   format %{ "ROL    $dst, $shift" %}
 8429   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8430   ins_encode( OpcP, RegOpc( dst ));
 8431   ins_pipe( ialu_reg );
 8432 %}
 8433 
 8434 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8435   effect(USE_DEF dst, USE shift, KILL cr);
 8436 
 8437   format %{ "ROL    $dst, $shift" %}
 8438   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8439   ins_encode( RegOpcImm(dst, shift) );
 8440   ins_pipe(ialu_reg);
 8441 %}
 8442 
 8443 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8444   effect(USE_DEF dst, USE shift, KILL cr);
 8445 
 8446   format %{ "ROL    $dst, $shift" %}
 8447   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8448   ins_encode(OpcP, RegOpc(dst));
 8449   ins_pipe( ialu_reg_reg );
 8450 %}
 8451 // end of ROL expand
 8452 
 8453 // ROL 32bit by one once
 8454 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8455   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8456 
 8457   expand %{
 8458     rolI_eReg_imm1(dst, lshift, cr);
 8459   %}
 8460 %}
 8461 
 8462 // ROL 32bit var by imm8 once
 8463 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8464   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8465   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8466 
 8467   expand %{
 8468     rolI_eReg_imm8(dst, lshift, cr);
 8469   %}
 8470 %}
 8471 
 8472 // ROL 32bit var by var once
 8473 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8474   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8475 
 8476   expand %{
 8477     rolI_eReg_CL(dst, shift, cr);
 8478   %}
 8479 %}
 8480 
 8481 // ROL 32bit var by var once
 8482 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8483   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8484 
 8485   expand %{
 8486     rolI_eReg_CL(dst, shift, cr);
 8487   %}
 8488 %}
 8489 
 8490 // ROR expand
 8491 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8492   effect(USE_DEF dst, USE shift, KILL cr);
 8493 
 8494   format %{ "ROR    $dst, $shift" %}
 8495   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8496   ins_encode( OpcP, RegOpc( dst ) );
 8497   ins_pipe( ialu_reg );
 8498 %}
 8499 
 8500 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8501   effect (USE_DEF dst, USE shift, KILL cr);
 8502 
 8503   format %{ "ROR    $dst, $shift" %}
 8504   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8505   ins_encode( RegOpcImm(dst, shift) );
 8506   ins_pipe( ialu_reg );
 8507 %}
 8508 
 8509 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8510   effect(USE_DEF dst, USE shift, KILL cr);
 8511 
 8512   format %{ "ROR    $dst, $shift" %}
 8513   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8514   ins_encode(OpcP, RegOpc(dst));
 8515   ins_pipe( ialu_reg_reg );
 8516 %}
 8517 // end of ROR expand
 8518 
 8519 // ROR right once
 8520 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8521   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8522 
 8523   expand %{
 8524     rorI_eReg_imm1(dst, rshift, cr);
 8525   %}
 8526 %}
 8527 
 8528 // ROR 32bit by immI8 once
 8529 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8530   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8531   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8532 
 8533   expand %{
 8534     rorI_eReg_imm8(dst, rshift, cr);
 8535   %}
 8536 %}
 8537 
 8538 // ROR 32bit var by var once
 8539 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8540   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8541 
 8542   expand %{
 8543     rorI_eReg_CL(dst, shift, cr);
 8544   %}
 8545 %}
 8546 
 8547 // ROR 32bit var by var once
 8548 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8549   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8550 
 8551   expand %{
 8552     rorI_eReg_CL(dst, shift, cr);
 8553   %}
 8554 %}
 8555 
 8556 // Xor Instructions
 8557 // Xor Register with Register
 8558 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8559   match(Set dst (XorI dst src));
 8560   effect(KILL cr);
 8561 
 8562   size(2);
 8563   format %{ "XOR    $dst,$src" %}
 8564   opcode(0x33);
 8565   ins_encode( OpcP, RegReg( dst, src) );
 8566   ins_pipe( ialu_reg_reg );
 8567 %}
 8568 
 8569 // Xor Register with Immediate -1
 8570 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8571   match(Set dst (XorI dst imm));
 8572 
 8573   size(2);
 8574   format %{ "NOT    $dst" %}
 8575   ins_encode %{
 8576      __ notl($dst$$Register);
 8577   %}
 8578   ins_pipe( ialu_reg );
 8579 %}
 8580 
 8581 // Xor Register with Immediate
 8582 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8583   match(Set dst (XorI dst src));
 8584   effect(KILL cr);
 8585 
 8586   format %{ "XOR    $dst,$src" %}
 8587   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8588   // ins_encode( RegImm( dst, src) );
 8589   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8590   ins_pipe( ialu_reg );
 8591 %}
 8592 
 8593 // Xor Register with Memory
 8594 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8595   match(Set dst (XorI dst (LoadI src)));
 8596   effect(KILL cr);
 8597 
 8598   ins_cost(150);
 8599   format %{ "XOR    $dst,$src" %}
 8600   opcode(0x33);
 8601   ins_encode( OpcP, RegMem(dst, src) );
 8602   ins_pipe( ialu_reg_mem );
 8603 %}
 8604 
 8605 // Xor Memory with Register
 8606 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8607   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8608   effect(KILL cr);
 8609 
 8610   ins_cost(150);
 8611   format %{ "XOR    $dst,$src" %}
 8612   opcode(0x31);  /* Opcode 31 /r */
 8613   ins_encode( OpcP, RegMem( src, dst ) );
 8614   ins_pipe( ialu_mem_reg );
 8615 %}
 8616 
 8617 // Xor Memory with Immediate
 8618 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8619   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8620   effect(KILL cr);
 8621 
 8622   ins_cost(125);
 8623   format %{ "XOR    $dst,$src" %}
 8624   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8625   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8626   ins_pipe( ialu_mem_imm );
 8627 %}
 8628 
 8629 //----------Convert Int to Boolean---------------------------------------------
 8630 
 8631 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8632   effect( DEF dst, USE src );
 8633   format %{ "MOV    $dst,$src" %}
 8634   ins_encode( enc_Copy( dst, src) );
 8635   ins_pipe( ialu_reg_reg );
 8636 %}
 8637 
 8638 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8639   effect( USE_DEF dst, USE src, KILL cr );
 8640 
 8641   size(4);
 8642   format %{ "NEG    $dst\n\t"
 8643             "ADC    $dst,$src" %}
 8644   ins_encode( neg_reg(dst),
 8645               OpcRegReg(0x13,dst,src) );
 8646   ins_pipe( ialu_reg_reg_long );
 8647 %}
 8648 
 8649 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8650   match(Set dst (Conv2B src));
 8651 
 8652   expand %{
 8653     movI_nocopy(dst,src);
 8654     ci2b(dst,src,cr);
 8655   %}
 8656 %}
 8657 
 8658 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8659   effect( DEF dst, USE src );
 8660   format %{ "MOV    $dst,$src" %}
 8661   ins_encode( enc_Copy( dst, src) );
 8662   ins_pipe( ialu_reg_reg );
 8663 %}
 8664 
 8665 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8666   effect( USE_DEF dst, USE src, KILL cr );
 8667   format %{ "NEG    $dst\n\t"
 8668             "ADC    $dst,$src" %}
 8669   ins_encode( neg_reg(dst),
 8670               OpcRegReg(0x13,dst,src) );
 8671   ins_pipe( ialu_reg_reg_long );
 8672 %}
 8673 
 8674 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8675   match(Set dst (Conv2B src));
 8676 
 8677   expand %{
 8678     movP_nocopy(dst,src);
 8679     cp2b(dst,src,cr);
 8680   %}
 8681 %}
 8682 
 8683 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8684   match(Set dst (CmpLTMask p q));
 8685   effect(KILL cr);
 8686   ins_cost(400);
 8687 
 8688   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8689   format %{ "XOR    $dst,$dst\n\t"
 8690             "CMP    $p,$q\n\t"
 8691             "SETlt  $dst\n\t"
 8692             "NEG    $dst" %}
 8693   ins_encode %{
 8694     Register Rp = $p$$Register;
 8695     Register Rq = $q$$Register;
 8696     Register Rd = $dst$$Register;
 8697     Label done;
 8698     __ xorl(Rd, Rd);
 8699     __ cmpl(Rp, Rq);
 8700     __ setb(Assembler::less, Rd);
 8701     __ negl(Rd);
 8702   %}
 8703 
 8704   ins_pipe(pipe_slow);
 8705 %}
 8706 
 8707 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8708   match(Set dst (CmpLTMask dst zero));
 8709   effect(DEF dst, KILL cr);
 8710   ins_cost(100);
 8711 
 8712   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8713   ins_encode %{
 8714   __ sarl($dst$$Register, 31);
 8715   %}
 8716   ins_pipe(ialu_reg);
 8717 %}
 8718 
 8719 /* better to save a register than avoid a branch */
 8720 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8721   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8722   effect(KILL cr);
 8723   ins_cost(400);
 8724   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8725             "JGE    done\n\t"
 8726             "ADD    $p,$y\n"
 8727             "done:  " %}
 8728   ins_encode %{
 8729     Register Rp = $p$$Register;
 8730     Register Rq = $q$$Register;
 8731     Register Ry = $y$$Register;
 8732     Label done;
 8733     __ subl(Rp, Rq);
 8734     __ jccb(Assembler::greaterEqual, done);
 8735     __ addl(Rp, Ry);
 8736     __ bind(done);
 8737   %}
 8738 
 8739   ins_pipe(pipe_cmplt);
 8740 %}
 8741 
 8742 /* better to save a register than avoid a branch */
 8743 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8744   match(Set y (AndI (CmpLTMask p q) y));
 8745   effect(KILL cr);
 8746 
 8747   ins_cost(300);
 8748 
 8749   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8750             "JLT      done\n\t"
 8751             "XORL     $y, $y\n"
 8752             "done:  " %}
 8753   ins_encode %{
 8754     Register Rp = $p$$Register;
 8755     Register Rq = $q$$Register;
 8756     Register Ry = $y$$Register;
 8757     Label done;
 8758     __ cmpl(Rp, Rq);
 8759     __ jccb(Assembler::less, done);
 8760     __ xorl(Ry, Ry);
 8761     __ bind(done);
 8762   %}
 8763 
 8764   ins_pipe(pipe_cmplt);
 8765 %}
 8766 
 8767 /* If I enable this, I encourage spilling in the inner loop of compress.
 8768 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8769   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8770 */
 8771 //----------Overflow Math Instructions-----------------------------------------
 8772 
 8773 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8774 %{
 8775   match(Set cr (OverflowAddI op1 op2));
 8776   effect(DEF cr, USE_KILL op1, USE op2);
 8777 
 8778   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8779 
 8780   ins_encode %{
 8781     __ addl($op1$$Register, $op2$$Register);
 8782   %}
 8783   ins_pipe(ialu_reg_reg);
 8784 %}
 8785 
 8786 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8787 %{
 8788   match(Set cr (OverflowAddI op1 op2));
 8789   effect(DEF cr, USE_KILL op1, USE op2);
 8790 
 8791   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8792 
 8793   ins_encode %{
 8794     __ addl($op1$$Register, $op2$$constant);
 8795   %}
 8796   ins_pipe(ialu_reg_reg);
 8797 %}
 8798 
 8799 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8800 %{
 8801   match(Set cr (OverflowSubI op1 op2));
 8802 
 8803   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8804   ins_encode %{
 8805     __ cmpl($op1$$Register, $op2$$Register);
 8806   %}
 8807   ins_pipe(ialu_reg_reg);
 8808 %}
 8809 
 8810 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8811 %{
 8812   match(Set cr (OverflowSubI op1 op2));
 8813 
 8814   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8815   ins_encode %{
 8816     __ cmpl($op1$$Register, $op2$$constant);
 8817   %}
 8818   ins_pipe(ialu_reg_reg);
 8819 %}
 8820 
 8821 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8822 %{
 8823   match(Set cr (OverflowSubI zero op2));
 8824   effect(DEF cr, USE_KILL op2);
 8825 
 8826   format %{ "NEG    $op2\t# overflow check int" %}
 8827   ins_encode %{
 8828     __ negl($op2$$Register);
 8829   %}
 8830   ins_pipe(ialu_reg_reg);
 8831 %}
 8832 
 8833 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8834 %{
 8835   match(Set cr (OverflowMulI op1 op2));
 8836   effect(DEF cr, USE_KILL op1, USE op2);
 8837 
 8838   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8839   ins_encode %{
 8840     __ imull($op1$$Register, $op2$$Register);
 8841   %}
 8842   ins_pipe(ialu_reg_reg_alu0);
 8843 %}
 8844 
 8845 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8846 %{
 8847   match(Set cr (OverflowMulI op1 op2));
 8848   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8849 
 8850   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8851   ins_encode %{
 8852     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8853   %}
 8854   ins_pipe(ialu_reg_reg_alu0);
 8855 %}
 8856 
 8857 // Integer Absolute Instructions
 8858 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8859 %{
 8860   match(Set dst (AbsI src));
 8861   effect(TEMP dst, TEMP tmp, KILL cr);
 8862   format %{ "movl $tmp, $src\n\t"
 8863             "sarl $tmp, 31\n\t"
 8864             "movl $dst, $src\n\t"
 8865             "xorl $dst, $tmp\n\t"
 8866             "subl $dst, $tmp\n"
 8867           %}
 8868   ins_encode %{
 8869     __ movl($tmp$$Register, $src$$Register);
 8870     __ sarl($tmp$$Register, 31);
 8871     __ movl($dst$$Register, $src$$Register);
 8872     __ xorl($dst$$Register, $tmp$$Register);
 8873     __ subl($dst$$Register, $tmp$$Register);
 8874   %}
 8875 
 8876   ins_pipe(ialu_reg_reg);
 8877 %}
 8878 
 8879 //----------Long Instructions------------------------------------------------
 8880 // Add Long Register with Register
 8881 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8882   match(Set dst (AddL dst src));
 8883   effect(KILL cr);
 8884   ins_cost(200);
 8885   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8886             "ADC    $dst.hi,$src.hi" %}
 8887   opcode(0x03, 0x13);
 8888   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8889   ins_pipe( ialu_reg_reg_long );
 8890 %}
 8891 
 8892 // Add Long Register with Immediate
 8893 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8894   match(Set dst (AddL dst src));
 8895   effect(KILL cr);
 8896   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8897             "ADC    $dst.hi,$src.hi" %}
 8898   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8899   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8900   ins_pipe( ialu_reg_long );
 8901 %}
 8902 
 8903 // Add Long Register with Memory
 8904 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8905   match(Set dst (AddL dst (LoadL mem)));
 8906   effect(KILL cr);
 8907   ins_cost(125);
 8908   format %{ "ADD    $dst.lo,$mem\n\t"
 8909             "ADC    $dst.hi,$mem+4" %}
 8910   opcode(0x03, 0x13);
 8911   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8912   ins_pipe( ialu_reg_long_mem );
 8913 %}
 8914 
 8915 // Subtract Long Register with Register.
 8916 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8917   match(Set dst (SubL dst src));
 8918   effect(KILL cr);
 8919   ins_cost(200);
 8920   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8921             "SBB    $dst.hi,$src.hi" %}
 8922   opcode(0x2B, 0x1B);
 8923   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8924   ins_pipe( ialu_reg_reg_long );
 8925 %}
 8926 
 8927 // Subtract Long Register with Immediate
 8928 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8929   match(Set dst (SubL dst src));
 8930   effect(KILL cr);
 8931   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8932             "SBB    $dst.hi,$src.hi" %}
 8933   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8934   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8935   ins_pipe( ialu_reg_long );
 8936 %}
 8937 
 8938 // Subtract Long Register with Memory
 8939 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8940   match(Set dst (SubL dst (LoadL mem)));
 8941   effect(KILL cr);
 8942   ins_cost(125);
 8943   format %{ "SUB    $dst.lo,$mem\n\t"
 8944             "SBB    $dst.hi,$mem+4" %}
 8945   opcode(0x2B, 0x1B);
 8946   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8947   ins_pipe( ialu_reg_long_mem );
 8948 %}
 8949 
 8950 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8951   match(Set dst (SubL zero dst));
 8952   effect(KILL cr);
 8953   ins_cost(300);
 8954   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8955   ins_encode( neg_long(dst) );
 8956   ins_pipe( ialu_reg_reg_long );
 8957 %}
 8958 
 8959 // And Long Register with Register
 8960 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8961   match(Set dst (AndL dst src));
 8962   effect(KILL cr);
 8963   format %{ "AND    $dst.lo,$src.lo\n\t"
 8964             "AND    $dst.hi,$src.hi" %}
 8965   opcode(0x23,0x23);
 8966   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8967   ins_pipe( ialu_reg_reg_long );
 8968 %}
 8969 
 8970 // And Long Register with Immediate
 8971 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8972   match(Set dst (AndL dst src));
 8973   effect(KILL cr);
 8974   format %{ "AND    $dst.lo,$src.lo\n\t"
 8975             "AND    $dst.hi,$src.hi" %}
 8976   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8977   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8978   ins_pipe( ialu_reg_long );
 8979 %}
 8980 
 8981 // And Long Register with Memory
 8982 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8983   match(Set dst (AndL dst (LoadL mem)));
 8984   effect(KILL cr);
 8985   ins_cost(125);
 8986   format %{ "AND    $dst.lo,$mem\n\t"
 8987             "AND    $dst.hi,$mem+4" %}
 8988   opcode(0x23, 0x23);
 8989   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8990   ins_pipe( ialu_reg_long_mem );
 8991 %}
 8992 
 8993 // BMI1 instructions
 8994 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8995   match(Set dst (AndL (XorL src1 minus_1) src2));
 8996   predicate(UseBMI1Instructions);
 8997   effect(KILL cr, TEMP dst);
 8998 
 8999   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9000             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9001          %}
 9002 
 9003   ins_encode %{
 9004     Register Rdst = $dst$$Register;
 9005     Register Rsrc1 = $src1$$Register;
 9006     Register Rsrc2 = $src2$$Register;
 9007     __ andnl(Rdst, Rsrc1, Rsrc2);
 9008     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9009   %}
 9010   ins_pipe(ialu_reg_reg_long);
 9011 %}
 9012 
 9013 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9014   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9015   predicate(UseBMI1Instructions);
 9016   effect(KILL cr, TEMP dst);
 9017 
 9018   ins_cost(125);
 9019   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9020             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9021          %}
 9022 
 9023   ins_encode %{
 9024     Register Rdst = $dst$$Register;
 9025     Register Rsrc1 = $src1$$Register;
 9026     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9027 
 9028     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9029     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9030   %}
 9031   ins_pipe(ialu_reg_mem);
 9032 %}
 9033 
 9034 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9035   match(Set dst (AndL (SubL imm_zero src) src));
 9036   predicate(UseBMI1Instructions);
 9037   effect(KILL cr, TEMP dst);
 9038 
 9039   format %{ "MOVL   $dst.hi, 0\n\t"
 9040             "BLSIL  $dst.lo, $src.lo\n\t"
 9041             "JNZ    done\n\t"
 9042             "BLSIL  $dst.hi, $src.hi\n"
 9043             "done:"
 9044          %}
 9045 
 9046   ins_encode %{
 9047     Label done;
 9048     Register Rdst = $dst$$Register;
 9049     Register Rsrc = $src$$Register;
 9050     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9051     __ blsil(Rdst, Rsrc);
 9052     __ jccb(Assembler::notZero, done);
 9053     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9054     __ bind(done);
 9055   %}
 9056   ins_pipe(ialu_reg);
 9057 %}
 9058 
 9059 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9060   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9061   predicate(UseBMI1Instructions);
 9062   effect(KILL cr, TEMP dst);
 9063 
 9064   ins_cost(125);
 9065   format %{ "MOVL   $dst.hi, 0\n\t"
 9066             "BLSIL  $dst.lo, $src\n\t"
 9067             "JNZ    done\n\t"
 9068             "BLSIL  $dst.hi, $src+4\n"
 9069             "done:"
 9070          %}
 9071 
 9072   ins_encode %{
 9073     Label done;
 9074     Register Rdst = $dst$$Register;
 9075     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9076 
 9077     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9078     __ blsil(Rdst, $src$$Address);
 9079     __ jccb(Assembler::notZero, done);
 9080     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9081     __ bind(done);
 9082   %}
 9083   ins_pipe(ialu_reg_mem);
 9084 %}
 9085 
 9086 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9087 %{
 9088   match(Set dst (XorL (AddL src minus_1) src));
 9089   predicate(UseBMI1Instructions);
 9090   effect(KILL cr, TEMP dst);
 9091 
 9092   format %{ "MOVL    $dst.hi, 0\n\t"
 9093             "BLSMSKL $dst.lo, $src.lo\n\t"
 9094             "JNC     done\n\t"
 9095             "BLSMSKL $dst.hi, $src.hi\n"
 9096             "done:"
 9097          %}
 9098 
 9099   ins_encode %{
 9100     Label done;
 9101     Register Rdst = $dst$$Register;
 9102     Register Rsrc = $src$$Register;
 9103     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9104     __ blsmskl(Rdst, Rsrc);
 9105     __ jccb(Assembler::carryClear, done);
 9106     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9107     __ bind(done);
 9108   %}
 9109 
 9110   ins_pipe(ialu_reg);
 9111 %}
 9112 
 9113 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9114 %{
 9115   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9116   predicate(UseBMI1Instructions);
 9117   effect(KILL cr, TEMP dst);
 9118 
 9119   ins_cost(125);
 9120   format %{ "MOVL    $dst.hi, 0\n\t"
 9121             "BLSMSKL $dst.lo, $src\n\t"
 9122             "JNC     done\n\t"
 9123             "BLSMSKL $dst.hi, $src+4\n"
 9124             "done:"
 9125          %}
 9126 
 9127   ins_encode %{
 9128     Label done;
 9129     Register Rdst = $dst$$Register;
 9130     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9131 
 9132     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9133     __ blsmskl(Rdst, $src$$Address);
 9134     __ jccb(Assembler::carryClear, done);
 9135     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9136     __ bind(done);
 9137   %}
 9138 
 9139   ins_pipe(ialu_reg_mem);
 9140 %}
 9141 
 9142 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9143 %{
 9144   match(Set dst (AndL (AddL src minus_1) src) );
 9145   predicate(UseBMI1Instructions);
 9146   effect(KILL cr, TEMP dst);
 9147 
 9148   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9149             "BLSRL  $dst.lo, $src.lo\n\t"
 9150             "JNC    done\n\t"
 9151             "BLSRL  $dst.hi, $src.hi\n"
 9152             "done:"
 9153   %}
 9154 
 9155   ins_encode %{
 9156     Label done;
 9157     Register Rdst = $dst$$Register;
 9158     Register Rsrc = $src$$Register;
 9159     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9160     __ blsrl(Rdst, Rsrc);
 9161     __ jccb(Assembler::carryClear, done);
 9162     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9163     __ bind(done);
 9164   %}
 9165 
 9166   ins_pipe(ialu_reg);
 9167 %}
 9168 
 9169 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9170 %{
 9171   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9172   predicate(UseBMI1Instructions);
 9173   effect(KILL cr, TEMP dst);
 9174 
 9175   ins_cost(125);
 9176   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9177             "BLSRL  $dst.lo, $src\n\t"
 9178             "JNC    done\n\t"
 9179             "BLSRL  $dst.hi, $src+4\n"
 9180             "done:"
 9181   %}
 9182 
 9183   ins_encode %{
 9184     Label done;
 9185     Register Rdst = $dst$$Register;
 9186     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9187     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9188     __ blsrl(Rdst, $src$$Address);
 9189     __ jccb(Assembler::carryClear, done);
 9190     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9191     __ bind(done);
 9192   %}
 9193 
 9194   ins_pipe(ialu_reg_mem);
 9195 %}
 9196 
 9197 // Or Long Register with Register
 9198 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9199   match(Set dst (OrL dst src));
 9200   effect(KILL cr);
 9201   format %{ "OR     $dst.lo,$src.lo\n\t"
 9202             "OR     $dst.hi,$src.hi" %}
 9203   opcode(0x0B,0x0B);
 9204   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9205   ins_pipe( ialu_reg_reg_long );
 9206 %}
 9207 
 9208 // Or Long Register with Immediate
 9209 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9210   match(Set dst (OrL dst src));
 9211   effect(KILL cr);
 9212   format %{ "OR     $dst.lo,$src.lo\n\t"
 9213             "OR     $dst.hi,$src.hi" %}
 9214   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9215   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9216   ins_pipe( ialu_reg_long );
 9217 %}
 9218 
 9219 // Or Long Register with Memory
 9220 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9221   match(Set dst (OrL dst (LoadL mem)));
 9222   effect(KILL cr);
 9223   ins_cost(125);
 9224   format %{ "OR     $dst.lo,$mem\n\t"
 9225             "OR     $dst.hi,$mem+4" %}
 9226   opcode(0x0B,0x0B);
 9227   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9228   ins_pipe( ialu_reg_long_mem );
 9229 %}
 9230 
 9231 // Xor Long Register with Register
 9232 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9233   match(Set dst (XorL dst src));
 9234   effect(KILL cr);
 9235   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9236             "XOR    $dst.hi,$src.hi" %}
 9237   opcode(0x33,0x33);
 9238   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9239   ins_pipe( ialu_reg_reg_long );
 9240 %}
 9241 
 9242 // Xor Long Register with Immediate -1
 9243 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9244   match(Set dst (XorL dst imm));
 9245   format %{ "NOT    $dst.lo\n\t"
 9246             "NOT    $dst.hi" %}
 9247   ins_encode %{
 9248      __ notl($dst$$Register);
 9249      __ notl(HIGH_FROM_LOW($dst$$Register));
 9250   %}
 9251   ins_pipe( ialu_reg_long );
 9252 %}
 9253 
 9254 // Xor Long Register with Immediate
 9255 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9256   match(Set dst (XorL dst src));
 9257   effect(KILL cr);
 9258   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9259             "XOR    $dst.hi,$src.hi" %}
 9260   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9261   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9262   ins_pipe( ialu_reg_long );
 9263 %}
 9264 
 9265 // Xor Long Register with Memory
 9266 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9267   match(Set dst (XorL dst (LoadL mem)));
 9268   effect(KILL cr);
 9269   ins_cost(125);
 9270   format %{ "XOR    $dst.lo,$mem\n\t"
 9271             "XOR    $dst.hi,$mem+4" %}
 9272   opcode(0x33,0x33);
 9273   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9274   ins_pipe( ialu_reg_long_mem );
 9275 %}
 9276 
 9277 // Shift Left Long by 1
 9278 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9279   predicate(UseNewLongLShift);
 9280   match(Set dst (LShiftL dst cnt));
 9281   effect(KILL cr);
 9282   ins_cost(100);
 9283   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9284             "ADC    $dst.hi,$dst.hi" %}
 9285   ins_encode %{
 9286     __ addl($dst$$Register,$dst$$Register);
 9287     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9288   %}
 9289   ins_pipe( ialu_reg_long );
 9290 %}
 9291 
 9292 // Shift Left Long by 2
 9293 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9294   predicate(UseNewLongLShift);
 9295   match(Set dst (LShiftL dst cnt));
 9296   effect(KILL cr);
 9297   ins_cost(100);
 9298   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9299             "ADC    $dst.hi,$dst.hi\n\t"
 9300             "ADD    $dst.lo,$dst.lo\n\t"
 9301             "ADC    $dst.hi,$dst.hi" %}
 9302   ins_encode %{
 9303     __ addl($dst$$Register,$dst$$Register);
 9304     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9305     __ addl($dst$$Register,$dst$$Register);
 9306     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9307   %}
 9308   ins_pipe( ialu_reg_long );
 9309 %}
 9310 
 9311 // Shift Left Long by 3
 9312 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9313   predicate(UseNewLongLShift);
 9314   match(Set dst (LShiftL dst cnt));
 9315   effect(KILL cr);
 9316   ins_cost(100);
 9317   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9318             "ADC    $dst.hi,$dst.hi\n\t"
 9319             "ADD    $dst.lo,$dst.lo\n\t"
 9320             "ADC    $dst.hi,$dst.hi\n\t"
 9321             "ADD    $dst.lo,$dst.lo\n\t"
 9322             "ADC    $dst.hi,$dst.hi" %}
 9323   ins_encode %{
 9324     __ addl($dst$$Register,$dst$$Register);
 9325     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9326     __ addl($dst$$Register,$dst$$Register);
 9327     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9328     __ addl($dst$$Register,$dst$$Register);
 9329     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9330   %}
 9331   ins_pipe( ialu_reg_long );
 9332 %}
 9333 
 9334 // Shift Left Long by 1-31
 9335 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9336   match(Set dst (LShiftL dst cnt));
 9337   effect(KILL cr);
 9338   ins_cost(200);
 9339   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9340             "SHL    $dst.lo,$cnt" %}
 9341   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9342   ins_encode( move_long_small_shift(dst,cnt) );
 9343   ins_pipe( ialu_reg_long );
 9344 %}
 9345 
 9346 // Shift Left Long by 32-63
 9347 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9348   match(Set dst (LShiftL dst cnt));
 9349   effect(KILL cr);
 9350   ins_cost(300);
 9351   format %{ "MOV    $dst.hi,$dst.lo\n"
 9352           "\tSHL    $dst.hi,$cnt-32\n"
 9353           "\tXOR    $dst.lo,$dst.lo" %}
 9354   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9355   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9356   ins_pipe( ialu_reg_long );
 9357 %}
 9358 
 9359 // Shift Left Long by variable
 9360 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9361   match(Set dst (LShiftL dst shift));
 9362   effect(KILL cr);
 9363   ins_cost(500+200);
 9364   size(17);
 9365   format %{ "TEST   $shift,32\n\t"
 9366             "JEQ,s  small\n\t"
 9367             "MOV    $dst.hi,$dst.lo\n\t"
 9368             "XOR    $dst.lo,$dst.lo\n"
 9369     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9370             "SHL    $dst.lo,$shift" %}
 9371   ins_encode( shift_left_long( dst, shift ) );
 9372   ins_pipe( pipe_slow );
 9373 %}
 9374 
 9375 // Shift Right Long by 1-31
 9376 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9377   match(Set dst (URShiftL dst cnt));
 9378   effect(KILL cr);
 9379   ins_cost(200);
 9380   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9381             "SHR    $dst.hi,$cnt" %}
 9382   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9383   ins_encode( move_long_small_shift(dst,cnt) );
 9384   ins_pipe( ialu_reg_long );
 9385 %}
 9386 
 9387 // Shift Right Long by 32-63
 9388 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9389   match(Set dst (URShiftL dst cnt));
 9390   effect(KILL cr);
 9391   ins_cost(300);
 9392   format %{ "MOV    $dst.lo,$dst.hi\n"
 9393           "\tSHR    $dst.lo,$cnt-32\n"
 9394           "\tXOR    $dst.hi,$dst.hi" %}
 9395   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9396   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9397   ins_pipe( ialu_reg_long );
 9398 %}
 9399 
 9400 // Shift Right Long by variable
 9401 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9402   match(Set dst (URShiftL dst shift));
 9403   effect(KILL cr);
 9404   ins_cost(600);
 9405   size(17);
 9406   format %{ "TEST   $shift,32\n\t"
 9407             "JEQ,s  small\n\t"
 9408             "MOV    $dst.lo,$dst.hi\n\t"
 9409             "XOR    $dst.hi,$dst.hi\n"
 9410     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9411             "SHR    $dst.hi,$shift" %}
 9412   ins_encode( shift_right_long( dst, shift ) );
 9413   ins_pipe( pipe_slow );
 9414 %}
 9415 
 9416 // Shift Right Long by 1-31
 9417 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9418   match(Set dst (RShiftL dst cnt));
 9419   effect(KILL cr);
 9420   ins_cost(200);
 9421   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9422             "SAR    $dst.hi,$cnt" %}
 9423   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9424   ins_encode( move_long_small_shift(dst,cnt) );
 9425   ins_pipe( ialu_reg_long );
 9426 %}
 9427 
 9428 // Shift Right Long by 32-63
 9429 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9430   match(Set dst (RShiftL dst cnt));
 9431   effect(KILL cr);
 9432   ins_cost(300);
 9433   format %{ "MOV    $dst.lo,$dst.hi\n"
 9434           "\tSAR    $dst.lo,$cnt-32\n"
 9435           "\tSAR    $dst.hi,31" %}
 9436   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9437   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9438   ins_pipe( ialu_reg_long );
 9439 %}
 9440 
 9441 // Shift Right arithmetic Long by variable
 9442 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9443   match(Set dst (RShiftL dst shift));
 9444   effect(KILL cr);
 9445   ins_cost(600);
 9446   size(18);
 9447   format %{ "TEST   $shift,32\n\t"
 9448             "JEQ,s  small\n\t"
 9449             "MOV    $dst.lo,$dst.hi\n\t"
 9450             "SAR    $dst.hi,31\n"
 9451     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9452             "SAR    $dst.hi,$shift" %}
 9453   ins_encode( shift_right_arith_long( dst, shift ) );
 9454   ins_pipe( pipe_slow );
 9455 %}
 9456 
 9457 
 9458 //----------Double Instructions------------------------------------------------
 9459 // Double Math
 9460 
 9461 // Compare & branch
 9462 
 9463 // P6 version of float compare, sets condition codes in EFLAGS
 9464 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9465   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9466   match(Set cr (CmpD src1 src2));
 9467   effect(KILL rax);
 9468   ins_cost(150);
 9469   format %{ "FLD    $src1\n\t"
 9470             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9471             "JNP    exit\n\t"
 9472             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9473             "SAHF\n"
 9474      "exit:\tNOP               // avoid branch to branch" %}
 9475   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9476   ins_encode( Push_Reg_DPR(src1),
 9477               OpcP, RegOpc(src2),
 9478               cmpF_P6_fixup );
 9479   ins_pipe( pipe_slow );
 9480 %}
 9481 
 9482 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9483   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9484   match(Set cr (CmpD src1 src2));
 9485   ins_cost(150);
 9486   format %{ "FLD    $src1\n\t"
 9487             "FUCOMIP ST,$src2  // P6 instruction" %}
 9488   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9489   ins_encode( Push_Reg_DPR(src1),
 9490               OpcP, RegOpc(src2));
 9491   ins_pipe( pipe_slow );
 9492 %}
 9493 
 9494 // Compare & branch
 9495 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9496   predicate(UseSSE<=1);
 9497   match(Set cr (CmpD src1 src2));
 9498   effect(KILL rax);
 9499   ins_cost(200);
 9500   format %{ "FLD    $src1\n\t"
 9501             "FCOMp  $src2\n\t"
 9502             "FNSTSW AX\n\t"
 9503             "TEST   AX,0x400\n\t"
 9504             "JZ,s   flags\n\t"
 9505             "MOV    AH,1\t# unordered treat as LT\n"
 9506     "flags:\tSAHF" %}
 9507   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9508   ins_encode( Push_Reg_DPR(src1),
 9509               OpcP, RegOpc(src2),
 9510               fpu_flags);
 9511   ins_pipe( pipe_slow );
 9512 %}
 9513 
 9514 // Compare vs zero into -1,0,1
 9515 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9516   predicate(UseSSE<=1);
 9517   match(Set dst (CmpD3 src1 zero));
 9518   effect(KILL cr, KILL rax);
 9519   ins_cost(280);
 9520   format %{ "FTSTD  $dst,$src1" %}
 9521   opcode(0xE4, 0xD9);
 9522   ins_encode( Push_Reg_DPR(src1),
 9523               OpcS, OpcP, PopFPU,
 9524               CmpF_Result(dst));
 9525   ins_pipe( pipe_slow );
 9526 %}
 9527 
 9528 // Compare into -1,0,1
 9529 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9530   predicate(UseSSE<=1);
 9531   match(Set dst (CmpD3 src1 src2));
 9532   effect(KILL cr, KILL rax);
 9533   ins_cost(300);
 9534   format %{ "FCMPD  $dst,$src1,$src2" %}
 9535   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9536   ins_encode( Push_Reg_DPR(src1),
 9537               OpcP, RegOpc(src2),
 9538               CmpF_Result(dst));
 9539   ins_pipe( pipe_slow );
 9540 %}
 9541 
 9542 // float compare and set condition codes in EFLAGS by XMM regs
 9543 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9544   predicate(UseSSE>=2);
 9545   match(Set cr (CmpD src1 src2));
 9546   ins_cost(145);
 9547   format %{ "UCOMISD $src1,$src2\n\t"
 9548             "JNP,s   exit\n\t"
 9549             "PUSHF\t# saw NaN, set CF\n\t"
 9550             "AND     [rsp], #0xffffff2b\n\t"
 9551             "POPF\n"
 9552     "exit:" %}
 9553   ins_encode %{
 9554     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9555     emit_cmpfp_fixup(_masm);
 9556   %}
 9557   ins_pipe( pipe_slow );
 9558 %}
 9559 
 9560 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9561   predicate(UseSSE>=2);
 9562   match(Set cr (CmpD src1 src2));
 9563   ins_cost(100);
 9564   format %{ "UCOMISD $src1,$src2" %}
 9565   ins_encode %{
 9566     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9567   %}
 9568   ins_pipe( pipe_slow );
 9569 %}
 9570 
 9571 // float compare and set condition codes in EFLAGS by XMM regs
 9572 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9573   predicate(UseSSE>=2);
 9574   match(Set cr (CmpD src1 (LoadD src2)));
 9575   ins_cost(145);
 9576   format %{ "UCOMISD $src1,$src2\n\t"
 9577             "JNP,s   exit\n\t"
 9578             "PUSHF\t# saw NaN, set CF\n\t"
 9579             "AND     [rsp], #0xffffff2b\n\t"
 9580             "POPF\n"
 9581     "exit:" %}
 9582   ins_encode %{
 9583     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9584     emit_cmpfp_fixup(_masm);
 9585   %}
 9586   ins_pipe( pipe_slow );
 9587 %}
 9588 
 9589 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9590   predicate(UseSSE>=2);
 9591   match(Set cr (CmpD src1 (LoadD src2)));
 9592   ins_cost(100);
 9593   format %{ "UCOMISD $src1,$src2" %}
 9594   ins_encode %{
 9595     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9596   %}
 9597   ins_pipe( pipe_slow );
 9598 %}
 9599 
 9600 // Compare into -1,0,1 in XMM
 9601 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9602   predicate(UseSSE>=2);
 9603   match(Set dst (CmpD3 src1 src2));
 9604   effect(KILL cr);
 9605   ins_cost(255);
 9606   format %{ "UCOMISD $src1, $src2\n\t"
 9607             "MOV     $dst, #-1\n\t"
 9608             "JP,s    done\n\t"
 9609             "JB,s    done\n\t"
 9610             "SETNE   $dst\n\t"
 9611             "MOVZB   $dst, $dst\n"
 9612     "done:" %}
 9613   ins_encode %{
 9614     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9615     emit_cmpfp3(_masm, $dst$$Register);
 9616   %}
 9617   ins_pipe( pipe_slow );
 9618 %}
 9619 
 9620 // Compare into -1,0,1 in XMM and memory
 9621 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9622   predicate(UseSSE>=2);
 9623   match(Set dst (CmpD3 src1 (LoadD src2)));
 9624   effect(KILL cr);
 9625   ins_cost(275);
 9626   format %{ "UCOMISD $src1, $src2\n\t"
 9627             "MOV     $dst, #-1\n\t"
 9628             "JP,s    done\n\t"
 9629             "JB,s    done\n\t"
 9630             "SETNE   $dst\n\t"
 9631             "MOVZB   $dst, $dst\n"
 9632     "done:" %}
 9633   ins_encode %{
 9634     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9635     emit_cmpfp3(_masm, $dst$$Register);
 9636   %}
 9637   ins_pipe( pipe_slow );
 9638 %}
 9639 
 9640 
 9641 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9642   predicate (UseSSE <=1);
 9643   match(Set dst (SubD dst src));
 9644 
 9645   format %{ "FLD    $src\n\t"
 9646             "DSUBp  $dst,ST" %}
 9647   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9648   ins_cost(150);
 9649   ins_encode( Push_Reg_DPR(src),
 9650               OpcP, RegOpc(dst) );
 9651   ins_pipe( fpu_reg_reg );
 9652 %}
 9653 
 9654 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9655   predicate (UseSSE <=1);
 9656   match(Set dst (RoundDouble (SubD src1 src2)));
 9657   ins_cost(250);
 9658 
 9659   format %{ "FLD    $src2\n\t"
 9660             "DSUB   ST,$src1\n\t"
 9661             "FSTP_D $dst\t# D-round" %}
 9662   opcode(0xD8, 0x5);
 9663   ins_encode( Push_Reg_DPR(src2),
 9664               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9665   ins_pipe( fpu_mem_reg_reg );
 9666 %}
 9667 
 9668 
 9669 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9670   predicate (UseSSE <=1);
 9671   match(Set dst (SubD dst (LoadD src)));
 9672   ins_cost(150);
 9673 
 9674   format %{ "FLD    $src\n\t"
 9675             "DSUBp  $dst,ST" %}
 9676   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9677   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9678               OpcP, RegOpc(dst) );
 9679   ins_pipe( fpu_reg_mem );
 9680 %}
 9681 
 9682 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9683   predicate (UseSSE<=1);
 9684   match(Set dst (AbsD src));
 9685   ins_cost(100);
 9686   format %{ "FABS" %}
 9687   opcode(0xE1, 0xD9);
 9688   ins_encode( OpcS, OpcP );
 9689   ins_pipe( fpu_reg_reg );
 9690 %}
 9691 
 9692 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9693   predicate(UseSSE<=1);
 9694   match(Set dst (NegD src));
 9695   ins_cost(100);
 9696   format %{ "FCHS" %}
 9697   opcode(0xE0, 0xD9);
 9698   ins_encode( OpcS, OpcP );
 9699   ins_pipe( fpu_reg_reg );
 9700 %}
 9701 
 9702 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9703   predicate(UseSSE<=1);
 9704   match(Set dst (AddD dst src));
 9705   format %{ "FLD    $src\n\t"
 9706             "DADD   $dst,ST" %}
 9707   size(4);
 9708   ins_cost(150);
 9709   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9710   ins_encode( Push_Reg_DPR(src),
 9711               OpcP, RegOpc(dst) );
 9712   ins_pipe( fpu_reg_reg );
 9713 %}
 9714 
 9715 
 9716 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9717   predicate(UseSSE<=1);
 9718   match(Set dst (RoundDouble (AddD src1 src2)));
 9719   ins_cost(250);
 9720 
 9721   format %{ "FLD    $src2\n\t"
 9722             "DADD   ST,$src1\n\t"
 9723             "FSTP_D $dst\t# D-round" %}
 9724   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9725   ins_encode( Push_Reg_DPR(src2),
 9726               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9727   ins_pipe( fpu_mem_reg_reg );
 9728 %}
 9729 
 9730 
 9731 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9732   predicate(UseSSE<=1);
 9733   match(Set dst (AddD dst (LoadD src)));
 9734   ins_cost(150);
 9735 
 9736   format %{ "FLD    $src\n\t"
 9737             "DADDp  $dst,ST" %}
 9738   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9739   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9740               OpcP, RegOpc(dst) );
 9741   ins_pipe( fpu_reg_mem );
 9742 %}
 9743 
 9744 // add-to-memory
 9745 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9746   predicate(UseSSE<=1);
 9747   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9748   ins_cost(150);
 9749 
 9750   format %{ "FLD_D  $dst\n\t"
 9751             "DADD   ST,$src\n\t"
 9752             "FST_D  $dst" %}
 9753   opcode(0xDD, 0x0);
 9754   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9755               Opcode(0xD8), RegOpc(src),
 9756               set_instruction_start,
 9757               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9758   ins_pipe( fpu_reg_mem );
 9759 %}
 9760 
 9761 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9762   predicate(UseSSE<=1);
 9763   match(Set dst (AddD dst con));
 9764   ins_cost(125);
 9765   format %{ "FLD1\n\t"
 9766             "DADDp  $dst,ST" %}
 9767   ins_encode %{
 9768     __ fld1();
 9769     __ faddp($dst$$reg);
 9770   %}
 9771   ins_pipe(fpu_reg);
 9772 %}
 9773 
 9774 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9775   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9776   match(Set dst (AddD dst con));
 9777   ins_cost(200);
 9778   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9779             "DADDp  $dst,ST" %}
 9780   ins_encode %{
 9781     __ fld_d($constantaddress($con));
 9782     __ faddp($dst$$reg);
 9783   %}
 9784   ins_pipe(fpu_reg_mem);
 9785 %}
 9786 
 9787 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9788   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9789   match(Set dst (RoundDouble (AddD src con)));
 9790   ins_cost(200);
 9791   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9792             "DADD   ST,$src\n\t"
 9793             "FSTP_D $dst\t# D-round" %}
 9794   ins_encode %{
 9795     __ fld_d($constantaddress($con));
 9796     __ fadd($src$$reg);
 9797     __ fstp_d(Address(rsp, $dst$$disp));
 9798   %}
 9799   ins_pipe(fpu_mem_reg_con);
 9800 %}
 9801 
 9802 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9803   predicate(UseSSE<=1);
 9804   match(Set dst (MulD dst src));
 9805   format %{ "FLD    $src\n\t"
 9806             "DMULp  $dst,ST" %}
 9807   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9808   ins_cost(150);
 9809   ins_encode( Push_Reg_DPR(src),
 9810               OpcP, RegOpc(dst) );
 9811   ins_pipe( fpu_reg_reg );
 9812 %}
 9813 
 9814 // Strict FP instruction biases argument before multiply then
 9815 // biases result to avoid double rounding of subnormals.
 9816 //
 9817 // scale arg1 by multiplying arg1 by 2^(-15360)
 9818 // load arg2
 9819 // multiply scaled arg1 by arg2
 9820 // rescale product by 2^(15360)
 9821 //
 9822 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9823   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9824   match(Set dst (MulD dst src));
 9825   ins_cost(1);   // Select this instruction for all FP double multiplies
 9826 
 9827   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9828             "DMULp  $dst,ST\n\t"
 9829             "FLD    $src\n\t"
 9830             "DMULp  $dst,ST\n\t"
 9831             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9832             "DMULp  $dst,ST\n\t" %}
 9833   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9834   ins_encode( strictfp_bias1(dst),
 9835               Push_Reg_DPR(src),
 9836               OpcP, RegOpc(dst),
 9837               strictfp_bias2(dst) );
 9838   ins_pipe( fpu_reg_reg );
 9839 %}
 9840 
 9841 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9842   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9843   match(Set dst (MulD dst con));
 9844   ins_cost(200);
 9845   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9846             "DMULp  $dst,ST" %}
 9847   ins_encode %{
 9848     __ fld_d($constantaddress($con));
 9849     __ fmulp($dst$$reg);
 9850   %}
 9851   ins_pipe(fpu_reg_mem);
 9852 %}
 9853 
 9854 
 9855 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9856   predicate( UseSSE<=1 );
 9857   match(Set dst (MulD dst (LoadD src)));
 9858   ins_cost(200);
 9859   format %{ "FLD_D  $src\n\t"
 9860             "DMULp  $dst,ST" %}
 9861   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9862   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9863               OpcP, RegOpc(dst) );
 9864   ins_pipe( fpu_reg_mem );
 9865 %}
 9866 
 9867 //
 9868 // Cisc-alternate to reg-reg multiply
 9869 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9870   predicate( UseSSE<=1 );
 9871   match(Set dst (MulD src (LoadD mem)));
 9872   ins_cost(250);
 9873   format %{ "FLD_D  $mem\n\t"
 9874             "DMUL   ST,$src\n\t"
 9875             "FSTP_D $dst" %}
 9876   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9877   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9878               OpcReg_FPR(src),
 9879               Pop_Reg_DPR(dst) );
 9880   ins_pipe( fpu_reg_reg_mem );
 9881 %}
 9882 
 9883 
 9884 // MACRO3 -- addDPR a mulDPR
 9885 // This instruction is a '2-address' instruction in that the result goes
 9886 // back to src2.  This eliminates a move from the macro; possibly the
 9887 // register allocator will have to add it back (and maybe not).
 9888 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9889   predicate( UseSSE<=1 );
 9890   match(Set src2 (AddD (MulD src0 src1) src2));
 9891   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9892             "DMUL   ST,$src1\n\t"
 9893             "DADDp  $src2,ST" %}
 9894   ins_cost(250);
 9895   opcode(0xDD); /* LoadD DD /0 */
 9896   ins_encode( Push_Reg_FPR(src0),
 9897               FMul_ST_reg(src1),
 9898               FAddP_reg_ST(src2) );
 9899   ins_pipe( fpu_reg_reg_reg );
 9900 %}
 9901 
 9902 
 9903 // MACRO3 -- subDPR a mulDPR
 9904 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9905   predicate( UseSSE<=1 );
 9906   match(Set src2 (SubD (MulD src0 src1) src2));
 9907   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9908             "DMUL   ST,$src1\n\t"
 9909             "DSUBRp $src2,ST" %}
 9910   ins_cost(250);
 9911   ins_encode( Push_Reg_FPR(src0),
 9912               FMul_ST_reg(src1),
 9913               Opcode(0xDE), Opc_plus(0xE0,src2));
 9914   ins_pipe( fpu_reg_reg_reg );
 9915 %}
 9916 
 9917 
 9918 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9919   predicate( UseSSE<=1 );
 9920   match(Set dst (DivD dst src));
 9921 
 9922   format %{ "FLD    $src\n\t"
 9923             "FDIVp  $dst,ST" %}
 9924   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9925   ins_cost(150);
 9926   ins_encode( Push_Reg_DPR(src),
 9927               OpcP, RegOpc(dst) );
 9928   ins_pipe( fpu_reg_reg );
 9929 %}
 9930 
 9931 // Strict FP instruction biases argument before division then
 9932 // biases result, to avoid double rounding of subnormals.
 9933 //
 9934 // scale dividend by multiplying dividend by 2^(-15360)
 9935 // load divisor
 9936 // divide scaled dividend by divisor
 9937 // rescale quotient by 2^(15360)
 9938 //
 9939 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9940   predicate (UseSSE<=1);
 9941   match(Set dst (DivD dst src));
 9942   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9943   ins_cost(01);
 9944 
 9945   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9946             "DMULp  $dst,ST\n\t"
 9947             "FLD    $src\n\t"
 9948             "FDIVp  $dst,ST\n\t"
 9949             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9950             "DMULp  $dst,ST\n\t" %}
 9951   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9952   ins_encode( strictfp_bias1(dst),
 9953               Push_Reg_DPR(src),
 9954               OpcP, RegOpc(dst),
 9955               strictfp_bias2(dst) );
 9956   ins_pipe( fpu_reg_reg );
 9957 %}
 9958 
 9959 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9960   predicate(UseSSE<=1);
 9961   match(Set dst (ModD dst src));
 9962   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9963 
 9964   format %{ "DMOD   $dst,$src" %}
 9965   ins_cost(250);
 9966   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9967               emitModDPR(),
 9968               Push_Result_Mod_DPR(src),
 9969               Pop_Reg_DPR(dst));
 9970   ins_pipe( pipe_slow );
 9971 %}
 9972 
 9973 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9974   predicate(UseSSE>=2);
 9975   match(Set dst (ModD src0 src1));
 9976   effect(KILL rax, KILL cr);
 9977 
 9978   format %{ "SUB    ESP,8\t # DMOD\n"
 9979           "\tMOVSD  [ESP+0],$src1\n"
 9980           "\tFLD_D  [ESP+0]\n"
 9981           "\tMOVSD  [ESP+0],$src0\n"
 9982           "\tFLD_D  [ESP+0]\n"
 9983      "loop:\tFPREM\n"
 9984           "\tFWAIT\n"
 9985           "\tFNSTSW AX\n"
 9986           "\tSAHF\n"
 9987           "\tJP     loop\n"
 9988           "\tFSTP_D [ESP+0]\n"
 9989           "\tMOVSD  $dst,[ESP+0]\n"
 9990           "\tADD    ESP,8\n"
 9991           "\tFSTP   ST0\t # Restore FPU Stack"
 9992     %}
 9993   ins_cost(250);
 9994   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9995   ins_pipe( pipe_slow );
 9996 %}
 9997 
 9998 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9999   predicate (UseSSE<=1);
10000   match(Set dst(AtanD dst src));
10001   format %{ "DATA   $dst,$src" %}
10002   opcode(0xD9, 0xF3);
10003   ins_encode( Push_Reg_DPR(src),
10004               OpcP, OpcS, RegOpc(dst) );
10005   ins_pipe( pipe_slow );
10006 %}
10007 
10008 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10009   predicate (UseSSE>=2);
10010   match(Set dst(AtanD dst src));
10011   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10012   format %{ "DATA   $dst,$src" %}
10013   opcode(0xD9, 0xF3);
10014   ins_encode( Push_SrcD(src),
10015               OpcP, OpcS, Push_ResultD(dst) );
10016   ins_pipe( pipe_slow );
10017 %}
10018 
10019 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10020   predicate (UseSSE<=1);
10021   match(Set dst (SqrtD src));
10022   format %{ "DSQRT  $dst,$src" %}
10023   opcode(0xFA, 0xD9);
10024   ins_encode( Push_Reg_DPR(src),
10025               OpcS, OpcP, Pop_Reg_DPR(dst) );
10026   ins_pipe( pipe_slow );
10027 %}
10028 
10029 //-------------Float Instructions-------------------------------
10030 // Float Math
10031 
10032 // Code for float compare:
10033 //     fcompp();
10034 //     fwait(); fnstsw_ax();
10035 //     sahf();
10036 //     movl(dst, unordered_result);
10037 //     jcc(Assembler::parity, exit);
10038 //     movl(dst, less_result);
10039 //     jcc(Assembler::below, exit);
10040 //     movl(dst, equal_result);
10041 //     jcc(Assembler::equal, exit);
10042 //     movl(dst, greater_result);
10043 //   exit:
10044 
10045 // P6 version of float compare, sets condition codes in EFLAGS
10046 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10047   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10048   match(Set cr (CmpF src1 src2));
10049   effect(KILL rax);
10050   ins_cost(150);
10051   format %{ "FLD    $src1\n\t"
10052             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10053             "JNP    exit\n\t"
10054             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10055             "SAHF\n"
10056      "exit:\tNOP               // avoid branch to branch" %}
10057   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10058   ins_encode( Push_Reg_DPR(src1),
10059               OpcP, RegOpc(src2),
10060               cmpF_P6_fixup );
10061   ins_pipe( pipe_slow );
10062 %}
10063 
10064 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10065   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10066   match(Set cr (CmpF src1 src2));
10067   ins_cost(100);
10068   format %{ "FLD    $src1\n\t"
10069             "FUCOMIP ST,$src2  // P6 instruction" %}
10070   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10071   ins_encode( Push_Reg_DPR(src1),
10072               OpcP, RegOpc(src2));
10073   ins_pipe( pipe_slow );
10074 %}
10075 
10076 
10077 // Compare & branch
10078 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10079   predicate(UseSSE == 0);
10080   match(Set cr (CmpF src1 src2));
10081   effect(KILL rax);
10082   ins_cost(200);
10083   format %{ "FLD    $src1\n\t"
10084             "FCOMp  $src2\n\t"
10085             "FNSTSW AX\n\t"
10086             "TEST   AX,0x400\n\t"
10087             "JZ,s   flags\n\t"
10088             "MOV    AH,1\t# unordered treat as LT\n"
10089     "flags:\tSAHF" %}
10090   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10091   ins_encode( Push_Reg_DPR(src1),
10092               OpcP, RegOpc(src2),
10093               fpu_flags);
10094   ins_pipe( pipe_slow );
10095 %}
10096 
10097 // Compare vs zero into -1,0,1
10098 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10099   predicate(UseSSE == 0);
10100   match(Set dst (CmpF3 src1 zero));
10101   effect(KILL cr, KILL rax);
10102   ins_cost(280);
10103   format %{ "FTSTF  $dst,$src1" %}
10104   opcode(0xE4, 0xD9);
10105   ins_encode( Push_Reg_DPR(src1),
10106               OpcS, OpcP, PopFPU,
10107               CmpF_Result(dst));
10108   ins_pipe( pipe_slow );
10109 %}
10110 
10111 // Compare into -1,0,1
10112 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10113   predicate(UseSSE == 0);
10114   match(Set dst (CmpF3 src1 src2));
10115   effect(KILL cr, KILL rax);
10116   ins_cost(300);
10117   format %{ "FCMPF  $dst,$src1,$src2" %}
10118   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10119   ins_encode( Push_Reg_DPR(src1),
10120               OpcP, RegOpc(src2),
10121               CmpF_Result(dst));
10122   ins_pipe( pipe_slow );
10123 %}
10124 
10125 // float compare and set condition codes in EFLAGS by XMM regs
10126 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10127   predicate(UseSSE>=1);
10128   match(Set cr (CmpF src1 src2));
10129   ins_cost(145);
10130   format %{ "UCOMISS $src1,$src2\n\t"
10131             "JNP,s   exit\n\t"
10132             "PUSHF\t# saw NaN, set CF\n\t"
10133             "AND     [rsp], #0xffffff2b\n\t"
10134             "POPF\n"
10135     "exit:" %}
10136   ins_encode %{
10137     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10138     emit_cmpfp_fixup(_masm);
10139   %}
10140   ins_pipe( pipe_slow );
10141 %}
10142 
10143 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10144   predicate(UseSSE>=1);
10145   match(Set cr (CmpF src1 src2));
10146   ins_cost(100);
10147   format %{ "UCOMISS $src1,$src2" %}
10148   ins_encode %{
10149     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10150   %}
10151   ins_pipe( pipe_slow );
10152 %}
10153 
10154 // float compare and set condition codes in EFLAGS by XMM regs
10155 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10156   predicate(UseSSE>=1);
10157   match(Set cr (CmpF src1 (LoadF src2)));
10158   ins_cost(165);
10159   format %{ "UCOMISS $src1,$src2\n\t"
10160             "JNP,s   exit\n\t"
10161             "PUSHF\t# saw NaN, set CF\n\t"
10162             "AND     [rsp], #0xffffff2b\n\t"
10163             "POPF\n"
10164     "exit:" %}
10165   ins_encode %{
10166     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10167     emit_cmpfp_fixup(_masm);
10168   %}
10169   ins_pipe( pipe_slow );
10170 %}
10171 
10172 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10173   predicate(UseSSE>=1);
10174   match(Set cr (CmpF src1 (LoadF src2)));
10175   ins_cost(100);
10176   format %{ "UCOMISS $src1,$src2" %}
10177   ins_encode %{
10178     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10179   %}
10180   ins_pipe( pipe_slow );
10181 %}
10182 
10183 // Compare into -1,0,1 in XMM
10184 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10185   predicate(UseSSE>=1);
10186   match(Set dst (CmpF3 src1 src2));
10187   effect(KILL cr);
10188   ins_cost(255);
10189   format %{ "UCOMISS $src1, $src2\n\t"
10190             "MOV     $dst, #-1\n\t"
10191             "JP,s    done\n\t"
10192             "JB,s    done\n\t"
10193             "SETNE   $dst\n\t"
10194             "MOVZB   $dst, $dst\n"
10195     "done:" %}
10196   ins_encode %{
10197     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10198     emit_cmpfp3(_masm, $dst$$Register);
10199   %}
10200   ins_pipe( pipe_slow );
10201 %}
10202 
10203 // Compare into -1,0,1 in XMM and memory
10204 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10205   predicate(UseSSE>=1);
10206   match(Set dst (CmpF3 src1 (LoadF src2)));
10207   effect(KILL cr);
10208   ins_cost(275);
10209   format %{ "UCOMISS $src1, $src2\n\t"
10210             "MOV     $dst, #-1\n\t"
10211             "JP,s    done\n\t"
10212             "JB,s    done\n\t"
10213             "SETNE   $dst\n\t"
10214             "MOVZB   $dst, $dst\n"
10215     "done:" %}
10216   ins_encode %{
10217     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10218     emit_cmpfp3(_masm, $dst$$Register);
10219   %}
10220   ins_pipe( pipe_slow );
10221 %}
10222 
10223 // Spill to obtain 24-bit precision
10224 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10225   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10226   match(Set dst (SubF src1 src2));
10227 
10228   format %{ "FSUB   $dst,$src1 - $src2" %}
10229   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10230   ins_encode( Push_Reg_FPR(src1),
10231               OpcReg_FPR(src2),
10232               Pop_Mem_FPR(dst) );
10233   ins_pipe( fpu_mem_reg_reg );
10234 %}
10235 //
10236 // This instruction does not round to 24-bits
10237 instruct subFPR_reg(regFPR dst, regFPR src) %{
10238   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10239   match(Set dst (SubF dst src));
10240 
10241   format %{ "FSUB   $dst,$src" %}
10242   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10243   ins_encode( Push_Reg_FPR(src),
10244               OpcP, RegOpc(dst) );
10245   ins_pipe( fpu_reg_reg );
10246 %}
10247 
10248 // Spill to obtain 24-bit precision
10249 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10250   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10251   match(Set dst (AddF src1 src2));
10252 
10253   format %{ "FADD   $dst,$src1,$src2" %}
10254   opcode(0xD8, 0x0); /* D8 C0+i */
10255   ins_encode( Push_Reg_FPR(src2),
10256               OpcReg_FPR(src1),
10257               Pop_Mem_FPR(dst) );
10258   ins_pipe( fpu_mem_reg_reg );
10259 %}
10260 //
10261 // This instruction does not round to 24-bits
10262 instruct addFPR_reg(regFPR dst, regFPR src) %{
10263   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10264   match(Set dst (AddF dst src));
10265 
10266   format %{ "FLD    $src\n\t"
10267             "FADDp  $dst,ST" %}
10268   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10269   ins_encode( Push_Reg_FPR(src),
10270               OpcP, RegOpc(dst) );
10271   ins_pipe( fpu_reg_reg );
10272 %}
10273 
10274 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10275   predicate(UseSSE==0);
10276   match(Set dst (AbsF src));
10277   ins_cost(100);
10278   format %{ "FABS" %}
10279   opcode(0xE1, 0xD9);
10280   ins_encode( OpcS, OpcP );
10281   ins_pipe( fpu_reg_reg );
10282 %}
10283 
10284 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10285   predicate(UseSSE==0);
10286   match(Set dst (NegF src));
10287   ins_cost(100);
10288   format %{ "FCHS" %}
10289   opcode(0xE0, 0xD9);
10290   ins_encode( OpcS, OpcP );
10291   ins_pipe( fpu_reg_reg );
10292 %}
10293 
10294 // Cisc-alternate to addFPR_reg
10295 // Spill to obtain 24-bit precision
10296 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10297   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10298   match(Set dst (AddF src1 (LoadF src2)));
10299 
10300   format %{ "FLD    $src2\n\t"
10301             "FADD   ST,$src1\n\t"
10302             "FSTP_S $dst" %}
10303   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10304   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10305               OpcReg_FPR(src1),
10306               Pop_Mem_FPR(dst) );
10307   ins_pipe( fpu_mem_reg_mem );
10308 %}
10309 //
10310 // Cisc-alternate to addFPR_reg
10311 // This instruction does not round to 24-bits
10312 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10313   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10314   match(Set dst (AddF dst (LoadF src)));
10315 
10316   format %{ "FADD   $dst,$src" %}
10317   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10319               OpcP, RegOpc(dst) );
10320   ins_pipe( fpu_reg_mem );
10321 %}
10322 
10323 // // Following two instructions for _222_mpegaudio
10324 // Spill to obtain 24-bit precision
10325 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10326   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10327   match(Set dst (AddF src1 src2));
10328 
10329   format %{ "FADD   $dst,$src1,$src2" %}
10330   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10331   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10332               OpcReg_FPR(src2),
10333               Pop_Mem_FPR(dst) );
10334   ins_pipe( fpu_mem_reg_mem );
10335 %}
10336 
10337 // Cisc-spill variant
10338 // Spill to obtain 24-bit precision
10339 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10340   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10341   match(Set dst (AddF src1 (LoadF src2)));
10342 
10343   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10344   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10345   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10346               set_instruction_start,
10347               OpcP, RMopc_Mem(secondary,src1),
10348               Pop_Mem_FPR(dst) );
10349   ins_pipe( fpu_mem_mem_mem );
10350 %}
10351 
10352 // Spill to obtain 24-bit precision
10353 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10354   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10355   match(Set dst (AddF src1 src2));
10356 
10357   format %{ "FADD   $dst,$src1,$src2" %}
10358   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10359   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10360               set_instruction_start,
10361               OpcP, RMopc_Mem(secondary,src1),
10362               Pop_Mem_FPR(dst) );
10363   ins_pipe( fpu_mem_mem_mem );
10364 %}
10365 
10366 
10367 // Spill to obtain 24-bit precision
10368 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10369   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10370   match(Set dst (AddF src con));
10371   format %{ "FLD    $src\n\t"
10372             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10373             "FSTP_S $dst"  %}
10374   ins_encode %{
10375     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10376     __ fadd_s($constantaddress($con));
10377     __ fstp_s(Address(rsp, $dst$$disp));
10378   %}
10379   ins_pipe(fpu_mem_reg_con);
10380 %}
10381 //
10382 // This instruction does not round to 24-bits
10383 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10384   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10385   match(Set dst (AddF src con));
10386   format %{ "FLD    $src\n\t"
10387             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10388             "FSTP   $dst"  %}
10389   ins_encode %{
10390     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10391     __ fadd_s($constantaddress($con));
10392     __ fstp_d($dst$$reg);
10393   %}
10394   ins_pipe(fpu_reg_reg_con);
10395 %}
10396 
10397 // Spill to obtain 24-bit precision
10398 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10399   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10400   match(Set dst (MulF src1 src2));
10401 
10402   format %{ "FLD    $src1\n\t"
10403             "FMUL   $src2\n\t"
10404             "FSTP_S $dst"  %}
10405   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10406   ins_encode( Push_Reg_FPR(src1),
10407               OpcReg_FPR(src2),
10408               Pop_Mem_FPR(dst) );
10409   ins_pipe( fpu_mem_reg_reg );
10410 %}
10411 //
10412 // This instruction does not round to 24-bits
10413 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10414   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10415   match(Set dst (MulF src1 src2));
10416 
10417   format %{ "FLD    $src1\n\t"
10418             "FMUL   $src2\n\t"
10419             "FSTP_S $dst"  %}
10420   opcode(0xD8, 0x1); /* D8 C8+i */
10421   ins_encode( Push_Reg_FPR(src2),
10422               OpcReg_FPR(src1),
10423               Pop_Reg_FPR(dst) );
10424   ins_pipe( fpu_reg_reg_reg );
10425 %}
10426 
10427 
10428 // Spill to obtain 24-bit precision
10429 // Cisc-alternate to reg-reg multiply
10430 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10431   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10432   match(Set dst (MulF src1 (LoadF src2)));
10433 
10434   format %{ "FLD_S  $src2\n\t"
10435             "FMUL   $src1\n\t"
10436             "FSTP_S $dst"  %}
10437   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10438   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10439               OpcReg_FPR(src1),
10440               Pop_Mem_FPR(dst) );
10441   ins_pipe( fpu_mem_reg_mem );
10442 %}
10443 //
10444 // This instruction does not round to 24-bits
10445 // Cisc-alternate to reg-reg multiply
10446 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10447   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10448   match(Set dst (MulF src1 (LoadF src2)));
10449 
10450   format %{ "FMUL   $dst,$src1,$src2" %}
10451   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10452   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10453               OpcReg_FPR(src1),
10454               Pop_Reg_FPR(dst) );
10455   ins_pipe( fpu_reg_reg_mem );
10456 %}
10457 
10458 // Spill to obtain 24-bit precision
10459 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10460   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10461   match(Set dst (MulF src1 src2));
10462 
10463   format %{ "FMUL   $dst,$src1,$src2" %}
10464   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10465   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10466               set_instruction_start,
10467               OpcP, RMopc_Mem(secondary,src1),
10468               Pop_Mem_FPR(dst) );
10469   ins_pipe( fpu_mem_mem_mem );
10470 %}
10471 
10472 // Spill to obtain 24-bit precision
10473 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10474   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10475   match(Set dst (MulF src con));
10476 
10477   format %{ "FLD    $src\n\t"
10478             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10479             "FSTP_S $dst"  %}
10480   ins_encode %{
10481     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10482     __ fmul_s($constantaddress($con));
10483     __ fstp_s(Address(rsp, $dst$$disp));
10484   %}
10485   ins_pipe(fpu_mem_reg_con);
10486 %}
10487 //
10488 // This instruction does not round to 24-bits
10489 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10490   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10491   match(Set dst (MulF src con));
10492 
10493   format %{ "FLD    $src\n\t"
10494             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10495             "FSTP   $dst"  %}
10496   ins_encode %{
10497     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10498     __ fmul_s($constantaddress($con));
10499     __ fstp_d($dst$$reg);
10500   %}
10501   ins_pipe(fpu_reg_reg_con);
10502 %}
10503 
10504 
10505 //
10506 // MACRO1 -- subsume unshared load into mulFPR
10507 // This instruction does not round to 24-bits
10508 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10509   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10510   match(Set dst (MulF (LoadF mem1) src));
10511 
10512   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10513             "FMUL   ST,$src\n\t"
10514             "FSTP   $dst" %}
10515   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10516   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10517               OpcReg_FPR(src),
10518               Pop_Reg_FPR(dst) );
10519   ins_pipe( fpu_reg_reg_mem );
10520 %}
10521 //
10522 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10523 // This instruction does not round to 24-bits
10524 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10525   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10526   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10527   ins_cost(95);
10528 
10529   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10530             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10531             "FADD   ST,$src2\n\t"
10532             "FSTP   $dst" %}
10533   opcode(0xD9); /* LoadF D9 /0 */
10534   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10535               FMul_ST_reg(src1),
10536               FAdd_ST_reg(src2),
10537               Pop_Reg_FPR(dst) );
10538   ins_pipe( fpu_reg_mem_reg_reg );
10539 %}
10540 
10541 // MACRO3 -- addFPR a mulFPR
10542 // This instruction does not round to 24-bits.  It is a '2-address'
10543 // instruction in that the result goes back to src2.  This eliminates
10544 // a move from the macro; possibly the register allocator will have
10545 // to add it back (and maybe not).
10546 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10547   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10548   match(Set src2 (AddF (MulF src0 src1) src2));
10549 
10550   format %{ "FLD    $src0     ===MACRO3===\n\t"
10551             "FMUL   ST,$src1\n\t"
10552             "FADDP  $src2,ST" %}
10553   opcode(0xD9); /* LoadF D9 /0 */
10554   ins_encode( Push_Reg_FPR(src0),
10555               FMul_ST_reg(src1),
10556               FAddP_reg_ST(src2) );
10557   ins_pipe( fpu_reg_reg_reg );
10558 %}
10559 
10560 // MACRO4 -- divFPR subFPR
10561 // This instruction does not round to 24-bits
10562 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10563   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10564   match(Set dst (DivF (SubF src2 src1) src3));
10565 
10566   format %{ "FLD    $src2   ===MACRO4===\n\t"
10567             "FSUB   ST,$src1\n\t"
10568             "FDIV   ST,$src3\n\t"
10569             "FSTP  $dst" %}
10570   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10571   ins_encode( Push_Reg_FPR(src2),
10572               subFPR_divFPR_encode(src1,src3),
10573               Pop_Reg_FPR(dst) );
10574   ins_pipe( fpu_reg_reg_reg_reg );
10575 %}
10576 
10577 // Spill to obtain 24-bit precision
10578 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10579   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10580   match(Set dst (DivF src1 src2));
10581 
10582   format %{ "FDIV   $dst,$src1,$src2" %}
10583   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10584   ins_encode( Push_Reg_FPR(src1),
10585               OpcReg_FPR(src2),
10586               Pop_Mem_FPR(dst) );
10587   ins_pipe( fpu_mem_reg_reg );
10588 %}
10589 //
10590 // This instruction does not round to 24-bits
10591 instruct divFPR_reg(regFPR dst, regFPR src) %{
10592   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10593   match(Set dst (DivF dst src));
10594 
10595   format %{ "FDIV   $dst,$src" %}
10596   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10597   ins_encode( Push_Reg_FPR(src),
10598               OpcP, RegOpc(dst) );
10599   ins_pipe( fpu_reg_reg );
10600 %}
10601 
10602 
10603 // Spill to obtain 24-bit precision
10604 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10605   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10606   match(Set dst (ModF src1 src2));
10607   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10608 
10609   format %{ "FMOD   $dst,$src1,$src2" %}
10610   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10611               emitModDPR(),
10612               Push_Result_Mod_DPR(src2),
10613               Pop_Mem_FPR(dst));
10614   ins_pipe( pipe_slow );
10615 %}
10616 //
10617 // This instruction does not round to 24-bits
10618 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10619   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10620   match(Set dst (ModF dst src));
10621   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10622 
10623   format %{ "FMOD   $dst,$src" %}
10624   ins_encode(Push_Reg_Mod_DPR(dst, src),
10625               emitModDPR(),
10626               Push_Result_Mod_DPR(src),
10627               Pop_Reg_FPR(dst));
10628   ins_pipe( pipe_slow );
10629 %}
10630 
10631 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10632   predicate(UseSSE>=1);
10633   match(Set dst (ModF src0 src1));
10634   effect(KILL rax, KILL cr);
10635   format %{ "SUB    ESP,4\t # FMOD\n"
10636           "\tMOVSS  [ESP+0],$src1\n"
10637           "\tFLD_S  [ESP+0]\n"
10638           "\tMOVSS  [ESP+0],$src0\n"
10639           "\tFLD_S  [ESP+0]\n"
10640      "loop:\tFPREM\n"
10641           "\tFWAIT\n"
10642           "\tFNSTSW AX\n"
10643           "\tSAHF\n"
10644           "\tJP     loop\n"
10645           "\tFSTP_S [ESP+0]\n"
10646           "\tMOVSS  $dst,[ESP+0]\n"
10647           "\tADD    ESP,4\n"
10648           "\tFSTP   ST0\t # Restore FPU Stack"
10649     %}
10650   ins_cost(250);
10651   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10652   ins_pipe( pipe_slow );
10653 %}
10654 
10655 
10656 //----------Arithmetic Conversion Instructions---------------------------------
10657 // The conversions operations are all Alpha sorted.  Please keep it that way!
10658 
10659 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10660   predicate(UseSSE==0);
10661   match(Set dst (RoundFloat src));
10662   ins_cost(125);
10663   format %{ "FST_S  $dst,$src\t# F-round" %}
10664   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10665   ins_pipe( fpu_mem_reg );
10666 %}
10667 
10668 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10669   predicate(UseSSE<=1);
10670   match(Set dst (RoundDouble src));
10671   ins_cost(125);
10672   format %{ "FST_D  $dst,$src\t# D-round" %}
10673   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10674   ins_pipe( fpu_mem_reg );
10675 %}
10676 
10677 // Force rounding to 24-bit precision and 6-bit exponent
10678 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10679   predicate(UseSSE==0);
10680   match(Set dst (ConvD2F src));
10681   format %{ "FST_S  $dst,$src\t# F-round" %}
10682   expand %{
10683     roundFloat_mem_reg(dst,src);
10684   %}
10685 %}
10686 
10687 // Force rounding to 24-bit precision and 6-bit exponent
10688 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10689   predicate(UseSSE==1);
10690   match(Set dst (ConvD2F src));
10691   effect( KILL cr );
10692   format %{ "SUB    ESP,4\n\t"
10693             "FST_S  [ESP],$src\t# F-round\n\t"
10694             "MOVSS  $dst,[ESP]\n\t"
10695             "ADD ESP,4" %}
10696   ins_encode %{
10697     __ subptr(rsp, 4);
10698     if ($src$$reg != FPR1L_enc) {
10699       __ fld_s($src$$reg-1);
10700       __ fstp_s(Address(rsp, 0));
10701     } else {
10702       __ fst_s(Address(rsp, 0));
10703     }
10704     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10705     __ addptr(rsp, 4);
10706   %}
10707   ins_pipe( pipe_slow );
10708 %}
10709 
10710 // Force rounding double precision to single precision
10711 instruct convD2F_reg(regF dst, regD src) %{
10712   predicate(UseSSE>=2);
10713   match(Set dst (ConvD2F src));
10714   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10715   ins_encode %{
10716     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10717   %}
10718   ins_pipe( pipe_slow );
10719 %}
10720 
10721 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10722   predicate(UseSSE==0);
10723   match(Set dst (ConvF2D src));
10724   format %{ "FST_S  $dst,$src\t# D-round" %}
10725   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10726   ins_pipe( fpu_reg_reg );
10727 %}
10728 
10729 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10730   predicate(UseSSE==1);
10731   match(Set dst (ConvF2D src));
10732   format %{ "FST_D  $dst,$src\t# D-round" %}
10733   expand %{
10734     roundDouble_mem_reg(dst,src);
10735   %}
10736 %}
10737 
10738 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10739   predicate(UseSSE==1);
10740   match(Set dst (ConvF2D src));
10741   effect( KILL cr );
10742   format %{ "SUB    ESP,4\n\t"
10743             "MOVSS  [ESP] $src\n\t"
10744             "FLD_S  [ESP]\n\t"
10745             "ADD    ESP,4\n\t"
10746             "FSTP   $dst\t# D-round" %}
10747   ins_encode %{
10748     __ subptr(rsp, 4);
10749     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10750     __ fld_s(Address(rsp, 0));
10751     __ addptr(rsp, 4);
10752     __ fstp_d($dst$$reg);
10753   %}
10754   ins_pipe( pipe_slow );
10755 %}
10756 
10757 instruct convF2D_reg(regD dst, regF src) %{
10758   predicate(UseSSE>=2);
10759   match(Set dst (ConvF2D src));
10760   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10761   ins_encode %{
10762     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10763   %}
10764   ins_pipe( pipe_slow );
10765 %}
10766 
10767 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10768 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10769   predicate(UseSSE<=1);
10770   match(Set dst (ConvD2I src));
10771   effect( KILL tmp, KILL cr );
10772   format %{ "FLD    $src\t# Convert double to int \n\t"
10773             "FLDCW  trunc mode\n\t"
10774             "SUB    ESP,4\n\t"
10775             "FISTp  [ESP + #0]\n\t"
10776             "FLDCW  std/24-bit mode\n\t"
10777             "POP    EAX\n\t"
10778             "CMP    EAX,0x80000000\n\t"
10779             "JNE,s  fast\n\t"
10780             "FLD_D  $src\n\t"
10781             "CALL   d2i_wrapper\n"
10782       "fast:" %}
10783   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10784   ins_pipe( pipe_slow );
10785 %}
10786 
10787 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10788 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10789   predicate(UseSSE>=2);
10790   match(Set dst (ConvD2I src));
10791   effect( KILL tmp, KILL cr );
10792   format %{ "CVTTSD2SI $dst, $src\n\t"
10793             "CMP    $dst,0x80000000\n\t"
10794             "JNE,s  fast\n\t"
10795             "SUB    ESP, 8\n\t"
10796             "MOVSD  [ESP], $src\n\t"
10797             "FLD_D  [ESP]\n\t"
10798             "ADD    ESP, 8\n\t"
10799             "CALL   d2i_wrapper\n"
10800       "fast:" %}
10801   ins_encode %{
10802     Label fast;
10803     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10804     __ cmpl($dst$$Register, 0x80000000);
10805     __ jccb(Assembler::notEqual, fast);
10806     __ subptr(rsp, 8);
10807     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10808     __ fld_d(Address(rsp, 0));
10809     __ addptr(rsp, 8);
10810     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10811     __ post_call_nop();
10812     __ bind(fast);
10813   %}
10814   ins_pipe( pipe_slow );
10815 %}
10816 
10817 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10818   predicate(UseSSE<=1);
10819   match(Set dst (ConvD2L src));
10820   effect( KILL cr );
10821   format %{ "FLD    $src\t# Convert double to long\n\t"
10822             "FLDCW  trunc mode\n\t"
10823             "SUB    ESP,8\n\t"
10824             "FISTp  [ESP + #0]\n\t"
10825             "FLDCW  std/24-bit mode\n\t"
10826             "POP    EAX\n\t"
10827             "POP    EDX\n\t"
10828             "CMP    EDX,0x80000000\n\t"
10829             "JNE,s  fast\n\t"
10830             "TEST   EAX,EAX\n\t"
10831             "JNE,s  fast\n\t"
10832             "FLD    $src\n\t"
10833             "CALL   d2l_wrapper\n"
10834       "fast:" %}
10835   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10836   ins_pipe( pipe_slow );
10837 %}
10838 
10839 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10840 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10841   predicate (UseSSE>=2);
10842   match(Set dst (ConvD2L src));
10843   effect( KILL cr );
10844   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10845             "MOVSD  [ESP],$src\n\t"
10846             "FLD_D  [ESP]\n\t"
10847             "FLDCW  trunc mode\n\t"
10848             "FISTp  [ESP + #0]\n\t"
10849             "FLDCW  std/24-bit mode\n\t"
10850             "POP    EAX\n\t"
10851             "POP    EDX\n\t"
10852             "CMP    EDX,0x80000000\n\t"
10853             "JNE,s  fast\n\t"
10854             "TEST   EAX,EAX\n\t"
10855             "JNE,s  fast\n\t"
10856             "SUB    ESP,8\n\t"
10857             "MOVSD  [ESP],$src\n\t"
10858             "FLD_D  [ESP]\n\t"
10859             "ADD    ESP,8\n\t"
10860             "CALL   d2l_wrapper\n"
10861       "fast:" %}
10862   ins_encode %{
10863     Label fast;
10864     __ subptr(rsp, 8);
10865     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10866     __ fld_d(Address(rsp, 0));
10867     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10868     __ fistp_d(Address(rsp, 0));
10869     // Restore the rounding mode, mask the exception
10870     if (Compile::current()->in_24_bit_fp_mode()) {
10871       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10872     } else {
10873       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10874     }
10875     // Load the converted long, adjust CPU stack
10876     __ pop(rax);
10877     __ pop(rdx);
10878     __ cmpl(rdx, 0x80000000);
10879     __ jccb(Assembler::notEqual, fast);
10880     __ testl(rax, rax);
10881     __ jccb(Assembler::notEqual, fast);
10882     __ subptr(rsp, 8);
10883     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10884     __ fld_d(Address(rsp, 0));
10885     __ addptr(rsp, 8);
10886     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10887     __ post_call_nop();
10888     __ bind(fast);
10889   %}
10890   ins_pipe( pipe_slow );
10891 %}
10892 
10893 // Convert a double to an int.  Java semantics require we do complex
10894 // manglations in the corner cases.  So we set the rounding mode to
10895 // 'zero', store the darned double down as an int, and reset the
10896 // rounding mode to 'nearest'.  The hardware stores a flag value down
10897 // if we would overflow or converted a NAN; we check for this and
10898 // and go the slow path if needed.
10899 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10900   predicate(UseSSE==0);
10901   match(Set dst (ConvF2I src));
10902   effect( KILL tmp, KILL cr );
10903   format %{ "FLD    $src\t# Convert float to int \n\t"
10904             "FLDCW  trunc mode\n\t"
10905             "SUB    ESP,4\n\t"
10906             "FISTp  [ESP + #0]\n\t"
10907             "FLDCW  std/24-bit mode\n\t"
10908             "POP    EAX\n\t"
10909             "CMP    EAX,0x80000000\n\t"
10910             "JNE,s  fast\n\t"
10911             "FLD    $src\n\t"
10912             "CALL   d2i_wrapper\n"
10913       "fast:" %}
10914   // DPR2I_encoding works for FPR2I
10915   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10916   ins_pipe( pipe_slow );
10917 %}
10918 
10919 // Convert a float in xmm to an int reg.
10920 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10921   predicate(UseSSE>=1);
10922   match(Set dst (ConvF2I src));
10923   effect( KILL tmp, KILL cr );
10924   format %{ "CVTTSS2SI $dst, $src\n\t"
10925             "CMP    $dst,0x80000000\n\t"
10926             "JNE,s  fast\n\t"
10927             "SUB    ESP, 4\n\t"
10928             "MOVSS  [ESP], $src\n\t"
10929             "FLD    [ESP]\n\t"
10930             "ADD    ESP, 4\n\t"
10931             "CALL   d2i_wrapper\n"
10932       "fast:" %}
10933   ins_encode %{
10934     Label fast;
10935     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10936     __ cmpl($dst$$Register, 0x80000000);
10937     __ jccb(Assembler::notEqual, fast);
10938     __ subptr(rsp, 4);
10939     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10940     __ fld_s(Address(rsp, 0));
10941     __ addptr(rsp, 4);
10942     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10943     __ post_call_nop();
10944     __ bind(fast);
10945   %}
10946   ins_pipe( pipe_slow );
10947 %}
10948 
10949 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10950   predicate(UseSSE==0);
10951   match(Set dst (ConvF2L src));
10952   effect( KILL cr );
10953   format %{ "FLD    $src\t# Convert float to long\n\t"
10954             "FLDCW  trunc mode\n\t"
10955             "SUB    ESP,8\n\t"
10956             "FISTp  [ESP + #0]\n\t"
10957             "FLDCW  std/24-bit mode\n\t"
10958             "POP    EAX\n\t"
10959             "POP    EDX\n\t"
10960             "CMP    EDX,0x80000000\n\t"
10961             "JNE,s  fast\n\t"
10962             "TEST   EAX,EAX\n\t"
10963             "JNE,s  fast\n\t"
10964             "FLD    $src\n\t"
10965             "CALL   d2l_wrapper\n"
10966       "fast:" %}
10967   // DPR2L_encoding works for FPR2L
10968   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10969   ins_pipe( pipe_slow );
10970 %}
10971 
10972 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10973 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10974   predicate (UseSSE>=1);
10975   match(Set dst (ConvF2L src));
10976   effect( KILL cr );
10977   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10978             "MOVSS  [ESP],$src\n\t"
10979             "FLD_S  [ESP]\n\t"
10980             "FLDCW  trunc mode\n\t"
10981             "FISTp  [ESP + #0]\n\t"
10982             "FLDCW  std/24-bit mode\n\t"
10983             "POP    EAX\n\t"
10984             "POP    EDX\n\t"
10985             "CMP    EDX,0x80000000\n\t"
10986             "JNE,s  fast\n\t"
10987             "TEST   EAX,EAX\n\t"
10988             "JNE,s  fast\n\t"
10989             "SUB    ESP,4\t# Convert float to long\n\t"
10990             "MOVSS  [ESP],$src\n\t"
10991             "FLD_S  [ESP]\n\t"
10992             "ADD    ESP,4\n\t"
10993             "CALL   d2l_wrapper\n"
10994       "fast:" %}
10995   ins_encode %{
10996     Label fast;
10997     __ subptr(rsp, 8);
10998     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10999     __ fld_s(Address(rsp, 0));
11000     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11001     __ fistp_d(Address(rsp, 0));
11002     // Restore the rounding mode, mask the exception
11003     if (Compile::current()->in_24_bit_fp_mode()) {
11004       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11005     } else {
11006       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11007     }
11008     // Load the converted long, adjust CPU stack
11009     __ pop(rax);
11010     __ pop(rdx);
11011     __ cmpl(rdx, 0x80000000);
11012     __ jccb(Assembler::notEqual, fast);
11013     __ testl(rax, rax);
11014     __ jccb(Assembler::notEqual, fast);
11015     __ subptr(rsp, 4);
11016     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11017     __ fld_s(Address(rsp, 0));
11018     __ addptr(rsp, 4);
11019     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11020     __ post_call_nop();
11021     __ bind(fast);
11022   %}
11023   ins_pipe( pipe_slow );
11024 %}
11025 
11026 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11027   predicate( UseSSE<=1 );
11028   match(Set dst (ConvI2D src));
11029   format %{ "FILD   $src\n\t"
11030             "FSTP   $dst" %}
11031   opcode(0xDB, 0x0);  /* DB /0 */
11032   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11033   ins_pipe( fpu_reg_mem );
11034 %}
11035 
11036 instruct convI2D_reg(regD dst, rRegI src) %{
11037   predicate( UseSSE>=2 && !UseXmmI2D );
11038   match(Set dst (ConvI2D src));
11039   format %{ "CVTSI2SD $dst,$src" %}
11040   ins_encode %{
11041     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11042   %}
11043   ins_pipe( pipe_slow );
11044 %}
11045 
11046 instruct convI2D_mem(regD dst, memory mem) %{
11047   predicate( UseSSE>=2 );
11048   match(Set dst (ConvI2D (LoadI mem)));
11049   format %{ "CVTSI2SD $dst,$mem" %}
11050   ins_encode %{
11051     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11052   %}
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 instruct convXI2D_reg(regD dst, rRegI src)
11057 %{
11058   predicate( UseSSE>=2 && UseXmmI2D );
11059   match(Set dst (ConvI2D src));
11060 
11061   format %{ "MOVD  $dst,$src\n\t"
11062             "CVTDQ2PD $dst,$dst\t# i2d" %}
11063   ins_encode %{
11064     __ movdl($dst$$XMMRegister, $src$$Register);
11065     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11066   %}
11067   ins_pipe(pipe_slow); // XXX
11068 %}
11069 
11070 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11071   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11072   match(Set dst (ConvI2D (LoadI mem)));
11073   format %{ "FILD   $mem\n\t"
11074             "FSTP   $dst" %}
11075   opcode(0xDB);      /* DB /0 */
11076   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11077               Pop_Reg_DPR(dst));
11078   ins_pipe( fpu_reg_mem );
11079 %}
11080 
11081 // Convert a byte to a float; no rounding step needed.
11082 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11083   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11084   match(Set dst (ConvI2F src));
11085   format %{ "FILD   $src\n\t"
11086             "FSTP   $dst" %}
11087 
11088   opcode(0xDB, 0x0);  /* DB /0 */
11089   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11090   ins_pipe( fpu_reg_mem );
11091 %}
11092 
11093 // In 24-bit mode, force exponent rounding by storing back out
11094 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11095   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11096   match(Set dst (ConvI2F src));
11097   ins_cost(200);
11098   format %{ "FILD   $src\n\t"
11099             "FSTP_S $dst" %}
11100   opcode(0xDB, 0x0);  /* DB /0 */
11101   ins_encode( Push_Mem_I(src),
11102               Pop_Mem_FPR(dst));
11103   ins_pipe( fpu_mem_mem );
11104 %}
11105 
11106 // In 24-bit mode, force exponent rounding by storing back out
11107 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11108   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11109   match(Set dst (ConvI2F (LoadI mem)));
11110   ins_cost(200);
11111   format %{ "FILD   $mem\n\t"
11112             "FSTP_S $dst" %}
11113   opcode(0xDB);  /* DB /0 */
11114   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11115               Pop_Mem_FPR(dst));
11116   ins_pipe( fpu_mem_mem );
11117 %}
11118 
11119 // This instruction does not round to 24-bits
11120 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11121   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11122   match(Set dst (ConvI2F src));
11123   format %{ "FILD   $src\n\t"
11124             "FSTP   $dst" %}
11125   opcode(0xDB, 0x0);  /* DB /0 */
11126   ins_encode( Push_Mem_I(src),
11127               Pop_Reg_FPR(dst));
11128   ins_pipe( fpu_reg_mem );
11129 %}
11130 
11131 // This instruction does not round to 24-bits
11132 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11133   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11134   match(Set dst (ConvI2F (LoadI mem)));
11135   format %{ "FILD   $mem\n\t"
11136             "FSTP   $dst" %}
11137   opcode(0xDB);      /* DB /0 */
11138   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11139               Pop_Reg_FPR(dst));
11140   ins_pipe( fpu_reg_mem );
11141 %}
11142 
11143 // Convert an int to a float in xmm; no rounding step needed.
11144 instruct convI2F_reg(regF dst, rRegI src) %{
11145   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11146   match(Set dst (ConvI2F src));
11147   format %{ "CVTSI2SS $dst, $src" %}
11148   ins_encode %{
11149     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11150   %}
11151   ins_pipe( pipe_slow );
11152 %}
11153 
11154  instruct convXI2F_reg(regF dst, rRegI src)
11155 %{
11156   predicate( UseSSE>=2 && UseXmmI2F );
11157   match(Set dst (ConvI2F src));
11158 
11159   format %{ "MOVD  $dst,$src\n\t"
11160             "CVTDQ2PS $dst,$dst\t# i2f" %}
11161   ins_encode %{
11162     __ movdl($dst$$XMMRegister, $src$$Register);
11163     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11164   %}
11165   ins_pipe(pipe_slow); // XXX
11166 %}
11167 
11168 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11169   match(Set dst (ConvI2L src));
11170   effect(KILL cr);
11171   ins_cost(375);
11172   format %{ "MOV    $dst.lo,$src\n\t"
11173             "MOV    $dst.hi,$src\n\t"
11174             "SAR    $dst.hi,31" %}
11175   ins_encode(convert_int_long(dst,src));
11176   ins_pipe( ialu_reg_reg_long );
11177 %}
11178 
11179 // Zero-extend convert int to long
11180 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11181   match(Set dst (AndL (ConvI2L src) mask) );
11182   effect( KILL flags );
11183   ins_cost(250);
11184   format %{ "MOV    $dst.lo,$src\n\t"
11185             "XOR    $dst.hi,$dst.hi" %}
11186   opcode(0x33); // XOR
11187   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11188   ins_pipe( ialu_reg_reg_long );
11189 %}
11190 
11191 // Zero-extend long
11192 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11193   match(Set dst (AndL src mask) );
11194   effect( KILL flags );
11195   ins_cost(250);
11196   format %{ "MOV    $dst.lo,$src.lo\n\t"
11197             "XOR    $dst.hi,$dst.hi\n\t" %}
11198   opcode(0x33); // XOR
11199   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11200   ins_pipe( ialu_reg_reg_long );
11201 %}
11202 
11203 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11204   predicate (UseSSE<=1);
11205   match(Set dst (ConvL2D src));
11206   effect( KILL cr );
11207   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11208             "PUSH   $src.lo\n\t"
11209             "FILD   ST,[ESP + #0]\n\t"
11210             "ADD    ESP,8\n\t"
11211             "FSTP_D $dst\t# D-round" %}
11212   opcode(0xDF, 0x5);  /* DF /5 */
11213   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11214   ins_pipe( pipe_slow );
11215 %}
11216 
11217 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11218   predicate (UseSSE>=2);
11219   match(Set dst (ConvL2D src));
11220   effect( KILL cr );
11221   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11222             "PUSH   $src.lo\n\t"
11223             "FILD_D [ESP]\n\t"
11224             "FSTP_D [ESP]\n\t"
11225             "MOVSD  $dst,[ESP]\n\t"
11226             "ADD    ESP,8" %}
11227   opcode(0xDF, 0x5);  /* DF /5 */
11228   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11229   ins_pipe( pipe_slow );
11230 %}
11231 
11232 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11233   predicate (UseSSE>=1);
11234   match(Set dst (ConvL2F src));
11235   effect( KILL cr );
11236   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11237             "PUSH   $src.lo\n\t"
11238             "FILD_D [ESP]\n\t"
11239             "FSTP_S [ESP]\n\t"
11240             "MOVSS  $dst,[ESP]\n\t"
11241             "ADD    ESP,8" %}
11242   opcode(0xDF, 0x5);  /* DF /5 */
11243   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11244   ins_pipe( pipe_slow );
11245 %}
11246 
11247 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11248   match(Set dst (ConvL2F src));
11249   effect( KILL cr );
11250   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11251             "PUSH   $src.lo\n\t"
11252             "FILD   ST,[ESP + #0]\n\t"
11253             "ADD    ESP,8\n\t"
11254             "FSTP_S $dst\t# F-round" %}
11255   opcode(0xDF, 0x5);  /* DF /5 */
11256   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11257   ins_pipe( pipe_slow );
11258 %}
11259 
11260 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11261   match(Set dst (ConvL2I src));
11262   effect( DEF dst, USE src );
11263   format %{ "MOV    $dst,$src.lo" %}
11264   ins_encode(enc_CopyL_Lo(dst,src));
11265   ins_pipe( ialu_reg_reg );
11266 %}
11267 
11268 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11269   match(Set dst (MoveF2I src));
11270   effect( DEF dst, USE src );
11271   ins_cost(100);
11272   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11273   ins_encode %{
11274     __ movl($dst$$Register, Address(rsp, $src$$disp));
11275   %}
11276   ins_pipe( ialu_reg_mem );
11277 %}
11278 
11279 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11280   predicate(UseSSE==0);
11281   match(Set dst (MoveF2I src));
11282   effect( DEF dst, USE src );
11283 
11284   ins_cost(125);
11285   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11286   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11287   ins_pipe( fpu_mem_reg );
11288 %}
11289 
11290 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11291   predicate(UseSSE>=1);
11292   match(Set dst (MoveF2I src));
11293   effect( DEF dst, USE src );
11294 
11295   ins_cost(95);
11296   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11297   ins_encode %{
11298     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11299   %}
11300   ins_pipe( pipe_slow );
11301 %}
11302 
11303 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11304   predicate(UseSSE>=2);
11305   match(Set dst (MoveF2I src));
11306   effect( DEF dst, USE src );
11307   ins_cost(85);
11308   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11309   ins_encode %{
11310     __ movdl($dst$$Register, $src$$XMMRegister);
11311   %}
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11316   match(Set dst (MoveI2F src));
11317   effect( DEF dst, USE src );
11318 
11319   ins_cost(100);
11320   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11321   ins_encode %{
11322     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11323   %}
11324   ins_pipe( ialu_mem_reg );
11325 %}
11326 
11327 
11328 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11329   predicate(UseSSE==0);
11330   match(Set dst (MoveI2F src));
11331   effect(DEF dst, USE src);
11332 
11333   ins_cost(125);
11334   format %{ "FLD_S  $src\n\t"
11335             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11336   opcode(0xD9);               /* D9 /0, FLD m32real */
11337   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11338               Pop_Reg_FPR(dst) );
11339   ins_pipe( fpu_reg_mem );
11340 %}
11341 
11342 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11343   predicate(UseSSE>=1);
11344   match(Set dst (MoveI2F src));
11345   effect( DEF dst, USE src );
11346 
11347   ins_cost(95);
11348   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11349   ins_encode %{
11350     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11351   %}
11352   ins_pipe( pipe_slow );
11353 %}
11354 
11355 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11356   predicate(UseSSE>=2);
11357   match(Set dst (MoveI2F src));
11358   effect( DEF dst, USE src );
11359 
11360   ins_cost(85);
11361   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11362   ins_encode %{
11363     __ movdl($dst$$XMMRegister, $src$$Register);
11364   %}
11365   ins_pipe( pipe_slow );
11366 %}
11367 
11368 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11369   match(Set dst (MoveD2L src));
11370   effect(DEF dst, USE src);
11371 
11372   ins_cost(250);
11373   format %{ "MOV    $dst.lo,$src\n\t"
11374             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11375   opcode(0x8B, 0x8B);
11376   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11377   ins_pipe( ialu_mem_long_reg );
11378 %}
11379 
11380 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11381   predicate(UseSSE<=1);
11382   match(Set dst (MoveD2L src));
11383   effect(DEF dst, USE src);
11384 
11385   ins_cost(125);
11386   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11387   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11388   ins_pipe( fpu_mem_reg );
11389 %}
11390 
11391 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11392   predicate(UseSSE>=2);
11393   match(Set dst (MoveD2L src));
11394   effect(DEF dst, USE src);
11395   ins_cost(95);
11396   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11397   ins_encode %{
11398     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11399   %}
11400   ins_pipe( pipe_slow );
11401 %}
11402 
11403 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11404   predicate(UseSSE>=2);
11405   match(Set dst (MoveD2L src));
11406   effect(DEF dst, USE src, TEMP tmp);
11407   ins_cost(85);
11408   format %{ "MOVD   $dst.lo,$src\n\t"
11409             "PSHUFLW $tmp,$src,0x4E\n\t"
11410             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11411   ins_encode %{
11412     __ movdl($dst$$Register, $src$$XMMRegister);
11413     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11414     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11420   match(Set dst (MoveL2D src));
11421   effect(DEF dst, USE src);
11422 
11423   ins_cost(200);
11424   format %{ "MOV    $dst,$src.lo\n\t"
11425             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11426   opcode(0x89, 0x89);
11427   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11428   ins_pipe( ialu_mem_long_reg );
11429 %}
11430 
11431 
11432 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11433   predicate(UseSSE<=1);
11434   match(Set dst (MoveL2D src));
11435   effect(DEF dst, USE src);
11436   ins_cost(125);
11437 
11438   format %{ "FLD_D  $src\n\t"
11439             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11440   opcode(0xDD);               /* DD /0, FLD m64real */
11441   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11442               Pop_Reg_DPR(dst) );
11443   ins_pipe( fpu_reg_mem );
11444 %}
11445 
11446 
11447 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11448   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11449   match(Set dst (MoveL2D src));
11450   effect(DEF dst, USE src);
11451 
11452   ins_cost(95);
11453   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11454   ins_encode %{
11455     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11456   %}
11457   ins_pipe( pipe_slow );
11458 %}
11459 
11460 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11461   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11462   match(Set dst (MoveL2D src));
11463   effect(DEF dst, USE src);
11464 
11465   ins_cost(95);
11466   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11467   ins_encode %{
11468     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11474   predicate(UseSSE>=2);
11475   match(Set dst (MoveL2D src));
11476   effect(TEMP dst, USE src, TEMP tmp);
11477   ins_cost(85);
11478   format %{ "MOVD   $dst,$src.lo\n\t"
11479             "MOVD   $tmp,$src.hi\n\t"
11480             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11481   ins_encode %{
11482     __ movdl($dst$$XMMRegister, $src$$Register);
11483     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11484     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11485   %}
11486   ins_pipe( pipe_slow );
11487 %}
11488 
11489 //----------------------------- CompressBits/ExpandBits ------------------------
11490 
11491 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11492   predicate(n->bottom_type()->isa_long());
11493   match(Set dst (CompressBits src mask));
11494   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11495   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11496   ins_encode %{
11497     Label exit, partail_result;
11498     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11499     // Merge the results of upper and lower destination registers such that upper destination
11500     // results are contiguously laid out after the lower destination result.
11501     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11502     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11503     __ popcntl($rtmp$$Register, $mask$$Register);
11504     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11505     __ cmpl($rtmp$$Register, 32);
11506     __ jccb(Assembler::equal, exit);
11507     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11508     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11509     // Shift left the contents of upper destination register by true bit count of lower mask register
11510     // and merge with lower destination register.
11511     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11512     __ orl($dst$$Register, $rtmp$$Register);
11513     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11514     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11515     // since contents of upper destination have already been copied to lower destination
11516     // register.
11517     __ cmpl($rtmp$$Register, 0);
11518     __ jccb(Assembler::greater, partail_result);
11519     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11520     __ jmp(exit);
11521     __ bind(partail_result);
11522     // Perform right shift over upper destination register to move out bits already copied
11523     // to lower destination register.
11524     __ subl($rtmp$$Register, 32);
11525     __ negl($rtmp$$Register);
11526     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11527     __ bind(exit);
11528   %}
11529   ins_pipe( pipe_slow );
11530 %}
11531 
11532 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11533   predicate(n->bottom_type()->isa_long());
11534   match(Set dst (ExpandBits src mask));
11535   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11536   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11537   ins_encode %{
11538     // Extraction operation sequentially reads the bits from source register starting from LSB
11539     // and lays them out into destination register at bit locations corresponding to true bits
11540     // in mask register. Thus number of source bits read are equal to combined true bit count
11541     // of mask register pair.
11542     Label exit, mask_clipping;
11543     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11544     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11545     __ popcntl($rtmp$$Register, $mask$$Register);
11546     // If true bit count of lower mask register is 32 then none of bit of lower source register
11547     // will feed to upper destination register.
11548     __ cmpl($rtmp$$Register, 32);
11549     __ jccb(Assembler::equal, exit);
11550     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11551     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11552     // Shift right the contents of lower source register to remove already consumed bits.
11553     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11554     // Extract the bits from lower source register starting from LSB under the influence
11555     // of upper mask register.
11556     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11557     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11558     __ subl($rtmp$$Register, 32);
11559     __ negl($rtmp$$Register);
11560     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11561     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11562     // Clear the set bits in upper mask register which have been used to extract the contents
11563     // from lower source register.
11564     __ bind(mask_clipping);
11565     __ blsrl($mask$$Register, $mask$$Register);
11566     __ decrementl($rtmp$$Register, 1);
11567     __ jccb(Assembler::greater, mask_clipping);
11568     // Starting from LSB extract the bits from upper source register under the influence of
11569     // remaining set bits in upper mask register.
11570     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11571     // Merge the partial results extracted from lower and upper source register bits.
11572     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11573     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11574     __ bind(exit);
11575   %}
11576   ins_pipe( pipe_slow );
11577 %}
11578 
11579 // =======================================================================
11580 // fast clearing of an array
11581 // Small ClearArray non-AVX512.
11582 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11583   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11584   match(Set dummy (ClearArray cnt base));
11585   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11586 
11587   format %{ $$template
11588     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11589     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11590     $$emit$$"JG     LARGE\n\t"
11591     $$emit$$"SHL    ECX, 1\n\t"
11592     $$emit$$"DEC    ECX\n\t"
11593     $$emit$$"JS     DONE\t# Zero length\n\t"
11594     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11595     $$emit$$"DEC    ECX\n\t"
11596     $$emit$$"JGE    LOOP\n\t"
11597     $$emit$$"JMP    DONE\n\t"
11598     $$emit$$"# LARGE:\n\t"
11599     if (UseFastStosb) {
11600        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11601        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11602     } else if (UseXMMForObjInit) {
11603        $$emit$$"MOV     RDI,RAX\n\t"
11604        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11605        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11606        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11607        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11608        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11609        $$emit$$"ADD     0x40,RAX\n\t"
11610        $$emit$$"# L_zero_64_bytes:\n\t"
11611        $$emit$$"SUB     0x8,RCX\n\t"
11612        $$emit$$"JGE     L_loop\n\t"
11613        $$emit$$"ADD     0x4,RCX\n\t"
11614        $$emit$$"JL      L_tail\n\t"
11615        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11616        $$emit$$"ADD     0x20,RAX\n\t"
11617        $$emit$$"SUB     0x4,RCX\n\t"
11618        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11619        $$emit$$"ADD     0x4,RCX\n\t"
11620        $$emit$$"JLE     L_end\n\t"
11621        $$emit$$"DEC     RCX\n\t"
11622        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11623        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11624        $$emit$$"ADD     0x8,RAX\n\t"
11625        $$emit$$"DEC     RCX\n\t"
11626        $$emit$$"JGE     L_sloop\n\t"
11627        $$emit$$"# L_end:\n\t"
11628     } else {
11629        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11630        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11631     }
11632     $$emit$$"# DONE"
11633   %}
11634   ins_encode %{
11635     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11636                  $tmp$$XMMRegister, false, knoreg);
11637   %}
11638   ins_pipe( pipe_slow );
11639 %}
11640 
11641 // Small ClearArray AVX512 non-constant length.
11642 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11643   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11644   match(Set dummy (ClearArray cnt base));
11645   ins_cost(125);
11646   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11647 
11648   format %{ $$template
11649     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11650     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11651     $$emit$$"JG     LARGE\n\t"
11652     $$emit$$"SHL    ECX, 1\n\t"
11653     $$emit$$"DEC    ECX\n\t"
11654     $$emit$$"JS     DONE\t# Zero length\n\t"
11655     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11656     $$emit$$"DEC    ECX\n\t"
11657     $$emit$$"JGE    LOOP\n\t"
11658     $$emit$$"JMP    DONE\n\t"
11659     $$emit$$"# LARGE:\n\t"
11660     if (UseFastStosb) {
11661        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11662        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11663     } else if (UseXMMForObjInit) {
11664        $$emit$$"MOV     RDI,RAX\n\t"
11665        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11666        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11667        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11668        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11669        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11670        $$emit$$"ADD     0x40,RAX\n\t"
11671        $$emit$$"# L_zero_64_bytes:\n\t"
11672        $$emit$$"SUB     0x8,RCX\n\t"
11673        $$emit$$"JGE     L_loop\n\t"
11674        $$emit$$"ADD     0x4,RCX\n\t"
11675        $$emit$$"JL      L_tail\n\t"
11676        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11677        $$emit$$"ADD     0x20,RAX\n\t"
11678        $$emit$$"SUB     0x4,RCX\n\t"
11679        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11680        $$emit$$"ADD     0x4,RCX\n\t"
11681        $$emit$$"JLE     L_end\n\t"
11682        $$emit$$"DEC     RCX\n\t"
11683        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11684        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11685        $$emit$$"ADD     0x8,RAX\n\t"
11686        $$emit$$"DEC     RCX\n\t"
11687        $$emit$$"JGE     L_sloop\n\t"
11688        $$emit$$"# L_end:\n\t"
11689     } else {
11690        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11691        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11692     }
11693     $$emit$$"# DONE"
11694   %}
11695   ins_encode %{
11696     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11697                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11698   %}
11699   ins_pipe( pipe_slow );
11700 %}
11701 
11702 // Large ClearArray non-AVX512.
11703 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11704   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11705   match(Set dummy (ClearArray cnt base));
11706   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11707   format %{ $$template
11708     if (UseFastStosb) {
11709        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11710        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11711        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11712     } else if (UseXMMForObjInit) {
11713        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11714        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11715        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11716        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11717        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11718        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11719        $$emit$$"ADD     0x40,RAX\n\t"
11720        $$emit$$"# L_zero_64_bytes:\n\t"
11721        $$emit$$"SUB     0x8,RCX\n\t"
11722        $$emit$$"JGE     L_loop\n\t"
11723        $$emit$$"ADD     0x4,RCX\n\t"
11724        $$emit$$"JL      L_tail\n\t"
11725        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11726        $$emit$$"ADD     0x20,RAX\n\t"
11727        $$emit$$"SUB     0x4,RCX\n\t"
11728        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11729        $$emit$$"ADD     0x4,RCX\n\t"
11730        $$emit$$"JLE     L_end\n\t"
11731        $$emit$$"DEC     RCX\n\t"
11732        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11733        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11734        $$emit$$"ADD     0x8,RAX\n\t"
11735        $$emit$$"DEC     RCX\n\t"
11736        $$emit$$"JGE     L_sloop\n\t"
11737        $$emit$$"# L_end:\n\t"
11738     } else {
11739        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11740        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11741        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11742     }
11743     $$emit$$"# DONE"
11744   %}
11745   ins_encode %{
11746     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11747                  $tmp$$XMMRegister, true, knoreg);
11748   %}
11749   ins_pipe( pipe_slow );
11750 %}
11751 
11752 // Large ClearArray AVX512.
11753 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11754   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11755   match(Set dummy (ClearArray cnt base));
11756   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11757   format %{ $$template
11758     if (UseFastStosb) {
11759        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11760        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11761        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11762     } else if (UseXMMForObjInit) {
11763        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11764        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11765        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11766        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11767        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11768        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11769        $$emit$$"ADD     0x40,RAX\n\t"
11770        $$emit$$"# L_zero_64_bytes:\n\t"
11771        $$emit$$"SUB     0x8,RCX\n\t"
11772        $$emit$$"JGE     L_loop\n\t"
11773        $$emit$$"ADD     0x4,RCX\n\t"
11774        $$emit$$"JL      L_tail\n\t"
11775        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11776        $$emit$$"ADD     0x20,RAX\n\t"
11777        $$emit$$"SUB     0x4,RCX\n\t"
11778        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11779        $$emit$$"ADD     0x4,RCX\n\t"
11780        $$emit$$"JLE     L_end\n\t"
11781        $$emit$$"DEC     RCX\n\t"
11782        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11783        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11784        $$emit$$"ADD     0x8,RAX\n\t"
11785        $$emit$$"DEC     RCX\n\t"
11786        $$emit$$"JGE     L_sloop\n\t"
11787        $$emit$$"# L_end:\n\t"
11788     } else {
11789        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11790        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11791        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11792     }
11793     $$emit$$"# DONE"
11794   %}
11795   ins_encode %{
11796     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11797                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11798   %}
11799   ins_pipe( pipe_slow );
11800 %}
11801 
11802 // Small ClearArray AVX512 constant length.
11803 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11804 %{
11805   predicate(!((ClearArrayNode*)n)->is_large() &&
11806                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11807   match(Set dummy (ClearArray cnt base));
11808   ins_cost(100);
11809   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11810   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11811   ins_encode %{
11812    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11813   %}
11814   ins_pipe(pipe_slow);
11815 %}
11816 
11817 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11818                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11819   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11820   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11821   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11822 
11823   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11824   ins_encode %{
11825     __ string_compare($str1$$Register, $str2$$Register,
11826                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11827                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11828   %}
11829   ins_pipe( pipe_slow );
11830 %}
11831 
11832 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11833                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11834   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11835   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11836   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11837 
11838   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11839   ins_encode %{
11840     __ string_compare($str1$$Register, $str2$$Register,
11841                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11842                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11848                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11849   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11850   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11851   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11852 
11853   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11854   ins_encode %{
11855     __ string_compare($str1$$Register, $str2$$Register,
11856                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11857                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11858   %}
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11863                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11864   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11865   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11866   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11867 
11868   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11869   ins_encode %{
11870     __ string_compare($str1$$Register, $str2$$Register,
11871                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11872                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11873   %}
11874   ins_pipe( pipe_slow );
11875 %}
11876 
11877 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11878                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11879   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11880   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11881   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11882 
11883   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11884   ins_encode %{
11885     __ string_compare($str1$$Register, $str2$$Register,
11886                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11887                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11888   %}
11889   ins_pipe( pipe_slow );
11890 %}
11891 
11892 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11893                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11894   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11895   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11896   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11897 
11898   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11899   ins_encode %{
11900     __ string_compare($str1$$Register, $str2$$Register,
11901                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11902                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11903   %}
11904   ins_pipe( pipe_slow );
11905 %}
11906 
11907 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11908                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11909   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11910   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11911   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11912 
11913   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11914   ins_encode %{
11915     __ string_compare($str2$$Register, $str1$$Register,
11916                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11917                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11923                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11924   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11925   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11926   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11927 
11928   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11929   ins_encode %{
11930     __ string_compare($str2$$Register, $str1$$Register,
11931                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11932                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11933   %}
11934   ins_pipe( pipe_slow );
11935 %}
11936 
11937 // fast string equals
11938 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11939                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11940   predicate(!VM_Version::supports_avx512vlbw());
11941   match(Set result (StrEquals (Binary str1 str2) cnt));
11942   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11943 
11944   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11945   ins_encode %{
11946     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11947                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11948                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11949   %}
11950 
11951   ins_pipe( pipe_slow );
11952 %}
11953 
11954 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11955                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11956   predicate(VM_Version::supports_avx512vlbw());
11957   match(Set result (StrEquals (Binary str1 str2) cnt));
11958   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11959 
11960   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11961   ins_encode %{
11962     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11963                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11964                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11965   %}
11966 
11967   ins_pipe( pipe_slow );
11968 %}
11969 
11970 
11971 // fast search of substring with known size.
11972 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11973                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11974   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11975   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11976   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11977 
11978   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11979   ins_encode %{
11980     int icnt2 = (int)$int_cnt2$$constant;
11981     if (icnt2 >= 16) {
11982       // IndexOf for constant substrings with size >= 16 elements
11983       // which don't need to be loaded through stack.
11984       __ string_indexofC8($str1$$Register, $str2$$Register,
11985                           $cnt1$$Register, $cnt2$$Register,
11986                           icnt2, $result$$Register,
11987                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11988     } else {
11989       // Small strings are loaded through stack if they cross page boundary.
11990       __ string_indexof($str1$$Register, $str2$$Register,
11991                         $cnt1$$Register, $cnt2$$Register,
11992                         icnt2, $result$$Register,
11993                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11994     }
11995   %}
11996   ins_pipe( pipe_slow );
11997 %}
11998 
11999 // fast search of substring with known size.
12000 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12001                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12002   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12003   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12004   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12005 
12006   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12007   ins_encode %{
12008     int icnt2 = (int)$int_cnt2$$constant;
12009     if (icnt2 >= 8) {
12010       // IndexOf for constant substrings with size >= 8 elements
12011       // which don't need to be loaded through stack.
12012       __ string_indexofC8($str1$$Register, $str2$$Register,
12013                           $cnt1$$Register, $cnt2$$Register,
12014                           icnt2, $result$$Register,
12015                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12016     } else {
12017       // Small strings are loaded through stack if they cross page boundary.
12018       __ string_indexof($str1$$Register, $str2$$Register,
12019                         $cnt1$$Register, $cnt2$$Register,
12020                         icnt2, $result$$Register,
12021                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12022     }
12023   %}
12024   ins_pipe( pipe_slow );
12025 %}
12026 
12027 // fast search of substring with known size.
12028 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12029                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12030   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12031   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12032   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12033 
12034   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12035   ins_encode %{
12036     int icnt2 = (int)$int_cnt2$$constant;
12037     if (icnt2 >= 8) {
12038       // IndexOf for constant substrings with size >= 8 elements
12039       // which don't need to be loaded through stack.
12040       __ string_indexofC8($str1$$Register, $str2$$Register,
12041                           $cnt1$$Register, $cnt2$$Register,
12042                           icnt2, $result$$Register,
12043                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12044     } else {
12045       // Small strings are loaded through stack if they cross page boundary.
12046       __ string_indexof($str1$$Register, $str2$$Register,
12047                         $cnt1$$Register, $cnt2$$Register,
12048                         icnt2, $result$$Register,
12049                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12050     }
12051   %}
12052   ins_pipe( pipe_slow );
12053 %}
12054 
12055 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12056                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12057   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12058   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12059   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12060 
12061   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12062   ins_encode %{
12063     __ string_indexof($str1$$Register, $str2$$Register,
12064                       $cnt1$$Register, $cnt2$$Register,
12065                       (-1), $result$$Register,
12066                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12067   %}
12068   ins_pipe( pipe_slow );
12069 %}
12070 
12071 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12072                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12073   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12074   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12075   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12076 
12077   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12078   ins_encode %{
12079     __ string_indexof($str1$$Register, $str2$$Register,
12080                       $cnt1$$Register, $cnt2$$Register,
12081                       (-1), $result$$Register,
12082                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12083   %}
12084   ins_pipe( pipe_slow );
12085 %}
12086 
12087 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12088                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12089   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12090   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12091   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12092 
12093   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12094   ins_encode %{
12095     __ string_indexof($str1$$Register, $str2$$Register,
12096                       $cnt1$$Register, $cnt2$$Register,
12097                       (-1), $result$$Register,
12098                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12099   %}
12100   ins_pipe( pipe_slow );
12101 %}
12102 
12103 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12104                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12105   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12106   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12107   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12108   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12109   ins_encode %{
12110     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12111                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12112   %}
12113   ins_pipe( pipe_slow );
12114 %}
12115 
12116 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12117                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12118   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12119   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12120   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12121   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12122   ins_encode %{
12123     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12124                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12125   %}
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 
12130 // fast array equals
12131 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12132                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12133 %{
12134   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12135   match(Set result (AryEq ary1 ary2));
12136   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12137   //ins_cost(300);
12138 
12139   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12140   ins_encode %{
12141     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12142                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12143                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12144   %}
12145   ins_pipe( pipe_slow );
12146 %}
12147 
12148 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12149                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12150 %{
12151   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12152   match(Set result (AryEq ary1 ary2));
12153   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12154   //ins_cost(300);
12155 
12156   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12157   ins_encode %{
12158     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12159                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12160                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12161   %}
12162   ins_pipe( pipe_slow );
12163 %}
12164 
12165 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12166                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12167 %{
12168   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12169   match(Set result (AryEq ary1 ary2));
12170   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12171   //ins_cost(300);
12172 
12173   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12174   ins_encode %{
12175     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12176                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12177                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12178   %}
12179   ins_pipe( pipe_slow );
12180 %}
12181 
12182 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12183                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12184 %{
12185   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12186   match(Set result (AryEq ary1 ary2));
12187   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12188   //ins_cost(300);
12189 
12190   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12191   ins_encode %{
12192     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12193                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12194                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12195   %}
12196   ins_pipe( pipe_slow );
12197 %}
12198 
12199 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12200                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12201 %{
12202   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12203   match(Set result (CountPositives ary1 len));
12204   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12205 
12206   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12207   ins_encode %{
12208     __ count_positives($ary1$$Register, $len$$Register,
12209                        $result$$Register, $tmp3$$Register,
12210                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12211   %}
12212   ins_pipe( pipe_slow );
12213 %}
12214 
12215 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12216                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12217 %{
12218   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12219   match(Set result (CountPositives ary1 len));
12220   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12221 
12222   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12223   ins_encode %{
12224     __ count_positives($ary1$$Register, $len$$Register,
12225                        $result$$Register, $tmp3$$Register,
12226                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12227   %}
12228   ins_pipe( pipe_slow );
12229 %}
12230 
12231 
12232 // fast char[] to byte[] compression
12233 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12234                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12235   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12236   match(Set result (StrCompressedCopy src (Binary dst len)));
12237   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12238 
12239   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12240   ins_encode %{
12241     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12242                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12243                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12244                            knoreg, knoreg);
12245   %}
12246   ins_pipe( pipe_slow );
12247 %}
12248 
12249 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12250                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12251   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12252   match(Set result (StrCompressedCopy src (Binary dst len)));
12253   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12254 
12255   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12256   ins_encode %{
12257     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12258                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12259                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12260                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12261   %}
12262   ins_pipe( pipe_slow );
12263 %}
12264 
12265 // fast byte[] to char[] inflation
12266 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12267                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12268   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12269   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12270   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12271 
12272   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12273   ins_encode %{
12274     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12275                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12276   %}
12277   ins_pipe( pipe_slow );
12278 %}
12279 
12280 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12281                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12282   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12283   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12284   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12285 
12286   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12287   ins_encode %{
12288     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12289                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12290   %}
12291   ins_pipe( pipe_slow );
12292 %}
12293 
12294 // encode char[] to byte[] in ISO_8859_1
12295 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12296                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12297                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12298   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12299   match(Set result (EncodeISOArray src (Binary dst len)));
12300   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12301 
12302   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12303   ins_encode %{
12304     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12305                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12306                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12307   %}
12308   ins_pipe( pipe_slow );
12309 %}
12310 
12311 // encode char[] to byte[] in ASCII
12312 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12313                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12314                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12315   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12316   match(Set result (EncodeISOArray src (Binary dst len)));
12317   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12318 
12319   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12320   ins_encode %{
12321     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12322                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12323                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12324   %}
12325   ins_pipe( pipe_slow );
12326 %}
12327 
12328 //----------Control Flow Instructions------------------------------------------
12329 // Signed compare Instructions
12330 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12331   match(Set cr (CmpI op1 op2));
12332   effect( DEF cr, USE op1, USE op2 );
12333   format %{ "CMP    $op1,$op2" %}
12334   opcode(0x3B);  /* Opcode 3B /r */
12335   ins_encode( OpcP, RegReg( op1, op2) );
12336   ins_pipe( ialu_cr_reg_reg );
12337 %}
12338 
12339 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12340   match(Set cr (CmpI op1 op2));
12341   effect( DEF cr, USE op1 );
12342   format %{ "CMP    $op1,$op2" %}
12343   opcode(0x81,0x07);  /* Opcode 81 /7 */
12344   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12345   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12346   ins_pipe( ialu_cr_reg_imm );
12347 %}
12348 
12349 // Cisc-spilled version of cmpI_eReg
12350 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12351   match(Set cr (CmpI op1 (LoadI op2)));
12352 
12353   format %{ "CMP    $op1,$op2" %}
12354   ins_cost(500);
12355   opcode(0x3B);  /* Opcode 3B /r */
12356   ins_encode( OpcP, RegMem( op1, op2) );
12357   ins_pipe( ialu_cr_reg_mem );
12358 %}
12359 
12360 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12361   match(Set cr (CmpI src zero));
12362   effect( DEF cr, USE src );
12363 
12364   format %{ "TEST   $src,$src" %}
12365   opcode(0x85);
12366   ins_encode( OpcP, RegReg( src, src ) );
12367   ins_pipe( ialu_cr_reg_imm );
12368 %}
12369 
12370 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12371   match(Set cr (CmpI (AndI src con) zero));
12372 
12373   format %{ "TEST   $src,$con" %}
12374   opcode(0xF7,0x00);
12375   ins_encode( OpcP, RegOpc(src), Con32(con) );
12376   ins_pipe( ialu_cr_reg_imm );
12377 %}
12378 
12379 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12380   match(Set cr (CmpI (AndI src mem) zero));
12381 
12382   format %{ "TEST   $src,$mem" %}
12383   opcode(0x85);
12384   ins_encode( OpcP, RegMem( src, mem ) );
12385   ins_pipe( ialu_cr_reg_mem );
12386 %}
12387 
12388 // Unsigned compare Instructions; really, same as signed except they
12389 // produce an eFlagsRegU instead of eFlagsReg.
12390 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12391   match(Set cr (CmpU op1 op2));
12392 
12393   format %{ "CMPu   $op1,$op2" %}
12394   opcode(0x3B);  /* Opcode 3B /r */
12395   ins_encode( OpcP, RegReg( op1, op2) );
12396   ins_pipe( ialu_cr_reg_reg );
12397 %}
12398 
12399 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12400   match(Set cr (CmpU op1 op2));
12401 
12402   format %{ "CMPu   $op1,$op2" %}
12403   opcode(0x81,0x07);  /* Opcode 81 /7 */
12404   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12405   ins_pipe( ialu_cr_reg_imm );
12406 %}
12407 
12408 // // Cisc-spilled version of cmpU_eReg
12409 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12410   match(Set cr (CmpU op1 (LoadI op2)));
12411 
12412   format %{ "CMPu   $op1,$op2" %}
12413   ins_cost(500);
12414   opcode(0x3B);  /* Opcode 3B /r */
12415   ins_encode( OpcP, RegMem( op1, op2) );
12416   ins_pipe( ialu_cr_reg_mem );
12417 %}
12418 
12419 // // Cisc-spilled version of cmpU_eReg
12420 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12421 //  match(Set cr (CmpU (LoadI op1) op2));
12422 //
12423 //  format %{ "CMPu   $op1,$op2" %}
12424 //  ins_cost(500);
12425 //  opcode(0x39);  /* Opcode 39 /r */
12426 //  ins_encode( OpcP, RegMem( op1, op2) );
12427 //%}
12428 
12429 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12430   match(Set cr (CmpU src zero));
12431 
12432   format %{ "TESTu  $src,$src" %}
12433   opcode(0x85);
12434   ins_encode( OpcP, RegReg( src, src ) );
12435   ins_pipe( ialu_cr_reg_imm );
12436 %}
12437 
12438 // Unsigned pointer compare Instructions
12439 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12440   match(Set cr (CmpP op1 op2));
12441 
12442   format %{ "CMPu   $op1,$op2" %}
12443   opcode(0x3B);  /* Opcode 3B /r */
12444   ins_encode( OpcP, RegReg( op1, op2) );
12445   ins_pipe( ialu_cr_reg_reg );
12446 %}
12447 
12448 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12449   match(Set cr (CmpP op1 op2));
12450 
12451   format %{ "CMPu   $op1,$op2" %}
12452   opcode(0x81,0x07);  /* Opcode 81 /7 */
12453   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12454   ins_pipe( ialu_cr_reg_imm );
12455 %}
12456 
12457 // // Cisc-spilled version of cmpP_eReg
12458 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12459   match(Set cr (CmpP op1 (LoadP op2)));
12460 
12461   format %{ "CMPu   $op1,$op2" %}
12462   ins_cost(500);
12463   opcode(0x3B);  /* Opcode 3B /r */
12464   ins_encode( OpcP, RegMem( op1, op2) );
12465   ins_pipe( ialu_cr_reg_mem );
12466 %}
12467 
12468 // // Cisc-spilled version of cmpP_eReg
12469 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12470 //  match(Set cr (CmpP (LoadP op1) op2));
12471 //
12472 //  format %{ "CMPu   $op1,$op2" %}
12473 //  ins_cost(500);
12474 //  opcode(0x39);  /* Opcode 39 /r */
12475 //  ins_encode( OpcP, RegMem( op1, op2) );
12476 //%}
12477 
12478 // Compare raw pointer (used in out-of-heap check).
12479 // Only works because non-oop pointers must be raw pointers
12480 // and raw pointers have no anti-dependencies.
12481 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12482   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12483   match(Set cr (CmpP op1 (LoadP op2)));
12484 
12485   format %{ "CMPu   $op1,$op2" %}
12486   opcode(0x3B);  /* Opcode 3B /r */
12487   ins_encode( OpcP, RegMem( op1, op2) );
12488   ins_pipe( ialu_cr_reg_mem );
12489 %}
12490 
12491 //
12492 // This will generate a signed flags result. This should be ok
12493 // since any compare to a zero should be eq/neq.
12494 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12495   match(Set cr (CmpP src zero));
12496 
12497   format %{ "TEST   $src,$src" %}
12498   opcode(0x85);
12499   ins_encode( OpcP, RegReg( src, src ) );
12500   ins_pipe( ialu_cr_reg_imm );
12501 %}
12502 
12503 // Cisc-spilled version of testP_reg
12504 // This will generate a signed flags result. This should be ok
12505 // since any compare to a zero should be eq/neq.
12506 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12507   match(Set cr (CmpP (LoadP op) zero));
12508 
12509   format %{ "TEST   $op,0xFFFFFFFF" %}
12510   ins_cost(500);
12511   opcode(0xF7);               /* Opcode F7 /0 */
12512   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12513   ins_pipe( ialu_cr_reg_imm );
12514 %}
12515 
12516 // Yanked all unsigned pointer compare operations.
12517 // Pointer compares are done with CmpP which is already unsigned.
12518 
12519 //----------Max and Min--------------------------------------------------------
12520 // Min Instructions
12521 ////
12522 //   *** Min and Max using the conditional move are slower than the
12523 //   *** branch version on a Pentium III.
12524 // // Conditional move for min
12525 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12526 //  effect( USE_DEF op2, USE op1, USE cr );
12527 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12528 //  opcode(0x4C,0x0F);
12529 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12530 //  ins_pipe( pipe_cmov_reg );
12531 //%}
12532 //
12533 //// Min Register with Register (P6 version)
12534 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12535 //  predicate(VM_Version::supports_cmov() );
12536 //  match(Set op2 (MinI op1 op2));
12537 //  ins_cost(200);
12538 //  expand %{
12539 //    eFlagsReg cr;
12540 //    compI_eReg(cr,op1,op2);
12541 //    cmovI_reg_lt(op2,op1,cr);
12542 //  %}
12543 //%}
12544 
12545 // Min Register with Register (generic version)
12546 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12547   match(Set dst (MinI dst src));
12548   effect(KILL flags);
12549   ins_cost(300);
12550 
12551   format %{ "MIN    $dst,$src" %}
12552   opcode(0xCC);
12553   ins_encode( min_enc(dst,src) );
12554   ins_pipe( pipe_slow );
12555 %}
12556 
12557 // Max Register with Register
12558 //   *** Min and Max using the conditional move are slower than the
12559 //   *** branch version on a Pentium III.
12560 // // Conditional move for max
12561 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12562 //  effect( USE_DEF op2, USE op1, USE cr );
12563 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12564 //  opcode(0x4F,0x0F);
12565 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12566 //  ins_pipe( pipe_cmov_reg );
12567 //%}
12568 //
12569 // // Max Register with Register (P6 version)
12570 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12571 //  predicate(VM_Version::supports_cmov() );
12572 //  match(Set op2 (MaxI op1 op2));
12573 //  ins_cost(200);
12574 //  expand %{
12575 //    eFlagsReg cr;
12576 //    compI_eReg(cr,op1,op2);
12577 //    cmovI_reg_gt(op2,op1,cr);
12578 //  %}
12579 //%}
12580 
12581 // Max Register with Register (generic version)
12582 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12583   match(Set dst (MaxI dst src));
12584   effect(KILL flags);
12585   ins_cost(300);
12586 
12587   format %{ "MAX    $dst,$src" %}
12588   opcode(0xCC);
12589   ins_encode( max_enc(dst,src) );
12590   ins_pipe( pipe_slow );
12591 %}
12592 
12593 // ============================================================================
12594 // Counted Loop limit node which represents exact final iterator value.
12595 // Note: the resulting value should fit into integer range since
12596 // counted loops have limit check on overflow.
12597 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12598   match(Set limit (LoopLimit (Binary init limit) stride));
12599   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12600   ins_cost(300);
12601 
12602   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12603   ins_encode %{
12604     int strd = (int)$stride$$constant;
12605     assert(strd != 1 && strd != -1, "sanity");
12606     int m1 = (strd > 0) ? 1 : -1;
12607     // Convert limit to long (EAX:EDX)
12608     __ cdql();
12609     // Convert init to long (init:tmp)
12610     __ movl($tmp$$Register, $init$$Register);
12611     __ sarl($tmp$$Register, 31);
12612     // $limit - $init
12613     __ subl($limit$$Register, $init$$Register);
12614     __ sbbl($limit_hi$$Register, $tmp$$Register);
12615     // + ($stride - 1)
12616     if (strd > 0) {
12617       __ addl($limit$$Register, (strd - 1));
12618       __ adcl($limit_hi$$Register, 0);
12619       __ movl($tmp$$Register, strd);
12620     } else {
12621       __ addl($limit$$Register, (strd + 1));
12622       __ adcl($limit_hi$$Register, -1);
12623       __ lneg($limit_hi$$Register, $limit$$Register);
12624       __ movl($tmp$$Register, -strd);
12625     }
12626     // signed division: (EAX:EDX) / pos_stride
12627     __ idivl($tmp$$Register);
12628     if (strd < 0) {
12629       // restore sign
12630       __ negl($tmp$$Register);
12631     }
12632     // (EAX) * stride
12633     __ mull($tmp$$Register);
12634     // + init (ignore upper bits)
12635     __ addl($limit$$Register, $init$$Register);
12636   %}
12637   ins_pipe( pipe_slow );
12638 %}
12639 
12640 // ============================================================================
12641 // Branch Instructions
12642 // Jump Table
12643 instruct jumpXtnd(rRegI switch_val) %{
12644   match(Jump switch_val);
12645   ins_cost(350);
12646   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12647   ins_encode %{
12648     // Jump to Address(table_base + switch_reg)
12649     Address index(noreg, $switch_val$$Register, Address::times_1);
12650     __ jump(ArrayAddress($constantaddress, index), noreg);
12651   %}
12652   ins_pipe(pipe_jmp);
12653 %}
12654 
12655 // Jump Direct - Label defines a relative address from JMP+1
12656 instruct jmpDir(label labl) %{
12657   match(Goto);
12658   effect(USE labl);
12659 
12660   ins_cost(300);
12661   format %{ "JMP    $labl" %}
12662   size(5);
12663   ins_encode %{
12664     Label* L = $labl$$label;
12665     __ jmp(*L, false); // Always long jump
12666   %}
12667   ins_pipe( pipe_jmp );
12668 %}
12669 
12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12671 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12672   match(If cop cr);
12673   effect(USE labl);
12674 
12675   ins_cost(300);
12676   format %{ "J$cop    $labl" %}
12677   size(6);
12678   ins_encode %{
12679     Label* L = $labl$$label;
12680     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12681   %}
12682   ins_pipe( pipe_jcc );
12683 %}
12684 
12685 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12686 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12687   match(CountedLoopEnd cop cr);
12688   effect(USE labl);
12689 
12690   ins_cost(300);
12691   format %{ "J$cop    $labl\t# Loop end" %}
12692   size(6);
12693   ins_encode %{
12694     Label* L = $labl$$label;
12695     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12696   %}
12697   ins_pipe( pipe_jcc );
12698 %}
12699 
12700 // Jump Direct Conditional - using unsigned comparison
12701 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12702   match(If cop cmp);
12703   effect(USE labl);
12704 
12705   ins_cost(300);
12706   format %{ "J$cop,u  $labl" %}
12707   size(6);
12708   ins_encode %{
12709     Label* L = $labl$$label;
12710     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12711   %}
12712   ins_pipe(pipe_jcc);
12713 %}
12714 
12715 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12716   match(If cop cmp);
12717   effect(USE labl);
12718 
12719   ins_cost(200);
12720   format %{ "J$cop,u  $labl" %}
12721   size(6);
12722   ins_encode %{
12723     Label* L = $labl$$label;
12724     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12725   %}
12726   ins_pipe(pipe_jcc);
12727 %}
12728 
12729 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12730   match(If cop cmp);
12731   effect(USE labl);
12732 
12733   ins_cost(200);
12734   format %{ $$template
12735     if ($cop$$cmpcode == Assembler::notEqual) {
12736       $$emit$$"JP,u   $labl\n\t"
12737       $$emit$$"J$cop,u   $labl"
12738     } else {
12739       $$emit$$"JP,u   done\n\t"
12740       $$emit$$"J$cop,u   $labl\n\t"
12741       $$emit$$"done:"
12742     }
12743   %}
12744   ins_encode %{
12745     Label* l = $labl$$label;
12746     if ($cop$$cmpcode == Assembler::notEqual) {
12747       __ jcc(Assembler::parity, *l, false);
12748       __ jcc(Assembler::notEqual, *l, false);
12749     } else if ($cop$$cmpcode == Assembler::equal) {
12750       Label done;
12751       __ jccb(Assembler::parity, done);
12752       __ jcc(Assembler::equal, *l, false);
12753       __ bind(done);
12754     } else {
12755        ShouldNotReachHere();
12756     }
12757   %}
12758   ins_pipe(pipe_jcc);
12759 %}
12760 
12761 // ============================================================================
12762 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12763 // array for an instance of the superklass.  Set a hidden internal cache on a
12764 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12765 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12766 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12767   match(Set result (PartialSubtypeCheck sub super));
12768   effect( KILL rcx, KILL cr );
12769 
12770   ins_cost(1100);  // slightly larger than the next version
12771   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12772             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12773             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12774             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12775             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12776             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12777             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12778      "miss:\t" %}
12779 
12780   opcode(0x1); // Force a XOR of EDI
12781   ins_encode( enc_PartialSubtypeCheck() );
12782   ins_pipe( pipe_slow );
12783 %}
12784 
12785 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12786   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12787   effect( KILL rcx, KILL result );
12788 
12789   ins_cost(1000);
12790   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12791             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12792             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12793             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12794             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12795             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12796      "miss:\t" %}
12797 
12798   opcode(0x0);  // No need to XOR EDI
12799   ins_encode( enc_PartialSubtypeCheck() );
12800   ins_pipe( pipe_slow );
12801 %}
12802 
12803 // ============================================================================
12804 // Branch Instructions -- short offset versions
12805 //
12806 // These instructions are used to replace jumps of a long offset (the default
12807 // match) with jumps of a shorter offset.  These instructions are all tagged
12808 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12809 // match rules in general matching.  Instead, the ADLC generates a conversion
12810 // method in the MachNode which can be used to do in-place replacement of the
12811 // long variant with the shorter variant.  The compiler will determine if a
12812 // branch can be taken by the is_short_branch_offset() predicate in the machine
12813 // specific code section of the file.
12814 
12815 // Jump Direct - Label defines a relative address from JMP+1
12816 instruct jmpDir_short(label labl) %{
12817   match(Goto);
12818   effect(USE labl);
12819 
12820   ins_cost(300);
12821   format %{ "JMP,s  $labl" %}
12822   size(2);
12823   ins_encode %{
12824     Label* L = $labl$$label;
12825     __ jmpb(*L);
12826   %}
12827   ins_pipe( pipe_jmp );
12828   ins_short_branch(1);
12829 %}
12830 
12831 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12832 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12833   match(If cop cr);
12834   effect(USE labl);
12835 
12836   ins_cost(300);
12837   format %{ "J$cop,s  $labl" %}
12838   size(2);
12839   ins_encode %{
12840     Label* L = $labl$$label;
12841     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12842   %}
12843   ins_pipe( pipe_jcc );
12844   ins_short_branch(1);
12845 %}
12846 
12847 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12848 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12849   match(CountedLoopEnd cop cr);
12850   effect(USE labl);
12851 
12852   ins_cost(300);
12853   format %{ "J$cop,s  $labl\t# Loop end" %}
12854   size(2);
12855   ins_encode %{
12856     Label* L = $labl$$label;
12857     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12858   %}
12859   ins_pipe( pipe_jcc );
12860   ins_short_branch(1);
12861 %}
12862 
12863 // Jump Direct Conditional - using unsigned comparison
12864 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12865   match(If cop cmp);
12866   effect(USE labl);
12867 
12868   ins_cost(300);
12869   format %{ "J$cop,us $labl" %}
12870   size(2);
12871   ins_encode %{
12872     Label* L = $labl$$label;
12873     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12874   %}
12875   ins_pipe( pipe_jcc );
12876   ins_short_branch(1);
12877 %}
12878 
12879 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12880   match(If cop cmp);
12881   effect(USE labl);
12882 
12883   ins_cost(300);
12884   format %{ "J$cop,us $labl" %}
12885   size(2);
12886   ins_encode %{
12887     Label* L = $labl$$label;
12888     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12889   %}
12890   ins_pipe( pipe_jcc );
12891   ins_short_branch(1);
12892 %}
12893 
12894 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12895   match(If cop cmp);
12896   effect(USE labl);
12897 
12898   ins_cost(300);
12899   format %{ $$template
12900     if ($cop$$cmpcode == Assembler::notEqual) {
12901       $$emit$$"JP,u,s   $labl\n\t"
12902       $$emit$$"J$cop,u,s   $labl"
12903     } else {
12904       $$emit$$"JP,u,s   done\n\t"
12905       $$emit$$"J$cop,u,s  $labl\n\t"
12906       $$emit$$"done:"
12907     }
12908   %}
12909   size(4);
12910   ins_encode %{
12911     Label* l = $labl$$label;
12912     if ($cop$$cmpcode == Assembler::notEqual) {
12913       __ jccb(Assembler::parity, *l);
12914       __ jccb(Assembler::notEqual, *l);
12915     } else if ($cop$$cmpcode == Assembler::equal) {
12916       Label done;
12917       __ jccb(Assembler::parity, done);
12918       __ jccb(Assembler::equal, *l);
12919       __ bind(done);
12920     } else {
12921        ShouldNotReachHere();
12922     }
12923   %}
12924   ins_pipe(pipe_jcc);
12925   ins_short_branch(1);
12926 %}
12927 
12928 // ============================================================================
12929 // Long Compare
12930 //
12931 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12932 // is tricky.  The flavor of compare used depends on whether we are testing
12933 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12934 // The GE test is the negated LT test.  The LE test can be had by commuting
12935 // the operands (yielding a GE test) and then negating; negate again for the
12936 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12937 // NE test is negated from that.
12938 
12939 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12940 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12941 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12942 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12943 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12944 // foo match ends up with the wrong leaf.  One fix is to not match both
12945 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12946 // both forms beat the trinary form of long-compare and both are very useful
12947 // on Intel which has so few registers.
12948 
12949 // Manifest a CmpL result in an integer register.  Very painful.
12950 // This is the test to avoid.
12951 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12952   match(Set dst (CmpL3 src1 src2));
12953   effect( KILL flags );
12954   ins_cost(1000);
12955   format %{ "XOR    $dst,$dst\n\t"
12956             "CMP    $src1.hi,$src2.hi\n\t"
12957             "JLT,s  m_one\n\t"
12958             "JGT,s  p_one\n\t"
12959             "CMP    $src1.lo,$src2.lo\n\t"
12960             "JB,s   m_one\n\t"
12961             "JEQ,s  done\n"
12962     "p_one:\tINC    $dst\n\t"
12963             "JMP,s  done\n"
12964     "m_one:\tDEC    $dst\n"
12965      "done:" %}
12966   ins_encode %{
12967     Label p_one, m_one, done;
12968     __ xorptr($dst$$Register, $dst$$Register);
12969     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12970     __ jccb(Assembler::less,    m_one);
12971     __ jccb(Assembler::greater, p_one);
12972     __ cmpl($src1$$Register, $src2$$Register);
12973     __ jccb(Assembler::below,   m_one);
12974     __ jccb(Assembler::equal,   done);
12975     __ bind(p_one);
12976     __ incrementl($dst$$Register);
12977     __ jmpb(done);
12978     __ bind(m_one);
12979     __ decrementl($dst$$Register);
12980     __ bind(done);
12981   %}
12982   ins_pipe( pipe_slow );
12983 %}
12984 
12985 //======
12986 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12987 // compares.  Can be used for LE or GT compares by reversing arguments.
12988 // NOT GOOD FOR EQ/NE tests.
12989 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12990   match( Set flags (CmpL src zero ));
12991   ins_cost(100);
12992   format %{ "TEST   $src.hi,$src.hi" %}
12993   opcode(0x85);
12994   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12995   ins_pipe( ialu_cr_reg_reg );
12996 %}
12997 
12998 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12999 // compares.  Can be used for LE or GT compares by reversing arguments.
13000 // NOT GOOD FOR EQ/NE tests.
13001 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13002   match( Set flags (CmpL src1 src2 ));
13003   effect( TEMP tmp );
13004   ins_cost(300);
13005   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13006             "MOV    $tmp,$src1.hi\n\t"
13007             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13008   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13009   ins_pipe( ialu_cr_reg_reg );
13010 %}
13011 
13012 // Long compares reg < zero/req OR reg >= zero/req.
13013 // Just a wrapper for a normal branch, plus the predicate test.
13014 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13015   match(If cmp flags);
13016   effect(USE labl);
13017   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13018   expand %{
13019     jmpCon(cmp,flags,labl);    // JLT or JGE...
13020   %}
13021 %}
13022 
13023 //======
13024 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13025 // compares.  Can be used for LE or GT compares by reversing arguments.
13026 // NOT GOOD FOR EQ/NE tests.
13027 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13028   match(Set flags (CmpUL src zero));
13029   ins_cost(100);
13030   format %{ "TEST   $src.hi,$src.hi" %}
13031   opcode(0x85);
13032   ins_encode(OpcP, RegReg_Hi2(src, src));
13033   ins_pipe(ialu_cr_reg_reg);
13034 %}
13035 
13036 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13037 // compares.  Can be used for LE or GT compares by reversing arguments.
13038 // NOT GOOD FOR EQ/NE tests.
13039 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13040   match(Set flags (CmpUL src1 src2));
13041   effect(TEMP tmp);
13042   ins_cost(300);
13043   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13044             "MOV    $tmp,$src1.hi\n\t"
13045             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13046   ins_encode(long_cmp_flags2(src1, src2, tmp));
13047   ins_pipe(ialu_cr_reg_reg);
13048 %}
13049 
13050 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13051 // Just a wrapper for a normal branch, plus the predicate test.
13052 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13053   match(If cmp flags);
13054   effect(USE labl);
13055   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13056   expand %{
13057     jmpCon(cmp, flags, labl);    // JLT or JGE...
13058   %}
13059 %}
13060 
13061 // Compare 2 longs and CMOVE longs.
13062 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13063   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13064   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13065   ins_cost(400);
13066   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13067             "CMOV$cmp $dst.hi,$src.hi" %}
13068   opcode(0x0F,0x40);
13069   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13070   ins_pipe( pipe_cmov_reg_long );
13071 %}
13072 
13073 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13074   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13075   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13076   ins_cost(500);
13077   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13078             "CMOV$cmp $dst.hi,$src.hi" %}
13079   opcode(0x0F,0x40);
13080   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13081   ins_pipe( pipe_cmov_reg_long );
13082 %}
13083 
13084 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13085   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13086   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13087   ins_cost(400);
13088   expand %{
13089     cmovLL_reg_LTGE(cmp, flags, dst, src);
13090   %}
13091 %}
13092 
13093 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13094   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13095   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13096   ins_cost(500);
13097   expand %{
13098     cmovLL_mem_LTGE(cmp, flags, dst, src);
13099   %}
13100 %}
13101 
13102 // Compare 2 longs and CMOVE ints.
13103 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13104   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13105   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13106   ins_cost(200);
13107   format %{ "CMOV$cmp $dst,$src" %}
13108   opcode(0x0F,0x40);
13109   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13110   ins_pipe( pipe_cmov_reg );
13111 %}
13112 
13113 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13114   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13115   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13116   ins_cost(250);
13117   format %{ "CMOV$cmp $dst,$src" %}
13118   opcode(0x0F,0x40);
13119   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13120   ins_pipe( pipe_cmov_mem );
13121 %}
13122 
13123 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13124   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13125   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13126   ins_cost(200);
13127   expand %{
13128     cmovII_reg_LTGE(cmp, flags, dst, src);
13129   %}
13130 %}
13131 
13132 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13133   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13134   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13135   ins_cost(250);
13136   expand %{
13137     cmovII_mem_LTGE(cmp, flags, dst, src);
13138   %}
13139 %}
13140 
13141 // Compare 2 longs and CMOVE ptrs.
13142 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13143   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13144   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13145   ins_cost(200);
13146   format %{ "CMOV$cmp $dst,$src" %}
13147   opcode(0x0F,0x40);
13148   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13149   ins_pipe( pipe_cmov_reg );
13150 %}
13151 
13152 // Compare 2 unsigned longs and CMOVE ptrs.
13153 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13154   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13155   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13156   ins_cost(200);
13157   expand %{
13158     cmovPP_reg_LTGE(cmp,flags,dst,src);
13159   %}
13160 %}
13161 
13162 // Compare 2 longs and CMOVE doubles
13163 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13164   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13165   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13166   ins_cost(200);
13167   expand %{
13168     fcmovDPR_regS(cmp,flags,dst,src);
13169   %}
13170 %}
13171 
13172 // Compare 2 longs and CMOVE doubles
13173 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13174   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13175   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13176   ins_cost(200);
13177   expand %{
13178     fcmovD_regS(cmp,flags,dst,src);
13179   %}
13180 %}
13181 
13182 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13183   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13184   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13185   ins_cost(200);
13186   expand %{
13187     fcmovFPR_regS(cmp,flags,dst,src);
13188   %}
13189 %}
13190 
13191 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13192   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13193   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13194   ins_cost(200);
13195   expand %{
13196     fcmovF_regS(cmp,flags,dst,src);
13197   %}
13198 %}
13199 
13200 //======
13201 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13202 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13203   match( Set flags (CmpL src zero ));
13204   effect(TEMP tmp);
13205   ins_cost(200);
13206   format %{ "MOV    $tmp,$src.lo\n\t"
13207             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13208   ins_encode( long_cmp_flags0( src, tmp ) );
13209   ins_pipe( ialu_reg_reg_long );
13210 %}
13211 
13212 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13213 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13214   match( Set flags (CmpL src1 src2 ));
13215   ins_cost(200+300);
13216   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13217             "JNE,s  skip\n\t"
13218             "CMP    $src1.hi,$src2.hi\n\t"
13219      "skip:\t" %}
13220   ins_encode( long_cmp_flags1( src1, src2 ) );
13221   ins_pipe( ialu_cr_reg_reg );
13222 %}
13223 
13224 // Long compare reg == zero/reg OR reg != zero/reg
13225 // Just a wrapper for a normal branch, plus the predicate test.
13226 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13227   match(If cmp flags);
13228   effect(USE labl);
13229   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13230   expand %{
13231     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13232   %}
13233 %}
13234 
13235 //======
13236 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13237 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13238   match(Set flags (CmpUL src zero));
13239   effect(TEMP tmp);
13240   ins_cost(200);
13241   format %{ "MOV    $tmp,$src.lo\n\t"
13242             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13243   ins_encode(long_cmp_flags0(src, tmp));
13244   ins_pipe(ialu_reg_reg_long);
13245 %}
13246 
13247 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13248 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13249   match(Set flags (CmpUL src1 src2));
13250   ins_cost(200+300);
13251   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13252             "JNE,s  skip\n\t"
13253             "CMP    $src1.hi,$src2.hi\n\t"
13254      "skip:\t" %}
13255   ins_encode(long_cmp_flags1(src1, src2));
13256   ins_pipe(ialu_cr_reg_reg);
13257 %}
13258 
13259 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13260 // Just a wrapper for a normal branch, plus the predicate test.
13261 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13262   match(If cmp flags);
13263   effect(USE labl);
13264   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13265   expand %{
13266     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13267   %}
13268 %}
13269 
13270 // Compare 2 longs and CMOVE longs.
13271 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13272   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13273   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13274   ins_cost(400);
13275   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13276             "CMOV$cmp $dst.hi,$src.hi" %}
13277   opcode(0x0F,0x40);
13278   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13279   ins_pipe( pipe_cmov_reg_long );
13280 %}
13281 
13282 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13283   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13284   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13285   ins_cost(500);
13286   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13287             "CMOV$cmp $dst.hi,$src.hi" %}
13288   opcode(0x0F,0x40);
13289   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13290   ins_pipe( pipe_cmov_reg_long );
13291 %}
13292 
13293 // Compare 2 longs and CMOVE ints.
13294 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13295   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13296   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13297   ins_cost(200);
13298   format %{ "CMOV$cmp $dst,$src" %}
13299   opcode(0x0F,0x40);
13300   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13301   ins_pipe( pipe_cmov_reg );
13302 %}
13303 
13304 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13305   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13306   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13307   ins_cost(250);
13308   format %{ "CMOV$cmp $dst,$src" %}
13309   opcode(0x0F,0x40);
13310   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13311   ins_pipe( pipe_cmov_mem );
13312 %}
13313 
13314 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13315   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13316   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13317   ins_cost(200);
13318   expand %{
13319     cmovII_reg_EQNE(cmp, flags, dst, src);
13320   %}
13321 %}
13322 
13323 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13324   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13325   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13326   ins_cost(250);
13327   expand %{
13328     cmovII_mem_EQNE(cmp, flags, dst, src);
13329   %}
13330 %}
13331 
13332 // Compare 2 longs and CMOVE ptrs.
13333 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13334   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13335   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13336   ins_cost(200);
13337   format %{ "CMOV$cmp $dst,$src" %}
13338   opcode(0x0F,0x40);
13339   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13340   ins_pipe( pipe_cmov_reg );
13341 %}
13342 
13343 // Compare 2 unsigned longs and CMOVE ptrs.
13344 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13345   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13346   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13347   ins_cost(200);
13348   expand %{
13349     cmovPP_reg_EQNE(cmp,flags,dst,src);
13350   %}
13351 %}
13352 
13353 // Compare 2 longs and CMOVE doubles
13354 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13355   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13356   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13357   ins_cost(200);
13358   expand %{
13359     fcmovDPR_regS(cmp,flags,dst,src);
13360   %}
13361 %}
13362 
13363 // Compare 2 longs and CMOVE doubles
13364 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13365   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13366   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13367   ins_cost(200);
13368   expand %{
13369     fcmovD_regS(cmp,flags,dst,src);
13370   %}
13371 %}
13372 
13373 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13374   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13375   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13376   ins_cost(200);
13377   expand %{
13378     fcmovFPR_regS(cmp,flags,dst,src);
13379   %}
13380 %}
13381 
13382 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13383   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13384   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13385   ins_cost(200);
13386   expand %{
13387     fcmovF_regS(cmp,flags,dst,src);
13388   %}
13389 %}
13390 
13391 //======
13392 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13393 // Same as cmpL_reg_flags_LEGT except must negate src
13394 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13395   match( Set flags (CmpL src zero ));
13396   effect( TEMP tmp );
13397   ins_cost(300);
13398   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13399             "CMP    $tmp,$src.lo\n\t"
13400             "SBB    $tmp,$src.hi\n\t" %}
13401   ins_encode( long_cmp_flags3(src, tmp) );
13402   ins_pipe( ialu_reg_reg_long );
13403 %}
13404 
13405 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13406 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13407 // requires a commuted test to get the same result.
13408 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13409   match( Set flags (CmpL src1 src2 ));
13410   effect( TEMP tmp );
13411   ins_cost(300);
13412   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13413             "MOV    $tmp,$src2.hi\n\t"
13414             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13415   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13416   ins_pipe( ialu_cr_reg_reg );
13417 %}
13418 
13419 // Long compares reg < zero/req OR reg >= zero/req.
13420 // Just a wrapper for a normal branch, plus the predicate test
13421 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13422   match(If cmp flags);
13423   effect(USE labl);
13424   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13425   ins_cost(300);
13426   expand %{
13427     jmpCon(cmp,flags,labl);    // JGT or JLE...
13428   %}
13429 %}
13430 
13431 //======
13432 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13433 // Same as cmpUL_reg_flags_LEGT except must negate src
13434 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13435   match(Set flags (CmpUL src zero));
13436   effect(TEMP tmp);
13437   ins_cost(300);
13438   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13439             "CMP    $tmp,$src.lo\n\t"
13440             "SBB    $tmp,$src.hi\n\t" %}
13441   ins_encode(long_cmp_flags3(src, tmp));
13442   ins_pipe(ialu_reg_reg_long);
13443 %}
13444 
13445 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13446 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13447 // requires a commuted test to get the same result.
13448 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13449   match(Set flags (CmpUL src1 src2));
13450   effect(TEMP tmp);
13451   ins_cost(300);
13452   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13453             "MOV    $tmp,$src2.hi\n\t"
13454             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13455   ins_encode(long_cmp_flags2( src2, src1, tmp));
13456   ins_pipe(ialu_cr_reg_reg);
13457 %}
13458 
13459 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13460 // Just a wrapper for a normal branch, plus the predicate test
13461 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13462   match(If cmp flags);
13463   effect(USE labl);
13464   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13465   ins_cost(300);
13466   expand %{
13467     jmpCon(cmp, flags, labl);    // JGT or JLE...
13468   %}
13469 %}
13470 
13471 // Compare 2 longs and CMOVE longs.
13472 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13473   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13474   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13475   ins_cost(400);
13476   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13477             "CMOV$cmp $dst.hi,$src.hi" %}
13478   opcode(0x0F,0x40);
13479   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13480   ins_pipe( pipe_cmov_reg_long );
13481 %}
13482 
13483 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13484   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13485   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13486   ins_cost(500);
13487   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13488             "CMOV$cmp $dst.hi,$src.hi+4" %}
13489   opcode(0x0F,0x40);
13490   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13491   ins_pipe( pipe_cmov_reg_long );
13492 %}
13493 
13494 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13495   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13496   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13497   ins_cost(400);
13498   expand %{
13499     cmovLL_reg_LEGT(cmp, flags, dst, src);
13500   %}
13501 %}
13502 
13503 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13504   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13505   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13506   ins_cost(500);
13507   expand %{
13508     cmovLL_mem_LEGT(cmp, flags, dst, src);
13509   %}
13510 %}
13511 
13512 // Compare 2 longs and CMOVE ints.
13513 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13514   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13515   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13516   ins_cost(200);
13517   format %{ "CMOV$cmp $dst,$src" %}
13518   opcode(0x0F,0x40);
13519   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13520   ins_pipe( pipe_cmov_reg );
13521 %}
13522 
13523 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13524   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13525   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13526   ins_cost(250);
13527   format %{ "CMOV$cmp $dst,$src" %}
13528   opcode(0x0F,0x40);
13529   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13530   ins_pipe( pipe_cmov_mem );
13531 %}
13532 
13533 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13534   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13535   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13536   ins_cost(200);
13537   expand %{
13538     cmovII_reg_LEGT(cmp, flags, dst, src);
13539   %}
13540 %}
13541 
13542 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13543   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13544   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13545   ins_cost(250);
13546   expand %{
13547     cmovII_mem_LEGT(cmp, flags, dst, src);
13548   %}
13549 %}
13550 
13551 // Compare 2 longs and CMOVE ptrs.
13552 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13553   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13554   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13555   ins_cost(200);
13556   format %{ "CMOV$cmp $dst,$src" %}
13557   opcode(0x0F,0x40);
13558   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13559   ins_pipe( pipe_cmov_reg );
13560 %}
13561 
13562 // Compare 2 unsigned longs and CMOVE ptrs.
13563 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13564   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13565   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13566   ins_cost(200);
13567   expand %{
13568     cmovPP_reg_LEGT(cmp,flags,dst,src);
13569   %}
13570 %}
13571 
13572 // Compare 2 longs and CMOVE doubles
13573 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13574   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13575   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13576   ins_cost(200);
13577   expand %{
13578     fcmovDPR_regS(cmp,flags,dst,src);
13579   %}
13580 %}
13581 
13582 // Compare 2 longs and CMOVE doubles
13583 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13584   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13585   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13586   ins_cost(200);
13587   expand %{
13588     fcmovD_regS(cmp,flags,dst,src);
13589   %}
13590 %}
13591 
13592 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13593   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13594   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13595   ins_cost(200);
13596   expand %{
13597     fcmovFPR_regS(cmp,flags,dst,src);
13598   %}
13599 %}
13600 
13601 
13602 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13603   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13604   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13605   ins_cost(200);
13606   expand %{
13607     fcmovF_regS(cmp,flags,dst,src);
13608   %}
13609 %}
13610 
13611 
13612 // ============================================================================
13613 // Procedure Call/Return Instructions
13614 // Call Java Static Instruction
13615 // Note: If this code changes, the corresponding ret_addr_offset() and
13616 //       compute_padding() functions will have to be adjusted.
13617 instruct CallStaticJavaDirect(method meth) %{
13618   match(CallStaticJava);
13619   effect(USE meth);
13620 
13621   ins_cost(300);
13622   format %{ "CALL,static " %}
13623   opcode(0xE8); /* E8 cd */
13624   ins_encode( pre_call_resets,
13625               Java_Static_Call( meth ),
13626               call_epilog,
13627               post_call_FPU );
13628   ins_pipe( pipe_slow );
13629   ins_alignment(4);
13630 %}
13631 
13632 // Call Java Dynamic Instruction
13633 // Note: If this code changes, the corresponding ret_addr_offset() and
13634 //       compute_padding() functions will have to be adjusted.
13635 instruct CallDynamicJavaDirect(method meth) %{
13636   match(CallDynamicJava);
13637   effect(USE meth);
13638 
13639   ins_cost(300);
13640   format %{ "MOV    EAX,(oop)-1\n\t"
13641             "CALL,dynamic" %}
13642   opcode(0xE8); /* E8 cd */
13643   ins_encode( pre_call_resets,
13644               Java_Dynamic_Call( meth ),
13645               call_epilog,
13646               post_call_FPU );
13647   ins_pipe( pipe_slow );
13648   ins_alignment(4);
13649 %}
13650 
13651 // Call Runtime Instruction
13652 instruct CallRuntimeDirect(method meth) %{
13653   match(CallRuntime );
13654   effect(USE meth);
13655 
13656   ins_cost(300);
13657   format %{ "CALL,runtime " %}
13658   opcode(0xE8); /* E8 cd */
13659   // Use FFREEs to clear entries in float stack
13660   ins_encode( pre_call_resets,
13661               FFree_Float_Stack_All,
13662               Java_To_Runtime( meth ),
13663               post_call_FPU );
13664   ins_pipe( pipe_slow );
13665 %}
13666 
13667 // Call runtime without safepoint
13668 instruct CallLeafDirect(method meth) %{
13669   match(CallLeaf);
13670   effect(USE meth);
13671 
13672   ins_cost(300);
13673   format %{ "CALL_LEAF,runtime " %}
13674   opcode(0xE8); /* E8 cd */
13675   ins_encode( pre_call_resets,
13676               FFree_Float_Stack_All,
13677               Java_To_Runtime( meth ),
13678               Verify_FPU_For_Leaf, post_call_FPU );
13679   ins_pipe( pipe_slow );
13680 %}
13681 
13682 instruct CallLeafNoFPDirect(method meth) %{
13683   match(CallLeafNoFP);
13684   effect(USE meth);
13685 
13686   ins_cost(300);
13687   format %{ "CALL_LEAF_NOFP,runtime " %}
13688   opcode(0xE8); /* E8 cd */
13689   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13690   ins_pipe( pipe_slow );
13691 %}
13692 
13693 
13694 // Return Instruction
13695 // Remove the return address & jump to it.
13696 instruct Ret() %{
13697   match(Return);
13698   format %{ "RET" %}
13699   opcode(0xC3);
13700   ins_encode(OpcP);
13701   ins_pipe( pipe_jmp );
13702 %}
13703 
13704 // Tail Call; Jump from runtime stub to Java code.
13705 // Also known as an 'interprocedural jump'.
13706 // Target of jump will eventually return to caller.
13707 // TailJump below removes the return address.
13708 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13709   match(TailCall jump_target method_ptr);
13710   ins_cost(300);
13711   format %{ "JMP    $jump_target \t# EBX holds method" %}
13712   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13713   ins_encode( OpcP, RegOpc(jump_target) );
13714   ins_pipe( pipe_jmp );
13715 %}
13716 
13717 
13718 // Tail Jump; remove the return address; jump to target.
13719 // TailCall above leaves the return address around.
13720 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13721   match( TailJump jump_target ex_oop );
13722   ins_cost(300);
13723   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13724             "JMP    $jump_target " %}
13725   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13726   ins_encode( enc_pop_rdx,
13727               OpcP, RegOpc(jump_target) );
13728   ins_pipe( pipe_jmp );
13729 %}
13730 
13731 // Create exception oop: created by stack-crawling runtime code.
13732 // Created exception is now available to this handler, and is setup
13733 // just prior to jumping to this handler.  No code emitted.
13734 instruct CreateException( eAXRegP ex_oop )
13735 %{
13736   match(Set ex_oop (CreateEx));
13737 
13738   size(0);
13739   // use the following format syntax
13740   format %{ "# exception oop is in EAX; no code emitted" %}
13741   ins_encode();
13742   ins_pipe( empty );
13743 %}
13744 
13745 
13746 // Rethrow exception:
13747 // The exception oop will come in the first argument position.
13748 // Then JUMP (not call) to the rethrow stub code.
13749 instruct RethrowException()
13750 %{
13751   match(Rethrow);
13752 
13753   // use the following format syntax
13754   format %{ "JMP    rethrow_stub" %}
13755   ins_encode(enc_rethrow);
13756   ins_pipe( pipe_jmp );
13757 %}
13758 
13759 // inlined locking and unlocking
13760 
13761 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13762   predicate(Compile::current()->use_rtm());
13763   match(Set cr (FastLock object box));
13764   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13765   ins_cost(300);
13766   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13767   ins_encode %{
13768     __ get_thread($thread$$Register);
13769     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13770                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13771                  _rtm_counters, _stack_rtm_counters,
13772                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13773                  true, ra_->C->profile_rtm());
13774   %}
13775   ins_pipe(pipe_slow);
13776 %}
13777 
13778 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13779   predicate(!Compile::current()->use_rtm());
13780   match(Set cr (FastLock object box));
13781   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13782   ins_cost(300);
13783   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13784   ins_encode %{
13785     __ get_thread($thread$$Register);
13786     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13787                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13788   %}
13789   ins_pipe(pipe_slow);
13790 %}
13791 
13792 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13793   match(Set cr (FastUnlock object box));
13794   effect(TEMP tmp, USE_KILL box);
13795   ins_cost(300);
13796   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13797   ins_encode %{
13798     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13799   %}
13800   ins_pipe(pipe_slow);
13801 %}
13802 
13803 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13804   predicate(Matcher::vector_length(n) <= 32);
13805   match(Set dst (MaskAll src));
13806   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13807   ins_encode %{
13808     int mask_len = Matcher::vector_length(this);
13809     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13810   %}
13811   ins_pipe( pipe_slow );
13812 %}
13813 
13814 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13815   predicate(Matcher::vector_length(n) > 32);
13816   match(Set dst (MaskAll src));
13817   effect(TEMP ktmp);
13818   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13819   ins_encode %{
13820     int mask_len = Matcher::vector_length(this);
13821     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13822   %}
13823   ins_pipe( pipe_slow );
13824 %}
13825 
13826 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13827   predicate(Matcher::vector_length(n) > 32);
13828   match(Set dst (MaskAll src));
13829   effect(TEMP ktmp);
13830   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13831   ins_encode %{
13832     int mask_len = Matcher::vector_length(this);
13833     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13834   %}
13835   ins_pipe( pipe_slow );
13836 %}
13837 
13838 // ============================================================================
13839 // Safepoint Instruction
13840 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13841   match(SafePoint poll);
13842   effect(KILL cr, USE poll);
13843 
13844   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13845   ins_cost(125);
13846   // EBP would need size(3)
13847   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13848   ins_encode %{
13849     __ relocate(relocInfo::poll_type);
13850     address pre_pc = __ pc();
13851     __ testl(rax, Address($poll$$Register, 0));
13852     address post_pc = __ pc();
13853     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13854   %}
13855   ins_pipe(ialu_reg_mem);
13856 %}
13857 
13858 
13859 // ============================================================================
13860 // This name is KNOWN by the ADLC and cannot be changed.
13861 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13862 // for this guy.
13863 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13864   match(Set dst (ThreadLocal));
13865   effect(DEF dst, KILL cr);
13866 
13867   format %{ "MOV    $dst, Thread::current()" %}
13868   ins_encode %{
13869     Register dstReg = as_Register($dst$$reg);
13870     __ get_thread(dstReg);
13871   %}
13872   ins_pipe( ialu_reg_fat );
13873 %}
13874 
13875 
13876 
13877 //----------PEEPHOLE RULES-----------------------------------------------------
13878 // These must follow all instruction definitions as they use the names
13879 // defined in the instructions definitions.
13880 //
13881 // peepmatch ( root_instr_name [preceding_instruction]* );
13882 //
13883 // peepconstraint %{
13884 // (instruction_number.operand_name relational_op instruction_number.operand_name
13885 //  [, ...] );
13886 // // instruction numbers are zero-based using left to right order in peepmatch
13887 //
13888 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13889 // // provide an instruction_number.operand_name for each operand that appears
13890 // // in the replacement instruction's match rule
13891 //
13892 // ---------VM FLAGS---------------------------------------------------------
13893 //
13894 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13895 //
13896 // Each peephole rule is given an identifying number starting with zero and
13897 // increasing by one in the order seen by the parser.  An individual peephole
13898 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13899 // on the command-line.
13900 //
13901 // ---------CURRENT LIMITATIONS----------------------------------------------
13902 //
13903 // Only match adjacent instructions in same basic block
13904 // Only equality constraints
13905 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13906 // Only one replacement instruction
13907 //
13908 // ---------EXAMPLE----------------------------------------------------------
13909 //
13910 // // pertinent parts of existing instructions in architecture description
13911 // instruct movI(rRegI dst, rRegI src) %{
13912 //   match(Set dst (CopyI src));
13913 // %}
13914 //
13915 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13916 //   match(Set dst (AddI dst src));
13917 //   effect(KILL cr);
13918 // %}
13919 //
13920 // // Change (inc mov) to lea
13921 // peephole %{
13922 //   // increment preceded by register-register move
13923 //   peepmatch ( incI_eReg movI );
13924 //   // require that the destination register of the increment
13925 //   // match the destination register of the move
13926 //   peepconstraint ( 0.dst == 1.dst );
13927 //   // construct a replacement instruction that sets
13928 //   // the destination to ( move's source register + one )
13929 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13930 // %}
13931 //
13932 // Implementation no longer uses movX instructions since
13933 // machine-independent system no longer uses CopyX nodes.
13934 //
13935 // peephole %{
13936 //   peepmatch ( incI_eReg movI );
13937 //   peepconstraint ( 0.dst == 1.dst );
13938 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13939 // %}
13940 //
13941 // peephole %{
13942 //   peepmatch ( decI_eReg movI );
13943 //   peepconstraint ( 0.dst == 1.dst );
13944 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13945 // %}
13946 //
13947 // peephole %{
13948 //   peepmatch ( addI_eReg_imm movI );
13949 //   peepconstraint ( 0.dst == 1.dst );
13950 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13951 // %}
13952 //
13953 // peephole %{
13954 //   peepmatch ( addP_eReg_imm movP );
13955 //   peepconstraint ( 0.dst == 1.dst );
13956 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13957 // %}
13958 
13959 // // Change load of spilled value to only a spill
13960 // instruct storeI(memory mem, rRegI src) %{
13961 //   match(Set mem (StoreI mem src));
13962 // %}
13963 //
13964 // instruct loadI(rRegI dst, memory mem) %{
13965 //   match(Set dst (LoadI mem));
13966 // %}
13967 //
13968 peephole %{
13969   peepmatch ( loadI storeI );
13970   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13971   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13972 %}
13973 
13974 //----------SMARTSPILL RULES---------------------------------------------------
13975 // These must follow all instruction definitions as they use the names
13976 // defined in the instructions definitions.